fix(api): resolve db-only stale incidents
This commit is contained in:
@@ -1168,12 +1168,14 @@ class IncidentService:
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
# 1. 從 Working Memory 讀取;Redis TTL 過期時退回 PostgreSQL。
|
||||
hydrated_from_episodic = False
|
||||
incident = await self.get_from_working_memory(incident_id)
|
||||
if incident is None:
|
||||
incident = await self.get_from_episodic_memory(incident_id)
|
||||
if incident is None:
|
||||
logger.warning("incident_not_found_for_resolve", incident_id=incident_id)
|
||||
return None
|
||||
hydrated_from_episodic = True
|
||||
logger.info(
|
||||
"incident_resolve_hydrated_from_episodic_memory",
|
||||
incident_id=incident_id,
|
||||
@@ -1197,11 +1199,15 @@ class IncidentService:
|
||||
incident.resolved_at = now_taipei()
|
||||
incident.updated_at = now_taipei()
|
||||
|
||||
# 3. 寫入 Working Memory
|
||||
redis_success = await self.save_to_working_memory(incident)
|
||||
if not redis_success:
|
||||
logger.error("resolve_redis_write_failed", incident_id=incident_id)
|
||||
return None
|
||||
# 3. 寫入 Working Memory。Redis TTL 已過的歷史 DB-only 事件只更新 DB,
|
||||
# 不重新灌回 Redis working memory,避免舊事件回流成 active workload。
|
||||
if hydrated_from_episodic:
|
||||
logger.info("resolve_db_only_incident", incident_id=incident_id)
|
||||
else:
|
||||
redis_success = await self.save_to_working_memory(incident)
|
||||
if not redis_success:
|
||||
logger.error("resolve_redis_write_failed", incident_id=incident_id)
|
||||
return None
|
||||
|
||||
# 4. 同步到 Episodic Memory
|
||||
try:
|
||||
|
||||
@@ -12,6 +12,7 @@ test_incident_service_resolve_idempotency
|
||||
重新放大「resolve_incident 重複觸發 postmortem 洗版」的舊風險。
|
||||
"""
|
||||
|
||||
import inspect
|
||||
from datetime import UTC, datetime
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock
|
||||
@@ -27,6 +28,42 @@ from src.services.incident_service import (
|
||||
)
|
||||
|
||||
|
||||
def _patch_resolve_side_effects(monkeypatch):
|
||||
"""讓 active incident resolve 測試只驗證 resolve contract,不真的觸發下游工作。"""
|
||||
import asyncio
|
||||
|
||||
import src.services.anomaly_counter as anomaly_counter
|
||||
import src.services.km_conversion_service as km_conversion_service
|
||||
import src.services.knowledge_extractor_service as knowledge_extractor_service
|
||||
|
||||
def close_create_task(coro):
|
||||
if inspect.iscoroutine(coro):
|
||||
coro.close()
|
||||
return SimpleNamespace()
|
||||
|
||||
monkeypatch.setattr(asyncio, "create_task", close_create_task)
|
||||
monkeypatch.setattr(
|
||||
knowledge_extractor_service,
|
||||
"get_knowledge_extractor",
|
||||
lambda: SimpleNamespace(extract_from_incident=AsyncMock(return_value=None)),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
km_conversion_service,
|
||||
"get_km_conversion_service",
|
||||
lambda: SimpleNamespace(convert=AsyncMock(return_value=None)),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
anomaly_counter,
|
||||
"AnomalyCounter",
|
||||
SimpleNamespace(derive_key_from_incident=staticmethod(lambda _incident: None)),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
anomaly_counter,
|
||||
"get_anomaly_counter",
|
||||
lambda: SimpleNamespace(),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resolve_incident_skips_when_already_resolved(monkeypatch):
|
||||
"""RESOLVED 的 incident 重複 resolve 應 idempotent。"""
|
||||
@@ -53,6 +90,42 @@ async def test_resolve_incident_skips_when_already_resolved(monkeypatch):
|
||||
save_mock.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resolve_incident_can_close_db_only_active_incident(monkeypatch):
|
||||
"""Redis TTL 過期但 DB 仍 active 的舊 incident 必須能收斂,不重新灌回 Redis。"""
|
||||
fake_incident = SimpleNamespace(
|
||||
incident_id="INC-DB-FALLBACK-ACTIVE-001",
|
||||
status=IncidentStatus.INVESTIGATING,
|
||||
resolved_at=None,
|
||||
updated_at=None,
|
||||
signals=[],
|
||||
affected_services=[],
|
||||
decision_chain=None,
|
||||
outcome=None,
|
||||
)
|
||||
|
||||
svc = IncidentService()
|
||||
monkeypatch.setattr(
|
||||
svc, "get_from_working_memory", AsyncMock(return_value=None)
|
||||
)
|
||||
episodic_mock = AsyncMock(return_value=fake_incident)
|
||||
monkeypatch.setattr(svc, "get_from_episodic_memory", episodic_mock)
|
||||
save_mock = AsyncMock(return_value=True)
|
||||
monkeypatch.setattr(svc, "save_to_working_memory", save_mock)
|
||||
_patch_resolve_side_effects(monkeypatch)
|
||||
|
||||
result = await svc.resolve_incident(
|
||||
"INC-DB-FALLBACK-ACTIVE-001",
|
||||
emit_postmortem=False,
|
||||
)
|
||||
|
||||
assert result is fake_incident
|
||||
assert fake_incident.status is IncidentStatus.RESOLVED
|
||||
assert fake_incident.resolved_at is not None
|
||||
episodic_mock.assert_awaited_once_with("INC-DB-FALLBACK-ACTIVE-001")
|
||||
save_mock.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resolve_incident_returns_none_when_not_found(monkeypatch):
|
||||
"""incident 不存在時 return None。確保 guard 不影響 not-found 路徑。"""
|
||||
|
||||
Reference in New Issue
Block a user