diff --git a/apps/api/src/services/incident_service.py b/apps/api/src/services/incident_service.py index 8d3aabc4..d1290b23 100644 --- a/apps/api/src/services/incident_service.py +++ b/apps/api/src/services/incident_service.py @@ -1168,12 +1168,14 @@ class IncidentService: from src.utils.timezone import now_taipei # 1. 從 Working Memory 讀取;Redis TTL 過期時退回 PostgreSQL。 + hydrated_from_episodic = False incident = await self.get_from_working_memory(incident_id) if incident is None: incident = await self.get_from_episodic_memory(incident_id) if incident is None: logger.warning("incident_not_found_for_resolve", incident_id=incident_id) return None + hydrated_from_episodic = True logger.info( "incident_resolve_hydrated_from_episodic_memory", incident_id=incident_id, @@ -1197,11 +1199,15 @@ class IncidentService: incident.resolved_at = now_taipei() incident.updated_at = now_taipei() - # 3. 寫入 Working Memory - redis_success = await self.save_to_working_memory(incident) - if not redis_success: - logger.error("resolve_redis_write_failed", incident_id=incident_id) - return None + # 3. 寫入 Working Memory。Redis TTL 已過的歷史 DB-only 事件只更新 DB, + # 不重新灌回 Redis working memory,避免舊事件回流成 active workload。 + if hydrated_from_episodic: + logger.info("resolve_db_only_incident", incident_id=incident_id) + else: + redis_success = await self.save_to_working_memory(incident) + if not redis_success: + logger.error("resolve_redis_write_failed", incident_id=incident_id) + return None # 4. 同步到 Episodic Memory try: diff --git a/apps/api/tests/test_incident_service_resolve_idempotency.py b/apps/api/tests/test_incident_service_resolve_idempotency.py index 0c86cea6..bffd08b1 100644 --- a/apps/api/tests/test_incident_service_resolve_idempotency.py +++ b/apps/api/tests/test_incident_service_resolve_idempotency.py @@ -12,6 +12,7 @@ test_incident_service_resolve_idempotency 重新放大「resolve_incident 重複觸發 postmortem 洗版」的舊風險。 """ +import inspect from datetime import UTC, datetime from types import SimpleNamespace from unittest.mock import AsyncMock @@ -27,6 +28,42 @@ from src.services.incident_service import ( ) +def _patch_resolve_side_effects(monkeypatch): + """讓 active incident resolve 測試只驗證 resolve contract,不真的觸發下游工作。""" + import asyncio + + import src.services.anomaly_counter as anomaly_counter + import src.services.km_conversion_service as km_conversion_service + import src.services.knowledge_extractor_service as knowledge_extractor_service + + def close_create_task(coro): + if inspect.iscoroutine(coro): + coro.close() + return SimpleNamespace() + + monkeypatch.setattr(asyncio, "create_task", close_create_task) + monkeypatch.setattr( + knowledge_extractor_service, + "get_knowledge_extractor", + lambda: SimpleNamespace(extract_from_incident=AsyncMock(return_value=None)), + ) + monkeypatch.setattr( + km_conversion_service, + "get_km_conversion_service", + lambda: SimpleNamespace(convert=AsyncMock(return_value=None)), + ) + monkeypatch.setattr( + anomaly_counter, + "AnomalyCounter", + SimpleNamespace(derive_key_from_incident=staticmethod(lambda _incident: None)), + ) + monkeypatch.setattr( + anomaly_counter, + "get_anomaly_counter", + lambda: SimpleNamespace(), + ) + + @pytest.mark.asyncio async def test_resolve_incident_skips_when_already_resolved(monkeypatch): """RESOLVED 的 incident 重複 resolve 應 idempotent。""" @@ -53,6 +90,42 @@ async def test_resolve_incident_skips_when_already_resolved(monkeypatch): save_mock.assert_not_called() +@pytest.mark.asyncio +async def test_resolve_incident_can_close_db_only_active_incident(monkeypatch): + """Redis TTL 過期但 DB 仍 active 的舊 incident 必須能收斂,不重新灌回 Redis。""" + fake_incident = SimpleNamespace( + incident_id="INC-DB-FALLBACK-ACTIVE-001", + status=IncidentStatus.INVESTIGATING, + resolved_at=None, + updated_at=None, + signals=[], + affected_services=[], + decision_chain=None, + outcome=None, + ) + + svc = IncidentService() + monkeypatch.setattr( + svc, "get_from_working_memory", AsyncMock(return_value=None) + ) + episodic_mock = AsyncMock(return_value=fake_incident) + monkeypatch.setattr(svc, "get_from_episodic_memory", episodic_mock) + save_mock = AsyncMock(return_value=True) + monkeypatch.setattr(svc, "save_to_working_memory", save_mock) + _patch_resolve_side_effects(monkeypatch) + + result = await svc.resolve_incident( + "INC-DB-FALLBACK-ACTIVE-001", + emit_postmortem=False, + ) + + assert result is fake_incident + assert fake_incident.status is IncidentStatus.RESOLVED + assert fake_incident.resolved_at is not None + episodic_mock.assert_awaited_once_with("INC-DB-FALLBACK-ACTIVE-001") + save_mock.assert_not_called() + + @pytest.mark.asyncio async def test_resolve_incident_returns_none_when_not_found(monkeypatch): """incident 不存在時 return None。確保 guard 不影響 not-found 路徑。"""