Files
awoooi/apps/api/tests/test_incident_service_resolve_idempotency.py
Your Name 92316dda04
All checks were successful
CD Pipeline / tests (push) Successful in 1m33s
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / build-and-deploy (push) Successful in 4m54s
CD Pipeline / post-deploy-checks (push) Successful in 2m8s
fix(api): resolve db-only stale incidents
2026-05-29 11:15:46 +08:00

245 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
test_incident_service_resolve_idempotency
==========================================
驗證 `IncidentService.resolve_incident` 對已經 RESOLVED 的 incident 必須 idempotent
- 直接 return existing incident
- 不呼叫 save_to_working_memory避免重複 Redis write
- 不呼叫 incident_repository.update_status避免重複 DB write
- 不觸發 postmortem / KB extract / KM convert / disposition 副作用
對應 critic 必修 #2 — 沒這個單測,未來有人挪 guard 位置會悄悄破功,
重新放大「resolve_incident 重複觸發 postmortem 洗版」的舊風險。
"""
import inspect
from datetime import UTC, datetime
from types import SimpleNamespace
from unittest.mock import AsyncMock
import pytest
from src.models.incident import IncidentStatus
from src.services.incident_service import (
IncidentService,
normalize_status,
parse_decision_chain,
parse_incident_outcome,
)
def _patch_resolve_side_effects(monkeypatch):
"""讓 active incident resolve 測試只驗證 resolve contract不真的觸發下游工作。"""
import asyncio
import src.services.anomaly_counter as anomaly_counter
import src.services.km_conversion_service as km_conversion_service
import src.services.knowledge_extractor_service as knowledge_extractor_service
def close_create_task(coro):
if inspect.iscoroutine(coro):
coro.close()
return SimpleNamespace()
monkeypatch.setattr(asyncio, "create_task", close_create_task)
monkeypatch.setattr(
knowledge_extractor_service,
"get_knowledge_extractor",
lambda: SimpleNamespace(extract_from_incident=AsyncMock(return_value=None)),
)
monkeypatch.setattr(
km_conversion_service,
"get_km_conversion_service",
lambda: SimpleNamespace(convert=AsyncMock(return_value=None)),
)
monkeypatch.setattr(
anomaly_counter,
"AnomalyCounter",
SimpleNamespace(derive_key_from_incident=staticmethod(lambda _incident: None)),
)
monkeypatch.setattr(
anomaly_counter,
"get_anomaly_counter",
lambda: SimpleNamespace(),
)
@pytest.mark.asyncio
async def test_resolve_incident_skips_when_already_resolved(monkeypatch):
"""RESOLVED 的 incident 重複 resolve 應 idempotent。"""
fake_incident = SimpleNamespace(
incident_id="INC-IDEMPO-001",
status=IncidentStatus.RESOLVED,
)
svc = IncidentService()
# Mock 入口讀取 → 回 RESOLVED incident
monkeypatch.setattr(
svc, "get_from_working_memory", AsyncMock(return_value=fake_incident)
)
# Mock 後續所有副作用 → 用 AsyncMock 監看是否被呼叫
save_mock = AsyncMock(return_value=True)
monkeypatch.setattr(svc, "save_to_working_memory", save_mock)
result = await svc.resolve_incident("INC-IDEMPO-001")
# 應 return existing incident
assert result is fake_incident
# 副作用一律不能觸發guard 必須早於 line 1117 的 status mutation
save_mock.assert_not_called()
@pytest.mark.asyncio
async def test_resolve_incident_can_close_db_only_active_incident(monkeypatch):
"""Redis TTL 過期但 DB 仍 active 的舊 incident 必須能收斂,不重新灌回 Redis。"""
fake_incident = SimpleNamespace(
incident_id="INC-DB-FALLBACK-ACTIVE-001",
status=IncidentStatus.INVESTIGATING,
resolved_at=None,
updated_at=None,
signals=[],
affected_services=[],
decision_chain=None,
outcome=None,
)
svc = IncidentService()
monkeypatch.setattr(
svc, "get_from_working_memory", AsyncMock(return_value=None)
)
episodic_mock = AsyncMock(return_value=fake_incident)
monkeypatch.setattr(svc, "get_from_episodic_memory", episodic_mock)
save_mock = AsyncMock(return_value=True)
monkeypatch.setattr(svc, "save_to_working_memory", save_mock)
_patch_resolve_side_effects(monkeypatch)
result = await svc.resolve_incident(
"INC-DB-FALLBACK-ACTIVE-001",
emit_postmortem=False,
)
assert result is fake_incident
assert fake_incident.status is IncidentStatus.RESOLVED
assert fake_incident.resolved_at is not None
episodic_mock.assert_awaited_once_with("INC-DB-FALLBACK-ACTIVE-001")
save_mock.assert_not_called()
@pytest.mark.asyncio
async def test_resolve_incident_returns_none_when_not_found(monkeypatch):
"""incident 不存在時 return None。確保 guard 不影響 not-found 路徑。"""
svc = IncidentService()
monkeypatch.setattr(
svc, "get_from_working_memory", AsyncMock(return_value=None)
)
monkeypatch.setattr(
svc, "get_from_episodic_memory", AsyncMock(return_value=None)
)
save_mock = AsyncMock(return_value=True)
monkeypatch.setattr(svc, "save_to_working_memory", save_mock)
result = await svc.resolve_incident("INC-NOT-EXIST")
assert result is None
save_mock.assert_not_called()
@pytest.mark.asyncio
async def test_resolve_incident_uses_episodic_memory_for_idempotent_fallback(monkeypatch):
"""Redis TTL 過期但 DB 已 RESOLVED 時resolve 應從 DB fallback 並保持冪等。"""
fake_incident = SimpleNamespace(
incident_id="INC-DB-FALLBACK-001",
status=IncidentStatus.RESOLVED,
)
svc = IncidentService()
monkeypatch.setattr(
svc, "get_from_working_memory", AsyncMock(return_value=None)
)
episodic_mock = AsyncMock(return_value=fake_incident)
monkeypatch.setattr(svc, "get_from_episodic_memory", episodic_mock)
save_mock = AsyncMock(return_value=True)
monkeypatch.setattr(svc, "save_to_working_memory", save_mock)
result = await svc.resolve_incident("INC-DB-FALLBACK-001")
assert result is fake_incident
episodic_mock.assert_awaited_once_with("INC-DB-FALLBACK-001")
save_mock.assert_not_called()
def test_normalize_status_accepts_db_enum_name() -> None:
"""PostgreSQL SQLEnum 會存 Enum name讀回時必須正規化成 Pydantic value。"""
assert normalize_status("INVESTIGATING") == "investigating"
assert normalize_status(IncidentStatus.CLOSED) == "closed"
def test_parse_decision_chain_skips_legacy_list_payload() -> None:
"""舊資料 decision_chain 可能是 list不應阻斷 incident hydrate / resolve。"""
assert parse_decision_chain([{"stage": "router"}], "INC-LEGACY-CHAIN") is None
def test_parse_incident_outcome_skips_legacy_string_payload() -> None:
"""舊資料 outcome 可能是字串;不應阻斷 incident hydrate / resolve。"""
assert parse_incident_outcome("resolved", "INC-LEGACY-OUTCOME") is None
def test_record_to_incident_tolerates_legacy_decision_chain_list() -> None:
"""DB fallback 必須能讀回舊 incident即使 decision_chain 不是新 schema。"""
now = datetime.now(UTC)
record = SimpleNamespace(
incident_id="INC-LEGACY-CHAIN",
status="INVESTIGATING",
severity="P2",
signals=[],
affected_services=[],
decision_chain=[{"stage": "router"}],
proposal_ids=[],
outcome=None,
created_at=now,
updated_at=now,
resolved_at=None,
closed_at=None,
ttl_days=7,
vectorized=False,
notification_type="TYPE-3",
alert_category="host_resource",
)
incident = IncidentService()._record_to_incident(record)
assert incident.incident_id == "INC-LEGACY-CHAIN"
assert incident.status == IncidentStatus.INVESTIGATING
assert incident.decision_chain is None
def test_record_to_incident_tolerates_legacy_outcome_string() -> None:
"""DB fallback 必須能讀回舊 incident即使 outcome 不是新 schema。"""
now = datetime.now(UTC)
record = SimpleNamespace(
incident_id="INC-LEGACY-OUTCOME",
status="INVESTIGATING",
severity="P2",
signals=[],
affected_services=[],
decision_chain=None,
proposal_ids=[],
outcome="resolved",
created_at=now,
updated_at=now,
resolved_at=None,
closed_at=None,
ttl_days=7,
vectorized=False,
notification_type="TYPE-3",
alert_category="host_resource",
)
incident = IncidentService()._record_to_incident(record)
assert incident.incident_id == "INC-LEGACY-OUTCOME"
assert incident.status == IncidentStatus.INVESTIGATING
assert incident.outcome is None