fix(api): suppress batch reconcile postmortems
This commit is contained in:
@@ -76,6 +76,7 @@ async def reconcile_stuck_incidents(limit: int = BATCH_LIMIT) -> tuple[int, int]
|
||||
result = await incident_service.resolve_incident(
|
||||
candidate.incident_id,
|
||||
resolution_type=candidate.resolution_type,
|
||||
emit_postmortem=False,
|
||||
)
|
||||
if result is not None:
|
||||
resolved += 1
|
||||
|
||||
@@ -1143,6 +1143,7 @@ class IncidentService:
|
||||
self,
|
||||
incident_id: str,
|
||||
resolution_type: str = "manual",
|
||||
emit_postmortem: bool = True,
|
||||
) -> Incident | None:
|
||||
"""
|
||||
將 Incident 狀態更新為 RESOLVED
|
||||
@@ -1152,6 +1153,8 @@ class IncidentService:
|
||||
Args:
|
||||
incident_id: 事件 ID
|
||||
resolution_type: "manual"(預設)| "timeout"(Approval 48h 逾期自動結案)
|
||||
emit_postmortem: 是否送出使用者可見 Postmortem。批次歷史 reconciler
|
||||
會關閉此開關,避免一次補關大量舊 incident 時洗版 Telegram。
|
||||
|
||||
ADR-073 補丁 2026-04-15 ogt + Claude Sonnet 4.6:
|
||||
新增 resolution_type="timeout" 路徑 — Approval EXPIRED 時由
|
||||
@@ -1323,47 +1326,54 @@ class IncidentService:
|
||||
except Exception as _disp_e:
|
||||
logger.warning("disposition_manual_resolve_failed", error=str(_disp_e))
|
||||
|
||||
# MASTER Task 4.2 (2026-04-14 Claude Sonnet 4.6): Postmortem 自動組裝
|
||||
# Incident duration > POSTMORTEM_MIN_DURATION_MINUTES(10min) 時自動生成
|
||||
# 孤兒 report_generation_service.trigger_postmortem 本次接上 resolve 路徑
|
||||
try:
|
||||
import asyncio
|
||||
if emit_postmortem:
|
||||
# MASTER Task 4.2 (2026-04-14 Claude Sonnet 4.6): Postmortem 自動組裝
|
||||
# Incident duration > POSTMORTEM_MIN_DURATION_MINUTES(10min) 時自動生成
|
||||
# 孤兒 report_generation_service.trigger_postmortem 本次接上 resolve 路徑
|
||||
try:
|
||||
import asyncio
|
||||
|
||||
from src.services.report_generation_service import (
|
||||
get_report_generation_service,
|
||||
)
|
||||
|
||||
alertname = (
|
||||
incident.signals[0].labels.get("alertname", "UnknownAlert")
|
||||
if incident.signals else "UnknownAlert"
|
||||
)
|
||||
title = f"{alertname} — {', '.join(incident.affected_services or ['N/A'])}"
|
||||
root_cause = None
|
||||
resolution_action = None
|
||||
ai_provider = None
|
||||
auto_repaired = False
|
||||
if incident.decision_chain:
|
||||
root_cause = incident.decision_chain.hypothesis
|
||||
ai_provider = incident.decision_chain.model_used
|
||||
if incident.outcome:
|
||||
resolution_action = (incident.outcome.learning_notes or None)
|
||||
auto_repaired = bool(incident.outcome.execution_success)
|
||||
|
||||
asyncio.create_task(
|
||||
get_report_generation_service().trigger_postmortem(
|
||||
incident_id=incident.incident_id,
|
||||
title=title,
|
||||
created_at=incident.signals[0].fired_at if incident.signals else incident.resolved_at,
|
||||
resolved_at=incident.resolved_at,
|
||||
root_cause=root_cause,
|
||||
resolution_action=resolution_action,
|
||||
ai_provider=ai_provider,
|
||||
auto_repaired=auto_repaired,
|
||||
from src.services.report_generation_service import (
|
||||
get_report_generation_service,
|
||||
)
|
||||
|
||||
alertname = (
|
||||
incident.signals[0].labels.get("alertname", "UnknownAlert")
|
||||
if incident.signals else "UnknownAlert"
|
||||
)
|
||||
title = f"{alertname} — {', '.join(incident.affected_services or ['N/A'])}"
|
||||
root_cause = None
|
||||
resolution_action = None
|
||||
ai_provider = None
|
||||
auto_repaired = False
|
||||
if incident.decision_chain:
|
||||
root_cause = incident.decision_chain.hypothesis
|
||||
ai_provider = incident.decision_chain.model_used
|
||||
if incident.outcome:
|
||||
resolution_action = (incident.outcome.learning_notes or None)
|
||||
auto_repaired = bool(incident.outcome.execution_success)
|
||||
|
||||
asyncio.create_task(
|
||||
get_report_generation_service().trigger_postmortem(
|
||||
incident_id=incident.incident_id,
|
||||
title=title,
|
||||
created_at=incident.signals[0].fired_at if incident.signals else incident.resolved_at,
|
||||
resolved_at=incident.resolved_at,
|
||||
root_cause=root_cause,
|
||||
resolution_action=resolution_action,
|
||||
ai_provider=ai_provider,
|
||||
auto_repaired=auto_repaired,
|
||||
)
|
||||
)
|
||||
except Exception as _pm_e:
|
||||
logger.exception("postmortem_trigger_failed",
|
||||
incident_id=incident_id, error=str(_pm_e))
|
||||
else:
|
||||
logger.info(
|
||||
"postmortem_suppressed_for_batch_reconcile",
|
||||
incident_id=incident_id,
|
||||
resolution_type=resolution_type,
|
||||
)
|
||||
except Exception as _pm_e:
|
||||
logger.exception("postmortem_trigger_failed",
|
||||
incident_id=incident_id, error=str(_pm_e))
|
||||
|
||||
return incident
|
||||
|
||||
|
||||
@@ -43,8 +43,10 @@ async def test_reconcile_stuck_incidents_resolves_strong_evidence(monkeypatch):
|
||||
)
|
||||
assert service.resolve_incident.await_args_list[0].kwargs == {
|
||||
"resolution_type": "auto_repair",
|
||||
"emit_postmortem": False,
|
||||
}
|
||||
assert service.resolve_incident.await_args_list[1].args == ("INC-TIMEOUT",)
|
||||
assert service.resolve_incident.await_args_list[1].kwargs == {
|
||||
"resolution_type": "timeout",
|
||||
"emit_postmortem": False,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user