fix(awooop): show diagnostic ops as non repair
All checks were successful
CD Pipeline / tests (push) Successful in 1m19s
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / build-and-deploy (push) Successful in 3m38s
CD Pipeline / post-deploy-checks (push) Successful in 2m5s

This commit is contained in:
Your Name
2026-05-31 14:50:01 +08:00
parent 0db345418f
commit 697fff96d8
4 changed files with 58 additions and 3 deletions

View File

@@ -2753,6 +2753,20 @@ def _safe_int(value: Any) -> int:
return 0
def _has_repair_execution_evidence(facts: dict[str, Any]) -> bool:
return (
_safe_int(facts.get("auto_repair_execution_records")) > 0
or _safe_int(facts.get("effective_execution_records")) > 0
)
def _has_nonrepair_operation_evidence(facts: dict[str, Any]) -> bool:
return (
_safe_int(facts.get("automation_operation_records")) > 0
and not _has_repair_execution_evidence(facts)
)
def _latest_remediation_history_item(
history: dict[str, Any] | None,
) -> dict[str, Any]:
@@ -3509,6 +3523,8 @@ def _build_awooop_status_chain(
)
auto_repair_records = _safe_int(facts.get("auto_repair_execution_records"))
operation_records = _safe_int(facts.get("automation_operation_records"))
has_repair_execution = _has_repair_execution_evidence(facts)
has_nonrepair_operation = _has_nonrepair_operation_evidence(facts)
gateway_total = _safe_int(facts.get("mcp_gateway_total"))
km_entries = _safe_int(facts.get("knowledge_entries"))
needs_human = bool(truth_status.get("needs_human"))
@@ -3516,13 +3532,16 @@ def _build_awooop_status_chain(
if verdict == "auto_repaired_verified":
repair_state = "auto_repaired_verified"
next_step = "monitor_for_regression"
elif auto_repair_records > 0 or operation_records > 0:
elif has_repair_execution:
repair_state = (
"executed_pending_verification"
if str(verification) == "missing"
else "executed"
)
next_step = "verify_execution_result"
elif has_nonrepair_operation:
repair_state = "diagnostic_or_audit_recorded"
next_step = "manual_review_or_collect_repair_evidence"
elif remediation_state == "read_only":
repair_state = "read_only_dry_run"
next_step = "approve_or_escalate_from_awooop"

View File

@@ -1614,6 +1614,39 @@ def test_awooop_status_chain_marks_read_only_manual_gate() -> None:
assert chain["blockers"] == ["pending_human_approval"]
def test_awooop_status_chain_does_not_treat_audit_ops_as_repair() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-20260530-88D960"],
source_id="INC-20260530-88D960",
truth_chain={
"truth_status": {
"current_stage": "execution_succeeded",
"stage_status": "success",
"needs_human": False,
"blockers": [],
},
"automation_quality": {
"verdict": "auto_repaired_verification_degraded",
"facts": {
"auto_repair_execution_records": 0,
"automation_operation_records": 1,
"effective_execution_records": 0,
"verification_result": "degraded",
"mcp_gateway_total": 22,
"knowledge_entries": 4,
},
"blockers": ["verification_recorded"],
},
},
remediation_history={"total": 0},
)
assert chain["repair_state"] == "diagnostic_or_audit_recorded"
assert chain["next_step"] == "manual_review_or_collect_repair_evidence"
assert chain["evidence"]["operation_records"] == 1
assert chain["evidence"]["auto_repair_records"] == 0
def test_legacy_mcp_timeline_summary_surfaces_tool_context() -> None:
record = {
"incident_id": "INC-20260514-F85F21",

View File

@@ -124,6 +124,7 @@ DB / worker evidence:
- `incident_timeline_service` 讀取 `approval_records.extra_metadata.execution_kind / repair_executed``execution_success + repair_executed=false` 改成 `info`,標題為 observation recorded不再顯示 repair success。
- Telegram 首屏 `_automation_status_summary()` 與 AwoooP status-chain / callback snapshot 改以 `auto_repair_execution_records``effective_execution_records` 判斷「真的有 repair execution」。
- `/api/v1/platform/status-chain` 的前台共用 API 同步改用 `effective_execution_records`;只有 raw operation log 時回 `repair_state=diagnostic_or_audit_recorded`,不再回 `executed`
- 若只有 `automation_operation_records``effective_execution_records=0`,顯示 `diagnostic_recorded` / `diagnostic_or_audit_recorded`,文案改為「已記錄診斷/觀察,尚未證明修復」。
- `build_incident_reconciliation()` 也改用 `_effective_execution_ops()` 計算 executed ops避免 NO_ACTION / audit-only row 觸發 `incident_open_after_successful_execution`
- Production backfill 兩筆舊 incident
@@ -140,6 +141,8 @@ ruff check --select E9,F401,F821,F841 modified services/tests
-> pass
pytest test_incident_timeline_service.py test_awooop_truth_chain_service.py test_telegram_ai_automation_block.py test_telegram_message_templates.py -q
-> 101 passed
pytest test_awooop_operator_timeline_labels.py -q
-> status-chain diagnostic/audit-only repair_state guard covered
production DB readback:
INC-20260530-88D960 approval extra_metadata.execution_kind=diagnostic repair_executed=false
INC-20260531-88394F approval extra_metadata.execution_kind=no_action repair_executed=false

View File

@@ -2679,9 +2679,9 @@ Phase 6 完成後
**T154b Telegram display truth + historical backfill2026-05-31 台北)**
- 觸發T154 補了新流量 execution metadata但前台/Telegram 顯示層仍以 raw `automation_operation_records > 0` 顯示「已自動執行」,會把 diagnostic / audit-only operation 誤認成 repair。`incident_timeline_service` 也會把 `execution_success` 一律畫成 executor success舊 incident 即使補 metadata 仍會誤導 operator。
- 修正:`incident_timeline_service``approval_records.extra_metadata.execution_kind / repair_executed``execution_success + repair_executed=false` 改成 info / observation recorded。Telegram 首屏、AwoooP status-chain、callback snapshot 改用 `auto_repair_execution_records``effective_execution_records` 判斷 repair execution只有 raw operation records 時顯示 `diagnostic_recorded` / `diagnostic_or_audit_recorded`,文案為「已記錄診斷/觀察,尚未證明修復」。`build_incident_reconciliation()` 改用 `_effective_execution_ops()`NO_ACTION / audit-only 不再觸發 successful execution mismatch。
- 修正:`incident_timeline_service``approval_records.extra_metadata.execution_kind / repair_executed``execution_success + repair_executed=false` 改成 info / observation recorded。Telegram 首屏、AwoooP status-chain、callback snapshot`/api/v1/platform/status-chain` 共用前台 API 改用 `auto_repair_execution_records``effective_execution_records` 判斷 repair execution只有 raw operation records 時顯示 `diagnostic_recorded` / `diagnostic_or_audit_recorded`,文案為「已記錄診斷/觀察,尚未證明修復」。`build_incident_reconciliation()` 改用 `_effective_execution_ops()`NO_ACTION / audit-only 不再觸發 successful execution mismatch。
- Production backfill`INC-20260530-88D960` approval 標記 `execution_kind=diagnostic, repair_executed=false``INC-20260531-88394F` approval 標記 `execution_kind=no_action, repair_executed=false`。兩筆均新增 `alert_operation_log.EXECUTION_COMPLETED`、postmortem `knowledge_entries(entry_type=POSTMORTEM,path_type=postmortem)``KM_CONVERTED``truth_backfill_id=telegram_execution_truth_backfill_20260531_t154b`。未重跑任何修復動作。
- Verification`py_compile` passtargeted `ruff --select E9,F401,F821,F841` pass`test_incident_timeline_service.py` + `test_awooop_truth_chain_service.py` + `test_telegram_ai_automation_block.py` + `test_telegram_message_templates.py` -> 101 passed。Production DB readback confirms both approvals have `repair_executed=false` plus execution/postmortem/KM backfill rows.
- Verification`py_compile` passtargeted `ruff --select E9,F401,F821,F841` pass`test_incident_timeline_service.py` + `test_awooop_truth_chain_service.py` + `test_telegram_ai_automation_block.py` + `test_telegram_message_templates.py` -> 101 passed`test_awooop_operator_timeline_labels.py` covers status-chain diagnostic/audit-only repair_state。Production DB readback confirms both approvals have `repair_executed=false` plus execution/postmortem/KM backfill rows.
- 判讀:可宣稱 repair 的條件是 `effective_execution_records > 0``auto_repair_execution_records > 0`,不是 operation log 數量。舊資料修正後operator 看到「已記錄診斷/觀察」時不能再視為自動修復完成。
**T152 Ansible runtime readiness surfaced2026-05-24 台北)**