Files
awoooi/apps/api/src/services/operator_outcome.py
Your Name a21f94ced1
Some checks failed
CD Pipeline / tests (push) Successful in 1m17s
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / build-and-deploy (push) Successful in 4m11s
CD Pipeline / post-deploy-checks (push) Has been cancelled
fix(alerts): clarify execution result verdict
2026-05-31 17:28:55 +08:00

388 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Operator-facing alert outcome contract.
This module is intentionally generic: it converts the existing truth-chain
stage, automation quality verdict, and remediation evidence state into one
small contract that Telegram, AwoooP, and result notifications can all share.
"""
from __future__ import annotations
from typing import Any
_ACTION_REQUIRED_CHANNELS = ("telegram_sre_war_room", "awooop_operator_console")
_RESULT_ONLY_CHANNELS = ("telegram_result_reply", "awooop_operator_console")
def _safe_int(value: Any) -> int:
try:
return int(value or 0)
except (TypeError, ValueError):
return 0
def _first_text(values: list[Any]) -> str | None:
for value in values:
if value:
return str(value)
return None
def _build_notification(
*,
mode: str,
channels: tuple[str, ...],
reason: str,
source_id: str | None,
) -> dict[str, Any]:
return {
"mode": mode,
"channels": list(channels),
"reason": reason,
"source_id": source_id,
"telegram": (
"reply_to_original_or_standalone_action_required"
if mode == "action_required"
else "reply_to_original_or_standalone_result"
),
"awooop": "status_chain_panel",
}
def _build_execution_result(
*,
state: str,
verdict: str,
stage: str,
has_repair_execution: bool,
has_nonrepair_operation: bool,
verification: str,
) -> dict[str, Any]:
"""Describe execution completion separately from remediation outcome."""
approval_status = "unknown"
completion_status = "unknown"
command_status = "unknown"
repair_status = "unknown"
failure_status = "unknown"
summary = "尚未能判定執行是否完成或失敗"
terminal = False
if state == "completed_verified":
approval_status = "completed"
completion_status = "completed_verified"
command_status = "succeeded"
repair_status = "verified_repaired"
failure_status = "no_failure"
summary = "已完成:修復指令成功,且驗證通過"
terminal = True
elif state == "execution_failed_manual_required":
approval_status = "completed"
completion_status = "failed"
command_status = "failed"
repair_status = "failed"
failure_status = "command_failed"
summary = "已失敗:修復指令執行失敗,需人工接手"
terminal = True
elif state == "diagnostic_only_manual_review":
approval_status = "completed"
completion_status = "completed_no_repair"
command_status = "diagnostic_completed" if has_nonrepair_operation else "skipped_no_action"
repair_status = "not_executed"
failure_status = "no_command_failed"
summary = "流程已完成:只完成診斷/觀察,沒有修復指令成功或失敗"
terminal = True
elif state == "verification_degraded_manual_required":
approval_status = "completed"
completion_status = "completed_verification_degraded"
command_status = "succeeded" if has_repair_execution else "diagnostic_completed"
repair_status = "verification_degraded"
failure_status = "no_command_failed"
summary = "已執行,但驗證結果退化;需人工確認是否真的修復"
terminal = False
elif state == "execution_unverified_manual_required":
approval_status = "completed"
completion_status = "completed_unverified"
command_status = "succeeded"
repair_status = "unverified"
failure_status = "no_command_failed"
summary = "已執行成功,但缺少修復驗證結果"
terminal = False
elif state == "no_action_manual_review":
approval_status = "pending_manual_review"
completion_status = "not_started_no_action"
command_status = "not_started"
repair_status = "not_executed"
failure_status = "not_applicable"
summary = "尚未執行AI 建議不修復,等待人工決定是否接手"
terminal = False
elif state == "approval_rejected_no_execution":
approval_status = "rejected"
completion_status = "closed_no_execution"
command_status = "not_run"
repair_status = "not_executed"
failure_status = "not_applicable"
summary = "已拒絕:審批結案,未執行任何修復指令"
terminal = True
elif state == "approval_expired_manual_review":
approval_status = "expired"
completion_status = "expired_no_execution"
command_status = "not_run"
repair_status = "not_executed"
failure_status = "not_applicable"
summary = "審批逾期:未執行修復,需人工重新審查"
terminal = False
elif state == "approval_required":
approval_status = "pending"
completion_status = "pending_approval"
command_status = "not_started"
repair_status = "not_executed"
failure_status = "not_applicable"
summary = "尚未執行:等待人工批准"
terminal = False
elif state == "read_only_dry_run_manual_gate":
approval_status = "manual_gate"
completion_status = "dry_run_completed"
command_status = "dry_run_succeeded"
repair_status = "not_executed"
failure_status = "not_applicable"
summary = "只讀試跑完成,尚未執行修復"
terminal = False
elif state == "observed_not_executed":
approval_status = "not_required"
completion_status = "observed_not_executed"
command_status = "not_run"
repair_status = "not_executed"
failure_status = "not_applicable"
summary = "只完成收件/觀測,尚未進入執行"
terminal = False
elif has_repair_execution:
approval_status = "completed"
completion_status = "completed_needs_review"
command_status = "succeeded" if verification != "missing" else "completed"
repair_status = "needs_review"
failure_status = "no_command_failed"
summary = "已有執行紀錄,但仍需人工確認最終修復結果"
terminal = False
elif verdict in {"received_only", "observed_not_executed"} or stage == "received":
approval_status = "not_started"
completion_status = "not_started"
command_status = "not_started"
repair_status = "not_executed"
failure_status = "not_applicable"
summary = "尚未執行修復指令"
terminal = False
return {
"approval_status": approval_status,
"completion_status": completion_status,
"command_status": command_status,
"repair_status": repair_status,
"failure_status": failure_status,
"terminal": terminal,
"summary_zh": summary,
}
def build_operator_outcome(
*,
truth_status: dict[str, Any] | None = None,
automation_quality: dict[str, Any] | None = None,
remediation_state: str | None = None,
fetch_error: str | None = None,
source_id: str | None = None,
) -> dict[str, Any]:
"""Build a normalized operator outcome for an alert/incident.
The output deliberately answers three questions:
1. What happened?
2. Does a human need to intervene?
3. How will that human be notified / where should they act?
"""
truth_status = truth_status or {}
automation_quality = automation_quality or {}
facts = automation_quality.get("facts")
if not isinstance(facts, dict):
facts = {}
verdict = str(automation_quality.get("verdict") or "unknown")
stage = str(truth_status.get("current_stage") or "unknown")
stage_status = str(truth_status.get("stage_status") or "unknown")
blockers = [
str(item)
for item in [
*(truth_status.get("blockers") if isinstance(truth_status.get("blockers"), list) else []),
*(automation_quality.get("blockers") if isinstance(automation_quality.get("blockers"), list) else []),
]
if item
]
verification = str(facts.get("verification_result") or "missing")
has_repair_execution = _safe_int(facts.get("effective_execution_records")) > 0 or _safe_int(
facts.get("auto_repair_execution_records")
) > 0
has_nonrepair_operation = (
_safe_int(facts.get("automation_operation_records")) > 0
and not has_repair_execution
)
needs_human_from_truth = bool(truth_status.get("needs_human"))
first_blocker = _first_text(blockers)
if fetch_error:
state = "truth_chain_unavailable"
severity = "warning"
needs_human = True
next_action = "open_awooop_and_review_source_records"
summary = "真相鏈查詢失敗,需人工確認處置結果"
reason = str(fetch_error)[:240]
elif verdict == "auto_repaired_verified":
state = "completed_verified"
severity = "success"
needs_human = False
next_action = "monitor_for_regression"
summary = "已驗證自動修復完成"
reason = "execution_and_verification_succeeded"
elif verdict == "execution_failed" or stage == "execution_failed":
state = "execution_failed_manual_required"
severity = "critical"
needs_human = True
next_action = "manual_fix_or_rollback"
summary = "執行失敗,需人工介入"
reason = first_blocker or "execution_failed"
elif verdict == "manual_required_diagnostic_only" or has_nonrepair_operation:
state = "diagnostic_only_manual_review"
severity = "warning"
needs_human = True
next_action = "manual_review_or_collect_repair_evidence"
summary = "只完成診斷/觀察,尚未證明修復"
reason = first_blocker or "diagnostic_or_audit_only"
elif verdict == "auto_repaired_verification_degraded":
state = "verification_degraded_manual_required"
severity = "warning"
needs_human = True
next_action = "manual_verify_or_repair"
summary = "已執行但驗證退化,需人工確認"
reason = first_blocker or f"verification={verification}"
elif verdict == "execution_unverified" or (
has_repair_execution and verification == "missing"
):
state = "execution_unverified_manual_required"
severity = "warning"
needs_human = True
next_action = "run_or_review_post_execution_verification"
summary = "已執行但缺少驗證結果,需人工確認"
reason = first_blocker or "execution_without_verification_result"
elif verdict == "manual_required_no_action":
state = "no_action_manual_review"
severity = "warning"
needs_human = True
next_action = "manual_review_no_action_decision"
summary = "AI 選擇不執行修復,需人工判斷是否接手"
reason = first_blocker or "no_action_or_observe"
elif verdict == "approval_rejected_no_execution" or stage == "approval_rejected":
state = "approval_rejected_no_execution"
severity = "info"
needs_human = False
next_action = "monitor_or_reopen_if_alert_recurs"
summary = "已拒絕處置,未執行修復"
reason = "approval_rejected"
elif verdict == "approval_expired_manual_review" or stage == "approval_expired":
state = "approval_expired_manual_review"
severity = "warning"
needs_human = True
next_action = "reopen_close_or_escalate_expired_approval"
summary = "審批已逾期,需人工重新審查"
reason = first_blocker or "approval_expired"
elif remediation_state == "read_only":
state = "read_only_dry_run_manual_gate"
severity = "warning"
needs_human = True
next_action = "approve_or_escalate_from_awooop"
summary = "只讀試跑完成,等待人工放行或轉交"
reason = first_blocker or "read_only_dry_run"
elif remediation_state == "write_observed":
state = "write_observed_manual_review"
severity = "critical"
needs_human = True
next_action = "review_write_evidence"
summary = "補救證據出現寫入旗標,需人工確認"
reason = first_blocker or "write_observed"
elif remediation_state in {"blocked", "fetch_failed"}:
state = "blocked_manual_required"
severity = "critical" if remediation_state == "blocked" else "warning"
needs_human = True
next_action = "manual_investigation"
summary = "自動化流程受阻,需人工處理"
reason = first_blocker or remediation_state
elif verdict == "approval_required" or stage == "approval_required":
state = "approval_required"
severity = "warning"
needs_human = True
next_action = "approve_reject_or_escalate"
summary = "等待人工審批,尚未執行"
reason = first_blocker or "pending_human_approval"
elif needs_human_from_truth:
state = "manual_required"
severity = "warning"
needs_human = True
next_action = "manual_investigation"
summary = "真相鏈判定需人工介入"
reason = first_blocker or f"{stage}/{stage_status}"
elif verdict in {"observed_not_executed", "received_only"}:
state = "observed_not_executed"
severity = "info"
needs_human = False
next_action = "collect_evidence_or_wait"
summary = "已收到/觀測,尚未進入修復執行"
reason = first_blocker or verdict
else:
state = "unknown_pending_observation"
severity = "warning"
needs_human = bool(blockers)
next_action = "review_status_chain"
summary = "處置結果尚未形成明確結論"
reason = first_blocker or f"{verdict}:{stage}/{stage_status}"
execution_result = _build_execution_result(
state=state,
verdict=verdict,
stage=stage,
has_repair_execution=has_repair_execution,
has_nonrepair_operation=has_nonrepair_operation,
verification=verification,
)
mode = "action_required" if needs_human else "result_only"
channels = _ACTION_REQUIRED_CHANNELS if needs_human else _RESULT_ONLY_CHANNELS
return {
"schema_version": "operator_outcome_v1",
"state": state,
"severity": severity,
"summary_zh": summary,
"needs_human": needs_human,
"human_action_required": needs_human,
"human_action_reason": reason,
"next_action": next_action,
"execution_result": execution_result,
"notification": _build_notification(
mode=mode,
channels=channels,
reason=reason,
source_id=source_id,
),
"evidence": {
"verdict": verdict,
"current_stage": stage,
"stage_status": stage_status,
"verification": verification,
"auto_repair_execution_records": _safe_int(
facts.get("auto_repair_execution_records")
),
"effective_execution_records": _safe_int(
facts.get("effective_execution_records")
),
"automation_operation_records": _safe_int(
facts.get("automation_operation_records")
),
"mcp_gateway_total": _safe_int(facts.get("mcp_gateway_total")),
"knowledge_entries": _safe_int(facts.get("knowledge_entries")),
},
"blockers": blockers[:8],
}