388 lines
15 KiB
Python
388 lines
15 KiB
Python
"""Operator-facing alert outcome contract.
|
||
|
||
This module is intentionally generic: it converts the existing truth-chain
|
||
stage, automation quality verdict, and remediation evidence state into one
|
||
small contract that Telegram, AwoooP, and result notifications can all share.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any
|
||
|
||
_ACTION_REQUIRED_CHANNELS = ("telegram_sre_war_room", "awooop_operator_console")
|
||
_RESULT_ONLY_CHANNELS = ("telegram_result_reply", "awooop_operator_console")
|
||
|
||
|
||
def _safe_int(value: Any) -> int:
|
||
try:
|
||
return int(value or 0)
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
|
||
|
||
def _first_text(values: list[Any]) -> str | None:
|
||
for value in values:
|
||
if value:
|
||
return str(value)
|
||
return None
|
||
|
||
|
||
def _build_notification(
|
||
*,
|
||
mode: str,
|
||
channels: tuple[str, ...],
|
||
reason: str,
|
||
source_id: str | None,
|
||
) -> dict[str, Any]:
|
||
return {
|
||
"mode": mode,
|
||
"channels": list(channels),
|
||
"reason": reason,
|
||
"source_id": source_id,
|
||
"telegram": (
|
||
"reply_to_original_or_standalone_action_required"
|
||
if mode == "action_required"
|
||
else "reply_to_original_or_standalone_result"
|
||
),
|
||
"awooop": "status_chain_panel",
|
||
}
|
||
|
||
|
||
def _build_execution_result(
|
||
*,
|
||
state: str,
|
||
verdict: str,
|
||
stage: str,
|
||
has_repair_execution: bool,
|
||
has_nonrepair_operation: bool,
|
||
verification: str,
|
||
) -> dict[str, Any]:
|
||
"""Describe execution completion separately from remediation outcome."""
|
||
approval_status = "unknown"
|
||
completion_status = "unknown"
|
||
command_status = "unknown"
|
||
repair_status = "unknown"
|
||
failure_status = "unknown"
|
||
summary = "尚未能判定執行是否完成或失敗"
|
||
terminal = False
|
||
|
||
if state == "completed_verified":
|
||
approval_status = "completed"
|
||
completion_status = "completed_verified"
|
||
command_status = "succeeded"
|
||
repair_status = "verified_repaired"
|
||
failure_status = "no_failure"
|
||
summary = "已完成:修復指令成功,且驗證通過"
|
||
terminal = True
|
||
elif state == "execution_failed_manual_required":
|
||
approval_status = "completed"
|
||
completion_status = "failed"
|
||
command_status = "failed"
|
||
repair_status = "failed"
|
||
failure_status = "command_failed"
|
||
summary = "已失敗:修復指令執行失敗,需人工接手"
|
||
terminal = True
|
||
elif state == "diagnostic_only_manual_review":
|
||
approval_status = "completed"
|
||
completion_status = "completed_no_repair"
|
||
command_status = "diagnostic_completed" if has_nonrepair_operation else "skipped_no_action"
|
||
repair_status = "not_executed"
|
||
failure_status = "no_command_failed"
|
||
summary = "流程已完成:只完成診斷/觀察,沒有修復指令成功或失敗"
|
||
terminal = True
|
||
elif state == "verification_degraded_manual_required":
|
||
approval_status = "completed"
|
||
completion_status = "completed_verification_degraded"
|
||
command_status = "succeeded" if has_repair_execution else "diagnostic_completed"
|
||
repair_status = "verification_degraded"
|
||
failure_status = "no_command_failed"
|
||
summary = "已執行,但驗證結果退化;需人工確認是否真的修復"
|
||
terminal = False
|
||
elif state == "execution_unverified_manual_required":
|
||
approval_status = "completed"
|
||
completion_status = "completed_unverified"
|
||
command_status = "succeeded"
|
||
repair_status = "unverified"
|
||
failure_status = "no_command_failed"
|
||
summary = "已執行成功,但缺少修復驗證結果"
|
||
terminal = False
|
||
elif state == "no_action_manual_review":
|
||
approval_status = "pending_manual_review"
|
||
completion_status = "not_started_no_action"
|
||
command_status = "not_started"
|
||
repair_status = "not_executed"
|
||
failure_status = "not_applicable"
|
||
summary = "尚未執行:AI 建議不修復,等待人工決定是否接手"
|
||
terminal = False
|
||
elif state == "approval_rejected_no_execution":
|
||
approval_status = "rejected"
|
||
completion_status = "closed_no_execution"
|
||
command_status = "not_run"
|
||
repair_status = "not_executed"
|
||
failure_status = "not_applicable"
|
||
summary = "已拒絕:審批結案,未執行任何修復指令"
|
||
terminal = True
|
||
elif state == "approval_expired_manual_review":
|
||
approval_status = "expired"
|
||
completion_status = "expired_no_execution"
|
||
command_status = "not_run"
|
||
repair_status = "not_executed"
|
||
failure_status = "not_applicable"
|
||
summary = "審批逾期:未執行修復,需人工重新審查"
|
||
terminal = False
|
||
elif state == "approval_required":
|
||
approval_status = "pending"
|
||
completion_status = "pending_approval"
|
||
command_status = "not_started"
|
||
repair_status = "not_executed"
|
||
failure_status = "not_applicable"
|
||
summary = "尚未執行:等待人工批准"
|
||
terminal = False
|
||
elif state == "read_only_dry_run_manual_gate":
|
||
approval_status = "manual_gate"
|
||
completion_status = "dry_run_completed"
|
||
command_status = "dry_run_succeeded"
|
||
repair_status = "not_executed"
|
||
failure_status = "not_applicable"
|
||
summary = "只讀試跑完成,尚未執行修復"
|
||
terminal = False
|
||
elif state == "observed_not_executed":
|
||
approval_status = "not_required"
|
||
completion_status = "observed_not_executed"
|
||
command_status = "not_run"
|
||
repair_status = "not_executed"
|
||
failure_status = "not_applicable"
|
||
summary = "只完成收件/觀測,尚未進入執行"
|
||
terminal = False
|
||
elif has_repair_execution:
|
||
approval_status = "completed"
|
||
completion_status = "completed_needs_review"
|
||
command_status = "succeeded" if verification != "missing" else "completed"
|
||
repair_status = "needs_review"
|
||
failure_status = "no_command_failed"
|
||
summary = "已有執行紀錄,但仍需人工確認最終修復結果"
|
||
terminal = False
|
||
elif verdict in {"received_only", "observed_not_executed"} or stage == "received":
|
||
approval_status = "not_started"
|
||
completion_status = "not_started"
|
||
command_status = "not_started"
|
||
repair_status = "not_executed"
|
||
failure_status = "not_applicable"
|
||
summary = "尚未執行修復指令"
|
||
terminal = False
|
||
|
||
return {
|
||
"approval_status": approval_status,
|
||
"completion_status": completion_status,
|
||
"command_status": command_status,
|
||
"repair_status": repair_status,
|
||
"failure_status": failure_status,
|
||
"terminal": terminal,
|
||
"summary_zh": summary,
|
||
}
|
||
|
||
|
||
def build_operator_outcome(
|
||
*,
|
||
truth_status: dict[str, Any] | None = None,
|
||
automation_quality: dict[str, Any] | None = None,
|
||
remediation_state: str | None = None,
|
||
fetch_error: str | None = None,
|
||
source_id: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Build a normalized operator outcome for an alert/incident.
|
||
|
||
The output deliberately answers three questions:
|
||
1. What happened?
|
||
2. Does a human need to intervene?
|
||
3. How will that human be notified / where should they act?
|
||
"""
|
||
truth_status = truth_status or {}
|
||
automation_quality = automation_quality or {}
|
||
facts = automation_quality.get("facts")
|
||
if not isinstance(facts, dict):
|
||
facts = {}
|
||
|
||
verdict = str(automation_quality.get("verdict") or "unknown")
|
||
stage = str(truth_status.get("current_stage") or "unknown")
|
||
stage_status = str(truth_status.get("stage_status") or "unknown")
|
||
blockers = [
|
||
str(item)
|
||
for item in [
|
||
*(truth_status.get("blockers") if isinstance(truth_status.get("blockers"), list) else []),
|
||
*(automation_quality.get("blockers") if isinstance(automation_quality.get("blockers"), list) else []),
|
||
]
|
||
if item
|
||
]
|
||
verification = str(facts.get("verification_result") or "missing")
|
||
has_repair_execution = _safe_int(facts.get("effective_execution_records")) > 0 or _safe_int(
|
||
facts.get("auto_repair_execution_records")
|
||
) > 0
|
||
has_nonrepair_operation = (
|
||
_safe_int(facts.get("automation_operation_records")) > 0
|
||
and not has_repair_execution
|
||
)
|
||
needs_human_from_truth = bool(truth_status.get("needs_human"))
|
||
first_blocker = _first_text(blockers)
|
||
|
||
if fetch_error:
|
||
state = "truth_chain_unavailable"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "open_awooop_and_review_source_records"
|
||
summary = "真相鏈查詢失敗,需人工確認處置結果"
|
||
reason = str(fetch_error)[:240]
|
||
elif verdict == "auto_repaired_verified":
|
||
state = "completed_verified"
|
||
severity = "success"
|
||
needs_human = False
|
||
next_action = "monitor_for_regression"
|
||
summary = "已驗證自動修復完成"
|
||
reason = "execution_and_verification_succeeded"
|
||
elif verdict == "execution_failed" or stage == "execution_failed":
|
||
state = "execution_failed_manual_required"
|
||
severity = "critical"
|
||
needs_human = True
|
||
next_action = "manual_fix_or_rollback"
|
||
summary = "執行失敗,需人工介入"
|
||
reason = first_blocker or "execution_failed"
|
||
elif verdict == "manual_required_diagnostic_only" or has_nonrepair_operation:
|
||
state = "diagnostic_only_manual_review"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "manual_review_or_collect_repair_evidence"
|
||
summary = "只完成診斷/觀察,尚未證明修復"
|
||
reason = first_blocker or "diagnostic_or_audit_only"
|
||
elif verdict == "auto_repaired_verification_degraded":
|
||
state = "verification_degraded_manual_required"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "manual_verify_or_repair"
|
||
summary = "已執行但驗證退化,需人工確認"
|
||
reason = first_blocker or f"verification={verification}"
|
||
elif verdict == "execution_unverified" or (
|
||
has_repair_execution and verification == "missing"
|
||
):
|
||
state = "execution_unverified_manual_required"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "run_or_review_post_execution_verification"
|
||
summary = "已執行但缺少驗證結果,需人工確認"
|
||
reason = first_blocker or "execution_without_verification_result"
|
||
elif verdict == "manual_required_no_action":
|
||
state = "no_action_manual_review"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "manual_review_no_action_decision"
|
||
summary = "AI 選擇不執行修復,需人工判斷是否接手"
|
||
reason = first_blocker or "no_action_or_observe"
|
||
elif verdict == "approval_rejected_no_execution" or stage == "approval_rejected":
|
||
state = "approval_rejected_no_execution"
|
||
severity = "info"
|
||
needs_human = False
|
||
next_action = "monitor_or_reopen_if_alert_recurs"
|
||
summary = "已拒絕處置,未執行修復"
|
||
reason = "approval_rejected"
|
||
elif verdict == "approval_expired_manual_review" or stage == "approval_expired":
|
||
state = "approval_expired_manual_review"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "reopen_close_or_escalate_expired_approval"
|
||
summary = "審批已逾期,需人工重新審查"
|
||
reason = first_blocker or "approval_expired"
|
||
elif remediation_state == "read_only":
|
||
state = "read_only_dry_run_manual_gate"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "approve_or_escalate_from_awooop"
|
||
summary = "只讀試跑完成,等待人工放行或轉交"
|
||
reason = first_blocker or "read_only_dry_run"
|
||
elif remediation_state == "write_observed":
|
||
state = "write_observed_manual_review"
|
||
severity = "critical"
|
||
needs_human = True
|
||
next_action = "review_write_evidence"
|
||
summary = "補救證據出現寫入旗標,需人工確認"
|
||
reason = first_blocker or "write_observed"
|
||
elif remediation_state in {"blocked", "fetch_failed"}:
|
||
state = "blocked_manual_required"
|
||
severity = "critical" if remediation_state == "blocked" else "warning"
|
||
needs_human = True
|
||
next_action = "manual_investigation"
|
||
summary = "自動化流程受阻,需人工處理"
|
||
reason = first_blocker or remediation_state
|
||
elif verdict == "approval_required" or stage == "approval_required":
|
||
state = "approval_required"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "approve_reject_or_escalate"
|
||
summary = "等待人工審批,尚未執行"
|
||
reason = first_blocker or "pending_human_approval"
|
||
elif needs_human_from_truth:
|
||
state = "manual_required"
|
||
severity = "warning"
|
||
needs_human = True
|
||
next_action = "manual_investigation"
|
||
summary = "真相鏈判定需人工介入"
|
||
reason = first_blocker or f"{stage}/{stage_status}"
|
||
elif verdict in {"observed_not_executed", "received_only"}:
|
||
state = "observed_not_executed"
|
||
severity = "info"
|
||
needs_human = False
|
||
next_action = "collect_evidence_or_wait"
|
||
summary = "已收到/觀測,尚未進入修復執行"
|
||
reason = first_blocker or verdict
|
||
else:
|
||
state = "unknown_pending_observation"
|
||
severity = "warning"
|
||
needs_human = bool(blockers)
|
||
next_action = "review_status_chain"
|
||
summary = "處置結果尚未形成明確結論"
|
||
reason = first_blocker or f"{verdict}:{stage}/{stage_status}"
|
||
|
||
execution_result = _build_execution_result(
|
||
state=state,
|
||
verdict=verdict,
|
||
stage=stage,
|
||
has_repair_execution=has_repair_execution,
|
||
has_nonrepair_operation=has_nonrepair_operation,
|
||
verification=verification,
|
||
)
|
||
mode = "action_required" if needs_human else "result_only"
|
||
channels = _ACTION_REQUIRED_CHANNELS if needs_human else _RESULT_ONLY_CHANNELS
|
||
return {
|
||
"schema_version": "operator_outcome_v1",
|
||
"state": state,
|
||
"severity": severity,
|
||
"summary_zh": summary,
|
||
"needs_human": needs_human,
|
||
"human_action_required": needs_human,
|
||
"human_action_reason": reason,
|
||
"next_action": next_action,
|
||
"execution_result": execution_result,
|
||
"notification": _build_notification(
|
||
mode=mode,
|
||
channels=channels,
|
||
reason=reason,
|
||
source_id=source_id,
|
||
),
|
||
"evidence": {
|
||
"verdict": verdict,
|
||
"current_stage": stage,
|
||
"stage_status": stage_status,
|
||
"verification": verification,
|
||
"auto_repair_execution_records": _safe_int(
|
||
facts.get("auto_repair_execution_records")
|
||
),
|
||
"effective_execution_records": _safe_int(
|
||
facts.get("effective_execution_records")
|
||
),
|
||
"automation_operation_records": _safe_int(
|
||
facts.get("automation_operation_records")
|
||
),
|
||
"mcp_gateway_total": _safe_int(facts.get("mcp_gateway_total")),
|
||
"knowledge_entries": _safe_int(facts.get("knowledge_entries")),
|
||
},
|
||
"blockers": blockers[:8],
|
||
}
|