awoooi/apps/api/src/services/operator_outcome.py

"""Operator-facing alert outcome contract.

This module is intentionally generic: it converts the existing truth-chain
stage, automation quality verdict, and remediation evidence state into one
small contract that Telegram, AwoooP, and result notifications can all share.
"""

from __future__ import annotations

from typing import Any

_ACTION_REQUIRED_CHANNELS = ("telegram_sre_war_room", "awooop_operator_console")
_RESULT_ONLY_CHANNELS = ("telegram_result_reply", "awooop_operator_console")


def _safe_int(value: Any) -> int:
    try:
        return int(value or 0)
    except (TypeError, ValueError):
        return 0


def _first_text(values: list[Any]) -> str | None:
    for value in values:
        if value:
            return str(value)
    return None


def _build_notification(
    *,
    mode: str,
    channels: tuple[str, ...],
    reason: str,
    source_id: str | None,
) -> dict[str, Any]:
    return {
        "mode": mode,
        "channels": list(channels),
        "reason": reason,
        "source_id": source_id,
        "telegram": (
            "reply_to_original_or_standalone_action_required"
            if mode == "action_required"
            else "reply_to_original_or_standalone_result"
        ),
        "awooop": "status_chain_panel",
    }


def _build_execution_result(
    *,
    state: str,
    verdict: str,
    stage: str,
    has_repair_execution: bool,
    has_nonrepair_operation: bool,
    verification: str,
) -> dict[str, Any]:
    """Describe execution completion separately from remediation outcome."""
    approval_status = "unknown"
    completion_status = "unknown"
    command_status = "unknown"
    repair_status = "unknown"
    failure_status = "unknown"
    summary = "尚未能判定執行是否完成或失敗"
    terminal = False

    if state == "completed_verified":
        approval_status = "completed"
        completion_status = "completed_verified"
        command_status = "succeeded"
        repair_status = "verified_repaired"
        failure_status = "no_failure"
        summary = "已完成：修復指令成功，且驗證通過"
        terminal = True
    elif state == "execution_failed_manual_required":
        approval_status = "completed"
        completion_status = "failed"
        command_status = "failed"
        repair_status = "failed"
        failure_status = "command_failed"
        summary = "已失敗：修復指令執行失敗，需人工接手"
        terminal = True
    elif state == "diagnostic_only_manual_review":
        approval_status = "completed"
        completion_status = "completed_no_repair"
        command_status = "diagnostic_completed" if has_nonrepair_operation else "skipped_no_action"
        repair_status = "not_executed"
        failure_status = "no_command_failed"
        summary = "流程已完成：只完成診斷/觀察，沒有修復指令成功或失敗"
        terminal = True
    elif state == "verification_degraded_manual_required":
        approval_status = "completed"
        completion_status = "completed_verification_degraded"
        command_status = "succeeded" if has_repair_execution else "diagnostic_completed"
        repair_status = "verification_degraded"
        failure_status = "no_command_failed"
        summary = "已執行，但驗證結果退化；需人工確認是否真的修復"
        terminal = False
    elif state == "execution_unverified_manual_required":
        approval_status = "completed"
        completion_status = "completed_unverified"
        command_status = "succeeded"
        repair_status = "unverified"
        failure_status = "no_command_failed"
        summary = "已執行成功，但缺少修復驗證結果"
        terminal = False
    elif state == "no_action_manual_review":
        approval_status = "pending_manual_review"
        completion_status = "not_started_no_action"
        command_status = "not_started"
        repair_status = "not_executed"
        failure_status = "not_applicable"
        summary = "尚未執行：AI 建議不修復，等待人工決定是否接手"
        terminal = False
    elif state == "approval_rejected_no_execution":
        approval_status = "rejected"
        completion_status = "closed_no_execution"
        command_status = "not_run"
        repair_status = "not_executed"
        failure_status = "not_applicable"
        summary = "已拒絕：審批結案，未執行任何修復指令"
        terminal = True
    elif state == "approval_expired_manual_review":
        approval_status = "expired"
        completion_status = "expired_no_execution"
        command_status = "not_run"
        repair_status = "not_executed"
        failure_status = "not_applicable"
        summary = "審批逾期：未執行修復，需人工重新審查"
        terminal = False
    elif state == "approval_required":
        approval_status = "pending"
        completion_status = "pending_approval"
        command_status = "not_started"
        repair_status = "not_executed"
        failure_status = "not_applicable"
        summary = "尚未執行：等待人工批准"
        terminal = False
    elif state == "read_only_dry_run_manual_gate":
        approval_status = "manual_gate"
        completion_status = "dry_run_completed"
        command_status = "dry_run_succeeded"
        repair_status = "not_executed"
        failure_status = "not_applicable"
        summary = "只讀試跑完成，尚未執行修復"
        terminal = False
    elif state == "observed_not_executed":
        approval_status = "not_required"
        completion_status = "observed_not_executed"
        command_status = "not_run"
        repair_status = "not_executed"
        failure_status = "not_applicable"
        summary = "只完成收件/觀測，尚未進入執行"
        terminal = False
    elif has_repair_execution:
        approval_status = "completed"
        completion_status = "completed_needs_review"
        command_status = "succeeded" if verification != "missing" else "completed"
        repair_status = "needs_review"
        failure_status = "no_command_failed"
        summary = "已有執行紀錄，但仍需人工確認最終修復結果"
        terminal = False
    elif verdict in {"received_only", "observed_not_executed"} or stage == "received":
        approval_status = "not_started"
        completion_status = "not_started"
        command_status = "not_started"
        repair_status = "not_executed"
        failure_status = "not_applicable"
        summary = "尚未執行修復指令"
        terminal = False

    return {
        "approval_status": approval_status,
        "completion_status": completion_status,
        "command_status": command_status,
        "repair_status": repair_status,
        "failure_status": failure_status,
        "terminal": terminal,
        "summary_zh": summary,
    }


def build_operator_outcome(
    *,
    truth_status: dict[str, Any] | None = None,
    automation_quality: dict[str, Any] | None = None,
    remediation_state: str | None = None,
    fetch_error: str | None = None,
    source_id: str | None = None,
) -> dict[str, Any]:
    """Build a normalized operator outcome for an alert/incident.

    The output deliberately answers three questions:
    1. What happened?
    2. Does a human need to intervene?
    3. How will that human be notified / where should they act?
    """
    truth_status = truth_status or {}
    automation_quality = automation_quality or {}
    facts = automation_quality.get("facts")
    if not isinstance(facts, dict):
        facts = {}

    verdict = str(automation_quality.get("verdict") or "unknown")
    stage = str(truth_status.get("current_stage") or "unknown")
    stage_status = str(truth_status.get("stage_status") or "unknown")
    blockers = [
        str(item)
        for item in [
            *(truth_status.get("blockers") if isinstance(truth_status.get("blockers"), list) else []),
            *(automation_quality.get("blockers") if isinstance(automation_quality.get("blockers"), list) else []),
        ]
        if item
    ]
    verification = str(facts.get("verification_result") or "missing")
    has_repair_execution = _safe_int(facts.get("effective_execution_records")) > 0 or _safe_int(
        facts.get("auto_repair_execution_records")
    ) > 0
    has_nonrepair_operation = (
        _safe_int(facts.get("automation_operation_records")) > 0
        and not has_repair_execution
    )
    needs_human_from_truth = bool(truth_status.get("needs_human"))
    first_blocker = _first_text(blockers)

    if fetch_error:
        state = "truth_chain_unavailable"
        severity = "warning"
        needs_human = True
        next_action = "open_awooop_and_review_source_records"
        summary = "真相鏈查詢失敗，需人工確認處置結果"
        reason = str(fetch_error)[:240]
    elif verdict == "auto_repaired_verified":
        state = "completed_verified"
        severity = "success"
        needs_human = False
        next_action = "monitor_for_regression"
        summary = "已驗證自動修復完成"
        reason = "execution_and_verification_succeeded"
    elif verdict == "execution_failed" or stage == "execution_failed":
        state = "execution_failed_manual_required"
        severity = "critical"
        needs_human = True
        next_action = "manual_fix_or_rollback"
        summary = "執行失敗，需人工介入"
        reason = first_blocker or "execution_failed"
    elif verdict == "manual_required_diagnostic_only" or has_nonrepair_operation:
        state = "diagnostic_only_manual_review"
        severity = "warning"
        needs_human = True
        next_action = "manual_review_or_collect_repair_evidence"
        summary = "只完成診斷/觀察，尚未證明修復"
        reason = first_blocker or "diagnostic_or_audit_only"
    elif verdict == "auto_repaired_verification_degraded":
        state = "verification_degraded_manual_required"
        severity = "warning"
        needs_human = True
        next_action = "manual_verify_or_repair"
        summary = "已執行但驗證退化，需人工確認"
        reason = first_blocker or f"verification={verification}"
    elif verdict == "execution_unverified" or (
        has_repair_execution and verification == "missing"
    ):
        state = "execution_unverified_manual_required"
        severity = "warning"
        needs_human = True
        next_action = "run_or_review_post_execution_verification"
        summary = "已執行但缺少驗證結果，需人工確認"
        reason = first_blocker or "execution_without_verification_result"
    elif verdict == "manual_required_no_action":
        state = "no_action_manual_review"
        severity = "warning"
        needs_human = True
        next_action = "manual_review_no_action_decision"
        summary = "AI 選擇不執行修復，需人工判斷是否接手"
        reason = first_blocker or "no_action_or_observe"
    elif verdict == "approval_rejected_no_execution" or stage == "approval_rejected":
        state = "approval_rejected_no_execution"
        severity = "info"
        needs_human = False
        next_action = "monitor_or_reopen_if_alert_recurs"
        summary = "已拒絕處置，未執行修復"
        reason = "approval_rejected"
    elif verdict == "approval_expired_manual_review" or stage == "approval_expired":
        state = "approval_expired_manual_review"
        severity = "warning"
        needs_human = True
        next_action = "reopen_close_or_escalate_expired_approval"
        summary = "審批已逾期，需人工重新審查"
        reason = first_blocker or "approval_expired"
    elif remediation_state == "read_only":
        state = "read_only_dry_run_manual_gate"
        severity = "warning"
        needs_human = True
        next_action = "approve_or_escalate_from_awooop"
        summary = "只讀試跑完成，等待人工放行或轉交"
        reason = first_blocker or "read_only_dry_run"
    elif remediation_state == "write_observed":
        state = "write_observed_manual_review"
        severity = "critical"
        needs_human = True
        next_action = "review_write_evidence"
        summary = "補救證據出現寫入旗標，需人工確認"
        reason = first_blocker or "write_observed"
    elif remediation_state in {"blocked", "fetch_failed"}:
        state = "blocked_manual_required"
        severity = "critical" if remediation_state == "blocked" else "warning"
        needs_human = True
        next_action = "manual_investigation"
        summary = "自動化流程受阻，需人工處理"
        reason = first_blocker or remediation_state
    elif verdict == "approval_required" or stage == "approval_required":
        state = "approval_required"
        severity = "warning"
        needs_human = True
        next_action = "approve_reject_or_escalate"
        summary = "等待人工審批，尚未執行"
        reason = first_blocker or "pending_human_approval"
    elif needs_human_from_truth:
        state = "manual_required"
        severity = "warning"
        needs_human = True
        next_action = "manual_investigation"
        summary = "真相鏈判定需人工介入"
        reason = first_blocker or f"{stage}/{stage_status}"
    elif verdict in {"observed_not_executed", "received_only"}:
        state = "observed_not_executed"
        severity = "info"
        needs_human = False
        next_action = "collect_evidence_or_wait"
        summary = "已收到/觀測，尚未進入修復執行"
        reason = first_blocker or verdict
    else:
        state = "unknown_pending_observation"
        severity = "warning"
        needs_human = bool(blockers)
        next_action = "review_status_chain"
        summary = "處置結果尚未形成明確結論"
        reason = first_blocker or f"{verdict}:{stage}/{stage_status}"

    execution_result = _build_execution_result(
        state=state,
        verdict=verdict,
        stage=stage,
        has_repair_execution=has_repair_execution,
        has_nonrepair_operation=has_nonrepair_operation,
        verification=verification,
    )
    mode = "action_required" if needs_human else "result_only"
    channels = _ACTION_REQUIRED_CHANNELS if needs_human else _RESULT_ONLY_CHANNELS
    return {
        "schema_version": "operator_outcome_v1",
        "state": state,
        "severity": severity,
        "summary_zh": summary,
        "needs_human": needs_human,
        "human_action_required": needs_human,
        "human_action_reason": reason,
        "next_action": next_action,
        "execution_result": execution_result,
        "notification": _build_notification(
            mode=mode,
            channels=channels,
            reason=reason,
            source_id=source_id,
        ),
        "evidence": {
            "verdict": verdict,
            "current_stage": stage,
            "stage_status": stage_status,
            "verification": verification,
            "auto_repair_execution_records": _safe_int(
                facts.get("auto_repair_execution_records")
            ),
            "effective_execution_records": _safe_int(
                facts.get("effective_execution_records")
            ),
            "automation_operation_records": _safe_int(
                facts.get("automation_operation_records")
            ),
            "mcp_gateway_total": _safe_int(facts.get("mcp_gateway_total")),
            "knowledge_entries": _safe_int(facts.get("knowledge_entries")),
        },
        "blockers": blockers[:8],
    }