awoooi/apps/api/src/agents/critic_agent.py

"""
AWOOOI AIOps Phase 2 — Critic Agent（質疑者）
=============================================
職責：刻意唱反調，防止幻覺與 echo chamber

輸入：DiagnosisReport + ActionPlan（兩者都看）
輸出：CriticReport（challenges[] 列表 + overall_assessment）

設計原則：
1. Critic 的工作是找漏洞，不是說好話（防 sycophancy）
2. prompt 強制要求批判性思維：「如果診斷是錯的，還有哪 3 種可能？」
3. challenge_count > 0 是 Phase 2 退出條件之一
4. Critic 連續 3 次找到 Diagnostician 嚴重漏洞 → 觸發 Diagnostician 狀態不穩（Phase 4 實作）
5. 熔斷降級：LLM 失敗 → 輸出空 challenges（不阻塞 Coordinator）
6. Critic 和 Reviewer 並行執行（都不阻塞對方）

ADR-082: Phase 2 多 Agent 協作
2026-04-15 ogt + Claude Sonnet 4.6（亞太）: Phase 2 初始建立
"""

from __future__ import annotations

import asyncio
import hashlib
import os
import time
from typing import Any

import structlog

from src.agents.base import BaseAgent
from src.agents.protocol import (
    ActionPlan,
    AgentRole,
    AgentVote,
    Challenge,
    CriticReport,
    DiagnosisReport,
)
from src.observability.agent_step_metrics import observe_agent_step
from src.services.sanitization_service import sanitize

logger = structlog.get_logger(__name__)

# Critic 挑戰數量上限（防止 LLM 生成無限質疑）
MAX_CHALLENGES = 5

# 2026-04-27 Claude Sonnet 4.6: A1 — 三段 timeout 拆分 + step metric (北極星 §1.2 Observable by Default)
# 背景：INC-20260425-8D17BB / 3B6C39 兩則告警 AI 信心降到 20%
#   OpenClaw NIM (192.168.0.188:8088) 實測 2-27s，原共用 PHASE2_STEP_TIMEOUT_SEC=20.0
#   Critic 只做批判性審查（prompt 最短、輸出最簡），分配最小 timeout=15s 以保留全局預算給 Diagnostician/Solver
#   env override：部署時可透過 K8s ConfigMap 動態調整，無需重新 build image
AGENT_CRITIC_TIMEOUT_SEC: float = float(
    os.environ.get("AGENT_CRITIC_TIMEOUT_SEC", "15.0")
)

# 保留相容 alias，標記棄用
# DEPRECATED (2026-04-27): 使用 AGENT_CRITIC_TIMEOUT_SEC，此 alias 將在下一個 Sprint 移除
PHASE2_STEP_TIMEOUT_SEC = AGENT_CRITIC_TIMEOUT_SEC


class CriticAgent(BaseAgent):
    """
    Critic Agent — 系統性懷疑論者

    Usage:
        agent = CriticAgent()
        report = await agent.run(diagnosis, plan)
    """

    AGENT_NAME = AgentRole.CRITIC.value
    AGENT_DESCRIPTION = (
        "Devil's advocate. Challenges diagnosis and proposed actions to prevent "
        "hallucination and echo chamber effects."
    )

    async def run(
        self,
        diagnosis: DiagnosisReport,
        plan: ActionPlan,
        timeout_sec: float = 0.0,  # noqa: ARG002 — 已廢棄，保留簽名相容性
    ) -> CriticReport:
        """
        批判性審查診斷和方案。

        Args:
            diagnosis: Diagnostician 輸出
            plan: Solver 輸出
            timeout_sec: 已廢棄 (2026-04-16 ogt) — LLM 等完整回應，真實異常才降級

        Returns:
            CriticReport（真實異常時 degraded=True）
        """
        start_ms = int(time.monotonic() * 1000)

        try:
            report = await self._critique(diagnosis, plan)
            report.latency_ms = int(time.monotonic() * 1000) - start_ms
            logger.info(
                "critic_done",
                challenges=report.challenge_count,
                has_critical=report.has_critical_challenge,
                vote=report.vote,
                latency_ms=report.latency_ms,
            )
            return report

        except Exception:
            latency = int(time.monotonic() * 1000) - start_ms
            logger.exception("critic_error")
            return self._degraded_report(latency, "error")

    async def _critique(
        self,
        diagnosis: DiagnosisReport,
        plan: ActionPlan,
    ) -> CriticReport:
        """LLM 批判性推理。"""
        top_hypothesis = diagnosis.top_hypothesis
        top_candidate = plan.top_candidate

        prompt = self._build_prompt({
            "hypothesis": top_hypothesis.description if top_hypothesis else "（無假設）",
            "action": top_candidate.action if top_candidate else "（無方案）",
            "confidence": top_hypothesis.confidence if top_hypothesis else 0.0,
        })

        _critic_signal = (
            f"hypothesis={top_hypothesis.description[:300] if top_hypothesis else 'none'}; "
            f"action={top_candidate.action[:300] if top_candidate else 'none'}"
        )
        alert_context = {
            "incident_id": diagnosis.evidence_snapshot_id or "UNKNOWN",
            "severity": "P3",
            "signals": [{"alert_name": "critic_review", "description": _critic_signal}],
            "affected_services": [],
            "intent_hint": "diagnose",
        }

        from src.services.openclaw import get_openclaw
        openclaw = get_openclaw()
        _step_start = time.monotonic()
        try:
            response_text, _provider, success = await asyncio.wait_for(
                openclaw.call(prompt, alert_context=alert_context),
                timeout=AGENT_CRITIC_TIMEOUT_SEC,
            )
            # 2026-04-27 Claude Sonnet 4.6: A1 — success path metric observe
            observe_agent_step("critic", "success", time.monotonic() - _step_start)
        except asyncio.TimeoutError:
            # 2026-04-27 Claude Sonnet 4.6: A1 — timeout path metric observe
            observe_agent_step("critic", "timeout", time.monotonic() - _step_start)
            logger.warning(
                "critic_step_timeout",
                snapshot_id=diagnosis.evidence_snapshot_id,
                timeout_sec=AGENT_CRITIC_TIMEOUT_SEC,
            )
            return self._degraded_report(0, "step_timeout")

        if not success or not response_text:
            return self._degraded_report(0, "llm_failed")

        parsed = self._parse_response(sanitize(response_text, "critic_output"))
        challenges = _extract_challenges(parsed)

        # 有 critical challenge → vote = REJECT
        vote = AgentVote.REJECT if any(c.severity == "critical" for c in challenges) else AgentVote.APPROVE

        return CriticReport(
            challenges=challenges,
            overall_assessment=str(parsed.get("overall_assessment", ""))[:1000],
            latency_ms=0,
            vote=vote,
        )

    def _build_prompt(self, context: dict[str, Any]) -> str:
        return f"""你是 AWOOOI SRE 系統的質疑者 Agent（Critic）。

你的工作是：找出診斷和方案的弱點。不是說好話，是找漏洞。

當前診斷：{context.get("hypothesis", "")}
當前方案：{context.get("action", "")}
診斷信心：{context.get("confidence", 0.0):.0%}

必須回答以下問題（每個問題產出一個 challenge）：
1. 如果這個診斷是錯的，還有哪些可能的根因？
2. 這個方案有什麼副作用或風險？
3. 是否有更好的替代方案被忽略了？

每個 challenge 標記嚴重度：
- "minor"：小瑕疵，不影響執行
- "major"：值得 Coordinator 考慮，但不是阻擋條件
- "critical"：嚴重邏輯漏洞，必須阻止此方案執行

以 JSON 回覆：
{{
  "challenges": [
    {{
      "target": "diagnosis",
      "argument": "可能是 OOM 但也可能是 code bug，需要看 GC logs 確認",
      "severity": "major"
    }}
  ],
  "overall_assessment": "診斷可信但方案風險偏高"
}}"""

    def _parse_response(self, response: str) -> dict[str, Any]:
        return self._extract_json(response)

    def analyze(self, context: dict[str, Any]) -> Any:
        raise NotImplementedError("Use run() for Phase 2 agents")

    def _degraded_report(
        self,
        latency_ms: int,
        reason: str = "unknown",
    ) -> CriticReport:
        """熔斷降級：輸出空 challenges（不阻塞 Coordinator）"""
        return CriticReport(
            challenges=[],
            overall_assessment=f"[降級] Critic LLM 失敗（{reason}），跳過批判性審查",
            latency_ms=latency_ms,
            vote=AgentVote.ABSTAIN,
            degraded=True,
        )


# ─────────────────────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────────────────────

def _extract_challenges(parsed: dict[str, Any]) -> list[Challenge]:
    """從 LLM 解析結果提取 challenges（按嚴重度排序）。"""
    raw = parsed.get("challenges", [])
    challenges = []
    severity_order = {"critical": 0, "major": 1, "minor": 2}

    for item in raw:
        if not isinstance(item, dict):
            continue
        c = Challenge(
            target=str(item.get("target", "unknown"))[:50],
            argument=str(item.get("argument", ""))[:500],
            severity=item.get("severity", "minor") if item.get("severity") in severity_order else "minor",
        )
        challenges.append(c)

    challenges.sort(key=lambda c: severity_order.get(c.severity, 2))
    return challenges[:MAX_CHALLENGES]


def compute_input_hash(diagnosis: DiagnosisReport, plan: ActionPlan) -> str:
    key = diagnosis.evidence_snapshot_id + (
        diagnosis.top_hypothesis.description if diagnosis.top_hypothesis else ""
    ) + (
        plan.top_candidate.action if plan.top_candidate else ""
    )
    return hashlib.sha256(key.encode()).hexdigest()[:16]


# ─────────────────────────────────────────────────────────────────────────────
# Singleton
# ─────────────────────────────────────────────────────────────────────────────

_agent: CriticAgent | None = None


def get_critic_agent() -> CriticAgent:
    global _agent
    if _agent is None:
        _agent = CriticAgent()
    return _agent