fix(diagnostician): 修復 'AI 深度診斷' 垃圾根因顯示

根因三層鏈： 1. openclaw.call(prompt) 不傳 context 2. OPENCLAW_NEMO fallback 把 prompt[:500]（系統說明文字）當 signal description 3. Nemo LLM 回傳 action_title="調查 AWOOOI SRE 系統的偵探 Agent"（任務描述） 4. _extract_hypotheses() 用 action_title 作為根因假設描述 → Telegram 顯示垃圾修復： - openclaw.call() 新增 alert_context 可選參數，透傳給 _call_with_fallback - diagnostician._analyze() 建立 alert_context（incident_id + evidence_summary as signal） → nemo 使用結構化 API 收到真實感應器資料而非系統說明文字 - _extract_hypotheses() nemo 格式轉換：優先用 reasoning（為什麼）作為假設描述而非 action_title（做什麼）— reasoning 更接近根因分析 2026-04-16 ogt + Claude Sonnet 4.6 (台北時區) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 22:34:48 +08:00
parent 54d6818b8d
commit 7eb837567d
2 changed files with 31 additions and 4 deletions
--- a/apps/api/src/agents/diagnostician_agent.py
+++ b/apps/api/src/agents/diagnostician_agent.py
@@ -102,9 +102,21 @@ class DiagnosticianAgent(BaseAgent):
            "anomaly_context": snapshot.anomaly_context,
        })

+        # 2026-04-16 ogt + Claude Sonnet 4.6: 傳遞 snapshot 結構化資料給 OPENCLAW_NEMO
+        # 根因：原本 call(prompt) 不傳 context → nemo fallback 把 prompt[:500]（系統說明）
+        #       當 signal description → LLM 回傳 "調查 AWOOOI SRE 系統的偵探 Agent" 垃圾
+        # 修復：把 snapshot.evidence_summary 放進 alert_context.signals 讓 nemo 看到真實資料
+        _evidence = (snapshot.evidence_summary or "（待感應器資料）")[:800]
+        alert_context = {
+            "incident_id": snapshot.snapshot_id or "UNKNOWN",
+            "severity": "P3",
+            "signals": [{"alert_name": "evidence_snapshot", "description": _evidence}],
+            "affected_services": [],
+        }
+
        from src.services.openclaw import get_openclaw
        openclaw = get_openclaw()
-        response_text, _provider, success = await openclaw.call(prompt)
+        response_text, _provider, success = await openclaw.call(prompt, alert_context=alert_context)

        if not success or not response_text:
            return self._degraded_report(snapshot, 0, reason="llm_failed")
@@ -223,8 +235,13 @@ def _extract_hypotheses(parsed: dict[str, Any]) -> list[Hypothesis]:
                       "medium": "ModerateIssue", "low": "LowRisk"}
        category = risk_to_cat.get(risk_level.lower(), "Unknown")
        if action_title and confidence > 0:
+            # 2026-04-16 ogt + Claude Sonnet 4.6: 優先用 reasoning 作為假設描述
+            # reasoning（解釋「為什麼」採取行動）比 action_title（「做什麼」）更接近根因
+            # 例: reasoning="CPU 95%, 系統過載" vs action_title="重啟 Pod"
+            nemo_reasoning = str(parsed.get("reasoning", "")).strip()
+            description = nemo_reasoning[:500] if len(nemo_reasoning) > 20 else action_title[:500]
            return [Hypothesis(
-                description=action_title[:500],
+                description=description,
                confidence=confidence,
                evidence_chain=[],
                category=category,
--- a/apps/api/src/services/openclaw.py
+++ b/apps/api/src/services/openclaw.py
@@ -810,7 +810,11 @@ class OpenClawService:
    # Public LLM Interface (ILLMProvider Protocol)
    # =========================================================================

-    async def call(self, prompt: str) -> tuple[str, str, bool]:
+    async def call(
+        self,
+        prompt: str,
+        alert_context: dict | None = None,
+    ) -> tuple[str, str, bool]:
        """
        呼叫 LLM (ILLMProvider Protocol 實作)

@@ -818,14 +822,20 @@ class OpenClawService:

        Args:
            prompt: 完整的 prompt
+            alert_context: 可選的告警上下文（含 incident_id/signals，
+                供 OPENCLAW_NEMO provider 使用結構化 API）

        Returns:
            (response, provider, success)

        2026-04-16 ogt + Claude Sonnet 4.6: 修復 — _call_with_fallback 回傳 5 值，
            call() 回傳 3 值，避免呼叫端 (diagnostician_agent:107) ValueError
+        2026-04-16 ogt + Claude Sonnet 4.6: 加入 alert_context — 讓 diagnostician
+            能傳 snapshot 結構化資料給 OPENCLAW_NEMO，避免 prompt 被截斷為 garbage
        """
-        response, provider, success, _tokens, _cost = await self._call_with_fallback(prompt)
+        response, provider, success, _tokens, _cost = await self._call_with_fallback(
+            prompt, alert_context=alert_context
+        )
        return response, provider, success

    # =========================================================================