From 7eb837567dd574e099b6a8c8d8b7be5fa325b0cf Mon Sep 17 00:00:00 2001 From: OG T Date: Thu, 16 Apr 2026 22:34:48 +0800 Subject: [PATCH] =?UTF-8?q?fix(diagnostician):=20=E4=BF=AE=E5=BE=A9=20'AI?= =?UTF-8?q?=20=E6=B7=B1=E5=BA=A6=E8=A8=BA=E6=96=B7'=20=E5=9E=83=E5=9C=BE?= =?UTF-8?q?=E6=A0=B9=E5=9B=A0=E9=A1=AF=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 根因三層鏈: 1. openclaw.call(prompt) 不傳 context 2. OPENCLAW_NEMO fallback 把 prompt[:500](系統說明文字)當 signal description 3. Nemo LLM 回傳 action_title="調查 AWOOOI SRE 系統的偵探 Agent"(任務描述) 4. _extract_hypotheses() 用 action_title 作為根因假設描述 → Telegram 顯示垃圾 修復: - openclaw.call() 新增 alert_context 可選參數,透傳給 _call_with_fallback - diagnostician._analyze() 建立 alert_context(incident_id + evidence_summary as signal) → nemo 使用結構化 API 收到真實感應器資料而非系統說明文字 - _extract_hypotheses() nemo 格式轉換:優先用 reasoning(為什麼)作為假設描述 而非 action_title(做什麼)— reasoning 更接近根因分析 2026-04-16 ogt + Claude Sonnet 4.6 (台北時區) Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/agents/diagnostician_agent.py | 21 +++++++++++++++++++-- apps/api/src/services/openclaw.py | 14 ++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/apps/api/src/agents/diagnostician_agent.py b/apps/api/src/agents/diagnostician_agent.py index aed2f8b2..28990cd1 100644 --- a/apps/api/src/agents/diagnostician_agent.py +++ b/apps/api/src/agents/diagnostician_agent.py @@ -102,9 +102,21 @@ class DiagnosticianAgent(BaseAgent): "anomaly_context": snapshot.anomaly_context, }) + # 2026-04-16 ogt + Claude Sonnet 4.6: 傳遞 snapshot 結構化資料給 OPENCLAW_NEMO + # 根因:原本 call(prompt) 不傳 context → nemo fallback 把 prompt[:500](系統說明) + # 當 signal description → LLM 回傳 "調查 AWOOOI SRE 系統的偵探 Agent" 垃圾 + # 修復:把 snapshot.evidence_summary 放進 alert_context.signals 讓 nemo 看到真實資料 + _evidence = (snapshot.evidence_summary or "(待感應器資料)")[:800] + alert_context = { + "incident_id": snapshot.snapshot_id or "UNKNOWN", + "severity": "P3", + "signals": [{"alert_name": "evidence_snapshot", "description": _evidence}], + "affected_services": [], + } + from src.services.openclaw import get_openclaw openclaw = get_openclaw() - response_text, _provider, success = await openclaw.call(prompt) + response_text, _provider, success = await openclaw.call(prompt, alert_context=alert_context) if not success or not response_text: return self._degraded_report(snapshot, 0, reason="llm_failed") @@ -223,8 +235,13 @@ def _extract_hypotheses(parsed: dict[str, Any]) -> list[Hypothesis]: "medium": "ModerateIssue", "low": "LowRisk"} category = risk_to_cat.get(risk_level.lower(), "Unknown") if action_title and confidence > 0: + # 2026-04-16 ogt + Claude Sonnet 4.6: 優先用 reasoning 作為假設描述 + # reasoning(解釋「為什麼」採取行動)比 action_title(「做什麼」)更接近根因 + # 例: reasoning="CPU 95%, 系統過載" vs action_title="重啟 Pod" + nemo_reasoning = str(parsed.get("reasoning", "")).strip() + description = nemo_reasoning[:500] if len(nemo_reasoning) > 20 else action_title[:500] return [Hypothesis( - description=action_title[:500], + description=description, confidence=confidence, evidence_chain=[], category=category, diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index 0b01441a..46c30859 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -810,7 +810,11 @@ class OpenClawService: # Public LLM Interface (ILLMProvider Protocol) # ========================================================================= - async def call(self, prompt: str) -> tuple[str, str, bool]: + async def call( + self, + prompt: str, + alert_context: dict | None = None, + ) -> tuple[str, str, bool]: """ 呼叫 LLM (ILLMProvider Protocol 實作) @@ -818,14 +822,20 @@ class OpenClawService: Args: prompt: 完整的 prompt + alert_context: 可選的告警上下文(含 incident_id/signals, + 供 OPENCLAW_NEMO provider 使用結構化 API) Returns: (response, provider, success) 2026-04-16 ogt + Claude Sonnet 4.6: 修復 — _call_with_fallback 回傳 5 值, call() 回傳 3 值,避免呼叫端 (diagnostician_agent:107) ValueError + 2026-04-16 ogt + Claude Sonnet 4.6: 加入 alert_context — 讓 diagnostician + 能傳 snapshot 結構化資料給 OPENCLAW_NEMO,避免 prompt 被截斷為 garbage """ - response, provider, success, _tokens, _cost = await self._call_with_fallback(prompt) + response, provider, success, _tokens, _cost = await self._call_with_fallback( + prompt, alert_context=alert_context + ) return response, provider, success # =========================================================================