From 68a42a3c97e7c1ba8126b2f38c2f546dda13227f Mon Sep 17 00:00:00 2001 From: OG T Date: Sun, 19 Apr 2026 01:11:09 +0800 Subject: [PATCH] =?UTF-8?q?fix(openclaw):=20=E5=B9=BB=E8=A6=BA=E9=A9=97?= =?UTF-8?q?=E8=AD=89=E9=9B=99=E8=B7=AF=E5=BE=91=E8=A6=86=E8=93=8B=20+=20?= =?UTF-8?q?=E6=8A=BD=E5=87=BA=E5=85=B1=E7=94=A8=20helper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2026-04-19 凌晨(台北時區)— ogt + Claude Opus 4.7 (1M) 根因: commit 7e9448f 的 Python hallucination validator 只裝在 `analyze_alert` (webhook path),但 incident sweeper 走 `generate_incident_proposal` (line 1552) 沒裝驗證 → 00:23 PostgreSQLDiskGrowthRate 卡片出現 "deployment/awoooi-prod" 幻覺未攔截。 修: 1. 抽出 `_validate_deployment_inventory(result, inventory, ns)` 共用方法 2. `analyze_alert` (line 1322 area) 呼叫此 helper — 原行內邏輯消除 3. `generate_incident_proposal` (line 1552) 動態抓 inventory + 呼叫 helper 4. helper 補: - result.action_title = '[安全降級] 調查 {ns} 真實資源狀態' (之前只改 description,action_title 沒變 → DB action 欄位仍殘留舊文字) - 每個欄位賦值 try/except 保底,單欄失敗不影響其他 Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/src/services/openclaw.py | 120 ++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 38 deletions(-) diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index 0bd6569c..6495452d 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -1144,6 +1144,77 @@ class OpenClawService: return None + def _validate_deployment_inventory( + self, + result: "OpenClawDecision | None", + k8s_inventory: str, + k8s_ns: str, + ) -> None: + """ + 2026-04-19 ogt + Claude Opus 4.7 (抽取自 analyze_alert): + 幻覺 deployment 名偵測與降級。雙路徑共用(analyze_alert + generate_incident_proposal)。 + + 根因: NEMOTRON 即使 prompt 含 inventory 仍會拿 namespace 當 deployment 名 + → 執行 kubectl rollout restart deployment/awoooi-prod → "not found" + 修復: 正則抽出 kubectl 指令的 deployment 名,對照 inventory 白名單; + 不在白名單 → 降級為 NO_ACTION + 轉純調查 get deploy + 信心 0。 + """ + if not result or not k8s_inventory: + return + _inventory_names = {n.strip() for n in k8s_inventory.split(",") if n.strip()} + if not _inventory_names: + return + _kcmd = (result.kubectl_command or "").lower() + import re as _re + _m = _re.search(r"deployment[/\s]+([a-z0-9][a-z0-9-]*)", _kcmd) + if not _m: + return + _deploy_guess = _m.group(1) + if _deploy_guess in _inventory_names: + return + + logger.warning( + "openclaw_deployment_hallucination_detected", + hallucinated=_deploy_guess, + inventory=sorted(_inventory_names), + original_kubectl_cmd=result.kubectl_command, + original_action=( + result.suggested_action.value + if hasattr(result.suggested_action, "value") + else str(result.suggested_action) + ), + namespace=k8s_ns, + ) + # 降級為安全調查動作,不執行破壞性操作 + try: + result.kubectl_command = f"kubectl get deploy -n {k8s_ns}" + except Exception: + pass + try: + result.target_resource = "unknown(hallucinated)" + except Exception: + pass + try: + result.suggested_action = SuggestedAction.NO_ACTION + except Exception: + pass + try: + result.action_title = f"[安全降級] 調查 {k8s_ns} 真實資源狀態" + except Exception: + pass + try: + result.description = ( + f"[安全降級] 原 LLM 建議的 deployment '{_deploy_guess}' 不在叢集 inventory " + f"({', '.join(sorted(_inventory_names))})。" + f"已降級為純調查動作(kubectl get deploy),請手動確認實際問題資源。" + ) + except Exception: + pass + try: + result.confidence = 0.0 + except Exception: + pass + def _parse_analysis_result(self, raw_response: str) -> OpenClawDecision | None: """ 解析 LLM 分析結果 - 使用 Pydantic Schema Enforcement @@ -1322,44 +1393,8 @@ Trace URL: {signoz_trace_url} # 解析結果 result = self._parse_analysis_result(raw_response) - # 2026-04-18 ogt + Claude Opus 4.7: 幻覺 deployment 名偵測與降級 (Checkpoint-3) - # 根因: NEMOTRON 即使 prompt 有 inventory 仍會拿 namespace "awoooi-prod" 當 deployment 名 - # → 執行時 kubectl rollout restart deployment/awoooi-prod → "not found" - # 修復: LLM 回應後 Python 驗證 kubectl_command 中的 deployment 名是否在 inventory - # 不在 → 降級為 NO_ACTION + 改成投查 kubectl get deploy(無破壞,只排查) - if result and _k8s_inventory: - _inventory_names = {n.strip() for n in _k8s_inventory.split(",") if n.strip()} - _kcmd = (result.kubectl_command or "").lower() - import re as _re - _m = _re.search(r"deployment[/\s]+([a-z0-9][a-z0-9-]*)", _kcmd) - if _m: - _deploy_guess = _m.group(1) - if _deploy_guess not in _inventory_names: - logger.warning( - "openclaw_deployment_hallucination_detected", - hallucinated=_deploy_guess, - inventory=sorted(_inventory_names), - original_kubectl_cmd=result.kubectl_command, - original_action=result.suggested_action.value if hasattr(result.suggested_action, 'value') else str(result.suggested_action), - ) - # 降級為安全調查動作,不執行破壞性操作 - result.kubectl_command = f"kubectl get deploy -n {_k8s_ns}" - result.target_resource = "unknown(hallucinated)" - # Pydantic enum 處理 — SuggestedAction 已在檔頂 import (line 34) - try: - result.suggested_action = SuggestedAction.NO_ACTION - except Exception: - pass - result.description = ( - f"[安全降級] 原 LLM 建議的 deployment '{_deploy_guess}' 不在叢集 inventory " - f"({', '.join(sorted(_inventory_names))})。" - f"已降級為純調查動作,請手動確認實際問題資源。" - ) - # 信心度歸零 - try: - result.confidence = 0.0 - except Exception: - pass + # 2026-04-18 → 2026-04-19: 幻覺 deployment 名偵測與降級 (共用 helper) + self._validate_deployment_inventory(result, _k8s_inventory, _k8s_ns) if result: logger.info( @@ -1551,6 +1586,15 @@ Focus on: # 解析 LLM 結果 result = self._parse_analysis_result(raw_response) + # 2026-04-19 ogt + Claude Opus 4.7: 同 analyze_alert 也需幻覺驗證 + # 此路徑沒有 inventory 預抓,動態抓 + _k8s_ns_for_validate = alert_context.get("namespace", "awoooi-prod") if "alert_context" in dir() else "awoooi-prod" + try: + _k8s_inv = await _fetch_k8s_inventory_for_openclaw(namespace=_k8s_ns_for_validate) + except Exception: + _k8s_inv = "" + self._validate_deployment_inventory(result, _k8s_inv, _k8s_ns_for_validate) + if result: logger.info( "proposal_generation_complete",