From 93205ceab0072cdcf4d8d805f5c83d4f95f130ad Mon Sep 17 00:00:00 2001 From: OG T Date: Fri, 17 Apr 2026 14:49:53 +0800 Subject: [PATCH] =?UTF-8?q?fix(auto=5Fapprove+solver):=20P1=20kubectl=20ga?= =?UTF-8?q?te=20+=20P2=20Nemo=20path=20kubectl=20=E5=BC=B7=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 安全漏洞 (auto_approve.py): - 新增條件 1d:action 必須含 kubectl 關鍵字才可自動執行 - Solver 經 OpenClaw Nemo 路徑輸出自然語言 → 條件 1c 通過但無法執行 - 修復:自然語言 action → 降級人工審核(NO_PLAYBOOK reason) P2 執行障礙 (solver_agent.py): - Nemo 格式路徑:action_title 不含 kubectl → return [] → 觸發 _degraded_plan - _default_action_for_category:舊自然語言 → 真實 kubectl 調查指令 - 降級路徑現在輸出 kubectl get/top/exec 等唯讀指令,可被 auto_approve 1d 正確評估 ADR-082: Phase 2 多 Agent 協作 2026-04-17 ogt + Claude Sonnet 4.6(亞太): P1+P2 hotfix Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/agents/solver_agent.py | 33 +++++++++++++++++++-------- apps/api/src/services/auto_approve.py | 16 +++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/apps/api/src/agents/solver_agent.py b/apps/api/src/agents/solver_agent.py index 6678b97c..2702527d 100644 --- a/apps/api/src/agents/solver_agent.py +++ b/apps/api/src/agents/solver_agent.py @@ -231,8 +231,14 @@ def _extract_candidates(parsed: dict[str, Any]) -> list[CandidateAction]: 2026-04-16 ogt + Claude Sonnet 4.6: 與 diagnostician 同步,修復 openclaw_nemo 格式不相容 """ # OpenClaw Nemo 格式轉換 + # 2026-04-17 ogt + Claude Sonnet 4.6: Nemo path kubectl 驗證 + # 根因:Nemo 回傳 {"action_title": "重啟 Crash Looping Pod"} 自然語言 + # 直接用 action_title 作為 action → 無 kubectl → auto_approve 誤通過 → 死迴圈 + # 修復:action_title 不含 kubectl → return [](觸發 _degraded_plan 輸出真實 kubectl) if "action_title" in parsed and "candidates" not in parsed: action_title = str(parsed.get("action_title", "")) + if "kubectl" not in action_title.lower(): + return [] # 交由 _degraded_plan 接手,輸出真實 kubectl 調查指令 confidence = float(parsed.get("confidence", 0.5)) risk_level = str(parsed.get("risk_level", "medium")) risk_to_blast = {"critical": 60, "high": 40, "medium": 25, "low": 10} @@ -265,17 +271,24 @@ def _extract_candidates(parsed: dict[str, Any]) -> list[CandidateAction]: def _default_action_for_category(category: str) -> str: - """降級時的預設動作(最保守的 restart)""" + """降級時的預設調查指令 — 必須是真實 kubectl 命令(調查優先,不執行破壞性操作) + + 2026-04-17 ogt + Claude Sonnet 4.6: 改為真實 kubectl 指令 + 舊:自然語言如 "restart_pod"、"check_disk_usage" → 無法被 auto_approve 執行 + 新:kubectl 調查指令 → 可執行,且均為唯讀操作,無副作用 + """ category_lower = category.lower() - if "pod" in category_lower or "kube" in category_lower: - return "restart_pod" - if "disk" in category_lower: - return "check_disk_usage" - if "cpu" in category_lower: - return "check_cpu_usage" - if "network" in category_lower: - return "check_network_connectivity" - return "restart_service" + if "pod" in category_lower or "kube" in category_lower or "crash" in category_lower: + return "kubectl get pods -n awoooi-prod -o wide" + if "disk" in category_lower or "storage" in category_lower or "pvc" in category_lower: + return "kubectl exec -n awoooi-prod deployment/postgresql -- df -h" + if "cpu" in category_lower or "load" in category_lower: + return "kubectl top pods -n awoooi-prod --sort-by=cpu" + if "memory" in category_lower or "oom" in category_lower: + return "kubectl top pods -n awoooi-prod --sort-by=memory" + if "network" in category_lower or "connect" in category_lower: + return "kubectl get services -n awoooi-prod" + return "kubectl get pods -n awoooi-prod" def compute_input_hash(diagnosis: DiagnosisReport) -> str: diff --git a/apps/api/src/services/auto_approve.py b/apps/api/src/services/auto_approve.py index 3d103098..d15fce86 100644 --- a/apps/api/src/services/auto_approve.py +++ b/apps/api/src/services/auto_approve.py @@ -290,6 +290,22 @@ class AutoApprovePolicy: confidence=confidence, ) + # 條件 1d: 自然語言描述不可自動執行(2026-04-17 ogt + Claude Sonnet 4.6) + # 根因:Solver 經 OpenClaw Nemo 路徑輸出「重啟 Crash Looping Pod」等自然語言 + # action 非空 → 條件 1c 通過 → auto_approved=True + # 但 kubectl_command 為空 → 實際無法執行 → incident 卡在 investigating + # 修復:action 必須含 kubectl 關鍵字才可自動執行,否則降級人工審核 + _kubectl_cmd = proposal_data.get("kubectl_command", "") or "" + _has_kubectl = "kubectl" in action.lower() or "kubectl" in _kubectl_cmd.lower() + if not _has_kubectl: + return self._reject( + reason=AutoApproveReason.NO_PLAYBOOK, + detail=f"Action '{action[:60]}' is natural language — no kubectl command, requires human review", + risk_level=risk_level, + trust_score=trust_score, + confidence=confidence, + ) + # 條件 2: 風險等級必須在允許列表中 if risk_level not in self.config.allowed_risk_levels: return self._reject(