From b2dfcf9b0d1e55b3b7c4a4b4436a5ebeffec8ae4 Mon Sep 17 00:00:00 2001 From: OG T Date: Sat, 11 Apr 2026 21:31:42 +0800 Subject: [PATCH] =?UTF-8?q?fix(telegram):=20safety=20guard=20=E6=94=94?= =?UTF-8?q?=E6=88=AA=E6=94=B9=E7=99=BC=E4=BA=BA=E5=B7=A5=E5=AF=A9=E6=A0=B8?= =?UTF-8?q?=E5=8D=A1=E7=89=87=EF=BC=8C=E4=B8=8D=E5=86=8D=E7=99=BC=20?= =?UTF-8?q?=E2=9D=8C=20=E5=A4=B1=E6=95=97=E8=A8=8A=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 問題:AI 無法確認 deployment name 時,每次告警都發一條 「❌ 自動修復失敗 kubectl scale deployment unknown」的垃圾訊息 修復: - safety guard 攔截 → token.state 回 READY(非 ERROR) - 改呼叫 _push_decision_to_telegram,發 TYPE-4 人工審核卡片 - mcp_all_failed=True 讓 classify_notification 選 TYPE-4 - K8s 找不到 target 的路徑同樣處理 效果:統帥看到的是「需要人工介入的審核卡片」而非「修復失敗」錯誤訊息 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/decision_manager.py | 25 +++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/apps/api/src/services/decision_manager.py b/apps/api/src/services/decision_manager.py index d84eebe3..d062cbe6 100644 --- a/apps/api/src/services/decision_manager.py +++ b/apps/api/src/services/decision_manager.py @@ -1219,12 +1219,17 @@ class DecisionManager: target=_target, reason="action 含未解析的 placeholder、unknown、target==alertname、或危險字元,拒絕執行", ) - token.state = DecisionState.ERROR - token.error = f"Auto-execute blocked: unresolved placeholder in action: {action[:80]}" + # Safety guard 攔截 → 降級為人工審核,而非「修復失敗」 + # 2026-04-11 Claude Sonnet 4.6: 不發 ❌ 失敗訊息,改發人工審核卡片 + # 讓統帥看到告警並決定如何處理,而不是重複收到「無法確認 deployment 名稱」 + token.state = DecisionState.READY # 回到 READY,讓人工審核 + token.error = f"Auto-execute blocked (safety guard): {action[:80]}" + token.proposal_data["decision_state"] = DecisionState.READY.value + token.proposal_data["auto_executed"] = False + token.proposal_data["mcp_all_failed"] = True # 標記讓 classify_notification → TYPE-4 await self._save_token(token) _fire_and_forget( - _push_auto_repair_result(incident, action, success=False, - error="無法確認 deployment 名稱,請人工確認後手動執行") + _push_decision_to_telegram(incident, token.proposal_data) ) return @@ -1241,12 +1246,16 @@ class DecisionManager: namespace=_ns, reason="K8s 中找不到此 deployment/pod,拒絕執行", ) - token.state = DecisionState.ERROR - token.error = f"Auto-execute blocked: deployment '{_target}' not found in K8s namespace '{_ns}'" + # K8s 找不到 target → 降級為人工審核,同 safety guard 策略 + # 2026-04-11 Claude Sonnet 4.6: 不發 ❌ 失敗,改發 TYPE-4 人工審核卡片 + token.state = DecisionState.READY + token.error = f"Auto-execute blocked: deployment '{_target}' not found in K8s" + token.proposal_data["decision_state"] = DecisionState.READY.value + token.proposal_data["auto_executed"] = False + token.proposal_data["mcp_all_failed"] = True await self._save_token(token) _fire_and_forget( - _push_auto_repair_result(incident, action, success=False, - error=f"K8s 中找不到 deployment '{_target}',請人工確認後手動執行") + _push_decision_to_telegram(incident, token.proposal_data) ) return