From 6c7f648b60e26c65bb0d7f4b35e14f3fbdd95bd6 Mon Sep 17 00:00:00 2001 From: OG T Date: Wed, 15 Apr 2026 10:56:48 +0800 Subject: [PATCH] =?UTF-8?q?fix:=203=20=E5=80=8B=E9=A3=9B=E8=BC=AA=E6=B2=89?= =?UTF-8?q?=E9=BB=98=E6=9C=AA=E6=89=93=E9=80=9A=E7=AF=80=E9=BB=9E=20?= =?UTF-8?q?=E2=80=94=20=E7=B5=B1=E5=B8=A5=E6=88=AA=E5=9C=96=E7=9B=A4?= =?UTF-8?q?=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 統帥截圖證據 (Telegram MEDIUM 告警仍走人工審核): INC-20260411-A03B2E / A2BB29 顯示「[規則匹配]」+ action=unknown-service 節點 1: AutoApprovePolicy 擋下規則匹配 (飛輪主因) - ADR-073 規則匹配 confidence=0.0 (防偽造) - AutoApprovePolicy.min_confidence=0.50 → 擋下 - 結果: MEDIUM 規則匹配永遠人工審核,飛輪不轉 修復: auto_approve.py 加 _is_rule_based 判斷 (is_rule_based / source=expert_system / rule_id / matched_rule) → bypass min_confidence 檢查 → 驗證: should_auto_approve=True ✅ 節點 2: _is_bad_target 漏 unknown-service magic string - _resolve_target_from_k8s fallback 產 unknown-service / unknown-pod - GAP-A4 Phase 1/2 只擋 'unknown' 而非前綴 修復: alert_rule_engine.py 加 unknown-/none-/null-/undefined- 前綴黑名單 → 驗證: 4 個 magic 全 bad ✅ 節點 3: stale_ready_tokens_resend 無時效過濾 - 截圖是 2026-04-11 (4 天前) 告警 - 舊 labels 過期,重 process 也產不出新 target - 壓爆 Ollama + 污染 Telegram 卡片 修復: decision_manager.py 跳過 > 3 天的 stale incident → skip + log stale_ready_token_skipped_too_old 回歸: 113/113 Co-Authored-By: Claude Haiku 4.5 --- apps/api/src/services/alert_rule_engine.py | 6 ++++++ apps/api/src/services/auto_approve.py | 12 +++++++++++- apps/api/src/services/decision_manager.py | 21 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/apps/api/src/services/alert_rule_engine.py b/apps/api/src/services/alert_rule_engine.py index fced1c05..38a55780 100644 --- a/apps/api/src/services/alert_rule_engine.py +++ b/apps/api/src/services/alert_rule_engine.py @@ -143,6 +143,12 @@ def _is_bad_target(target: str, alertname: str) -> bool: """ if not target or target in ("unknown", "none", "null", ""): return True + # 2026-04-15 Claude Sonnet 4.6 (GAP-A4 Phase 3): 擴充 fallback magic string + # 截圖實證:Telegram 卡顯示 target=unknown-service 通過 _is_bad_target + # _resolve_target_from_k8s fallback 也會產 unknown-pod/unknown-container + _BAD_MAGIC_PREFIXES = ("unknown-", "none-", "null-", "undefined-") + if any(target.startswith(p) for p in _BAD_MAGIC_PREFIXES): + return True if target == alertname: return True if any(c in target for c in (" ", ":", "(", ")", '"', "'", "<", ">", "{", "}")): diff --git a/apps/api/src/services/auto_approve.py b/apps/api/src/services/auto_approve.py index 35f9e9e6..0dd00144 100644 --- a/apps/api/src/services/auto_approve.py +++ b/apps/api/src/services/auto_approve.py @@ -298,7 +298,17 @@ class AutoApprovePolicy: ) # 條件 4: AI 信心度 - if confidence < self.config.min_confidence: + # 2026-04-15 Claude Sonnet 4.6 (飛輪沉默節點 1 修復): + # 規則匹配的 confidence 固定 0.0(ADR-073 防偽造),會被此條件擋下 + # 但 YAML 規則是人工審核過的,應直接信任 → bypass min_confidence + # 改用「Playbook 成功率」或「規則 source」判斷可信度 + _is_rule_based = ( + proposal_data.get("is_rule_based") is True + or proposal_data.get("source") == "expert_system" + or (proposal_data.get("rule_id") or "") != "" + or (proposal_data.get("matched_rule") or "") != "" + ) + if not _is_rule_based and confidence < self.config.min_confidence: return self._reject( reason=AutoApproveReason.LOW_TRUST, detail=f"Confidence {confidence:.0%} < {self.config.min_confidence:.0%}", diff --git a/apps/api/src/services/decision_manager.py b/apps/api/src/services/decision_manager.py index 0843add9..42c65a31 100644 --- a/apps/api/src/services/decision_manager.py +++ b/apps/api/src/services/decision_manager.py @@ -2150,6 +2150,27 @@ class DecisionManager: if str(getattr(incident, "status", "")).lower() in ("resolved", "closed"): continue + # 2026-04-15 Claude Sonnet 4.6 (節點 3 修復): + # 跳過 > 3 天的 stale incident — labels 已過時,重 process 無意義 + # 只會壓爆 Ollama + 污染 Telegram 卡片(截圖:4/11 的卡片今天還在彈) + _STALE_DAYS = 3 + _created_at = getattr(incident, "created_at", None) + if _created_at: + from datetime import datetime as _dt, timedelta as _td, timezone as _tz + _now = _dt.now(_tz.utc) + _cutoff = _now - _td(days=_STALE_DAYS) + # 確保 _created_at 有時區 + if _created_at.tzinfo is None: + _created_at = _created_at.replace(tzinfo=_tz.utc) + if _created_at < _cutoff: + logger.debug( + "stale_ready_token_skipped_too_old", + incident_id=incident_id, + age_days=(_now - _created_at).days, + cutoff_days=_STALE_DAYS, + ) + continue + proposal_data = data.get("proposal_data") or {} if not proposal_data: continue