fix: 3 個飛輪沉默未打通節點 — 統帥截圖盤出
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 18m56s
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 18m56s
統帥截圖證據 (Telegram MEDIUM 告警仍走人工審核):
INC-20260411-A03B2E / A2BB29 顯示「[規則匹配]」+ action=unknown-service
節點 1: AutoApprovePolicy 擋下規則匹配 (飛輪主因)
- ADR-073 規則匹配 confidence=0.0 (防偽造)
- AutoApprovePolicy.min_confidence=0.50 → 擋下
- 結果: MEDIUM 規則匹配永遠人工審核,飛輪不轉
修復: auto_approve.py 加 _is_rule_based 判斷
(is_rule_based / source=expert_system / rule_id / matched_rule)
→ bypass min_confidence 檢查
→ 驗證: should_auto_approve=True ✅
節點 2: _is_bad_target 漏 unknown-service magic string
- _resolve_target_from_k8s fallback 產 unknown-service / unknown-pod
- GAP-A4 Phase 1/2 只擋 'unknown' 而非前綴
修復: alert_rule_engine.py 加 unknown-/none-/null-/undefined- 前綴黑名單
→ 驗證: 4 個 magic 全 bad ✅
節點 3: stale_ready_tokens_resend 無時效過濾
- 截圖是 2026-04-11 (4 天前) 告警
- 舊 labels 過期,重 process 也產不出新 target
- 壓爆 Ollama + 污染 Telegram 卡片
修復: decision_manager.py 跳過 > 3 天的 stale incident
→ skip + log stale_ready_token_skipped_too_old
回歸: 113/113
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -143,6 +143,12 @@ def _is_bad_target(target: str, alertname: str) -> bool:
|
||||
"""
|
||||
if not target or target in ("unknown", "none", "null", ""):
|
||||
return True
|
||||
# 2026-04-15 Claude Sonnet 4.6 (GAP-A4 Phase 3): 擴充 fallback magic string
|
||||
# 截圖實證:Telegram 卡顯示 target=unknown-service 通過 _is_bad_target
|
||||
# _resolve_target_from_k8s fallback 也會產 unknown-pod/unknown-container
|
||||
_BAD_MAGIC_PREFIXES = ("unknown-", "none-", "null-", "undefined-")
|
||||
if any(target.startswith(p) for p in _BAD_MAGIC_PREFIXES):
|
||||
return True
|
||||
if target == alertname:
|
||||
return True
|
||||
if any(c in target for c in (" ", ":", "(", ")", '"', "'", "<", ">", "{", "}")):
|
||||
|
||||
@@ -298,7 +298,17 @@ class AutoApprovePolicy:
|
||||
)
|
||||
|
||||
# 條件 4: AI 信心度
|
||||
if confidence < self.config.min_confidence:
|
||||
# 2026-04-15 Claude Sonnet 4.6 (飛輪沉默節點 1 修復):
|
||||
# 規則匹配的 confidence 固定 0.0(ADR-073 防偽造),會被此條件擋下
|
||||
# 但 YAML 規則是人工審核過的,應直接信任 → bypass min_confidence
|
||||
# 改用「Playbook 成功率」或「規則 source」判斷可信度
|
||||
_is_rule_based = (
|
||||
proposal_data.get("is_rule_based") is True
|
||||
or proposal_data.get("source") == "expert_system"
|
||||
or (proposal_data.get("rule_id") or "") != ""
|
||||
or (proposal_data.get("matched_rule") or "") != ""
|
||||
)
|
||||
if not _is_rule_based and confidence < self.config.min_confidence:
|
||||
return self._reject(
|
||||
reason=AutoApproveReason.LOW_TRUST,
|
||||
detail=f"Confidence {confidence:.0%} < {self.config.min_confidence:.0%}",
|
||||
|
||||
@@ -2150,6 +2150,27 @@ class DecisionManager:
|
||||
if str(getattr(incident, "status", "")).lower() in ("resolved", "closed"):
|
||||
continue
|
||||
|
||||
# 2026-04-15 Claude Sonnet 4.6 (節點 3 修復):
|
||||
# 跳過 > 3 天的 stale incident — labels 已過時,重 process 無意義
|
||||
# 只會壓爆 Ollama + 污染 Telegram 卡片(截圖:4/11 的卡片今天還在彈)
|
||||
_STALE_DAYS = 3
|
||||
_created_at = getattr(incident, "created_at", None)
|
||||
if _created_at:
|
||||
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
|
||||
_now = _dt.now(_tz.utc)
|
||||
_cutoff = _now - _td(days=_STALE_DAYS)
|
||||
# 確保 _created_at 有時區
|
||||
if _created_at.tzinfo is None:
|
||||
_created_at = _created_at.replace(tzinfo=_tz.utc)
|
||||
if _created_at < _cutoff:
|
||||
logger.debug(
|
||||
"stale_ready_token_skipped_too_old",
|
||||
incident_id=incident_id,
|
||||
age_days=(_now - _created_at).days,
|
||||
cutoff_days=_STALE_DAYS,
|
||||
)
|
||||
continue
|
||||
|
||||
proposal_data = data.get("proposal_data") or {}
|
||||
if not proposal_data:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user