From 97ce5ea658ffa4dc1b717586e7ddf7547a67e4b8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 24 Apr 2026 15:57:30 +0800 Subject: [PATCH] =?UTF-8?q?feat(p2.6):=20trust=5Fdrift=5Fdetector=20?= =?UTF-8?q?=E6=8E=A5=E5=85=A5=20ai=5Fslo=5Fwatchdog=5Fjob=20W-6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P2.6 接入 2026-04-24 ogt + Claude Sonnet 4.6 問題: trust_drift_detector.py 是孤立服務(零引用),Playbook 信任度 偏態(盲目樂觀/學習鎖死)從未被任何監控機制感知 修復: ai_slo_watchdog_job._check_once() 新增 W-6 Trust Drift 檢查 - 呼叫 get_trust_drift_detector().run()(偵測 + 寫 ai_governance_events) - 偵測到偏態時加入 violations 清單 → 觸發 TYPE-8M Meta-System 告警 - checks 計數從 5 → 6 覆蓋案例: - optimism_bias: >70% Playbook trust_score >0.9 → PostExecutionVerifier 可能失效 - confidence_collapse: >70% Playbook trust_score <0.3 → EWMA 計算/執行誤判 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/jobs/ai_slo_watchdog_job.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/apps/api/src/jobs/ai_slo_watchdog_job.py b/apps/api/src/jobs/ai_slo_watchdog_job.py index d4aff21b..dd512120 100644 --- a/apps/api/src/jobs/ai_slo_watchdog_job.py +++ b/apps/api/src/jobs/ai_slo_watchdog_job.py @@ -112,8 +112,26 @@ async def _check_once() -> None: except Exception as e: logger.warning("watchdog_w5_stuck_analysis_check_failed", error=str(e)) + # W-6: Trust Drift 偵測(Playbook 信任度分布偏態) + # P2.6 接入 2026-04-24 ogt + Claude Sonnet 4.6 + # trust_drift_detector 是孤立服務,此處首次接入 watchdog 自動觸發 + try: + from src.services.trust_drift_detector import get_trust_drift_detector + dist = await get_trust_drift_detector().run() + if dist.drift_detected: + drift_labels = { + "optimism_bias": "盲目樂觀 — PostExecutionVerifier 可能失效或 RAG 資料污染", + "confidence_collapse": "學習鎖死 — EWMA 計算異常或所有執行誤判失敗", + } + label = drift_labels.get(dist.drift_type or "", dist.drift_type or "未知") + violations.append( + f"Trust Drift 偵測到 {label}(高分 {dist.high_ratio:.0%} / 低分 {dist.low_ratio:.0%},共 {dist.total} 個 Playbook)" + ) + except Exception as e: + logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e)) + if not violations: - logger.debug("ai_slo_watchdog_all_ok", checks=5) + logger.debug("ai_slo_watchdog_all_ok", checks=6) return # 去重:violations 相同內容 1 小時內不重複發