Files
awoooi/apps/api/src/services/trust_drift_detector.py
Your Name ff30c61c4c
All checks were successful
Code Review / ai-code-review (push) Successful in 21s
CD Pipeline / tests (push) Successful in 1m20s
CD Pipeline / build-and-deploy (push) Successful in 4m15s
CD Pipeline / post-deploy-checks (push) Successful in 1m58s
fix(rls): 收斂 API DB access context
2026-05-12 19:55:13 +08:00

259 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AWOOOI AIOps Phase 6 — Trust Drift Detector信任度漂移偵測器
===============================================================
【LIB ONLY — NO SIDE EFFECTS】
2026-05-02 ogt + Claude Sonnet 4.6(亞太): 整併雙寫路徑
背景:原本 watchdog W-6 呼叫 detector.run() 會直接寫 event_type=trust_drift 到
ai_governance_eventsgovernance_agent.check_trust_drift() 每 1h 也寫同一 event_type。
造成雙寫、語義混淆,下游 consumer 無法區分 source-of-truth。
整併決策governance_agent.check_trust_drift() 為唯一 source-of-truth功能更完整
含 auto-deprecate + Telegram 推送)。本模組降為純統計 lib不再自行寫 PG。
職責(整併後):純統計 lib偵測 Playbook trust_score 分布的兩種極端偏態:
極端 A「盲目樂觀」> 70% Playbook trust_score > 0.9
→ 可能是 PostExecutionVerifier 失效,或 RAG 資料被污染,讓所有 AI 都以為「我很棒」
→ 真正的好系統不會所有 Playbook 都高分
極端 B「學習鎖死」> 70% Playbook trust_score < 0.3
→ 可能是 EWMA 計算出錯,或所有執行都被誤判失敗,讓 AI 對自己完全沒信心
→ 學習機制可能卡死
設計原則(整併後):
1. 只讀 DB不修改任何數據
2. detect() / run() 只回傳 TrustDistribution不寫 ai_governance_events
3. save_drift_event() 保留供呼叫方(如需要分布事件)顯式呼叫,不在 run() 內自動觸發
4. 樣本不足(< 10 個 approved Playbook→ 跳過偵測,不告警
5. AI 治理事件的唯一寫入點governance_agent.check_trust_drift()
ADR-087: AI 自我治理閉環
2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 6 初始建立
2026-05-02 ogt + Claude Sonnet 4.6(亞太): 降為 lib only移除 run() 自動 PG 寫入
"""
from __future__ import annotations
from dataclasses import dataclass
import structlog
from sqlalchemy import func, select
from src.db.base import get_db_context
from src.db.models import AiGovernanceEvent, PlaybookRecord
from src.utils.timezone import now_taipei
logger = structlog.get_logger(__name__)
# ─────────────────────────────────────────────────────────────────────────────
# 偵測閾值MASTER §3.6,修改需 ADR-087 更新)
# ─────────────────────────────────────────────────────────────────────────────
DRIFT_HIGH_THRESHOLD: float = 0.9 # trust_score > 此值算「過高」
DRIFT_LOW_THRESHOLD: float = 0.3 # trust_score < 此值算「過低」
DRIFT_RATIO_TRIGGER: float = 0.70 # 超過 70% Playbook 落在極端 → 觸發警報
DRIFT_MIN_SAMPLES: int = 10 # 最少 approved Playbook 數量
# ─────────────────────────────────────────────────────────────────────────────
# Data Types
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class TrustDistribution:
"""Playbook 信任度分布快照"""
total: int
high_count: int # trust_score > 0.9
low_count: int # trust_score < 0.3
mid_count: int # 0.3 <= trust_score <= 0.9(正常區間)
high_ratio: float
low_ratio: float
mean_trust: float
drift_type: str | None # "optimism_bias" / "confidence_collapse" / None
drift_detected: bool
def to_dict(self) -> dict:
return {
"total": self.total,
"high_count": self.high_count,
"low_count": self.low_count,
"mid_count": self.mid_count,
"high_ratio": round(self.high_ratio, 4),
"low_ratio": round(self.low_ratio, 4),
"mean_trust": round(self.mean_trust, 4),
"drift_type": self.drift_type,
"drift_detected": self.drift_detected,
"thresholds": {
"high": DRIFT_HIGH_THRESHOLD,
"low": DRIFT_LOW_THRESHOLD,
"ratio_trigger": DRIFT_RATIO_TRIGGER,
"min_samples": DRIFT_MIN_SAMPLES,
},
}
# ─────────────────────────────────────────────────────────────────────────────
# Main Service
# ─────────────────────────────────────────────────────────────────────────────
class TrustDriftDetector:
"""
信任度漂移偵測器
Usage:
detector = TrustDriftDetector()
dist = await detector.detect()
if dist.drift_detected:
await detector.save_drift_event(dist)
"""
async def detect(self) -> TrustDistribution:
"""
讀取所有 approved Playbook計算信任度分布偵測漂移。
Returns:
TrustDistribution樣本不足時 drift_detected=False
"""
try:
async with get_db_context() as session:
# 只計算 approved 狀態的 Playbook
total_q = await session.execute(
select(func.count()).where(
PlaybookRecord.status == "approved"
)
)
total: int = total_q.scalar() or 0
if total < DRIFT_MIN_SAMPLES:
logger.info(
"trust_drift_skip_insufficient_samples",
total=total,
required=DRIFT_MIN_SAMPLES,
)
return TrustDistribution(
total=total,
high_count=0, low_count=0, mid_count=0,
high_ratio=0.0, low_ratio=0.0, mean_trust=0.0,
drift_type=None, drift_detected=False,
)
high_q = await session.execute(
select(func.count()).where(
PlaybookRecord.status == "approved",
PlaybookRecord.trust_score > DRIFT_HIGH_THRESHOLD,
)
)
high_count: int = high_q.scalar() or 0
low_q = await session.execute(
select(func.count()).where(
PlaybookRecord.status == "approved",
PlaybookRecord.trust_score < DRIFT_LOW_THRESHOLD,
)
)
low_count: int = low_q.scalar() or 0
mean_q = await session.execute(
select(func.avg(PlaybookRecord.trust_score)).where(
PlaybookRecord.status == "approved"
)
)
mean_trust: float = float(mean_q.scalar() or 0.0)
mid_count = total - high_count - low_count
high_ratio = high_count / total
low_ratio = low_count / total
# 偵測漂移類型
drift_type = None
if high_ratio >= DRIFT_RATIO_TRIGGER:
drift_type = "optimism_bias" # 所有 Playbook 都覺得自己很好 → 可疑
elif low_ratio >= DRIFT_RATIO_TRIGGER:
drift_type = "confidence_collapse" # AI 對自己完全沒信心 → 學習卡死
dist = TrustDistribution(
total=total,
high_count=high_count,
low_count=low_count,
mid_count=mid_count,
high_ratio=high_ratio,
low_ratio=low_ratio,
mean_trust=mean_trust,
drift_type=drift_type,
drift_detected=drift_type is not None,
)
if dist.drift_detected:
logger.warning(
"trust_drift_detected",
drift_type=drift_type,
high_ratio=round(high_ratio, 3),
low_ratio=round(low_ratio, 3),
mean_trust=round(mean_trust, 3),
total=total,
)
else:
logger.info(
"trust_drift_ok",
mean_trust=round(mean_trust, 3),
total=total,
high_ratio=round(high_ratio, 3),
)
return dist
except Exception as e:
logger.error("trust_drift_detect_error", error=str(e))
# 保守:偵測失敗 → 不告警(不知道比亂告警好)
return TrustDistribution(
total=0,
high_count=0, low_count=0, mid_count=0,
high_ratio=0.0, low_ratio=0.0, mean_trust=0.0,
drift_type=None, drift_detected=False,
)
async def save_drift_event(self, dist: TrustDistribution) -> None:
"""將信任度漂移事件寫入 ai_governance_events。"""
try:
async with get_db_context() as session:
event = AiGovernanceEvent(
event_type="trust_drift",
details={
**dist.to_dict(),
"detected_at": now_taipei().isoformat(),
},
resolved=False,
)
session.add(event)
await session.commit()
logger.warning(
"trust_drift_event_saved",
drift_type=dist.drift_type,
)
except Exception as e:
logger.error("trust_drift_event_save_error", error=str(e))
async def run(self) -> TrustDistribution:
"""統計偵測LIB ONLY只回傳 TrustDistribution不寫 ai_governance_events。
2026-05-02 ogt + Claude Sonnet 4.6(亞太): 整併雙寫路徑
原行為detect() 後若 drift_detected 自動呼叫 save_drift_event() 寫 PG。
改為:只回傳結果,由呼叫方決定是否寫入。
ai_governance_events 的唯一寫入點governance_agent.check_trust_drift()。
"""
return await self.detect()
# ─────────────────────────────────────────────────────────────────────────────
# Singleton
# ─────────────────────────────────────────────────────────────────────────────
_detector: TrustDriftDetector | None = None
def get_trust_drift_detector() -> TrustDriftDetector:
global _detector
if _detector is None:
_detector = TrustDriftDetector()
return _detector