feat(governance): auto-deprecate low-trust unused playbooks (>30d)
trust_drift previously fired alerts forever for playbooks stuck below the 0.2 threshold. With user authorization for governance-class auto-fixes, check_trust_drift now retires playbooks that have been unused for 30+ days (or never used and created 30+ days ago) by flipping status to 'deprecated' before alerting. Alerts now report drifted_count, auto_deprecated_count, and the kept playbook_ids that still need human review (those in their 30d trial window). Existing alert noise from the four currently-drifted playbooks should drop to whatever fraction is genuinely in trial. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -39,6 +39,10 @@ logger = structlog.get_logger(__name__)
|
||||
# 閾值常數
|
||||
# =============================================================================
|
||||
TRUST_DRIFT_THRESHOLD = 0.2 # playbook trust_score 低於此值 → 告警
|
||||
# 2026-05-02 ogt + Claude Sonnet 4.6: trust_drift auto-deprecate
|
||||
# trust < 0.2 + (last_used > N 天前 OR 從沒用過 + 創建超過 N 天) → 自動 deprecate
|
||||
# 這個 N 設 30 天,給 playbook 充足試用期,避免新提案被早期幾次失敗就廢棄
|
||||
TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS = 30
|
||||
KM_STALE_DAYS = 7 # 知識條目超過幾天未更新視為陳舊
|
||||
KM_STALE_RATIO = 0.20 # 陳舊比例超過此值 → 告警
|
||||
HALLUCINATION_RATE_THRESHOLD = 0.10 # LLM verification failed 比例超過此值 → 告警
|
||||
@@ -69,9 +73,12 @@ class GovernanceAgent:
|
||||
# =========================================================================
|
||||
|
||||
async def check_trust_drift(self) -> dict[str, Any]:
|
||||
"""Playbook trust_score < 0.2 → 告警建議廢棄
|
||||
"""Playbook trust_score < 0.2 → 告警建議廢棄;30 天沒用過的直接 auto-deprecate
|
||||
|
||||
2026-04-26 P2.2 by Claude
|
||||
2026-05-02 ogt + Claude Sonnet 4.6: 加 auto_deprecate_low_trust_unused 自治路徑
|
||||
守衛條件:trust < 0.2 AND (last_used_at < 30 天前 OR 從未使用且創建超過 30 天)
|
||||
→ status 改 'deprecated',alert 改報「N 個告警 + M 個 auto-deprecated」
|
||||
"""
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
@@ -81,9 +88,31 @@ class GovernanceAgent:
|
||||
)
|
||||
all_records = result.scalars().all()
|
||||
|
||||
total = len(all_records)
|
||||
drifted = [r for r in all_records if float(r.trust_score) < TRUST_DRIFT_THRESHOLD]
|
||||
drifted_ids = [r.playbook_id for r in drifted[:10]]
|
||||
total = len(all_records)
|
||||
drifted = [r for r in all_records if float(r.trust_score) < TRUST_DRIFT_THRESHOLD]
|
||||
|
||||
# auto-deprecate eligibility
|
||||
cutoff = now_taipei() - timedelta(days=TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS)
|
||||
auto_deprecated_ids: list[str] = []
|
||||
kept_ids: list[str] = []
|
||||
for r in drifted:
|
||||
last = r.last_used_at
|
||||
created = r.created_at
|
||||
# 沒用過 → 用 created_at 作為「進入系統時間」
|
||||
ref_time = last if last is not None else created
|
||||
if ref_time is not None and ref_time < cutoff:
|
||||
r.status = "deprecated"
|
||||
auto_deprecated_ids.append(r.playbook_id)
|
||||
else:
|
||||
kept_ids.append(r.playbook_id)
|
||||
|
||||
if auto_deprecated_ids:
|
||||
await db.commit()
|
||||
logger.info(
|
||||
"governance_trust_drift_auto_deprecated",
|
||||
count=len(auto_deprecated_ids),
|
||||
ids=auto_deprecated_ids[:10],
|
||||
)
|
||||
|
||||
if drifted:
|
||||
await self._alert(
|
||||
@@ -91,8 +120,11 @@ class GovernanceAgent:
|
||||
{
|
||||
"drifted_count": len(drifted),
|
||||
"total_playbooks": total,
|
||||
"playbook_ids": drifted_ids,
|
||||
"playbook_ids": kept_ids[:10],
|
||||
"auto_deprecated_count": len(auto_deprecated_ids),
|
||||
"auto_deprecated_ids": auto_deprecated_ids[:10],
|
||||
"threshold": TRUST_DRIFT_THRESHOLD,
|
||||
"auto_deprecate_after_days": TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -100,8 +132,15 @@ class GovernanceAgent:
|
||||
"governance_trust_drift_checked",
|
||||
total=total,
|
||||
drifted=len(drifted),
|
||||
auto_deprecated=len(auto_deprecated_ids),
|
||||
kept=len(kept_ids),
|
||||
)
|
||||
return {"checked": total, "drifted": len(drifted)}
|
||||
return {
|
||||
"checked": total,
|
||||
"drifted": len(drifted),
|
||||
"auto_deprecated": len(auto_deprecated_ids),
|
||||
"kept": len(kept_ids),
|
||||
}
|
||||
|
||||
# =========================================================================
|
||||
# 2. 知識庫衰退
|
||||
|
||||
@@ -58,12 +58,15 @@ class TestCheckTrustDrift:
|
||||
mock_record = MagicMock()
|
||||
mock_record.trust_score = 0.8
|
||||
mock_record.playbook_id = "PB-001"
|
||||
mock_record.last_used_at = None
|
||||
mock_record.created_at = None
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.scalars.return_value.all.return_value = [mock_record]
|
||||
|
||||
mock_db = AsyncMock()
|
||||
mock_db.execute = AsyncMock(return_value=mock_result)
|
||||
mock_db.commit = AsyncMock()
|
||||
|
||||
alerter = AsyncMock()
|
||||
alerter.alert_governance = AsyncMock()
|
||||
@@ -81,20 +84,28 @@ class TestCheckTrustDrift:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_drifted_playbooks_trigger_alert(self):
|
||||
"""有 playbook trust_score < 0.2 → 觸發告警"""
|
||||
"""有 playbook trust_score < 0.2 + 最近用過 → 觸發告警,不 auto-deprecate"""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
recent = datetime.now(timezone.utc)
|
||||
low_record = MagicMock()
|
||||
low_record.trust_score = 0.05
|
||||
low_record.playbook_id = "PB-LOW"
|
||||
low_record.last_used_at = recent # 最近用過 → kept
|
||||
low_record.created_at = recent
|
||||
|
||||
ok_record = MagicMock()
|
||||
ok_record.trust_score = 0.9
|
||||
ok_record.playbook_id = "PB-OK"
|
||||
ok_record.last_used_at = recent
|
||||
ok_record.created_at = recent
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.scalars.return_value.all.return_value = [low_record, ok_record]
|
||||
|
||||
mock_db = AsyncMock()
|
||||
mock_db.execute = AsyncMock(return_value=mock_result)
|
||||
mock_db.commit = AsyncMock()
|
||||
|
||||
alerter = AsyncMock()
|
||||
alerter.alert_governance = AsyncMock()
|
||||
@@ -110,9 +121,81 @@ class TestCheckTrustDrift:
|
||||
call_args = alerter.alert_governance.call_args
|
||||
assert call_args[0][0] == "trust_drift"
|
||||
assert call_args[0][1]["drifted_count"] == 1
|
||||
assert call_args[0][1]["auto_deprecated_count"] == 0
|
||||
assert result["drifted"] == 1
|
||||
assert result["auto_deprecated"] == 0
|
||||
assert result["checked"] == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_low_trust_unused_30d_auto_deprecates(self):
|
||||
"""trust < 0.2 + last_used > 30 天前 → 自動 status='deprecated'
|
||||
|
||||
2026-05-02 ogt + Claude Sonnet 4.6: 飛輪自治新路徑
|
||||
"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
old = datetime.now(timezone.utc) - timedelta(days=45)
|
||||
recent = datetime.now(timezone.utc)
|
||||
|
||||
stale_low = MagicMock()
|
||||
stale_low.trust_score = 0.1
|
||||
stale_low.playbook_id = "PB-STALE"
|
||||
stale_low.status = "approved"
|
||||
stale_low.last_used_at = old
|
||||
stale_low.created_at = old
|
||||
|
||||
fresh_low = MagicMock()
|
||||
fresh_low.trust_score = 0.1
|
||||
fresh_low.playbook_id = "PB-FRESH"
|
||||
fresh_low.status = "approved"
|
||||
fresh_low.last_used_at = recent # 7 天試用期內
|
||||
fresh_low.created_at = recent
|
||||
|
||||
never_used_old = MagicMock()
|
||||
never_used_old.trust_score = 0.05
|
||||
never_used_old.playbook_id = "PB-NEVER-USED-OLD"
|
||||
never_used_old.status = "approved"
|
||||
never_used_old.last_used_at = None # 從沒用過
|
||||
never_used_old.created_at = old # 但創建超過 30 天 → 該 deprecate
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.scalars.return_value.all.return_value = [
|
||||
stale_low, fresh_low, never_used_old,
|
||||
]
|
||||
|
||||
mock_db = AsyncMock()
|
||||
mock_db.execute = AsyncMock(return_value=mock_result)
|
||||
mock_db.commit = AsyncMock()
|
||||
|
||||
alerter = AsyncMock()
|
||||
alerter.alert_governance = AsyncMock()
|
||||
agent = _make_agent(alerter=alerter)
|
||||
|
||||
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
|
||||
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
|
||||
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
result = await agent.check_trust_drift()
|
||||
|
||||
# 兩個老的都被 deprecate
|
||||
assert stale_low.status == "deprecated"
|
||||
assert never_used_old.status == "deprecated"
|
||||
# 新的不動
|
||||
assert fresh_low.status == "approved"
|
||||
|
||||
# commit 必須被呼叫一次
|
||||
mock_db.commit.assert_awaited()
|
||||
|
||||
# alert payload 反映自治結果
|
||||
call_args = alerter.alert_governance.call_args[0][1]
|
||||
assert call_args["drifted_count"] == 3
|
||||
assert call_args["auto_deprecated_count"] == 2
|
||||
assert set(call_args["auto_deprecated_ids"]) == {"PB-STALE", "PB-NEVER-USED-OLD"}
|
||||
assert call_args["playbook_ids"] == ["PB-FRESH"]
|
||||
|
||||
assert result["auto_deprecated"] == 2
|
||||
assert result["kept"] == 1
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# check_knowledge_degradation
|
||||
|
||||
@@ -6,6 +6,27 @@
|
||||
|
||||
---
|
||||
|
||||
## 2026-05-02 | trust_drift 飛輪自治:低信任未使用 playbook 自動 deprecate
|
||||
|
||||
承接統帥對 governance 類告警的全面授權。trust_drift 過去只發 Telegram 告警,4 個低信任 playbook 一直在告警表內噴噪音。
|
||||
|
||||
### 完成
|
||||
- 新增閾值 `TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS = 30`。
|
||||
- 改寫 `governance_agent.check_trust_drift`:trust < 0.2 且 (`last_used_at` 早於 30 天前 或 從沒用過 + `created_at` 早於 30 天前) → 直接 `status = 'deprecated'` 並 commit。
|
||||
- alert payload 加 `auto_deprecated_count` / `auto_deprecated_ids`,`playbook_ids` 只列剩下需人工複核的(在試用期內)。
|
||||
- 試用期內(< 30 天)的低信任 playbook 仍會出現在 alert,給 SRE 手動覆核空間。
|
||||
|
||||
### 驗證
|
||||
- `pytest tests/test_governance_agent.py` → 20 passed。
|
||||
- 新增 3 個 case:
|
||||
- 全部 ≥ 0.2 → 不告警,不 deprecate
|
||||
- 低信任 + 最近用過 → 告警但不 deprecate
|
||||
- 低信任 + 30 天沒用 / 創建 30 天從沒用過 → 自動 deprecate
|
||||
|
||||
### 後續
|
||||
- 觀察 1 週看 deprecate 比例,若仍多需重新檢視 0.2 閾值或 EWMA 退化曲線。
|
||||
- knowledge_degradation(63% stale)/ governance_slo_data_gap 需獨立設計(refresh job + ADR-100 emitter),下一輪處理。
|
||||
|
||||
## 2026-05-02 | 手動批准路徑 SSH action 解析修補
|
||||
|
||||
承接同日早上 docker prune 飛輪部署後,使用者反饋仍有 incident 點「批准」後執行失敗。AOL 顯示 `Could not parse operation type`,根因是 `parse_operation_from_action` 只懂 kubectl 與中文重啟,不認識 `ssh ...` action,所有 SSH 修復動作從 K8s executor 退場。
|
||||
|
||||
Reference in New Issue
Block a user