feat(governance): auto-deprecate low-trust unused playbooks (>30d)
Some checks failed
Code Review / ai-code-review (push) Successful in 41s
CD Pipeline / tests (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled

trust_drift previously fired alerts forever for playbooks stuck below
the 0.2 threshold. With user authorization for governance-class
auto-fixes, check_trust_drift now retires playbooks that have been
unused for 30+ days (or never used and created 30+ days ago) by
flipping status to 'deprecated' before alerting.

Alerts now report drifted_count, auto_deprecated_count, and the kept
playbook_ids that still need human review (those in their 30d trial
window). Existing alert noise from the four currently-drifted
playbooks should drop to whatever fraction is genuinely in trial.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Your Name
2026-05-02 12:07:32 +08:00
parent 607358c4dd
commit 3059897318
3 changed files with 150 additions and 7 deletions

View File

@@ -39,6 +39,10 @@ logger = structlog.get_logger(__name__)
# 閾值常數
# =============================================================================
TRUST_DRIFT_THRESHOLD = 0.2 # playbook trust_score 低於此值 → 告警
# 2026-05-02 ogt + Claude Sonnet 4.6: trust_drift auto-deprecate
# trust < 0.2 + (last_used > N 天前 OR 從沒用過 + 創建超過 N 天) → 自動 deprecate
# 這個 N 設 30 天,給 playbook 充足試用期,避免新提案被早期幾次失敗就廢棄
TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS = 30
KM_STALE_DAYS = 7 # 知識條目超過幾天未更新視為陳舊
KM_STALE_RATIO = 0.20 # 陳舊比例超過此值 → 告警
HALLUCINATION_RATE_THRESHOLD = 0.10 # LLM verification failed 比例超過此值 → 告警
@@ -69,9 +73,12 @@ class GovernanceAgent:
# =========================================================================
async def check_trust_drift(self) -> dict[str, Any]:
"""Playbook trust_score < 0.2 → 告警建議廢棄
"""Playbook trust_score < 0.2 → 告警建議廢棄30 天沒用過的直接 auto-deprecate
2026-04-26 P2.2 by Claude
2026-05-02 ogt + Claude Sonnet 4.6: 加 auto_deprecate_low_trust_unused 自治路徑
守衛條件trust < 0.2 AND (last_used_at < 30 天前 OR 從未使用且創建超過 30 天)
→ status 改 'deprecated'alert 改報「N 個告警 + M 個 auto-deprecated」
"""
async with get_db_context() as db:
result = await db.execute(
@@ -81,9 +88,31 @@ class GovernanceAgent:
)
all_records = result.scalars().all()
total = len(all_records)
drifted = [r for r in all_records if float(r.trust_score) < TRUST_DRIFT_THRESHOLD]
drifted_ids = [r.playbook_id for r in drifted[:10]]
total = len(all_records)
drifted = [r for r in all_records if float(r.trust_score) < TRUST_DRIFT_THRESHOLD]
# auto-deprecate eligibility
cutoff = now_taipei() - timedelta(days=TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS)
auto_deprecated_ids: list[str] = []
kept_ids: list[str] = []
for r in drifted:
last = r.last_used_at
created = r.created_at
# 沒用過 → 用 created_at 作為「進入系統時間」
ref_time = last if last is not None else created
if ref_time is not None and ref_time < cutoff:
r.status = "deprecated"
auto_deprecated_ids.append(r.playbook_id)
else:
kept_ids.append(r.playbook_id)
if auto_deprecated_ids:
await db.commit()
logger.info(
"governance_trust_drift_auto_deprecated",
count=len(auto_deprecated_ids),
ids=auto_deprecated_ids[:10],
)
if drifted:
await self._alert(
@@ -91,8 +120,11 @@ class GovernanceAgent:
{
"drifted_count": len(drifted),
"total_playbooks": total,
"playbook_ids": drifted_ids,
"playbook_ids": kept_ids[:10],
"auto_deprecated_count": len(auto_deprecated_ids),
"auto_deprecated_ids": auto_deprecated_ids[:10],
"threshold": TRUST_DRIFT_THRESHOLD,
"auto_deprecate_after_days": TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS,
},
)
@@ -100,8 +132,15 @@ class GovernanceAgent:
"governance_trust_drift_checked",
total=total,
drifted=len(drifted),
auto_deprecated=len(auto_deprecated_ids),
kept=len(kept_ids),
)
return {"checked": total, "drifted": len(drifted)}
return {
"checked": total,
"drifted": len(drifted),
"auto_deprecated": len(auto_deprecated_ids),
"kept": len(kept_ids),
}
# =========================================================================
# 2. 知識庫衰退

View File

@@ -58,12 +58,15 @@ class TestCheckTrustDrift:
mock_record = MagicMock()
mock_record.trust_score = 0.8
mock_record.playbook_id = "PB-001"
mock_record.last_used_at = None
mock_record.created_at = None
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = [mock_record]
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
mock_db.commit = AsyncMock()
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
@@ -81,20 +84,28 @@ class TestCheckTrustDrift:
@pytest.mark.asyncio
async def test_drifted_playbooks_trigger_alert(self):
"""有 playbook trust_score < 0.2 → 觸發告警"""
"""有 playbook trust_score < 0.2 + 最近用過 → 觸發告警,不 auto-deprecate"""
from datetime import datetime, timezone
recent = datetime.now(timezone.utc)
low_record = MagicMock()
low_record.trust_score = 0.05
low_record.playbook_id = "PB-LOW"
low_record.last_used_at = recent # 最近用過 → kept
low_record.created_at = recent
ok_record = MagicMock()
ok_record.trust_score = 0.9
ok_record.playbook_id = "PB-OK"
ok_record.last_used_at = recent
ok_record.created_at = recent
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = [low_record, ok_record]
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
mock_db.commit = AsyncMock()
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
@@ -110,9 +121,81 @@ class TestCheckTrustDrift:
call_args = alerter.alert_governance.call_args
assert call_args[0][0] == "trust_drift"
assert call_args[0][1]["drifted_count"] == 1
assert call_args[0][1]["auto_deprecated_count"] == 0
assert result["drifted"] == 1
assert result["auto_deprecated"] == 0
assert result["checked"] == 2
@pytest.mark.asyncio
async def test_low_trust_unused_30d_auto_deprecates(self):
"""trust < 0.2 + last_used > 30 天前 → 自動 status='deprecated'
2026-05-02 ogt + Claude Sonnet 4.6: 飛輪自治新路徑
"""
from datetime import datetime, timedelta, timezone
old = datetime.now(timezone.utc) - timedelta(days=45)
recent = datetime.now(timezone.utc)
stale_low = MagicMock()
stale_low.trust_score = 0.1
stale_low.playbook_id = "PB-STALE"
stale_low.status = "approved"
stale_low.last_used_at = old
stale_low.created_at = old
fresh_low = MagicMock()
fresh_low.trust_score = 0.1
fresh_low.playbook_id = "PB-FRESH"
fresh_low.status = "approved"
fresh_low.last_used_at = recent # 7 天試用期內
fresh_low.created_at = recent
never_used_old = MagicMock()
never_used_old.trust_score = 0.05
never_used_old.playbook_id = "PB-NEVER-USED-OLD"
never_used_old.status = "approved"
never_used_old.last_used_at = None # 從沒用過
never_used_old.created_at = old # 但創建超過 30 天 → 該 deprecate
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = [
stale_low, fresh_low, never_used_old,
]
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
mock_db.commit = AsyncMock()
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_trust_drift()
# 兩個老的都被 deprecate
assert stale_low.status == "deprecated"
assert never_used_old.status == "deprecated"
# 新的不動
assert fresh_low.status == "approved"
# commit 必須被呼叫一次
mock_db.commit.assert_awaited()
# alert payload 反映自治結果
call_args = alerter.alert_governance.call_args[0][1]
assert call_args["drifted_count"] == 3
assert call_args["auto_deprecated_count"] == 2
assert set(call_args["auto_deprecated_ids"]) == {"PB-STALE", "PB-NEVER-USED-OLD"}
assert call_args["playbook_ids"] == ["PB-FRESH"]
assert result["auto_deprecated"] == 2
assert result["kept"] == 1
# =============================================================================
# check_knowledge_degradation

View File

@@ -6,6 +6,27 @@
---
## 2026-05-02 | trust_drift 飛輪自治:低信任未使用 playbook 自動 deprecate
承接統帥對 governance 類告警的全面授權。trust_drift 過去只發 Telegram 告警4 個低信任 playbook 一直在告警表內噴噪音。
### 完成
- 新增閾值 `TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS = 30`
- 改寫 `governance_agent.check_trust_drift`trust < 0.2 且 (`last_used_at` 早於 30 天前 或 從沒用過 + `created_at` 早於 30 天前) → 直接 `status = 'deprecated'` 並 commit。
- alert payload 加 `auto_deprecated_count` / `auto_deprecated_ids``playbook_ids` 只列剩下需人工複核的(在試用期內)。
- 試用期內(< 30 天)的低信任 playbook 仍會出現在 alert給 SRE 手動覆核空間。
### 驗證
- `pytest tests/test_governance_agent.py` → 20 passed。
- 新增 3 個 case
- 全部 ≥ 0.2 → 不告警,不 deprecate
- 低信任 + 最近用過 → 告警但不 deprecate
- 低信任 + 30 天沒用 / 創建 30 天從沒用過 → 自動 deprecate
### 後續
- 觀察 1 週看 deprecate 比例,若仍多需重新檢視 0.2 閾值或 EWMA 退化曲線。
- knowledge_degradation63% stale/ governance_slo_data_gap 需獨立設計refresh job + ADR-100 emitter下一輪處理。
## 2026-05-02 | 手動批准路徑 SSH action 解析修補
承接同日早上 docker prune 飛輪部署後,使用者反饋仍有 incident 點「批准」後執行失敗。AOL 顯示 `Could not parse operation type`,根因是 `parse_operation_from_action` 只懂 kubectl 與中文重啟,不認識 `ssh ...` action所有 SSH 修復動作從 K8s executor 退場。