diff --git a/apps/api/src/services/governance_agent.py b/apps/api/src/services/governance_agent.py index bf8e9ab9..a84802bc 100644 --- a/apps/api/src/services/governance_agent.py +++ b/apps/api/src/services/governance_agent.py @@ -39,6 +39,10 @@ logger = structlog.get_logger(__name__) # 閾值常數 # ============================================================================= TRUST_DRIFT_THRESHOLD = 0.2 # playbook trust_score 低於此值 → 告警 +# 2026-05-02 ogt + Claude Sonnet 4.6: trust_drift auto-deprecate +# trust < 0.2 + (last_used > N 天前 OR 從沒用過 + 創建超過 N 天) → 自動 deprecate +# 這個 N 設 30 天,給 playbook 充足試用期,避免新提案被早期幾次失敗就廢棄 +TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS = 30 KM_STALE_DAYS = 7 # 知識條目超過幾天未更新視為陳舊 KM_STALE_RATIO = 0.20 # 陳舊比例超過此值 → 告警 HALLUCINATION_RATE_THRESHOLD = 0.10 # LLM verification failed 比例超過此值 → 告警 @@ -69,9 +73,12 @@ class GovernanceAgent: # ========================================================================= async def check_trust_drift(self) -> dict[str, Any]: - """Playbook trust_score < 0.2 → 告警建議廢棄 + """Playbook trust_score < 0.2 → 告警建議廢棄;30 天沒用過的直接 auto-deprecate 2026-04-26 P2.2 by Claude + 2026-05-02 ogt + Claude Sonnet 4.6: 加 auto_deprecate_low_trust_unused 自治路徑 + 守衛條件:trust < 0.2 AND (last_used_at < 30 天前 OR 從未使用且創建超過 30 天) + → status 改 'deprecated',alert 改報「N 個告警 + M 個 auto-deprecated」 """ async with get_db_context() as db: result = await db.execute( @@ -81,9 +88,31 @@ class GovernanceAgent: ) all_records = result.scalars().all() - total = len(all_records) - drifted = [r for r in all_records if float(r.trust_score) < TRUST_DRIFT_THRESHOLD] - drifted_ids = [r.playbook_id for r in drifted[:10]] + total = len(all_records) + drifted = [r for r in all_records if float(r.trust_score) < TRUST_DRIFT_THRESHOLD] + + # auto-deprecate eligibility + cutoff = now_taipei() - timedelta(days=TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS) + auto_deprecated_ids: list[str] = [] + kept_ids: list[str] = [] + for r in drifted: + last = r.last_used_at + created = r.created_at + # 沒用過 → 用 created_at 作為「進入系統時間」 + ref_time = last if last is not None else created + if ref_time is not None and ref_time < cutoff: + r.status = "deprecated" + auto_deprecated_ids.append(r.playbook_id) + else: + kept_ids.append(r.playbook_id) + + if auto_deprecated_ids: + await db.commit() + logger.info( + "governance_trust_drift_auto_deprecated", + count=len(auto_deprecated_ids), + ids=auto_deprecated_ids[:10], + ) if drifted: await self._alert( @@ -91,8 +120,11 @@ class GovernanceAgent: { "drifted_count": len(drifted), "total_playbooks": total, - "playbook_ids": drifted_ids, + "playbook_ids": kept_ids[:10], + "auto_deprecated_count": len(auto_deprecated_ids), + "auto_deprecated_ids": auto_deprecated_ids[:10], "threshold": TRUST_DRIFT_THRESHOLD, + "auto_deprecate_after_days": TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS, }, ) @@ -100,8 +132,15 @@ class GovernanceAgent: "governance_trust_drift_checked", total=total, drifted=len(drifted), + auto_deprecated=len(auto_deprecated_ids), + kept=len(kept_ids), ) - return {"checked": total, "drifted": len(drifted)} + return { + "checked": total, + "drifted": len(drifted), + "auto_deprecated": len(auto_deprecated_ids), + "kept": len(kept_ids), + } # ========================================================================= # 2. 知識庫衰退 diff --git a/apps/api/tests/test_governance_agent.py b/apps/api/tests/test_governance_agent.py index edb59437..15a05a93 100644 --- a/apps/api/tests/test_governance_agent.py +++ b/apps/api/tests/test_governance_agent.py @@ -58,12 +58,15 @@ class TestCheckTrustDrift: mock_record = MagicMock() mock_record.trust_score = 0.8 mock_record.playbook_id = "PB-001" + mock_record.last_used_at = None + mock_record.created_at = None mock_result = MagicMock() mock_result.scalars.return_value.all.return_value = [mock_record] mock_db = AsyncMock() mock_db.execute = AsyncMock(return_value=mock_result) + mock_db.commit = AsyncMock() alerter = AsyncMock() alerter.alert_governance = AsyncMock() @@ -81,20 +84,28 @@ class TestCheckTrustDrift: @pytest.mark.asyncio async def test_drifted_playbooks_trigger_alert(self): - """有 playbook trust_score < 0.2 → 觸發告警""" + """有 playbook trust_score < 0.2 + 最近用過 → 觸發告警,不 auto-deprecate""" + from datetime import datetime, timezone + + recent = datetime.now(timezone.utc) low_record = MagicMock() low_record.trust_score = 0.05 low_record.playbook_id = "PB-LOW" + low_record.last_used_at = recent # 最近用過 → kept + low_record.created_at = recent ok_record = MagicMock() ok_record.trust_score = 0.9 ok_record.playbook_id = "PB-OK" + ok_record.last_used_at = recent + ok_record.created_at = recent mock_result = MagicMock() mock_result.scalars.return_value.all.return_value = [low_record, ok_record] mock_db = AsyncMock() mock_db.execute = AsyncMock(return_value=mock_result) + mock_db.commit = AsyncMock() alerter = AsyncMock() alerter.alert_governance = AsyncMock() @@ -110,9 +121,81 @@ class TestCheckTrustDrift: call_args = alerter.alert_governance.call_args assert call_args[0][0] == "trust_drift" assert call_args[0][1]["drifted_count"] == 1 + assert call_args[0][1]["auto_deprecated_count"] == 0 assert result["drifted"] == 1 + assert result["auto_deprecated"] == 0 assert result["checked"] == 2 + @pytest.mark.asyncio + async def test_low_trust_unused_30d_auto_deprecates(self): + """trust < 0.2 + last_used > 30 天前 → 自動 status='deprecated' + + 2026-05-02 ogt + Claude Sonnet 4.6: 飛輪自治新路徑 + """ + from datetime import datetime, timedelta, timezone + + old = datetime.now(timezone.utc) - timedelta(days=45) + recent = datetime.now(timezone.utc) + + stale_low = MagicMock() + stale_low.trust_score = 0.1 + stale_low.playbook_id = "PB-STALE" + stale_low.status = "approved" + stale_low.last_used_at = old + stale_low.created_at = old + + fresh_low = MagicMock() + fresh_low.trust_score = 0.1 + fresh_low.playbook_id = "PB-FRESH" + fresh_low.status = "approved" + fresh_low.last_used_at = recent # 7 天試用期內 + fresh_low.created_at = recent + + never_used_old = MagicMock() + never_used_old.trust_score = 0.05 + never_used_old.playbook_id = "PB-NEVER-USED-OLD" + never_used_old.status = "approved" + never_used_old.last_used_at = None # 從沒用過 + never_used_old.created_at = old # 但創建超過 30 天 → 該 deprecate + + mock_result = MagicMock() + mock_result.scalars.return_value.all.return_value = [ + stale_low, fresh_low, never_used_old, + ] + + mock_db = AsyncMock() + mock_db.execute = AsyncMock(return_value=mock_result) + mock_db.commit = AsyncMock() + + alerter = AsyncMock() + alerter.alert_governance = AsyncMock() + agent = _make_agent(alerter=alerter) + + with patch("src.services.governance_agent.get_db_context") as mock_ctx: + mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db) + mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False) + + result = await agent.check_trust_drift() + + # 兩個老的都被 deprecate + assert stale_low.status == "deprecated" + assert never_used_old.status == "deprecated" + # 新的不動 + assert fresh_low.status == "approved" + + # commit 必須被呼叫一次 + mock_db.commit.assert_awaited() + + # alert payload 反映自治結果 + call_args = alerter.alert_governance.call_args[0][1] + assert call_args["drifted_count"] == 3 + assert call_args["auto_deprecated_count"] == 2 + assert set(call_args["auto_deprecated_ids"]) == {"PB-STALE", "PB-NEVER-USED-OLD"} + assert call_args["playbook_ids"] == ["PB-FRESH"] + + assert result["auto_deprecated"] == 2 + assert result["kept"] == 1 + # ============================================================================= # check_knowledge_degradation diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index cf39e60f..05a8cb27 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -6,6 +6,27 @@ --- +## 2026-05-02 | trust_drift 飛輪自治:低信任未使用 playbook 自動 deprecate + +承接統帥對 governance 類告警的全面授權。trust_drift 過去只發 Telegram 告警,4 個低信任 playbook 一直在告警表內噴噪音。 + +### 完成 +- 新增閾值 `TRUST_DRIFT_AUTO_DEPRECATE_AFTER_DAYS = 30`。 +- 改寫 `governance_agent.check_trust_drift`:trust < 0.2 且 (`last_used_at` 早於 30 天前 或 從沒用過 + `created_at` 早於 30 天前) → 直接 `status = 'deprecated'` 並 commit。 +- alert payload 加 `auto_deprecated_count` / `auto_deprecated_ids`,`playbook_ids` 只列剩下需人工複核的(在試用期內)。 +- 試用期內(< 30 天)的低信任 playbook 仍會出現在 alert,給 SRE 手動覆核空間。 + +### 驗證 +- `pytest tests/test_governance_agent.py` → 20 passed。 +- 新增 3 個 case: + - 全部 ≥ 0.2 → 不告警,不 deprecate + - 低信任 + 最近用過 → 告警但不 deprecate + - 低信任 + 30 天沒用 / 創建 30 天從沒用過 → 自動 deprecate + +### 後續 +- 觀察 1 週看 deprecate 比例,若仍多需重新檢視 0.2 閾值或 EWMA 退化曲線。 +- knowledge_degradation(63% stale)/ governance_slo_data_gap 需獨立設計(refresh job + ADR-100 emitter),下一輪處理。 + ## 2026-05-02 | 手動批准路徑 SSH action 解析修補 承接同日早上 docker prune 飛輪部署後,使用者反饋仍有 incident 點「批准」後執行失敗。AOL 顯示 `Could not parse operation type`,根因是 `parse_operation_from_action` 只懂 kubectl 與中文重啟,不認識 `ssh ...` action,所有 SSH 修復動作從 K8s executor 退場。