Files
awoooi/apps/api/tests/test_governance_agent.py
Your Name cc547736ab feat(wave6-8): P2.1 fusion + P2.2 governance + P2.4 consensus + Wave 7/8 BLOCKER 修復
承接 Wave 6/7/8 多 engineer 在 agent 限額前完成的代碼,補 commit 解 production
HEAD 隱性 import error(decision_fusion 已被 decision_manager 引用但檔案 untracked)。

新增(後端核心):
- decision_fusion.py (562 行) — P2.1 方法 III(OpenClaw + Hermes + Elephant 三 LLM 融合)
- aiops_timeline.py + aiops_timeline_service.py — critic B4 修復
  /api/v1/aiops/timeline endpoint,DB 存取抽到 service 層遵守 leWOOOgo 積木化
- migrations/p2_decision_fusion_columns.sql + rollback — approval_records fusion 欄位

修改(後端整合):
- decision_manager.py — fusion 三斷鏈修補(critic B1+B2+B3):
  · B1: 寫 _evidence_snapshot_ref 到 token.proposal_data
  · B2: fusion 前計算 complexity_score 並寫 token
  · B3: fusion composite 寫 token.proposal_data["decision_fusion"]
- auto_approve.py — fusion + consensus 認識(critic B3+B5):
  · composite > 0.7 → auto_execute_eligible bypass min_confidence
  · source=consensus_engine + score>=0.6 → 規則可信路徑
- consensus_engine.py — db-fix _save_consensus 重用 agent_sessions
- governance_agent.py — db-fix _alert PG 寫入 ai_governance_events
- approval_db.py — fusion 3 欄位 + 2 partial index + CheckConstraint
- db/models.py — schema 對齊 migration
- core/config.py — vuln #1 修復:OLLAMA_URL/_FALLBACK_URL field_validator
  拒絕公網 IP + 外部域名,僅允許私網/loopback/K8s SVC 白名單
- core/feature_flags.py — P2 fusion + consensus flags
- main.py — governance_agent lifespan 啟動
- failover_alerter.py — Wave8-X2: in-memory dedup fallback(Redis 拒絕後不 fail-open)
- ollama_*.py — metrics 整合 + recovery 改善
- auto_repair_service.py — verifier 接線

新增(測試 2438 行):
- test_decision_fusion.py / test_governance_agent.py / test_consensus_integration.py
- test_p2_db_fixes.py / test_wave8_fusion_fixes.py
- test_config_url_validation.py(vuln #1 12 tests)
- test_failover_alerter.py +Wave8-X2 in-memory dedup 補測

驗收: 116 tests pass (decision_fusion + wave8_fusion + config_url + consensus +
                      governance + p2_db_fixes + failover_alerter)

Conflict resolution:
- 3 檔(config.py + auto_approve.py + decision_manager.py)git stash pop 衝突
  保留 stashed (engineer 最終版),補回 ValueError 「公網 IP」字樣對齊 test

Note: 此 commit 解 production HEAD 隱性 import error
仍未修: vuln #4 prompt injection / debugger B14 quota fail-closed
       / B25-B26 drain_pending_tasks / B8 governance fail alert

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Multiple Engineers (Wave 6/7/8) <noreply@anthropic.com>
2026-04-27 08:11:40 +08:00

560 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/tests/test_governance_agent.py | 2026-04-26 @ Asia/Taipei
# 2026-04-26 P2.2 by Claude — GovernanceAgent 單元測試
"""
GovernanceAgent 單元測試 — P2.2
================================
測試覆蓋:
- check_trust_drift : 觸發 / 不觸發
- check_knowledge_degradation : 觸發 / 不觸發
- check_llm_hallucination : 觸發 / 不觸發 / 空資料
- check_execution_blast_radius : 觸發 / 不觸發 / 空資料
- run_self_check : 全跑 + exception 隔離(單一 check 拋例外不影響其他)
- alert_governance : FailoverAlerter dedup 邏輯
測試分類unit全部 mock DB / alerter無真實 PG 依賴)
"""
from __future__ import annotations
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.governance_agent import (
GovernanceAgent,
get_governance_agent,
reset_governance_agent,
run_governance_loop,
EXECUTION_FAIL_RATE_THRESHOLD,
HALLUCINATION_RATE_THRESHOLD,
KM_STALE_RATIO,
TRUST_DRIFT_THRESHOLD,
)
# =============================================================================
# Helpers
# =============================================================================
def _make_agent(alerter=None) -> GovernanceAgent:
"""建立 GovernanceAgent注入 mock alerter"""
if alerter is None:
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
return GovernanceAgent(alerter=alerter)
# =============================================================================
# check_trust_drift
# =============================================================================
class TestCheckTrustDrift:
"""check_trust_drift — Playbook 信任度漂移"""
@pytest.mark.asyncio
async def test_no_drifted_playbooks_no_alert(self):
"""所有 playbook trust_score >= 0.2 → 不觸發告警"""
mock_record = MagicMock()
mock_record.trust_score = 0.8
mock_record.playbook_id = "PB-001"
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = [mock_record]
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_trust_drift()
alerter.alert_governance.assert_not_called()
assert result["drifted"] == 0
assert result["checked"] == 1
@pytest.mark.asyncio
async def test_drifted_playbooks_trigger_alert(self):
"""有 playbook trust_score < 0.2 → 觸發告警"""
low_record = MagicMock()
low_record.trust_score = 0.05
low_record.playbook_id = "PB-LOW"
ok_record = MagicMock()
ok_record.trust_score = 0.9
ok_record.playbook_id = "PB-OK"
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = [low_record, ok_record]
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_trust_drift()
alerter.alert_governance.assert_called_once()
call_args = alerter.alert_governance.call_args
assert call_args[0][0] == "trust_drift"
assert call_args[0][1]["drifted_count"] == 1
assert result["drifted"] == 1
assert result["checked"] == 2
# =============================================================================
# check_knowledge_degradation
# =============================================================================
class TestCheckKnowledgeDegradation:
"""check_knowledge_degradation — 知識庫衰退"""
@pytest.mark.asyncio
async def test_stale_ratio_below_threshold_no_alert(self):
"""陳舊比例 < 20% → 不觸發告警"""
# total=10, stale=1 → ratio=0.1 < 0.2
mock_db = AsyncMock()
total_mock = MagicMock()
total_mock.scalar.return_value = 10
stale_mock = MagicMock()
stale_mock.scalar.return_value = 1
mock_db.execute = AsyncMock(side_effect=[total_mock, stale_mock])
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_knowledge_degradation()
alerter.alert_governance.assert_not_called()
assert result["stale"] == 1
assert result["total"] == 10
assert result["ratio"] == 0.1
@pytest.mark.asyncio
async def test_stale_ratio_above_threshold_triggers_alert(self):
"""陳舊比例 > 20% → 觸發告警"""
# total=10, stale=3 → ratio=0.3 > 0.2
mock_db = AsyncMock()
total_mock = MagicMock()
total_mock.scalar.return_value = 10
stale_mock = MagicMock()
stale_mock.scalar.return_value = 3
mock_db.execute = AsyncMock(side_effect=[total_mock, stale_mock])
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_knowledge_degradation()
alerter.alert_governance.assert_called_once()
call_args = alerter.alert_governance.call_args
assert call_args[0][0] == "knowledge_degradation"
assert result["stale"] == 3
assert result["ratio"] == 0.3
# =============================================================================
# check_llm_hallucination
# =============================================================================
class TestCheckLlmHallucination:
"""check_llm_hallucination — LLM 幻覺率"""
@pytest.mark.asyncio
async def test_empty_evidence_no_alert(self):
"""沒有 evidence 記錄 → 不觸發告警rate=0"""
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = []
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_llm_hallucination()
alerter.alert_governance.assert_not_called()
assert result["rate"] == 0.0
assert result["total"] == 0
@pytest.mark.asyncio
async def test_hallucination_below_threshold_no_alert(self):
"""failed 比例 < 10% → 不觸發告警"""
# 100 筆中 8 筆 failed → 8% < 10%
rows = ["success"] * 92 + ["failed"] * 8
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = rows
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_llm_hallucination()
alerter.alert_governance.assert_not_called()
assert result["failed"] == 8
assert result["rate"] == 0.08
@pytest.mark.asyncio
async def test_hallucination_above_threshold_triggers_alert(self):
"""failed 比例 > 10% → 觸發告警"""
# 100 筆中 15 筆 failed → 15% > 10%
rows = ["success"] * 85 + ["failed"] * 15
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = rows
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_llm_hallucination()
alerter.alert_governance.assert_called_once()
call_args = alerter.alert_governance.call_args
assert call_args[0][0] == "llm_hallucination"
assert result["failed"] == 15
assert result["rate"] == 0.15
# =============================================================================
# check_execution_blast_radius
# =============================================================================
class TestCheckExecutionBlastRadius:
"""check_execution_blast_radius — 執行失敗率"""
@pytest.mark.asyncio
async def test_empty_executions_no_alert(self):
"""沒有執行記錄 → 不觸發告警"""
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = []
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_execution_blast_radius()
alerter.alert_governance.assert_not_called()
assert result["total"] == 0
assert result["rate"] == 0.0
@pytest.mark.asyncio
async def test_failure_rate_below_threshold_no_alert(self):
"""失敗比例 < 15% → 不觸發告警"""
# 100 筆10 筆 False → 10% < 15%
rows = [True] * 90 + [False] * 10
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = rows
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_execution_blast_radius()
alerter.alert_governance.assert_not_called()
assert result["failed"] == 10
assert result["rate"] == 0.1
@pytest.mark.asyncio
async def test_failure_rate_above_threshold_triggers_alert(self):
"""失敗比例 > 15% → 觸發告警"""
# 100 筆20 筆 False → 20% > 15%
rows = [True] * 80 + [False] * 20
mock_result = MagicMock()
mock_result.scalars.return_value.all.return_value = rows
mock_db = AsyncMock()
mock_db.execute = AsyncMock(return_value=mock_result)
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
result = await agent.check_execution_blast_radius()
alerter.alert_governance.assert_called_once()
call_args = alerter.alert_governance.call_args
assert call_args[0][0] == "execution_blast_radius"
assert result["failed"] == 20
assert result["rate"] == 0.2
# =============================================================================
# run_self_check — exception 隔離
# =============================================================================
class TestRunSelfCheck:
"""run_self_check — 全跑 + exception 隔離"""
@pytest.mark.asyncio
async def test_all_checks_run_successfully(self):
"""4 項全部成功 → results 有 4 個 key無 error 欄位"""
agent = _make_agent()
# 讓 4 個 check 都回傳假資料
agent.check_trust_drift = AsyncMock(return_value={"checked": 5, "drifted": 0})
agent.check_knowledge_degradation = AsyncMock(return_value={"total": 10, "stale": 1, "ratio": 0.1})
agent.check_llm_hallucination = AsyncMock(return_value={"total": 100, "failed": 5, "rate": 0.05})
agent.check_execution_blast_radius = AsyncMock(return_value={"total": 100, "failed": 8, "rate": 0.08})
results = await agent.run_self_check()
assert "trust_drift" in results
assert "knowledge_degradation" in results
assert "llm_hallucination" in results
assert "execution_blast_radius" in results
assert "error" not in results["trust_drift"]
@pytest.mark.asyncio
async def test_one_check_fails_others_still_run(self):
"""某一項 check 拋例外 → 其他項目仍照常執行,失敗項有 error key"""
agent = _make_agent()
agent.check_trust_drift = AsyncMock(side_effect=RuntimeError("DB connection failed"))
agent.check_knowledge_degradation = AsyncMock(return_value={"total": 5, "stale": 0, "ratio": 0.0})
agent.check_llm_hallucination = AsyncMock(return_value={"total": 50, "failed": 2, "rate": 0.04})
agent.check_execution_blast_radius = AsyncMock(return_value={"total": 50, "failed": 3, "rate": 0.06})
results = await agent.run_self_check()
# 失敗項有 error
assert "error" in results["trust_drift"]
assert "DB connection failed" in results["trust_drift"]["error"]
# 其他三項不受影響
assert results["knowledge_degradation"]["total"] == 5
assert results["llm_hallucination"]["total"] == 50
assert results["execution_blast_radius"]["total"] == 50
@pytest.mark.asyncio
async def test_all_checks_fail_returns_all_errors(self):
"""所有項目全部失敗 → 4 個 key 都有 error"""
agent = _make_agent()
for attr in ["check_trust_drift", "check_knowledge_degradation",
"check_llm_hallucination", "check_execution_blast_radius"]:
setattr(agent, attr, AsyncMock(side_effect=Exception("mock failure")))
results = await agent.run_self_check()
assert len(results) == 4
for key in ["trust_drift", "knowledge_degradation", "llm_hallucination", "execution_blast_radius"]:
assert "error" in results[key]
# =============================================================================
# FailoverAlerter.alert_governance — dedup 邏輯
# =============================================================================
class TestAlertGovernance:
"""FailoverAlerter.alert_governance — dedup 邏輯"""
@pytest.mark.asyncio
async def test_first_call_sends_message(self):
"""Redis dedup 未命中(第一次)→ 送出告警"""
from src.services.failover_alerter import FailoverAlerter
mock_redis = AsyncMock()
mock_redis.set = AsyncMock(return_value=True) # SET NX → OK第一次
alerter = FailoverAlerter(redis_client=mock_redis)
with patch.object(alerter, "_send", new_callable=AsyncMock) as mock_send:
await alerter.alert_governance("trust_drift", {"drifted_count": 2})
mock_send.assert_called_once()
@pytest.mark.asyncio
async def test_dedup_blocks_second_call(self):
"""Redis dedup 命中(已送過)→ 不重複發送"""
from src.services.failover_alerter import FailoverAlerter
mock_redis = AsyncMock()
mock_redis.set = AsyncMock(return_value=None) # SET NX → None已存在
alerter = FailoverAlerter(redis_client=mock_redis)
with patch.object(alerter, "_send", new_callable=AsyncMock) as mock_send:
await alerter.alert_governance("trust_drift", {"drifted_count": 2})
mock_send.assert_not_called()
@pytest.mark.asyncio
async def test_different_event_types_independent_dedup(self):
"""不同 event_type 的 dedup key 互相獨立"""
from src.services.failover_alerter import FailoverAlerter
call_count = 0
set_keys = []
async def mock_set(key, value, ex, nx):
nonlocal call_count
call_count += 1
set_keys.append(key)
return True # 永遠是第一次
mock_redis = AsyncMock()
mock_redis.set = mock_set
alerter = FailoverAlerter(redis_client=mock_redis)
with patch.object(alerter, "_send", new_callable=AsyncMock):
await alerter.alert_governance("trust_drift", {})
await alerter.alert_governance("llm_hallucination", {})
assert call_count == 2
assert any("trust_drift" in k for k in set_keys)
assert any("llm_hallucination" in k for k in set_keys)
# =============================================================================
# B8 — run_self_check 全失敗聚合告警
# 2026-04-27 Wave8-X3 by Claude — governance silent failure alert
# =============================================================================
class TestRunSelfCheckGlobalFailureAlert:
"""≥3 項 check 失敗時必須送出 governance_self_failure 告警。"""
@pytest.mark.asyncio
async def test_three_checks_fail_triggers_governance_self_failure_alert(self):
"""3 項失敗 → 觸發 governance_self_failure 告警"""
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
agent.check_trust_drift = AsyncMock(side_effect=Exception("db error 1"))
agent.check_knowledge_degradation = AsyncMock(side_effect=Exception("db error 2"))
agent.check_llm_hallucination = AsyncMock(side_effect=Exception("db error 3"))
agent.check_execution_blast_radius = AsyncMock(return_value={"total": 10, "failed": 0, "rate": 0.0})
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=AsyncMock())
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
results = await agent.run_self_check()
# _alert 是透過 alerter.alert_governance 發送的
# 驗證 governance_self_failure 有被呼叫
calls = [call[0][0] for call in alerter.alert_governance.call_args_list]
assert "governance_self_failure" in calls
# 失敗的 3 項都有 error
for key in ["trust_drift", "knowledge_degradation", "llm_hallucination"]:
assert "error" in results[key]
# 成功的 1 項無 error
assert "error" not in results["execution_blast_radius"]
@pytest.mark.asyncio
async def test_all_four_checks_fail_triggers_alert_with_four_failed(self):
"""4 項全失敗 → governance_self_failure 告警的 failed_checks 包含全部 4 個"""
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
for attr in ["check_trust_drift", "check_knowledge_degradation",
"check_llm_hallucination", "check_execution_blast_radius"]:
setattr(agent, attr, AsyncMock(side_effect=Exception("all down")))
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=AsyncMock())
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
await agent.run_self_check()
calls = alerter.alert_governance.call_args_list
governance_failure_calls = [c for c in calls if c[0][0] == "governance_self_failure"]
assert len(governance_failure_calls) >= 1
payload = governance_failure_calls[0][0][1]
assert payload["total_checks"] == 4
assert len(payload["failed_checks"]) == 4
@pytest.mark.asyncio
async def test_two_checks_fail_does_not_trigger_governance_self_failure(self):
"""僅 2 項失敗 → 不觸發 governance_self_failure不足 3 項門檻)"""
alerter = AsyncMock()
alerter.alert_governance = AsyncMock()
agent = _make_agent(alerter=alerter)
agent.check_trust_drift = AsyncMock(side_effect=Exception("err"))
agent.check_knowledge_degradation = AsyncMock(side_effect=Exception("err"))
agent.check_llm_hallucination = AsyncMock(return_value={"total": 10, "failed": 0, "rate": 0.0})
agent.check_execution_blast_radius = AsyncMock(return_value={"total": 10, "failed": 0, "rate": 0.0})
with patch("src.services.governance_agent.get_db_context") as mock_ctx:
mock_ctx.return_value.__aenter__ = AsyncMock(return_value=AsyncMock())
mock_ctx.return_value.__aexit__ = AsyncMock(return_value=False)
await agent.run_self_check()
calls = [c[0][0] for c in alerter.alert_governance.call_args_list]
assert "governance_self_failure" not in calls