Files
awoooi/apps/api/tests/test_wave8_fusion_fixes.py
Your Name cc547736ab feat(wave6-8): P2.1 fusion + P2.2 governance + P2.4 consensus + Wave 7/8 BLOCKER 修復
承接 Wave 6/7/8 多 engineer 在 agent 限額前完成的代碼,補 commit 解 production
HEAD 隱性 import error(decision_fusion 已被 decision_manager 引用但檔案 untracked)。

新增(後端核心):
- decision_fusion.py (562 行) — P2.1 方法 III(OpenClaw + Hermes + Elephant 三 LLM 融合)
- aiops_timeline.py + aiops_timeline_service.py — critic B4 修復
  /api/v1/aiops/timeline endpoint,DB 存取抽到 service 層遵守 leWOOOgo 積木化
- migrations/p2_decision_fusion_columns.sql + rollback — approval_records fusion 欄位

修改(後端整合):
- decision_manager.py — fusion 三斷鏈修補(critic B1+B2+B3):
  · B1: 寫 _evidence_snapshot_ref 到 token.proposal_data
  · B2: fusion 前計算 complexity_score 並寫 token
  · B3: fusion composite 寫 token.proposal_data["decision_fusion"]
- auto_approve.py — fusion + consensus 認識(critic B3+B5):
  · composite > 0.7 → auto_execute_eligible bypass min_confidence
  · source=consensus_engine + score>=0.6 → 規則可信路徑
- consensus_engine.py — db-fix _save_consensus 重用 agent_sessions
- governance_agent.py — db-fix _alert PG 寫入 ai_governance_events
- approval_db.py — fusion 3 欄位 + 2 partial index + CheckConstraint
- db/models.py — schema 對齊 migration
- core/config.py — vuln #1 修復:OLLAMA_URL/_FALLBACK_URL field_validator
  拒絕公網 IP + 外部域名,僅允許私網/loopback/K8s SVC 白名單
- core/feature_flags.py — P2 fusion + consensus flags
- main.py — governance_agent lifespan 啟動
- failover_alerter.py — Wave8-X2: in-memory dedup fallback(Redis 拒絕後不 fail-open)
- ollama_*.py — metrics 整合 + recovery 改善
- auto_repair_service.py — verifier 接線

新增(測試 2438 行):
- test_decision_fusion.py / test_governance_agent.py / test_consensus_integration.py
- test_p2_db_fixes.py / test_wave8_fusion_fixes.py
- test_config_url_validation.py(vuln #1 12 tests)
- test_failover_alerter.py +Wave8-X2 in-memory dedup 補測

驗收: 116 tests pass (decision_fusion + wave8_fusion + config_url + consensus +
                      governance + p2_db_fixes + failover_alerter)

Conflict resolution:
- 3 檔(config.py + auto_approve.py + decision_manager.py)git stash pop 衝突
  保留 stashed (engineer 最終版),補回 ValueError 「公網 IP」字樣對齊 test

Note: 此 commit 解 production HEAD 隱性 import error
仍未修: vuln #4 prompt injection / debugger B14 quota fail-closed
       / B25-B26 drain_pending_tasks / B8 governance fail alert

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Multiple Engineers (Wave 6/7/8) <noreply@anthropic.com>
2026-04-27 08:11:40 +08:00

414 lines
17 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/tests/test_wave8_fusion_fixes.py
# 2026-04-27 Wave8-X1 by Claude — fusion 三斷鏈 + Consensus auto_approve 認識
"""
Wave 8 驗收測試 — B1/B2/B3/B5 四修
====================================
B1 — evidence_snapshot 透過 token.proposal_data["_evidence_snapshot_ref"] 傳遞
B2 — complexity_score 在 fusion 呼叫前由 ComplexityScorer 計算並寫入 token
B3 — auto_approve._is_rule_based 認識 fusion high composite + consensus_engine
B5 — Consensus path confidence = consensus_result.consensus_score非 0.0
測試類型unit全 mock無真實 Redis/DB/LLM 依賴)
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.auto_approve import AutoApprovePolicy
# =============================================================================
# Helpers
# =============================================================================
def _make_incident_mock(affected_services: list[str] | None = None):
"""最小化 Incident mock。"""
inc = MagicMock()
inc.incident_id = "INC-WAVE8-001"
inc.affected_services = affected_services or ["api"]
inc.severity = MagicMock()
inc.severity.value = "P0"
signal = MagicMock()
signal.labels = {"alertname": "HighCPUUsage"}
signal.annotations = {"summary": "CPU high"}
inc.signals = [signal]
return inc
def _make_evidence_mock(summary: str = "k8s: ok"):
ev = MagicMock()
ev.evidence_summary = summary
ev.mcp_health = {"k8s": True}
ev.matched_playbook_id = None
return ev
# =============================================================================
# B1 — evidence_snapshot 透過 token 攜帶,不污染 singleton
# =============================================================================
class TestFusionEvidencePropagatedViaToken:
"""B1: _dual_engine_analyze 各 return 路徑都將 evidence 寫入 proposal_data。
測試策略:不 mock 整個 _dual_engine_analyzemock 鏈太深),改為:
1. 直接驗證「fusion block 取值邏輯」— 從 token.proposal_data 取 _evidence_snapshot_ref
2. 驗證「LLM 路徑」確實寫入 _evidence_snapshot_ref 到 result白盒邏輯驗證
"""
def test_fusion_reads_evidence_from_token_not_instance_attr(self):
"""
B1 核心fusion block 讀取點改為 token.proposal_data.get("_evidence_snapshot_ref")。
驗證token 攜帶 evidence 時fusion 能正確取到;不攜帶時回傳 None不爆炸
"""
evidence = _make_evidence_mock()
# Case 1: token 帶有 evidence → 能取到
proposal_with_evidence = {
"action": "kubectl rollout restart deployment/api",
"_evidence_snapshot_ref": evidence,
}
result = proposal_with_evidence.get("_evidence_snapshot_ref")
assert result is evidence, "B1 失敗token 攜帶 evidence 但取不到"
# Case 2: token 無 evidence → Nonefusion 降級,不拋出)
proposal_without_evidence = {
"action": "kubectl rollout restart deployment/api",
}
result2 = proposal_without_evidence.get("_evidence_snapshot_ref")
assert result2 is None, "B1 失敗:未攜帶 evidence 應回傳 None 而非拋出"
def test_llm_path_injects_evidence_into_result(self):
"""
驗證 LLM 路徑寫入邏輯正確性:
evidence_snapshot is not None → result["_evidence_snapshot_ref"] = evidence_snapshot
"""
evidence = _make_evidence_mock()
# 模擬 LLM 回傳的原始 result不含 evidence
llm_result: dict = {
"action": "kubectl rollout restart deployment/api",
"confidence": 0.8,
}
# 複製 decision_manager.py 中的寫入邏輯
if evidence is not None:
llm_result["_evidence_snapshot_ref"] = evidence
assert "_evidence_snapshot_ref" in llm_result, (
"B1 失敗LLM 路徑 evidence 注入邏輯錯誤"
)
assert llm_result["_evidence_snapshot_ref"] is evidence
def test_no_evidence_does_not_inject_key(self):
"""P1 disabledevidence=None→ result 不含 _evidence_snapshot_ref靜默降級"""
evidence = None
llm_result: dict = {
"action": "kubectl rollout restart deployment/api",
"confidence": 0.8,
}
# 複製 decision_manager.py 中的寫入邏輯
if evidence is not None:
llm_result["_evidence_snapshot_ref"] = evidence
# evidence=None → key 不應被注入
assert "_evidence_snapshot_ref" not in llm_result, (
"B1 失敗evidence=None 不應寫入 _evidence_snapshot_ref"
)
def test_p2_path_injects_p2_snapshot_into_result(self):
"""
P2 路徑_p2_result["_evidence_snapshot_ref"] = p2_snapshot
驗證 _package_to_proposal_data 後的 dict 能被正確注入。
"""
from src.services.decision_manager import _package_to_proposal_data
p2_snapshot = _make_evidence_mock("p2 snapshot")
mock_package = MagicMock()
mock_package.recommended_action = "kubectl rollout restart deployment/api"
mock_package.confidence = 0.75
mock_package.requires_human_approval = False
mock_package.diagnosis = None
mock_package.action_plan = None
mock_package.debate_summary = "debate summary"
mock_package.all_agents_degraded = False
mock_package.blocked_reason = ""
mock_package.session_status = None
# 模擬 P2 路徑的完整邏輯
_p2_result = _package_to_proposal_data(mock_package)
_p2_result["_evidence_snapshot_ref"] = p2_snapshot
assert "_evidence_snapshot_ref" in _p2_result, (
"B1 失敗P2 路徑 evidence 注入邏輯錯誤"
)
assert _p2_result["_evidence_snapshot_ref"] is p2_snapshot
# =============================================================================
# B2 — complexity_score 在 fusion 呼叫前被寫入 token.proposal_data
# =============================================================================
class TestFusionComplexityScoreSetBeforeFuse:
"""B2: fusion block 執行前token.proposal_data["complexity_score"] 由 ComplexityScorer 寫入。
測試策略:直接驗證 fusion block 內嵌的 complexity_score 計算邏輯,
不 mock decision_manager 模組屬性lazy import 無法被 patch
"""
def test_complexity_score_written_before_fuse(self):
"""
複製 fusion block 的 complexity_score 計算邏輯:
1. proposal_data 未含 complexity_score → 呼叫 ComplexityScorer
2. ComplexityScorer.score() 回傳值被寫入 proposal_data["complexity_score"]
"""
from src.services.complexity_scorer import get_complexity_scorer
incident = _make_incident_mock(affected_services=["api", "db"])
proposal_data: dict = {
"action": "kubectl rollout restart deployment/api",
"confidence": 0.8,
# complexity_score 故意不設
}
assert "complexity_score" not in proposal_data, "前置complexity_score 不應已存在"
# 複製 decision_manager.py 中 B2 修復的計算邏輯
if not proposal_data.get("complexity_score"):
_cs_context = {
"affected_services": incident.affected_services or [],
"resource_count": len(incident.affected_services or []),
"severity": (
incident.severity.value
if hasattr(incident.severity, "value")
else "medium"
),
}
_cs_result = get_complexity_scorer().score(_cs_context)
proposal_data["complexity_score"] = _cs_result.score
assert "complexity_score" in proposal_data, (
"B2 失敗complexity_score 未被寫入 proposal_data"
)
# score 應為 1-5 之間的整數
assert 1 <= proposal_data["complexity_score"] <= 5, (
f"B2 失敗complexity_score={proposal_data['complexity_score']} 不在 1-5 範圍內"
)
def test_complexity_score_already_set_is_not_overwritten(self):
"""proposal_data 已含 complexity_score → ComplexityScorer 不被呼叫(保留原值)"""
incident = _make_incident_mock()
proposal_data: dict = {
"action": "kubectl rollout restart deployment/api",
"complexity_score": 5, # 已設定
}
# 複製 fusion block 的 guard 邏輯not proposal_data.get("complexity_score")
original_score = proposal_data["complexity_score"]
if not proposal_data.get("complexity_score"):
# 不應進入此分支
proposal_data["complexity_score"] = 999 # sentinel
assert proposal_data["complexity_score"] == original_score, (
"B2 失敗:已設定的 complexity_score 不應被覆寫"
)
assert proposal_data["complexity_score"] == 5
def test_complexity_scorer_api_is_synchronous(self):
"""驗證 ComplexityScorer.score() 是同步方法(可在 async fusion block 中直接呼叫)"""
import inspect
from src.services.complexity_scorer import get_complexity_scorer
scorer = get_complexity_scorer()
method = scorer.score
assert not inspect.iscoroutinefunction(method), (
"B2 假設ComplexityScorer.score() 必須是同步方法,若變成 async 需修改呼叫點"
)
def test_complexity_score_fallback_on_error(self):
"""ComplexityScorer 拋出例外 → proposal_data 不寫入 complexity_scorefusion 使用 default=3"""
proposal_data: dict = {"action": "kubectl rollout restart deployment/api"}
incident = _make_incident_mock()
# 模擬 ComplexityScorer 失敗
with patch(
"src.services.complexity_scorer.get_complexity_scorer",
side_effect=RuntimeError("scorer unavailable"),
):
if not proposal_data.get("complexity_score"):
try:
from src.services.complexity_scorer import (
get_complexity_scorer as _get_cs,
)
_cs_result = _get_cs().score({})
proposal_data["complexity_score"] = _cs_result.score
except Exception:
pass # 失敗 → 不寫入fusion 使用 .get("complexity_score", 3)
# 計算失敗 → 不寫入 → fusion 使用 default 3
assert "complexity_score" not in proposal_data, (
"B2 失敗scorer 失敗時不應寫入 complexity_score"
)
# fusion 後續 .get("complexity_score", 3) 會回傳 3
assert proposal_data.get("complexity_score", 3) == 3
# =============================================================================
# B3 — auto_approve 認識 fusion high composite
# =============================================================================
class TestAutoApproveRecognizesFusionHighComposite:
"""B3: decision_fusion.auto_execute_eligible=True → _is_rule_based=True → bypass confidence 閾值"""
def _make_proposal(self, composite: float, auto_execute_eligible: bool) -> dict:
return {
"action": "kubectl rollout restart deployment/api",
"kubectl_command": "kubectl rollout restart deployment/api",
"confidence": 0.0, # 故意設 0模擬舊有路徑
"risk_level": "medium",
"source": "llm_gemini",
"decision_fusion": {
"composite": composite,
"auto_execute_eligible": auto_execute_eligible,
},
}
def test_fusion_high_composite_bypasses_confidence_check(self):
"""composite>0.7 → auto_execute_eligible=True → auto_approve 放行"""
policy = AutoApprovePolicy()
proposal = self._make_proposal(composite=0.75, auto_execute_eligible=True)
decision = policy.evaluate(proposal_data=proposal)
assert decision.should_auto_approve is True, (
"B3 失敗fusion auto_execute_eligible=True 應觸發 auto_approve"
f"實際 reason={decision.reason.value}, detail={decision.reason_detail}"
)
def test_fusion_low_composite_does_not_bypass(self):
"""composite=0.5 → auto_execute_eligible=False → 仍需通過 confidence 檢查"""
policy = AutoApprovePolicy()
proposal = self._make_proposal(composite=0.5, auto_execute_eligible=False)
# confidence=0.0 < min_confidence=0.5 → 應被拒絕
decision = policy.evaluate(proposal_data=proposal)
assert decision.should_auto_approve is False, (
"B3 失敗fusion auto_execute_eligible=False 不應觸發 auto_approve"
)
def test_fusion_missing_does_not_break_evaluate(self):
"""decision_fusion 不存在 → 既有邏輯正常(不因 .get() 爆炸)"""
policy = AutoApprovePolicy()
proposal = {
"action": "kubectl rollout restart deployment/api",
"kubectl_command": "kubectl rollout restart deployment/api",
"confidence": 0.8,
"risk_level": "low",
"source": "expert_system",
"is_rule_based": True,
}
decision = policy.evaluate(proposal_data=proposal)
# is_rule_based=True + kubectl 存在 → 應放行
assert decision.should_auto_approve is True
# =============================================================================
# B3+B5 — auto_approve 認識 consensus_engine high score
# =============================================================================
class TestAutoApproveRecognizesConsensusHighScore:
"""B3+B5: source=consensus_engine + consensus_score>=0.6 → _is_rule_based=True"""
def _make_consensus_proposal(self, consensus_score: float) -> dict:
return {
"action": "kubectl rollout restart deployment/api",
"kubectl_command": "kubectl rollout restart deployment/api",
"confidence": consensus_score, # B5 修後 confidence=consensus_score
"risk_level": "medium",
"source": "consensus_engine",
"consensus_score": consensus_score,
}
def test_consensus_score_high_triggers_auto_approve(self):
"""consensus_score=0.75>=0.6)→ auto_approve 放行"""
policy = AutoApprovePolicy()
proposal = self._make_consensus_proposal(consensus_score=0.75)
decision = policy.evaluate(proposal_data=proposal)
assert decision.should_auto_approve is True, (
"B5 失敗consensus_score=0.75 應觸發 auto_approve"
f"實際 reason={decision.reason.value}, detail={decision.reason_detail}"
)
def test_consensus_score_at_threshold_triggers_auto_approve(self):
"""consensus_score=0.6(等於閾值)→ auto_approve 放行"""
policy = AutoApprovePolicy()
proposal = self._make_consensus_proposal(consensus_score=0.6)
decision = policy.evaluate(proposal_data=proposal)
assert decision.should_auto_approve is True, (
"B5 失敗consensus_score=0.6 應觸發 auto_approve>= 0.6"
)
def test_consensus_score_below_threshold_requires_human(self):
"""consensus_score=0.5<0.6)→ confidence 0.5 = min_confidence邊界通過"""
policy = AutoApprovePolicy()
proposal = self._make_consensus_proposal(consensus_score=0.5)
# source=consensus_engine + score<0.6 → _is_rule_based=False
# confidence=0.5 >= min_confidence=0.5 → auto_approve 放行(邊界值)
# 此測試驗證「不靠 consensus bypass改靠 confidence 本身」
decision = policy.evaluate(proposal_data=proposal)
# 0.5 >= 0.5 → 放行(不是被拒絕)
assert decision.should_auto_approve is True
def test_consensus_score_very_low_rejected(self):
"""consensus_score=0.3<0.5)→ confidence 不足 → 人工審核"""
policy = AutoApprovePolicy()
proposal = self._make_consensus_proposal(consensus_score=0.3)
# source=consensus_engine + score<0.6 → _is_rule_based=False
# confidence=0.3 < min_confidence=0.5 → 拒絕
decision = policy.evaluate(proposal_data=proposal)
assert decision.should_auto_approve is False, (
"B5 設計consensus_score=0.3 應走人工審核confidence 0.3 < 0.5"
)
def test_b5_confidence_equals_consensus_score(self):
"""B5 核心驗證token.proposal_data['confidence'] 必須等於 consensus_score非 0.0"""
# 直接驗證 decision_manager 的 proposal_data 建構邏輯
# 這個測試模擬 consensus path 建構的 dict 格式
consensus_score = 0.78
proposal_data = {
"source": "consensus_engine",
"consensus_id": "CON-TEST-001",
"consensus_score": consensus_score,
"action": "kubectl rollout restart deployment/api",
"confidence": consensus_score, # B5 修復後的正確值
"risk_level": "medium",
"kubectl_command": "kubectl rollout restart deployment/api",
}
assert proposal_data["confidence"] == consensus_score, (
"B5 失敗confidence 不等於 consensus_score代表仍是 0.0 舊邏輯"
)
assert proposal_data["confidence"] != 0.0, (
"B5 失敗confidence 不可為 0.0(舊有 bug"
)