# apps/api/tests/test_wave8_fusion_fixes.py # 2026-04-27 Wave8-X1 by Claude — fusion 三斷鏈 + Consensus auto_approve 認識 """ Wave 8 驗收測試 — B1/B2/B3/B5 四修 ==================================== B1 — evidence_snapshot 透過 token.proposal_data["_evidence_snapshot_ref"] 傳遞 B2 — complexity_score 在 fusion 呼叫前由 ComplexityScorer 計算並寫入 token B3 — auto_approve._is_rule_based 認識 fusion high composite + consensus_engine B5 — Consensus path confidence = consensus_result.consensus_score(非 0.0) 測試類型:unit(全 mock,無真實 Redis/DB/LLM 依賴) """ from __future__ import annotations from unittest.mock import AsyncMock, MagicMock, patch import pytest from src.services.auto_approve import AutoApprovePolicy # ============================================================================= # Helpers # ============================================================================= def _make_incident_mock(affected_services: list[str] | None = None): """最小化 Incident mock。""" inc = MagicMock() inc.incident_id = "INC-WAVE8-001" inc.affected_services = affected_services or ["api"] inc.severity = MagicMock() inc.severity.value = "P0" signal = MagicMock() signal.labels = {"alertname": "HighCPUUsage"} signal.annotations = {"summary": "CPU high"} inc.signals = [signal] return inc def _make_evidence_mock(summary: str = "k8s: ok"): ev = MagicMock() ev.evidence_summary = summary ev.mcp_health = {"k8s": True} ev.matched_playbook_id = None return ev # ============================================================================= # B1 — evidence_snapshot 透過 token 攜帶,不污染 singleton # ============================================================================= class TestFusionEvidencePropagatedViaToken: """B1: _dual_engine_analyze 各 return 路徑都將 evidence 寫入 proposal_data。 測試策略:不 mock 整個 _dual_engine_analyze(mock 鏈太深),改為: 1. 直接驗證「fusion block 取值邏輯」— 從 token.proposal_data 取 _evidence_snapshot_ref 2. 驗證「LLM 路徑」確實寫入 _evidence_snapshot_ref 到 result(白盒邏輯驗證) """ def test_fusion_reads_evidence_from_token_not_instance_attr(self): """ B1 核心:fusion block 讀取點改為 token.proposal_data.get("_evidence_snapshot_ref")。 驗證:token 攜帶 evidence 時,fusion 能正確取到;不攜帶時回傳 None(不爆炸)。 """ evidence = _make_evidence_mock() # Case 1: token 帶有 evidence → 能取到 proposal_with_evidence = { "action": "kubectl rollout restart deployment/api", "_evidence_snapshot_ref": evidence, } result = proposal_with_evidence.get("_evidence_snapshot_ref") assert result is evidence, "B1 失敗:token 攜帶 evidence 但取不到" # Case 2: token 無 evidence → None(fusion 降級,不拋出) proposal_without_evidence = { "action": "kubectl rollout restart deployment/api", } result2 = proposal_without_evidence.get("_evidence_snapshot_ref") assert result2 is None, "B1 失敗:未攜帶 evidence 應回傳 None 而非拋出" def test_llm_path_injects_evidence_into_result(self): """ 驗證 LLM 路徑寫入邏輯正確性: evidence_snapshot is not None → result["_evidence_snapshot_ref"] = evidence_snapshot """ evidence = _make_evidence_mock() # 模擬 LLM 回傳的原始 result(不含 evidence) llm_result: dict = { "action": "kubectl rollout restart deployment/api", "confidence": 0.8, } # 複製 decision_manager.py 中的寫入邏輯 if evidence is not None: llm_result["_evidence_snapshot_ref"] = evidence assert "_evidence_snapshot_ref" in llm_result, ( "B1 失敗:LLM 路徑 evidence 注入邏輯錯誤" ) assert llm_result["_evidence_snapshot_ref"] is evidence def test_no_evidence_does_not_inject_key(self): """P1 disabled(evidence=None)→ result 不含 _evidence_snapshot_ref(靜默降級)""" evidence = None llm_result: dict = { "action": "kubectl rollout restart deployment/api", "confidence": 0.8, } # 複製 decision_manager.py 中的寫入邏輯 if evidence is not None: llm_result["_evidence_snapshot_ref"] = evidence # evidence=None → key 不應被注入 assert "_evidence_snapshot_ref" not in llm_result, ( "B1 失敗:evidence=None 不應寫入 _evidence_snapshot_ref" ) def test_p2_path_injects_p2_snapshot_into_result(self): """ P2 路徑:_p2_result["_evidence_snapshot_ref"] = p2_snapshot 驗證 _package_to_proposal_data 後的 dict 能被正確注入。 """ from src.services.decision_manager import _package_to_proposal_data p2_snapshot = _make_evidence_mock("p2 snapshot") mock_package = MagicMock() mock_package.recommended_action = "kubectl rollout restart deployment/api" mock_package.confidence = 0.75 mock_package.requires_human_approval = False mock_package.diagnosis = None mock_package.action_plan = None mock_package.debate_summary = "debate summary" mock_package.all_agents_degraded = False mock_package.blocked_reason = "" mock_package.session_status = None # 模擬 P2 路徑的完整邏輯 _p2_result = _package_to_proposal_data(mock_package) _p2_result["_evidence_snapshot_ref"] = p2_snapshot assert "_evidence_snapshot_ref" in _p2_result, ( "B1 失敗:P2 路徑 evidence 注入邏輯錯誤" ) assert _p2_result["_evidence_snapshot_ref"] is p2_snapshot # ============================================================================= # B2 — complexity_score 在 fusion 呼叫前被寫入 token.proposal_data # ============================================================================= class TestFusionComplexityScoreSetBeforeFuse: """B2: fusion block 執行前,token.proposal_data["complexity_score"] 由 ComplexityScorer 寫入。 測試策略:直接驗證 fusion block 內嵌的 complexity_score 計算邏輯, 不 mock decision_manager 模組屬性(lazy import 無法被 patch)。 """ def test_complexity_score_written_before_fuse(self): """ 複製 fusion block 的 complexity_score 計算邏輯: 1. proposal_data 未含 complexity_score → 呼叫 ComplexityScorer 2. ComplexityScorer.score() 回傳值被寫入 proposal_data["complexity_score"] """ from src.services.complexity_scorer import get_complexity_scorer incident = _make_incident_mock(affected_services=["api", "db"]) proposal_data: dict = { "action": "kubectl rollout restart deployment/api", "confidence": 0.8, # complexity_score 故意不設 } assert "complexity_score" not in proposal_data, "前置:complexity_score 不應已存在" # 複製 decision_manager.py 中 B2 修復的計算邏輯 if not proposal_data.get("complexity_score"): _cs_context = { "affected_services": incident.affected_services or [], "resource_count": len(incident.affected_services or []), "severity": ( incident.severity.value if hasattr(incident.severity, "value") else "medium" ), } _cs_result = get_complexity_scorer().score(_cs_context) proposal_data["complexity_score"] = _cs_result.score assert "complexity_score" in proposal_data, ( "B2 失敗:complexity_score 未被寫入 proposal_data" ) # score 應為 1-5 之間的整數 assert 1 <= proposal_data["complexity_score"] <= 5, ( f"B2 失敗:complexity_score={proposal_data['complexity_score']} 不在 1-5 範圍內" ) def test_complexity_score_already_set_is_not_overwritten(self): """proposal_data 已含 complexity_score → ComplexityScorer 不被呼叫(保留原值)""" incident = _make_incident_mock() proposal_data: dict = { "action": "kubectl rollout restart deployment/api", "complexity_score": 5, # 已設定 } # 複製 fusion block 的 guard 邏輯(not proposal_data.get("complexity_score")) original_score = proposal_data["complexity_score"] if not proposal_data.get("complexity_score"): # 不應進入此分支 proposal_data["complexity_score"] = 999 # sentinel assert proposal_data["complexity_score"] == original_score, ( "B2 失敗:已設定的 complexity_score 不應被覆寫" ) assert proposal_data["complexity_score"] == 5 def test_complexity_scorer_api_is_synchronous(self): """驗證 ComplexityScorer.score() 是同步方法(可在 async fusion block 中直接呼叫)""" import inspect from src.services.complexity_scorer import get_complexity_scorer scorer = get_complexity_scorer() method = scorer.score assert not inspect.iscoroutinefunction(method), ( "B2 假設:ComplexityScorer.score() 必須是同步方法,若變成 async 需修改呼叫點" ) def test_complexity_score_fallback_on_error(self): """ComplexityScorer 拋出例外 → proposal_data 不寫入 complexity_score,fusion 使用 default=3""" proposal_data: dict = {"action": "kubectl rollout restart deployment/api"} incident = _make_incident_mock() # 模擬 ComplexityScorer 失敗 with patch( "src.services.complexity_scorer.get_complexity_scorer", side_effect=RuntimeError("scorer unavailable"), ): if not proposal_data.get("complexity_score"): try: from src.services.complexity_scorer import ( get_complexity_scorer as _get_cs, ) _cs_result = _get_cs().score({}) proposal_data["complexity_score"] = _cs_result.score except Exception: pass # 失敗 → 不寫入,fusion 使用 .get("complexity_score", 3) # 計算失敗 → 不寫入 → fusion 使用 default 3 assert "complexity_score" not in proposal_data, ( "B2 失敗:scorer 失敗時不應寫入 complexity_score" ) # fusion 後續 .get("complexity_score", 3) 會回傳 3 assert proposal_data.get("complexity_score", 3) == 3 # ============================================================================= # B3 — auto_approve 認識 fusion high composite # ============================================================================= class TestAutoApproveRecognizesFusionHighComposite: """B3: decision_fusion.auto_execute_eligible=True → _is_rule_based=True → bypass confidence 閾值""" def _make_proposal(self, composite: float, auto_execute_eligible: bool) -> dict: return { "action": "kubectl rollout restart deployment/api", "kubectl_command": "kubectl rollout restart deployment/api", "confidence": 0.0, # 故意設 0,模擬舊有路徑 "risk_level": "medium", "source": "llm_gemini", "decision_fusion": { "composite": composite, "auto_execute_eligible": auto_execute_eligible, }, } def test_fusion_high_composite_bypasses_confidence_check(self): """composite>0.7 → auto_execute_eligible=True → auto_approve 放行""" policy = AutoApprovePolicy() proposal = self._make_proposal(composite=0.75, auto_execute_eligible=True) decision = policy.evaluate(proposal_data=proposal) assert decision.should_auto_approve is True, ( "B3 失敗:fusion auto_execute_eligible=True 應觸發 auto_approve," f"實際 reason={decision.reason.value}, detail={decision.reason_detail}" ) def test_fusion_low_composite_does_not_bypass(self): """composite=0.5 → auto_execute_eligible=False → 仍需通過 confidence 檢查""" policy = AutoApprovePolicy() proposal = self._make_proposal(composite=0.5, auto_execute_eligible=False) # confidence=0.0 < min_confidence=0.5 → 應被拒絕 decision = policy.evaluate(proposal_data=proposal) assert decision.should_auto_approve is False, ( "B3 失敗:fusion auto_execute_eligible=False 不應觸發 auto_approve" ) def test_fusion_missing_does_not_break_evaluate(self): """decision_fusion 不存在 → 既有邏輯正常(不因 .get() 爆炸)""" policy = AutoApprovePolicy() proposal = { "action": "kubectl rollout restart deployment/api", "kubectl_command": "kubectl rollout restart deployment/api", "confidence": 0.8, "risk_level": "low", "source": "expert_system", "is_rule_based": True, } decision = policy.evaluate(proposal_data=proposal) # is_rule_based=True + kubectl 存在 → 應放行 assert decision.should_auto_approve is True # ============================================================================= # B3+B5 — auto_approve 認識 consensus_engine high score # ============================================================================= class TestAutoApproveRecognizesConsensusHighScore: """B3+B5: source=consensus_engine + consensus_score>=0.6 → _is_rule_based=True""" def _make_consensus_proposal(self, consensus_score: float) -> dict: return { "action": "kubectl rollout restart deployment/api", "kubectl_command": "kubectl rollout restart deployment/api", "confidence": consensus_score, # B5 修後 confidence=consensus_score "risk_level": "medium", "source": "consensus_engine", "consensus_score": consensus_score, } def test_consensus_score_high_triggers_auto_approve(self): """consensus_score=0.75(>=0.6)→ auto_approve 放行""" policy = AutoApprovePolicy() proposal = self._make_consensus_proposal(consensus_score=0.75) decision = policy.evaluate(proposal_data=proposal) assert decision.should_auto_approve is True, ( "B5 失敗:consensus_score=0.75 應觸發 auto_approve," f"實際 reason={decision.reason.value}, detail={decision.reason_detail}" ) def test_consensus_score_at_threshold_triggers_auto_approve(self): """consensus_score=0.6(等於閾值)→ auto_approve 放行""" policy = AutoApprovePolicy() proposal = self._make_consensus_proposal(consensus_score=0.6) decision = policy.evaluate(proposal_data=proposal) assert decision.should_auto_approve is True, ( "B5 失敗:consensus_score=0.6 應觸發 auto_approve(>= 0.6)" ) def test_consensus_score_below_threshold_requires_human(self): """consensus_score=0.5(<0.6)→ confidence 0.5 = min_confidence,邊界通過""" policy = AutoApprovePolicy() proposal = self._make_consensus_proposal(consensus_score=0.5) # source=consensus_engine + score<0.6 → _is_rule_based=False # confidence=0.5 >= min_confidence=0.5 → auto_approve 放行(邊界值) # 此測試驗證「不靠 consensus bypass,改靠 confidence 本身」 decision = policy.evaluate(proposal_data=proposal) # 0.5 >= 0.5 → 放行(不是被拒絕) assert decision.should_auto_approve is True def test_consensus_score_very_low_rejected(self): """consensus_score=0.3(<0.5)→ confidence 不足 → 人工審核""" policy = AutoApprovePolicy() proposal = self._make_consensus_proposal(consensus_score=0.3) # source=consensus_engine + score<0.6 → _is_rule_based=False # confidence=0.3 < min_confidence=0.5 → 拒絕 decision = policy.evaluate(proposal_data=proposal) assert decision.should_auto_approve is False, ( "B5 設計:consensus_score=0.3 應走人工審核(confidence 0.3 < 0.5)" ) def test_b5_confidence_equals_consensus_score(self): """B5 核心驗證:token.proposal_data['confidence'] 必須等於 consensus_score(非 0.0)""" # 直接驗證 decision_manager 的 proposal_data 建構邏輯 # 這個測試模擬 consensus path 建構的 dict 格式 consensus_score = 0.78 proposal_data = { "source": "consensus_engine", "consensus_id": "CON-TEST-001", "consensus_score": consensus_score, "action": "kubectl rollout restart deployment/api", "confidence": consensus_score, # B5 修復後的正確值 "risk_level": "medium", "kubectl_command": "kubectl rollout restart deployment/api", } assert proposal_data["confidence"] == consensus_score, ( "B5 失敗:confidence 不等於 consensus_score,代表仍是 0.0 舊邏輯" ) assert proposal_data["confidence"] != 0.0, ( "B5 失敗:confidence 不可為 0.0(舊有 bug)" )