""" PreDecisionInvestigator 測試 ============================ ADR-081: Phase 1 決策前情報調查員 測試項目: - 工具並行蒐集(多維度) - 工具部分失敗不阻塞(Graceful Degradation) - 工具逾時被丟棄 - EvidenceSnapshot 正確填入感官維度 - evidence_summary 組裝 + Token Budget 截斷 - fingerprint 計算一致性 - _fill_snapshot_dimension 正確路由 注意:不依賴真實 Redis / DB — 純邏輯測試 2026-04-15 Claude Sonnet 4.6 + ogt: Phase 1 初始建立 """ from __future__ import annotations import asyncio from typing import Any import pytest from src.plugins.mcp.interfaces import MCPTool, MCPToolProvider, MCPToolResult from src.plugins.mcp.registry import AuditedMCPToolProvider from src.services import pre_decision_investigator as pdi_module from src.services.evidence_snapshot import EvidenceSnapshot from src.services.mcp_tool_registry import ( RegisteredTool, SensorDimension, ) from src.services.pre_decision_investigator import ( PreDecisionInvestigator, _compute_fingerprint, _fill_snapshot_dimension, _build_tool_params, ) # ───────────────────────────────────────────────────────────────────────────── # Stubs # ───────────────────────────────────────────────────────────────────────────── def _make_tool(name: str) -> MCPTool: return MCPTool(name=name, description="", input_schema={}, server_name="test") class _SuccessProvider(MCPToolProvider): """永遠成功,回傳固定 output""" def __init__(self, output: dict | str | None = None) -> None: self._output = output if output is not None else {"status": "Running"} @property def name(self) -> str: return "success_provider" async def list_tools(self) -> list[MCPTool]: return [] async def execute(self, tool_name: str, parameters: dict) -> MCPToolResult: return MCPToolResult(success=True, execution_id="ok", output=self._output) class _FailProvider(MCPToolProvider): """永遠失敗""" @property def name(self) -> str: return "fail_provider" async def list_tools(self) -> list[MCPTool]: return [] async def execute(self, tool_name: str, parameters: dict) -> MCPToolResult: return MCPToolResult(success=False, execution_id="fail", error="connection refused") class _TimeoutProvider(MCPToolProvider): """永遠逾時""" @property def name(self) -> str: return "timeout_provider" async def list_tools(self) -> list[MCPTool]: return [] async def execute(self, tool_name: str, parameters: dict) -> MCPToolResult: await asyncio.sleep(9999) return MCPToolResult(success=True, execution_id="never", output={}) class _CaptureProvider(_SuccessProvider): """成功 provider,額外保存收到的參數。""" def __init__(self) -> None: super().__init__({"status": "Running"}) self.seen_parameters: dict | None = None async def execute(self, tool_name: str, parameters: dict) -> MCPToolResult: self.seen_parameters = dict(parameters) return await super().execute(tool_name, parameters) class _DbContext: async def __aenter__(self) -> object: return object() async def __aexit__(self, *_args: object) -> None: return None def _stub_incident( alertname: str = "KubePodCrashLooping", namespace: str = "awoooi-prod", pod: str = "api-xyz", severity: str = "critical", ) -> object: """返回最小 Incident stub(僅需 .signals[0].labels)""" class _Signal: labels = { "alertname": alertname, "namespace": namespace, "pod": pod, "severity": severity, } class _Incident: incident_id = f"INC-{alertname[:4]}" signals = [_Signal()] return _Incident() def _stub_host_incident() -> object: class _Signal: alert_name = "HostErrorLogFlood" labels = { "alertname": "HostErrorLogFlood", "error_count": "30", } class _Incident: incident_id = "INC-HOST" signals = [_Signal()] affected_services = ["ollama"] return _Incident() def _reg(tool_name: str, provider: MCPToolProvider, dim: SensorDimension) -> RegisteredTool: return RegisteredTool( tool=_make_tool(tool_name), provider=provider, dimensions=[dim], priority=5, ) def test_build_tool_params_uses_host_alias_and_service_from_affected_service() -> None: params = _build_tool_params(_stub_host_incident()) assert params["host"] == "192.168.0.188" assert params["target"] == "ollama" assert params["service_name"] == "ollama" assert params["container_name"] == "ollama" assert params["filter_name"] == "ollama" assert params["severity"] == "ERROR,FATAL,CRITICAL" assert params["search_text"] == "HostErrorLogFlood" assert params["time_window_minutes"] == 30 def test_build_tool_params_maps_wooo_alias_to_allowed_ssh_host() -> None: class _Signal: alert_name = "HostHighCpuLoad" labels = { "alertname": "HostHighCpuLoad", "instance": "wooo:9100", } class _Incident: incident_id = "INC-WOOO" signals = [_Signal()] affected_services = ["gitea"] params = _build_tool_params(_Incident()) assert params["host"] == "192.168.0.110" assert params["filter_name"] == "gitea" # ───────────────────────────────────────────────────────────────────────────── # _compute_fingerprint # ───────────────────────────────────────────────────────────────────────────── class TestComputeFingerprint: def test_same_labels_same_fingerprint(self): i1 = _stub_incident("Kube", "prod", "pod1", "critical") i2 = _stub_incident("Kube", "prod", "pod1", "critical") assert _compute_fingerprint(i1) == _compute_fingerprint(i2) def test_different_alertname_different_fingerprint(self): i1 = _stub_incident("Kube", "prod", "pod1", "critical") i2 = _stub_incident("Host", "prod", "pod1", "critical") assert _compute_fingerprint(i1) != _compute_fingerprint(i2) def test_fingerprint_length_16(self): i = _stub_incident() fp = _compute_fingerprint(i) assert len(fp) == 16 def test_fingerprint_hex_chars_only(self): i = _stub_incident() fp = _compute_fingerprint(i) assert all(c in "0123456789abcdef" for c in fp) # ───────────────────────────────────────────────────────────────────────────── # _build_tool_params # ───────────────────────────────────────────────────────────────────────────── class TestBuildToolParams: def test_namespace_extracted(self): p = _build_tool_params(_stub_incident(namespace="mynamespace")) assert p["namespace"] == "mynamespace" def test_pod_name_extracted(self): p = _build_tool_params(_stub_incident(pod="mypod")) assert p["pod_name"] == "mypod" def test_alertname_extracted(self): p = _build_tool_params(_stub_incident(alertname="MyAlert")) assert p["alertname"] == "MyAlert" def test_default_namespace_fallback(self): class _Signal: labels = {} class _Inc: incident_id = "x" signals = [_Signal()] p = _build_tool_params(_Inc()) assert p["namespace"] == "awoooi-prod" # ───────────────────────────────────────────────────────────────────────────── # _fill_snapshot_dimension # ───────────────────────────────────────────────────────────────────────────── class TestFillSnapshotDimension: def _reg(self, dim: SensorDimension) -> RegisteredTool: return RegisteredTool( tool=_make_tool("t"), provider=_SuccessProvider(), dimensions=[dim], priority=5, ) def test_d1_dict_fills_k8s_state(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D1_K8S_STATE), {"status": "Running"}) assert snap.k8s_state == {"status": "Running"} def test_d1_str_wraps_in_raw(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D1_K8S_STATE), "raw k8s output") assert snap.k8s_state == {"raw": "raw k8s output"} def test_d2_fills_recent_logs(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D2_LOGS), "log line 1\nlog line 2") assert "log line 1" in snap.recent_logs def test_d2_dict_serialized_to_string(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D2_LOGS), {"msg": "hello"}) assert "hello" in snap.recent_logs def test_d3_fills_metrics(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D3_METRICS), {"cpu": 95.2}) assert snap.metrics_snapshot == {"cpu": 95.2} def test_d4_list_fills_deployments(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D4_CHANGES), [{"rev": "abc"}]) assert snap.recent_deployments == [{"rev": "abc"}] def test_d4_dict_wrapped_in_list(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D4_CHANGES), {"rev": "abc"}) assert snap.recent_deployments == [{"rev": "abc"}] def test_d5_fills_business_metrics(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D5_BUSINESS), {"sli": 0.99}) assert snap.business_metrics == {"sli": 0.99} def test_d6_truncated_at_2000(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D6_HISTORY), "X" * 5000) assert len(snap.historical_context) <= 2100 # 2000 + possible truncation note def test_d7_fills_peer_health(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D7_PEERS), {"replica_0": "ok"}) assert snap.peer_health == {"replica_0": "ok"} def test_d8_fills_topology(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D8_TOPOLOGY), {"upstream": "db"}) assert snap.dependency_topology == {"upstream": "db"} def test_none_raw_is_noop(self): snap = EvidenceSnapshot(incident_id="x") _fill_snapshot_dimension(snap, self._reg(SensorDimension.D1_K8S_STATE), None) assert snap.k8s_state is None # 未被修改 # ───────────────────────────────────────────────────────────────────────────── # PreDecisionInvestigator._collect_one # ───────────────────────────────────────────────────────────────────────────── class TestCollectOne: """單工具蒐集行為(不需要 DB / Redis)""" @pytest.mark.asyncio async def test_success_fills_snapshot(self): investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="x") reg = _reg("kubectl_describe", _SuccessProvider({"status": "Running"}), SensorDimension.D1_K8S_STATE) await investigator._collect_one(snap, reg, {"namespace": "prod"}) assert snap.mcp_health["kubectl_describe"] is True assert snap.sensors_succeeded == 1 assert snap.k8s_state is not None @pytest.mark.asyncio async def test_collect_one_injects_mcp_audit_context(self): investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="INC-AUDIT") provider = _CaptureProvider() reg = _reg("kubectl_describe", provider, SensorDimension.D1_K8S_STATE) await investigator._collect_one(snap, reg, {"namespace": "prod"}) assert provider.seen_parameters is not None audit_context = provider.seen_parameters["_mcp_audit"] assert audit_context["incident_id"] == "INC-AUDIT" assert audit_context["session_id"] == "incident:INC-AUDIT:pre_decision" assert audit_context["flywheel_node"] == "sense" assert audit_context["agent_role"] == "pre_decision_investigator" @pytest.mark.asyncio async def test_collect_one_routes_audited_provider_through_gateway( self, monkeypatch: pytest.MonkeyPatch, ): investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="INC-GATEWAY") provider = _CaptureProvider() reg = _reg( "kubectl_describe", AuditedMCPToolProvider(provider), SensorDimension.D1_K8S_STATE, ) calls: list[dict[str, Any]] = [] class FakeGateway: def __init__(self, db: object) -> None: self.db = db async def call(self, ctx, parameters: dict[str, Any]) -> MCPToolResult: calls.append({"ctx": ctx, "parameters": parameters, "db": self.db}) return MCPToolResult(success=True, execution_id="gw", output={"status": "Running"}) monkeypatch.setattr(pdi_module, "get_db_context", lambda _project_id: _DbContext()) monkeypatch.setattr(pdi_module, "McpGateway", FakeGateway) await investigator._collect_one(snap, reg, {"namespace": "prod"}) assert snap.mcp_health["kubectl_describe"] is True assert snap.k8s_state is not None assert provider.seen_parameters is None assert calls ctx = calls[0]["ctx"] assert ctx.project_id == "awoooi" assert ctx.agent_id == "pre_decision_investigator" assert ctx.tool_name == "kubectl_describe" assert ctx.trace_id == "INC-GATEWAY" assert ctx.required_scope == "read" assert calls[0]["parameters"]["_mcp_audit"]["incident_id"] == "INC-GATEWAY" @pytest.mark.asyncio async def test_failed_tool_marks_health_false(self): investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="x") reg = _reg("kubectl_logs", _FailProvider(), SensorDimension.D2_LOGS) await investigator._collect_one(snap, reg, {}) assert snap.mcp_health["kubectl_logs"] is False assert snap.sensors_succeeded == 0 @pytest.mark.asyncio async def test_timeout_marks_health_false(self): """工具逾時必須被丟棄,不阻塞主路徑""" investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="x") reg = _reg("slow_tool", _TimeoutProvider(), SensorDimension.D3_METRICS) # _collect_one 本身有 MCP_TOOL_TIMEOUT_SEC=5 的 wait_for 保護 # 但在測試中我們直接驗證它不會 raise,只是設 health=False # 用一個足夠短的超時替代(patch不做,因為這是純邏輯驗證) with pytest.raises(Exception): # TimeoutProvider 會永久阻塞,直接觸發 asyncio.TimeoutError await asyncio.wait_for( investigator._collect_one(snap, reg, {}), timeout=0.1, ) # 超時後 health 預設 False(在 _collect_one 開頭設定) assert snap.mcp_health.get("slow_tool") is False # ───────────────────────────────────────────────────────────────────────────── # PreDecisionInvestigator._collect_all # ───────────────────────────────────────────────────────────────────────────── class TestCollectAll: """多工具並行蒐集 — Graceful Degradation""" @pytest.mark.asyncio async def test_partial_failure_does_not_block(self): """失敗工具不阻塞成功工具""" investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="x") snap.sensors_attempted = 2 tools = [ _reg("kubectl_describe", _SuccessProvider({"status": "Running"}), SensorDimension.D1_K8S_STATE), _reg("kubectl_logs", _FailProvider(), SensorDimension.D2_LOGS), ] incident = _stub_incident() await investigator._collect_all(snap, tools, incident) assert snap.mcp_health["kubectl_describe"] is True assert snap.mcp_health["kubectl_logs"] is False assert snap.sensors_succeeded == 1 assert snap.k8s_state is not None @pytest.mark.asyncio async def test_all_success_fills_multiple_dimensions(self): investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="x") tools = [ _reg("kubectl_describe", _SuccessProvider({"status": "Running"}), SensorDimension.D1_K8S_STATE), _reg("prometheus_query", _SuccessProvider({"cpu": 95.0}), SensorDimension.D3_METRICS), ] incident = _stub_incident() await investigator._collect_all(snap, tools, incident) assert snap.k8s_state is not None assert snap.metrics_snapshot is not None assert snap.sensors_succeeded == 2 @pytest.mark.asyncio async def test_empty_tools_produces_empty_snapshot(self): investigator = PreDecisionInvestigator() snap = EvidenceSnapshot(incident_id="x") await investigator._collect_all(snap, [], _stub_incident()) assert snap.sensors_succeeded == 0 assert snap.mcp_health == {}