""" test_solver_recommended_actions.py =================================== B1 — Solver 結構化動作 RecommendedAction schema 驗證 + 真實 NIM e2e 測試 2026-04-27 Claude Sonnet 4.6: B1 — Solver 結構化動作 (北極星 §1.1 修復多樣性 ≥ 40%) 測試範圍: Unit — schema 驗證(_validate_recommended_action / _extract_recommended_actions) Unit — _degraded_plan 改造:candidates=[], recommended_actions=[], degraded=True Unit — YAML MCP registry 動態載入 E2E — 真實 NIM (192.168.0.188:8088) 三類 incident: HostDiskUsage / KubePodOOM / DatabaseConnectionFail assert: len(recommended_actions) >= 1 至少 1 個非 restart 類動作(label/name 不含「重啟」「restart」) 遵循 feedback_no_mock_testing.md: - 禁止 MagicMock/AsyncMock/unittest.mock.patch 虛構 LLM 呼叫 - 真實 NIM 不可達時 pytest.skip()(skipif 判斷 OpenClaw 連線) """ from __future__ import annotations import sys import os # 確保 src 可找到 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../")) import pytest import asyncio from src.agents.protocol import ( AgentVote, DiagnosisReport, Hypothesis, RecommendedAction, ) from src.agents.solver_agent import ( _extract_recommended_actions, _load_mcp_tool_registry, _validate_recommended_action, get_solver_agent, ) # ============================================================================= # Helpers # ============================================================================= def _make_diagnosis( incident_id: str, hypothesis: str, category: str, confidence: float = 0.85, ) -> DiagnosisReport: """建立標準 DiagnosisReport stub(不 mock LLM,只提供輸入結構)""" return DiagnosisReport( hypotheses=[ Hypothesis( description=hypothesis, confidence=confidence, evidence_chain=["test_evidence"], category=category, ) ], evidence_snapshot_id=incident_id, latency_ms=100, vote=AgentVote.APPROVE, degraded=False, ) async def _nim_reachable() -> bool: """ 快速探測 NIM (192.168.0.188:8088) 是否可達(用於 skipif) E2E 跳過條件(任一滿足即 skip): 1. MOCK_MODE=true(conftest.py 預設設置,pytest 測試套件中避免真實呼叫) 2. NIM 實際不可達(網路斷線 / server down) 若要在本地跑真實 NIM e2e:MOCK_MODE=false pytest tests/test_solver_recommended_actions.py -k e2e_nim """ # 1. MOCK_MODE 檢查(conftest 設 true,會讓 openclaw 走 mock 而非 NIM) if os.environ.get("MOCK_MODE", "").lower() == "true": return False # 2. NIM 連線探測 try: import httpx async with httpx.AsyncClient(timeout=5.0) as client: r = await client.get("http://192.168.0.188:8088/health") return r.status_code < 500 except Exception: return False # ============================================================================= # Unit — _validate_recommended_action schema 驗證 # ============================================================================= class TestValidateRecommendedAction: """_validate_recommended_action 的 schema 驗證單元測試""" def test_valid_action_passes(self): """合法 action dict → 返回 RecommendedAction""" raw = { "name": "check_pod_logs", "label": "查 Pod Log", "emoji": "📋", "mcp_provider": "k8s", "mcp_tool": "k8s_get_pod_logs", "params": {"namespace": "awoooi-prod", "pod": "{labels.pod}"}, "risk": "low", "reasoning": "先查 log 確認根因", } result = _validate_recommended_action(raw) assert result is not None assert isinstance(result, RecommendedAction) assert result.name == "check_pod_logs" assert result.mcp_provider == "k8s" assert result.risk == "low" def test_missing_name_returns_none(self): """缺少 name → 返回 None(不假造)""" raw = { "label": "查 Pod Log", "emoji": "📋", "mcp_provider": "k8s", "mcp_tool": "k8s_get_pod_logs", "params": {}, "risk": "low", "reasoning": "先查 log", } assert _validate_recommended_action(raw) is None def test_invalid_provider_returns_none(self): """mcp_provider 不在白名單 → 返回 None""" raw = { "name": "hack", "label": "惡意", "emoji": "⚠️", "mcp_provider": "evil_provider", # 不在白名單 "mcp_tool": "run_rm_rf", "params": {}, "risk": "low", "reasoning": "test", } assert _validate_recommended_action(raw) is None def test_invalid_risk_returns_none(self): """risk 不在白名單 → 返回 None""" raw = { "name": "test_action", "label": "測試", "emoji": "🔍", "mcp_provider": "k8s", "mcp_tool": "k8s_get_pod_logs", "params": {}, "risk": "apocalypse", # 不在白名單 "reasoning": "test", } assert _validate_recommended_action(raw) is None def test_non_dict_input_returns_none(self): """非 dict 輸入 → 返回 None""" assert _validate_recommended_action("not a dict") is None assert _validate_recommended_action(None) is None assert _validate_recommended_action(42) is None def test_params_non_dict_coerced_to_empty(self): """params 非 dict → 轉為 {}(不 crash)""" raw = { "name": "test_action", "label": "測試", "emoji": "🔍", "mcp_provider": "ssh", "mcp_tool": "ssh_get_top_processes", "params": "not_a_dict", # 型別錯誤 "risk": "low", "reasoning": "test", } result = _validate_recommended_action(raw) assert result is not None assert result.params == {} def test_all_valid_providers(self): """所有合法 mcp_provider 都能通過驗證""" valid_providers = ["k8s", "ssh", "prometheus", "signoz", "database", "internal"] for provider in valid_providers: raw = { "name": f"test_{provider}", "label": f"測試 {provider}", "emoji": "🔍", "mcp_provider": provider, "mcp_tool": "some_tool", "params": {}, "risk": "low", "reasoning": "test", } result = _validate_recommended_action(raw) assert result is not None, f"provider={provider} 應通過驗證" def test_low_medium_high_risk_levels_accepted(self): """low/medium/high risk 等級通過驗證;critical 須走人工審批(F4 設計)""" for risk in ["low", "medium", "high"]: raw = { "name": f"test_{risk}", "label": f"測試 {risk}", "emoji": "🔍", "mcp_provider": "k8s", "mcp_tool": "some_tool", "params": {}, "risk": risk, "reasoning": "test", } result = _validate_recommended_action(raw) assert result is not None, f"risk={risk} 應通過驗證" def test_critical_risk_rejected_for_button(self): """F4 修法:critical 動作必須走人工審批,不可變 Telegram 按鈕""" raw = { "name": "test_critical", "label": "測試 critical", "emoji": "🔍", "mcp_provider": "k8s", "mcp_tool": "some_tool", "params": {}, "risk": "critical", "reasoning": "test", } result = _validate_recommended_action(raw) assert result is None, "critical risk 應被拒絕(F4:不可變按鈕,須走人工審批)" # ============================================================================= # Unit — _extract_recommended_actions 批量驗證 # ============================================================================= class TestExtractRecommendedActions: """_extract_recommended_actions 的批量驗證單元測試""" def test_empty_list_returns_empty(self): """空 recommended_actions → 返回 []""" result, _ = _extract_recommended_actions({"recommended_actions": []}) assert result == [] def test_missing_key_returns_empty(self): """沒有 recommended_actions key → 返回 []""" result, _ = _extract_recommended_actions({"candidates": []}) assert result == [] def test_non_list_returns_empty(self): """recommended_actions 非 list → 返回 []""" result, _ = _extract_recommended_actions({"recommended_actions": "not_a_list"}) assert result == [] def test_valid_actions_extracted(self): """合法 actions → 全部提取""" parsed = { "recommended_actions": [ { "name": "check_pod_logs", "label": "查 Pod Log", "emoji": "📋", "mcp_provider": "k8s", "mcp_tool": "k8s_get_pod_logs", "params": {"namespace": "awoooi-prod"}, "risk": "low", "reasoning": "先查 log", }, { "name": "k8s_restart", "label": "重啟", "emoji": "🔄", "mcp_provider": "k8s", "mcp_tool": "kubectl_restart", "params": {"namespace": "awoooi-prod"}, "risk": "medium", "reasoning": "確認 OOM 後重啟", }, ] } result, _ = _extract_recommended_actions(parsed) assert len(result) == 2 assert result[0].name == "check_pod_logs" assert result[1].name == "k8s_restart" def test_mixed_valid_invalid_skips_invalid(self): """混合合法/非法 → skip 非法,不假造""" parsed = { "recommended_actions": [ { "name": "valid_action", "label": "合法", "emoji": "✅", "mcp_provider": "k8s", "mcp_tool": "k8s_get_pod_logs", "params": {}, "risk": "low", "reasoning": "ok", }, { # 缺少 mcp_tool → 無效 "name": "invalid_action", "label": "非法", "emoji": "❌", "mcp_provider": "k8s", "risk": "low", "reasoning": "bad", }, { "name": "another_valid", "label": "另一合法", "emoji": "🔍", "mcp_provider": "ssh", "mcp_tool": "ssh_get_top_processes", "params": {}, "risk": "low", "reasoning": "ok", }, ] } result, _ = _extract_recommended_actions(parsed) assert len(result) == 2, "應 skip 非法,只返回 2 個合法 action" names = {r.name for r in result} assert "valid_action" in names assert "another_valid" in names assert "invalid_action" not in names def test_max_3_actions_enforced(self): """超過 3 個 → 最多取 3 個""" parsed = { "recommended_actions": [ { "name": f"action_{i}", "label": f"動作 {i}", "emoji": "🔍", "mcp_provider": "k8s", "mcp_tool": "k8s_get_pod_logs", "params": {}, "risk": "low", "reasoning": f"reason {i}", } for i in range(5) ] } result, _ = _extract_recommended_actions(parsed) assert len(result) == 3, "最多取 3 個 recommended_actions" # ============================================================================= # Unit — _degraded_plan 改造驗證 # ============================================================================= class TestDegradedPlanGraceful: """驗證 _degraded_plan 改造後不再假造 hardcode RESTART""" @pytest.mark.asyncio async def test_degraded_plan_empty_candidates_and_actions(self): """ _degraded_plan 必須: - candidates=[](不假造 RESTART) - recommended_actions=[](不假造動作) - degraded=True - vote=DEGRADED 北極星 §1.1:禁止寫死規則,降級 = 誠實的能力邊界聲明 """ from src.agents.solver_agent import SolverAgent agent = SolverAgent() diagnosis = _make_diagnosis( "TEST-DEGRADED-001", "磁碟使用率超過 90%", "HostDiskUsage", ) plan = agent._degraded_plan(diagnosis, latency_ms=500, reason="test_unit") assert plan.degraded is True, "降級計畫必須標記 degraded=True" assert plan.vote == AgentVote.DEGRADED, "降級計畫 vote 必須為 DEGRADED" assert plan.candidates == [], ( f"降級計畫 candidates 必須為空(不假造 RESTART),實際:{plan.candidates}" ) assert plan.recommended_actions == [], ( f"降級計畫 recommended_actions 必須為空(不假造動作),實際:{plan.recommended_actions}" ) @pytest.mark.asyncio async def test_degraded_plan_no_hardcode_restart(self): """ 確認 _degraded_plan 不再含任何 'restart' / 'RESTART' / '重啟' 字串的 candidate 北極星 §1.1:禁止寫死規則 """ from src.agents.solver_agent import SolverAgent agent = SolverAgent() for category in ["HostDiskUsage", "KubePodOOMKilled", "DatabaseConnectionFail"]: diagnosis = _make_diagnosis( f"TEST-NOREST-{category}", f"測試 {category} 降級", category, ) plan = agent._degraded_plan(diagnosis, latency_ms=0, reason="no_restart_test") # 確認沒有 hardcode restart candidate for c in plan.candidates: action_lower = c.action.lower() assert "restart" not in action_lower or "rollout restart" in action_lower, ( f"category={category}: 降級不應有 hardcode restart 動作,實際:{c.action}" ) # 最重要:candidates 為空(不假造) assert plan.candidates == [], ( f"category={category}: 降級 candidates 必須為空,實際:{plan.candidates}" ) # ============================================================================= # Unit — YAML MCP registry 動態載入 # ============================================================================= class TestMcpRegistryLoader: """_load_mcp_tool_registry 動態載入測試""" def test_registry_loads_successfully(self): """YAML 正常載入,返回非空 dict""" registry = _load_mcp_tool_registry() assert isinstance(registry, dict) assert len(registry) > 0, "callback_action_spec.yaml 應有 action 定義" def test_registry_contains_required_actions(self): """必要的 action 存在於 registry""" registry = _load_mcp_tool_registry() required_actions = ["check_pod_logs", "k8s_restart", "describe_pod"] for action in required_actions: assert action in registry, f"action '{action}' 應在 registry 中" def test_registry_entries_have_required_fields(self): """每個 entry 都有 provider / tool / risk / label""" registry = _load_mcp_tool_registry() for name, info in registry.items(): assert "provider" in info, f"action '{name}' 缺少 provider" assert "tool" in info, f"action '{name}' 缺少 tool" assert "risk" in info, f"action '{name}' 缺少 risk" assert "label" in info, f"action '{name}' 缺少 label" def test_registry_all_providers_valid(self): """所有 entry 的 provider 必須在 _VALID_MCP_PROVIDERS 清單內""" from src.agents.solver_agent import _VALID_MCP_PROVIDERS registry = _load_mcp_tool_registry() for name, info in registry.items(): provider = info.get("provider", "") assert provider in _VALID_MCP_PROVIDERS, ( f"action '{name}' 的 provider='{provider}' 不在合法清單 {_VALID_MCP_PROVIDERS}" ) # ============================================================================= # E2E — 真實 NIM 三類 incident 測試 # ============================================================================= @pytest.mark.asyncio async def test_e2e_nim_host_disk_usage(): """ E2E: HostDiskUsage — 磁碟使用率過高 真實 NIM (192.168.0.188:8088) 呼叫,不允許 mock 驗收: - len(recommended_actions) >= 1 - 至少 1 個非 restart 類動作 - plan 未降級(如果 NIM 回應正常) """ if not await _nim_reachable(): pytest.skip("NIM (192.168.0.188:8088) 不可達,跳過 E2E 測試") diagnosis = _make_diagnosis( "TEST-E2E-HOST-DISK-001", "主機 192.168.0.121 根目錄磁碟使用率達 91%,/var/lib/docker overlay2 目錄佔用最多", "HostDiskUsage", confidence=0.88, ) agent = get_solver_agent() plan = await agent.run(diagnosis) # 基本不崩潰 assert plan is not None assert plan.diagnosis_report is not None if plan.degraded: # NIM 降級(可能 timeout 或回傳失敗),確認降級是乾淨的 assert plan.candidates == [], f"降級時 candidates 必須為空:{plan.candidates}" assert plan.recommended_actions == [], f"降級時 recommended_actions 必須為空:{plan.recommended_actions}" pytest.skip(f"NIM 回應降級({plan.vote}),跳過 recommended_actions 驗收") # 正常路徑驗收 assert len(plan.recommended_actions) >= 1, ( f"HostDiskUsage 應至少有 1 個 recommended_action,實際:{plan.recommended_actions}" ) # 至少 1 個非 restart 類動作(多樣性 ≥ 40%) non_restart_actions = [ a for a in plan.recommended_actions if "restart" not in a.name.lower() and "restart" not in a.label.lower() and "重啟" not in a.label ] assert len(non_restart_actions) >= 1, ( f"HostDiskUsage 應至少 1 個非 restart 動作(多樣性要求),實際:{[a.name for a in plan.recommended_actions]}" ) # schema 完整性驗證 for action in plan.recommended_actions: assert action.name, "name 不可為空" assert action.label, "label 不可為空" assert action.mcp_provider in {"k8s", "ssh", "prometheus", "signoz", "database", "internal"}, ( f"mcp_provider={action.mcp_provider} 不在白名單" ) assert action.risk in {"low", "medium", "high", "critical"}, ( f"risk={action.risk} 不在白名單" ) @pytest.mark.asyncio async def test_e2e_nim_kube_pod_oom(): """ E2E: KubePodOOM — Pod OOM Killed 真實 NIM (192.168.0.188:8088) 呼叫,不允許 mock 驗收: - len(recommended_actions) >= 1 - 至少 1 個非 restart 類動作(如查 log、查資源使用) """ if not await _nim_reachable(): pytest.skip("NIM (192.168.0.188:8088) 不可達,跳過 E2E 測試") diagnosis = _make_diagnosis( "TEST-E2E-OOM-001", "awoooi-api pod 在 awoooi-prod namespace 因記憶體超限被 OOM Killer 終止," "記憶體限制 512Mi,實際峰值 587Mi,過去 1h 發生 3 次", "KubePodOOMKilled", confidence=0.92, ) agent = get_solver_agent() plan = await agent.run(diagnosis) assert plan is not None if plan.degraded: assert plan.candidates == [] assert plan.recommended_actions == [] pytest.skip(f"NIM 回應降級({plan.vote}),跳過 recommended_actions 驗收") assert len(plan.recommended_actions) >= 1, ( f"KubePodOOM 應至少有 1 個 recommended_action,實際:{plan.recommended_actions}" ) non_restart_actions = [ a for a in plan.recommended_actions if "restart" not in a.name.lower() and "restart" not in a.label.lower() and "重啟" not in a.label ] assert len(non_restart_actions) >= 1, ( f"KubePodOOM 應至少 1 個非 restart 動作,實際:{[a.name for a in plan.recommended_actions]}" ) for action in plan.recommended_actions: assert action.mcp_provider in {"k8s", "ssh", "prometheus", "signoz", "database", "internal"} assert action.risk in {"low", "medium", "high", "critical"} @pytest.mark.asyncio async def test_e2e_nim_database_connection_fail(): """ E2E: DatabaseConnectionFail — 資料庫連線失敗 真實 NIM (192.168.0.188:8088) 呼叫,不允許 mock 驗收: - len(recommended_actions) >= 1 - 至少 1 個非 restart 類動作(如查連線、查 DB 狀態) """ if not await _nim_reachable(): pytest.skip("NIM (192.168.0.188:8088) 不可達,跳過 E2E 測試") diagnosis = _make_diagnosis( "TEST-E2E-DB-CONN-001", "PostgreSQL 資料庫連線池耗盡,最大連線數 100/100 均被佔用," "awoooi-api 回應 'connection pool exhausted',P95 latency 飆升至 12s", "DatabaseConnectionFail", confidence=0.86, ) agent = get_solver_agent() plan = await agent.run(diagnosis) assert plan is not None if plan.degraded: assert plan.candidates == [] assert plan.recommended_actions == [] pytest.skip(f"NIM 回應降級({plan.vote}),跳過 recommended_actions 驗收") assert len(plan.recommended_actions) >= 1, ( f"DatabaseConnectionFail 應至少有 1 個 recommended_action,實際:{plan.recommended_actions}" ) non_restart_actions = [ a for a in plan.recommended_actions if "restart" not in a.name.lower() and "restart" not in a.label.lower() and "重啟" not in a.label ] assert len(non_restart_actions) >= 1, ( f"DatabaseConnectionFail 應至少 1 個非 restart 動作,實際:{[a.name for a in plan.recommended_actions]}" ) for action in plan.recommended_actions: assert action.mcp_provider in {"k8s", "ssh", "prometheus", "signoz", "database", "internal"} assert action.risk in {"low", "medium", "high", "critical"}