Files
awoooi/apps/api/tests/test_solver_recommended_actions.py
Your Name 9908fdf50d
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m59s
feat(p3.1-t2-patha): DiagnosisAggregator 路徑 A + Solver F4 critical reject + 對齊測試
Wave 8 P3.1-T2 PathA 啟用 + Solver F4 安全強化 + test 對齊:

PathA — DiagnosisAggregator 信號分類層補 PDI:
- ENABLE_DIAGNOSIS_AGGREGATOR default=False → True
  · PathA 純信號分類層(OOMKilled/CrashLoop 等業務邏輯)
  · 不重複呼叫 K8s/SignOz API(只取 PDI 已收集的 raw 資料)
  · 安全 default on — 純邏輯處理,無外部依賴重疊
- diagnosis_aggregator.py +155 行(PathA 實作)
- pre_decision_investigator.py 已接 (commit 3a2cd151)

F4 — Solver critical risk reject:
- solver_agent.py: _validate_recommended_action 拒絕 risk=critical
  · 鐵律:critical 動作必須走人工審批,不可變 Telegram 按鈕
  · log warning + return None(被 _extract 過濾掉)
- _extract_recommended_actions 改返回 (list, status_str) tuple
  · status="ok"/"empty"/"all_invalid" 供呼叫端決策
- protocol.py +16 / metrics.py +9 / ai_router.py +18 — 配套 metric + protocol field

測試對齊:
- test_solver_recommended_actions.py 拆 test_all_valid → low/medium/high accepted +
  test_critical_rejected
- result tuple unpack: result, _ = _extract_recommended_actions(...)
- test_diagnosis_aggregator_stub.py: feature flag default 改 True 對齊 PathA

Tests: 51 passed (solver 28 + aggregator 16 + router fallback 8)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Multiple Engineers (Wave 8 P3.1-T2 PathA + F4) <noreply@anthropic.com>
2026-04-27 14:42:29 +08:00

624 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
test_solver_recommended_actions.py
===================================
B1 — Solver 結構化動作 RecommendedAction schema 驗證 + 真實 NIM e2e 測試
2026-04-27 Claude Sonnet 4.6: B1 — Solver 結構化動作 (北極星 §1.1 修復多樣性 ≥ 40%)
測試範圍:
Unit — schema 驗證_validate_recommended_action / _extract_recommended_actions
Unit — _degraded_plan 改造candidates=[], recommended_actions=[], degraded=True
Unit — YAML MCP registry 動態載入
E2E — 真實 NIM (192.168.0.188:8088) 三類 incident
HostDiskUsage / KubePodOOM / DatabaseConnectionFail
assert: len(recommended_actions) >= 1
至少 1 個非 restart 類動作label/name 不含「重啟」「restart」
遵循 feedback_no_mock_testing.md:
- 禁止 MagicMock/AsyncMock/unittest.mock.patch 虛構 LLM 呼叫
- 真實 NIM 不可達時 pytest.skip()skipif 判斷 OpenClaw 連線)
"""
from __future__ import annotations
import sys
import os
# 確保 src 可找到
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../"))
import pytest
import asyncio
from src.agents.protocol import (
AgentVote,
DiagnosisReport,
Hypothesis,
RecommendedAction,
)
from src.agents.solver_agent import (
_extract_recommended_actions,
_load_mcp_tool_registry,
_validate_recommended_action,
get_solver_agent,
)
# =============================================================================
# Helpers
# =============================================================================
def _make_diagnosis(
incident_id: str,
hypothesis: str,
category: str,
confidence: float = 0.85,
) -> DiagnosisReport:
"""建立標準 DiagnosisReport stub不 mock LLM只提供輸入結構"""
return DiagnosisReport(
hypotheses=[
Hypothesis(
description=hypothesis,
confidence=confidence,
evidence_chain=["test_evidence"],
category=category,
)
],
evidence_snapshot_id=incident_id,
latency_ms=100,
vote=AgentVote.APPROVE,
degraded=False,
)
async def _nim_reachable() -> bool:
"""
快速探測 NIM (192.168.0.188:8088) 是否可達(用於 skipif
E2E 跳過條件(任一滿足即 skip
1. MOCK_MODE=trueconftest.py 預設設置pytest 測試套件中避免真實呼叫)
2. NIM 實際不可達(網路斷線 / server down
若要在本地跑真實 NIM e2eMOCK_MODE=false pytest tests/test_solver_recommended_actions.py -k e2e_nim
"""
# 1. MOCK_MODE 檢查conftest 設 true會讓 openclaw 走 mock 而非 NIM
if os.environ.get("MOCK_MODE", "").lower() == "true":
return False
# 2. NIM 連線探測
try:
import httpx
async with httpx.AsyncClient(timeout=5.0) as client:
r = await client.get("http://192.168.0.188:8088/health")
return r.status_code < 500
except Exception:
return False
# =============================================================================
# Unit — _validate_recommended_action schema 驗證
# =============================================================================
class TestValidateRecommendedAction:
"""_validate_recommended_action 的 schema 驗證單元測試"""
def test_valid_action_passes(self):
"""合法 action dict → 返回 RecommendedAction"""
raw = {
"name": "check_pod_logs",
"label": "查 Pod Log",
"emoji": "📋",
"mcp_provider": "k8s",
"mcp_tool": "k8s_get_pod_logs",
"params": {"namespace": "awoooi-prod", "pod": "{labels.pod}"},
"risk": "low",
"reasoning": "先查 log 確認根因",
}
result = _validate_recommended_action(raw)
assert result is not None
assert isinstance(result, RecommendedAction)
assert result.name == "check_pod_logs"
assert result.mcp_provider == "k8s"
assert result.risk == "low"
def test_missing_name_returns_none(self):
"""缺少 name → 返回 None不假造"""
raw = {
"label": "查 Pod Log",
"emoji": "📋",
"mcp_provider": "k8s",
"mcp_tool": "k8s_get_pod_logs",
"params": {},
"risk": "low",
"reasoning": "先查 log",
}
assert _validate_recommended_action(raw) is None
def test_invalid_provider_returns_none(self):
"""mcp_provider 不在白名單 → 返回 None"""
raw = {
"name": "hack",
"label": "惡意",
"emoji": "⚠️",
"mcp_provider": "evil_provider", # 不在白名單
"mcp_tool": "run_rm_rf",
"params": {},
"risk": "low",
"reasoning": "test",
}
assert _validate_recommended_action(raw) is None
def test_invalid_risk_returns_none(self):
"""risk 不在白名單 → 返回 None"""
raw = {
"name": "test_action",
"label": "測試",
"emoji": "🔍",
"mcp_provider": "k8s",
"mcp_tool": "k8s_get_pod_logs",
"params": {},
"risk": "apocalypse", # 不在白名單
"reasoning": "test",
}
assert _validate_recommended_action(raw) is None
def test_non_dict_input_returns_none(self):
"""非 dict 輸入 → 返回 None"""
assert _validate_recommended_action("not a dict") is None
assert _validate_recommended_action(None) is None
assert _validate_recommended_action(42) is None
def test_params_non_dict_coerced_to_empty(self):
"""params 非 dict → 轉為 {}(不 crash"""
raw = {
"name": "test_action",
"label": "測試",
"emoji": "🔍",
"mcp_provider": "ssh",
"mcp_tool": "ssh_get_top_processes",
"params": "not_a_dict", # 型別錯誤
"risk": "low",
"reasoning": "test",
}
result = _validate_recommended_action(raw)
assert result is not None
assert result.params == {}
def test_all_valid_providers(self):
"""所有合法 mcp_provider 都能通過驗證"""
valid_providers = ["k8s", "ssh", "prometheus", "signoz", "database", "internal"]
for provider in valid_providers:
raw = {
"name": f"test_{provider}",
"label": f"測試 {provider}",
"emoji": "🔍",
"mcp_provider": provider,
"mcp_tool": "some_tool",
"params": {},
"risk": "low",
"reasoning": "test",
}
result = _validate_recommended_action(raw)
assert result is not None, f"provider={provider} 應通過驗證"
def test_low_medium_high_risk_levels_accepted(self):
"""low/medium/high risk 等級通過驗證critical 須走人工審批F4 設計)"""
for risk in ["low", "medium", "high"]:
raw = {
"name": f"test_{risk}",
"label": f"測試 {risk}",
"emoji": "🔍",
"mcp_provider": "k8s",
"mcp_tool": "some_tool",
"params": {},
"risk": risk,
"reasoning": "test",
}
result = _validate_recommended_action(raw)
assert result is not None, f"risk={risk} 應通過驗證"
def test_critical_risk_rejected_for_button(self):
"""F4 修法critical 動作必須走人工審批,不可變 Telegram 按鈕"""
raw = {
"name": "test_critical",
"label": "測試 critical",
"emoji": "🔍",
"mcp_provider": "k8s",
"mcp_tool": "some_tool",
"params": {},
"risk": "critical",
"reasoning": "test",
}
result = _validate_recommended_action(raw)
assert result is None, "critical risk 應被拒絕F4不可變按鈕須走人工審批"
# =============================================================================
# Unit — _extract_recommended_actions 批量驗證
# =============================================================================
class TestExtractRecommendedActions:
"""_extract_recommended_actions 的批量驗證單元測試"""
def test_empty_list_returns_empty(self):
"""空 recommended_actions → 返回 []"""
result, _ = _extract_recommended_actions({"recommended_actions": []})
assert result == []
def test_missing_key_returns_empty(self):
"""沒有 recommended_actions key → 返回 []"""
result, _ = _extract_recommended_actions({"candidates": []})
assert result == []
def test_non_list_returns_empty(self):
"""recommended_actions 非 list → 返回 []"""
result, _ = _extract_recommended_actions({"recommended_actions": "not_a_list"})
assert result == []
def test_valid_actions_extracted(self):
"""合法 actions → 全部提取"""
parsed = {
"recommended_actions": [
{
"name": "check_pod_logs",
"label": "查 Pod Log",
"emoji": "📋",
"mcp_provider": "k8s",
"mcp_tool": "k8s_get_pod_logs",
"params": {"namespace": "awoooi-prod"},
"risk": "low",
"reasoning": "先查 log",
},
{
"name": "k8s_restart",
"label": "重啟",
"emoji": "🔄",
"mcp_provider": "k8s",
"mcp_tool": "kubectl_restart",
"params": {"namespace": "awoooi-prod"},
"risk": "medium",
"reasoning": "確認 OOM 後重啟",
},
]
}
result, _ = _extract_recommended_actions(parsed)
assert len(result) == 2
assert result[0].name == "check_pod_logs"
assert result[1].name == "k8s_restart"
def test_mixed_valid_invalid_skips_invalid(self):
"""混合合法/非法 → skip 非法,不假造"""
parsed = {
"recommended_actions": [
{
"name": "valid_action",
"label": "合法",
"emoji": "",
"mcp_provider": "k8s",
"mcp_tool": "k8s_get_pod_logs",
"params": {},
"risk": "low",
"reasoning": "ok",
},
{
# 缺少 mcp_tool → 無效
"name": "invalid_action",
"label": "非法",
"emoji": "",
"mcp_provider": "k8s",
"risk": "low",
"reasoning": "bad",
},
{
"name": "another_valid",
"label": "另一合法",
"emoji": "🔍",
"mcp_provider": "ssh",
"mcp_tool": "ssh_get_top_processes",
"params": {},
"risk": "low",
"reasoning": "ok",
},
]
}
result, _ = _extract_recommended_actions(parsed)
assert len(result) == 2, "應 skip 非法,只返回 2 個合法 action"
names = {r.name for r in result}
assert "valid_action" in names
assert "another_valid" in names
assert "invalid_action" not in names
def test_max_3_actions_enforced(self):
"""超過 3 個 → 最多取 3 個"""
parsed = {
"recommended_actions": [
{
"name": f"action_{i}",
"label": f"動作 {i}",
"emoji": "🔍",
"mcp_provider": "k8s",
"mcp_tool": "k8s_get_pod_logs",
"params": {},
"risk": "low",
"reasoning": f"reason {i}",
}
for i in range(5)
]
}
result, _ = _extract_recommended_actions(parsed)
assert len(result) == 3, "最多取 3 個 recommended_actions"
# =============================================================================
# Unit — _degraded_plan 改造驗證
# =============================================================================
class TestDegradedPlanGraceful:
"""驗證 _degraded_plan 改造後不再假造 hardcode RESTART"""
@pytest.mark.asyncio
async def test_degraded_plan_empty_candidates_and_actions(self):
"""
_degraded_plan 必須:
- candidates=[](不假造 RESTART
- recommended_actions=[](不假造動作)
- degraded=True
- vote=DEGRADED
北極星 §1.1:禁止寫死規則,降級 = 誠實的能力邊界聲明
"""
from src.agents.solver_agent import SolverAgent
agent = SolverAgent()
diagnosis = _make_diagnosis(
"TEST-DEGRADED-001",
"磁碟使用率超過 90%",
"HostDiskUsage",
)
plan = agent._degraded_plan(diagnosis, latency_ms=500, reason="test_unit")
assert plan.degraded is True, "降級計畫必須標記 degraded=True"
assert plan.vote == AgentVote.DEGRADED, "降級計畫 vote 必須為 DEGRADED"
assert plan.candidates == [], (
f"降級計畫 candidates 必須為空(不假造 RESTART實際{plan.candidates}"
)
assert plan.recommended_actions == [], (
f"降級計畫 recommended_actions 必須為空(不假造動作),實際:{plan.recommended_actions}"
)
@pytest.mark.asyncio
async def test_degraded_plan_no_hardcode_restart(self):
"""
確認 _degraded_plan 不再含任何 'restart' / 'RESTART' / '重啟' 字串的 candidate
北極星 §1.1:禁止寫死規則
"""
from src.agents.solver_agent import SolverAgent
agent = SolverAgent()
for category in ["HostDiskUsage", "KubePodOOMKilled", "DatabaseConnectionFail"]:
diagnosis = _make_diagnosis(
f"TEST-NOREST-{category}",
f"測試 {category} 降級",
category,
)
plan = agent._degraded_plan(diagnosis, latency_ms=0, reason="no_restart_test")
# 確認沒有 hardcode restart candidate
for c in plan.candidates:
action_lower = c.action.lower()
assert "restart" not in action_lower or "rollout restart" in action_lower, (
f"category={category}: 降級不應有 hardcode restart 動作,實際:{c.action}"
)
# 最重要candidates 為空(不假造)
assert plan.candidates == [], (
f"category={category}: 降級 candidates 必須為空,實際:{plan.candidates}"
)
# =============================================================================
# Unit — YAML MCP registry 動態載入
# =============================================================================
class TestMcpRegistryLoader:
"""_load_mcp_tool_registry 動態載入測試"""
def test_registry_loads_successfully(self):
"""YAML 正常載入,返回非空 dict"""
registry = _load_mcp_tool_registry()
assert isinstance(registry, dict)
assert len(registry) > 0, "callback_action_spec.yaml 應有 action 定義"
def test_registry_contains_required_actions(self):
"""必要的 action 存在於 registry"""
registry = _load_mcp_tool_registry()
required_actions = ["check_pod_logs", "k8s_restart", "describe_pod"]
for action in required_actions:
assert action in registry, f"action '{action}' 應在 registry 中"
def test_registry_entries_have_required_fields(self):
"""每個 entry 都有 provider / tool / risk / label"""
registry = _load_mcp_tool_registry()
for name, info in registry.items():
assert "provider" in info, f"action '{name}' 缺少 provider"
assert "tool" in info, f"action '{name}' 缺少 tool"
assert "risk" in info, f"action '{name}' 缺少 risk"
assert "label" in info, f"action '{name}' 缺少 label"
def test_registry_all_providers_valid(self):
"""所有 entry 的 provider 必須在 _VALID_MCP_PROVIDERS 清單內"""
from src.agents.solver_agent import _VALID_MCP_PROVIDERS
registry = _load_mcp_tool_registry()
for name, info in registry.items():
provider = info.get("provider", "")
assert provider in _VALID_MCP_PROVIDERS, (
f"action '{name}' 的 provider='{provider}' 不在合法清單 {_VALID_MCP_PROVIDERS}"
)
# =============================================================================
# E2E — 真實 NIM 三類 incident 測試
# =============================================================================
@pytest.mark.asyncio
async def test_e2e_nim_host_disk_usage():
"""
E2E: HostDiskUsage — 磁碟使用率過高
真實 NIM (192.168.0.188:8088) 呼叫,不允許 mock
驗收:
- len(recommended_actions) >= 1
- 至少 1 個非 restart 類動作
- plan 未降級(如果 NIM 回應正常)
"""
if not await _nim_reachable():
pytest.skip("NIM (192.168.0.188:8088) 不可達,跳過 E2E 測試")
diagnosis = _make_diagnosis(
"TEST-E2E-HOST-DISK-001",
"主機 192.168.0.121 根目錄磁碟使用率達 91%/var/lib/docker overlay2 目錄佔用最多",
"HostDiskUsage",
confidence=0.88,
)
agent = get_solver_agent()
plan = await agent.run(diagnosis)
# 基本不崩潰
assert plan is not None
assert plan.diagnosis_report is not None
if plan.degraded:
# NIM 降級(可能 timeout 或回傳失敗),確認降級是乾淨的
assert plan.candidates == [], f"降級時 candidates 必須為空:{plan.candidates}"
assert plan.recommended_actions == [], f"降級時 recommended_actions 必須為空:{plan.recommended_actions}"
pytest.skip(f"NIM 回應降級({plan.vote}),跳過 recommended_actions 驗收")
# 正常路徑驗收
assert len(plan.recommended_actions) >= 1, (
f"HostDiskUsage 應至少有 1 個 recommended_action實際{plan.recommended_actions}"
)
# 至少 1 個非 restart 類動作(多樣性 ≥ 40%
non_restart_actions = [
a for a in plan.recommended_actions
if "restart" not in a.name.lower()
and "restart" not in a.label.lower()
and "重啟" not in a.label
]
assert len(non_restart_actions) >= 1, (
f"HostDiskUsage 應至少 1 個非 restart 動作(多樣性要求),實際:{[a.name for a in plan.recommended_actions]}"
)
# schema 完整性驗證
for action in plan.recommended_actions:
assert action.name, "name 不可為空"
assert action.label, "label 不可為空"
assert action.mcp_provider in {"k8s", "ssh", "prometheus", "signoz", "database", "internal"}, (
f"mcp_provider={action.mcp_provider} 不在白名單"
)
assert action.risk in {"low", "medium", "high", "critical"}, (
f"risk={action.risk} 不在白名單"
)
@pytest.mark.asyncio
async def test_e2e_nim_kube_pod_oom():
"""
E2E: KubePodOOM — Pod OOM Killed
真實 NIM (192.168.0.188:8088) 呼叫,不允許 mock
驗收:
- len(recommended_actions) >= 1
- 至少 1 個非 restart 類動作(如查 log、查資源使用
"""
if not await _nim_reachable():
pytest.skip("NIM (192.168.0.188:8088) 不可達,跳過 E2E 測試")
diagnosis = _make_diagnosis(
"TEST-E2E-OOM-001",
"awoooi-api pod 在 awoooi-prod namespace 因記憶體超限被 OOM Killer 終止,"
"記憶體限制 512Mi實際峰值 587Mi過去 1h 發生 3 次",
"KubePodOOMKilled",
confidence=0.92,
)
agent = get_solver_agent()
plan = await agent.run(diagnosis)
assert plan is not None
if plan.degraded:
assert plan.candidates == []
assert plan.recommended_actions == []
pytest.skip(f"NIM 回應降級({plan.vote}),跳過 recommended_actions 驗收")
assert len(plan.recommended_actions) >= 1, (
f"KubePodOOM 應至少有 1 個 recommended_action實際{plan.recommended_actions}"
)
non_restart_actions = [
a for a in plan.recommended_actions
if "restart" not in a.name.lower()
and "restart" not in a.label.lower()
and "重啟" not in a.label
]
assert len(non_restart_actions) >= 1, (
f"KubePodOOM 應至少 1 個非 restart 動作,實際:{[a.name for a in plan.recommended_actions]}"
)
for action in plan.recommended_actions:
assert action.mcp_provider in {"k8s", "ssh", "prometheus", "signoz", "database", "internal"}
assert action.risk in {"low", "medium", "high", "critical"}
@pytest.mark.asyncio
async def test_e2e_nim_database_connection_fail():
"""
E2E: DatabaseConnectionFail — 資料庫連線失敗
真實 NIM (192.168.0.188:8088) 呼叫,不允許 mock
驗收:
- len(recommended_actions) >= 1
- 至少 1 個非 restart 類動作(如查連線、查 DB 狀態)
"""
if not await _nim_reachable():
pytest.skip("NIM (192.168.0.188:8088) 不可達,跳過 E2E 測試")
diagnosis = _make_diagnosis(
"TEST-E2E-DB-CONN-001",
"PostgreSQL 資料庫連線池耗盡,最大連線數 100/100 均被佔用,"
"awoooi-api 回應 'connection pool exhausted'P95 latency 飆升至 12s",
"DatabaseConnectionFail",
confidence=0.86,
)
agent = get_solver_agent()
plan = await agent.run(diagnosis)
assert plan is not None
if plan.degraded:
assert plan.candidates == []
assert plan.recommended_actions == []
pytest.skip(f"NIM 回應降級({plan.vote}),跳過 recommended_actions 驗收")
assert len(plan.recommended_actions) >= 1, (
f"DatabaseConnectionFail 應至少有 1 個 recommended_action實際{plan.recommended_actions}"
)
non_restart_actions = [
a for a in plan.recommended_actions
if "restart" not in a.name.lower()
and "restart" not in a.label.lower()
and "重啟" not in a.label
]
assert len(non_restart_actions) >= 1, (
f"DatabaseConnectionFail 應至少 1 個非 restart 動作,實際:{[a.name for a in plan.recommended_actions]}"
)
for action in plan.recommended_actions:
assert action.mcp_provider in {"k8s", "ssh", "prometheus", "signoz", "database", "internal"}
assert action.risk in {"low", "medium", "high", "critical"}