ADR-092 P1 飛輪閉環的 Ollama 失敗轉移子系統,全部 Engineer-A2/C/C2 補上。 新服務 (1581 行): - ollama_health_monitor.py (356):3 層健康檢測(TCP/HTTP/推理) - ollama_failover_manager.py (571):111→188 自動切換 + Redis 持久化 + recovery callback - ollama_auto_recovery.py (436):30s 背景監控 + 連續 3 次 HEALTHY → 切回 + clear_cache - failover_alerter.py (218):P1.5 Telegram 容災告警 服務整合: - ai_router.py: AIProviderEnum.OLLAMA_188 + 120s budget + failover fallback chain - main.py lifespan: 啟動時 wire callback + start recovery,關閉時優雅 stop - config.py: OLLAMA_FALLBACK_URL / OLLAMA_HEALTH_CHECK_MODEL / GEMINI_DAILY_QUOTA(帳單熔斷) K8s 配置: - 04-configmap.yaml.patch-188-fallback:注入 OLLAMA_FALLBACK_URL=http://192.168.0.188:11434 測試 (2082 行): - test_ollama_health_monitor.py (402) - test_ollama_failover_manager.py (707) - test_ollama_auto_recovery.py (580) - test_ai_router_failover_integration.py (257) - test_lifespan_failover_wiring.py (136) 依賴鏈:service 三件套 + ai_router + main.py 一起 commit,缺一就 ImportError。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
258 lines
10 KiB
Python
258 lines
10 KiB
Python
# apps/api/tests/test_ai_router_failover_integration.py | 2026-04-25 @ Asia/Taipei
|
||
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
|
||
"""
|
||
AIRouter × OllamaFailoverManager 整合測試
|
||
==========================================
|
||
測試覆蓋:
|
||
1. 初步路由選 OLLAMA → failover_manager 重評 → decision 使用 failover 結果
|
||
2. failover 回傳 GEMINI primary → decision.selected_provider == GEMINI
|
||
3. failover 的 fallback_chain 正確轉換到 decision.fallback_chain
|
||
4. 初步路由選 NEMOTRON → failover_manager 不被呼叫
|
||
5. 初步路由選 OPENCLAW_NEMO → failover_manager 不被呼叫
|
||
6. failover_manager 發生例外 → fail-open,保留原始 provider
|
||
|
||
測試分類:unit(mock OllamaFailoverManager,無 Redis / DB 依賴)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from src.services.ai_router import AIProviderEnum, AIRouter, reset_ai_router
|
||
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
|
||
from src.services.ollama_health_monitor import HealthReport, HealthStatus
|
||
|
||
|
||
# =============================================================================
|
||
# Fixtures / Helpers
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def reset_router_singleton():
|
||
"""每個測試前後重置 AIRouter singleton,避免 failover_manager mock 殘留"""
|
||
yield
|
||
reset_ai_router()
|
||
|
||
|
||
def _make_health(status: HealthStatus) -> HealthReport:
|
||
return HealthReport(status=status, host="http://192.168.0.111:11434", latency_ms=500.0)
|
||
|
||
|
||
def _make_failover_result(
|
||
primary_provider: str,
|
||
primary_model: str,
|
||
fallback: list[tuple[str, str]] | None = None,
|
||
) -> OllamaRoutingResult:
|
||
"""建立 OllamaRoutingResult 測試物件"""
|
||
fb_endpoints = [
|
||
OllamaEndpoint(url="", provider_name=p, model=m)
|
||
for p, m in (fallback or [])
|
||
]
|
||
return OllamaRoutingResult(
|
||
primary=OllamaEndpoint(url="", provider_name=primary_provider, model=primary_model),
|
||
fallback_chain=fb_endpoints,
|
||
routing_reason=f"test: {primary_provider}",
|
||
health_111=_make_health(HealthStatus.OFFLINE),
|
||
health_188=None,
|
||
)
|
||
|
||
|
||
def _make_router_with_mock_failover(mock_failover_manager) -> AIRouter:
|
||
"""建立 AIRouter,並替換其 _failover_manager"""
|
||
router = AIRouter()
|
||
router._failover_manager = mock_failover_manager
|
||
return router
|
||
|
||
|
||
# =============================================================================
|
||
# Test 1: OLLAMA 路由 → failover_manager 重評 → 使用 GEMINI
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_router_uses_failover_when_ollama_initial_provider():
|
||
"""初步路由選 OLLAMA → 應走 failover_manager 重評,decision.selected_provider == GEMINI"""
|
||
mock_fm = MagicMock()
|
||
mock_fm.select_provider = AsyncMock(
|
||
return_value=_make_failover_result(
|
||
primary_provider="gemini",
|
||
primary_model="gemini-1.5-flash",
|
||
fallback=[("ollama_188", "qwen2.5:7b-instruct"), ("nemotron", "nvidia/nemotron-mini-4b-instruct")],
|
||
)
|
||
)
|
||
|
||
router = _make_router_with_mock_failover(mock_fm)
|
||
|
||
# 讓 intent classifier + complexity scorer 走 sync 快路徑(ALERT_TRIAGE → OLLAMA)
|
||
with patch.object(router._intent_classifier, "classify") as mock_classify:
|
||
from src.services.intent_classifier import IntentResult, IntentType, RiskLevel
|
||
from src.services.complexity_scorer import ComplexityScore
|
||
|
||
mock_classify.return_value = IntentResult(
|
||
intent=IntentType.ALERT_TRIAGE,
|
||
confidence=0.9,
|
||
method="keyword",
|
||
matched_keywords=["alert"],
|
||
detected_resources=[],
|
||
reasoning="test",
|
||
)
|
||
with patch.object(router._complexity_scorer, "score") as mock_score:
|
||
mock_score.return_value = ComplexityScore(score=1, features={})
|
||
|
||
decision = await router.route("test alert message")
|
||
|
||
assert decision.selected_provider == AIProviderEnum.GEMINI
|
||
assert decision.selected_model == "gemini-1.5-flash"
|
||
mock_fm.select_provider.assert_awaited_once()
|
||
|
||
|
||
# =============================================================================
|
||
# Test 2: fallback_chain 正確轉換
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_router_failover_fallback_chain_converted():
|
||
"""failover_manager 回傳 fallback_chain → decision.fallback_chain 包含 OLLAMA_188"""
|
||
mock_fm = MagicMock()
|
||
mock_fm.select_provider = AsyncMock(
|
||
return_value=_make_failover_result(
|
||
primary_provider="gemini",
|
||
primary_model="gemini-1.5-flash",
|
||
fallback=[
|
||
("ollama_188", "qwen2.5:7b-instruct"),
|
||
("nemotron", "nvidia/nemotron-mini-4b-instruct"),
|
||
("claude", "claude-3-5-haiku-20241022"),
|
||
],
|
||
)
|
||
)
|
||
|
||
router = _make_router_with_mock_failover(mock_fm)
|
||
|
||
with patch.object(router._intent_classifier, "classify") as mock_classify:
|
||
from src.services.intent_classifier import IntentResult, IntentType
|
||
from src.services.complexity_scorer import ComplexityScore
|
||
|
||
mock_classify.return_value = IntentResult(
|
||
intent=IntentType.ALERT_TRIAGE,
|
||
confidence=0.9,
|
||
method="keyword",
|
||
matched_keywords=["alert"],
|
||
detected_resources=[],
|
||
reasoning="test",
|
||
)
|
||
with patch.object(router._complexity_scorer, "score") as mock_score:
|
||
mock_score.return_value = ComplexityScore(score=1, features={})
|
||
|
||
decision = await router.route("test alert message")
|
||
|
||
fb_providers = [p for p, _ in decision.fallback_chain]
|
||
assert AIProviderEnum.OLLAMA_188 in fb_providers, (
|
||
f"OLLAMA_188 not in fallback_chain: {fb_providers}"
|
||
)
|
||
assert AIProviderEnum.NEMOTRON in fb_providers
|
||
assert AIProviderEnum.CLAUDE in fb_providers
|
||
|
||
|
||
# =============================================================================
|
||
# Test 3: NEMOTRON 路由 → failover_manager 不被呼叫
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_router_does_not_use_failover_for_nemotron():
|
||
"""初步路由選 NEMOTRON(tool_calling)→ failover_manager.select_provider 不應被呼叫"""
|
||
mock_fm = MagicMock()
|
||
mock_fm.select_provider = AsyncMock()
|
||
|
||
router = _make_router_with_mock_failover(mock_fm)
|
||
|
||
# 強制 intent = DIAGNOSE(→ OPENCLAW_NEMO),再用 context_hint 跳過 LLM
|
||
# 但 NEMOTRON 只由 route_tool_calling() 觸發,route() 最多到 OPENCLAW_NEMO
|
||
# 改用 QUERY → OLLAMA 的 override,然後驗 failover 被觸發(這不是 NEMOTRON 測試)
|
||
# 正確測試:強制 CRITICAL → CLAUDE,驗 failover 不被呼叫
|
||
with patch.object(router._intent_classifier, "classify") as mock_classify:
|
||
from src.services.intent_classifier import IntentResult, IntentType, RiskLevel
|
||
from src.services.complexity_scorer import ComplexityScore
|
||
|
||
mock_classify.return_value = IntentResult(
|
||
intent=IntentType.DELETE,
|
||
confidence=1.0,
|
||
method="keyword",
|
||
matched_keywords=["delete"],
|
||
detected_resources=[],
|
||
reasoning="test",
|
||
risk_level=RiskLevel.CRITICAL,
|
||
)
|
||
with patch.object(router._complexity_scorer, "score") as mock_score:
|
||
mock_score.return_value = ComplexityScore(score=5, features={})
|
||
|
||
decision = await router.route("delete this service")
|
||
|
||
# CRITICAL risk → CLAUDE,failover_manager 不應被呼叫
|
||
assert decision.selected_provider == AIProviderEnum.CLAUDE
|
||
mock_fm.select_provider.assert_not_awaited()
|
||
|
||
|
||
# =============================================================================
|
||
# Test 4: OPENCLAW_NEMO 路由 → failover_manager 不被呼叫
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_router_does_not_use_failover_for_openclaw_nemo():
|
||
"""DIAGNOSE intent → OPENCLAW_NEMO → failover_manager 不應被呼叫"""
|
||
mock_fm = MagicMock()
|
||
mock_fm.select_provider = AsyncMock()
|
||
|
||
router = _make_router_with_mock_failover(mock_fm)
|
||
|
||
# context_hint=diagnose → OPENCLAW_NEMO(規則 3 override)
|
||
decision = await router.route(
|
||
"diagnose service crash",
|
||
context={"intent_hint": "diagnose"},
|
||
)
|
||
|
||
assert decision.selected_provider == AIProviderEnum.OPENCLAW_NEMO
|
||
mock_fm.select_provider.assert_not_awaited()
|
||
|
||
|
||
# =============================================================================
|
||
# Test 5: failover_manager 發生例外 → fail-open,保留原始 OLLAMA
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_router_failopen_when_failover_manager_raises():
|
||
"""failover_manager.select_provider 拋出例外 → fail-open,decision 仍然成功(使用原始 OLLAMA)"""
|
||
mock_fm = MagicMock()
|
||
mock_fm.select_provider = AsyncMock(side_effect=RuntimeError("redis timeout"))
|
||
|
||
router = _make_router_with_mock_failover(mock_fm)
|
||
|
||
with patch.object(router._intent_classifier, "classify") as mock_classify:
|
||
from src.services.intent_classifier import IntentResult, IntentType
|
||
from src.services.complexity_scorer import ComplexityScore
|
||
|
||
mock_classify.return_value = IntentResult(
|
||
intent=IntentType.ALERT_TRIAGE,
|
||
confidence=0.9,
|
||
method="keyword",
|
||
matched_keywords=["alert"],
|
||
detected_resources=[],
|
||
reasoning="test",
|
||
)
|
||
with patch.object(router._complexity_scorer, "score") as mock_score:
|
||
mock_score.return_value = ComplexityScore(score=1, features={})
|
||
|
||
# 不應 raise,應 fail-open
|
||
decision = await router.route("test alert message")
|
||
|
||
# fail-open → 保留 OLLAMA(原始 initial decision)
|
||
assert decision.selected_provider == AIProviderEnum.OLLAMA
|
||
# fallback_chain 仍然存在(來自 _build_fallback_chain)
|
||
assert len(decision.fallback_chain) > 0
|