# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei # Created 2026-04-25 P1.1c by Claude Engineer-C # 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新) # 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111,188 完全移出 """ OllamaFailoverManager 單元測試 - P1.1c v3.0 ============================================= 測試覆蓋(新路由矩陣:統帥鐵律 2026-04-26,唯一 Ollama=111,備援只用 Gemini): - 111 HEALTHY → primary=ollama(111),fallback=[Gemini] - 111 SLOW → primary=Gemini,fallback=[111, Nemotron, Claude] - 111 DEGRADED → primary=Gemini,fallback=[Nemotron, Claude] - 111 OFFLINE → primary=Gemini,fallback=[Nemotron, Claude] - Gemini quota exceeded → primary=Nemotron,fallback=[Claude] - select_provider 只 check 111(不再並行 check 188) - clear_cache() / notify_recovery() 方法 - OllamaRoutingResult.health_188 保留為 optional(backward-compat) 測試分類:unit(mock OllamaHealthMonitor,無 DB 依賴) """ from __future__ import annotations from unittest.mock import AsyncMock, MagicMock, patch import pytest from src.services.ollama_health_monitor import HealthReport, HealthStatus from src.services.ollama_failover_manager import ( OllamaFailoverManager, OllamaRoutingResult, get_ollama_failover_manager, reset_ollama_failover_manager, ) # ============================================================================= # Fixtures # ============================================================================= URL_111 = "http://192.168.0.111:11434" URL_188 = "http://192.168.0.188:11434" @pytest.fixture(autouse=True) def reset_singleton(): yield reset_ollama_failover_manager() def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport: return HealthReport(status=status, host=url, latency_ms=500.0) def _make_manager(url_111: str = URL_111) -> OllamaFailoverManager: """建立 manager,settings mock 為指定 URL(188 已移除)""" mock_settings = MagicMock() mock_settings.OLLAMA_URL = url_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_monitor = MagicMock() manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings return manager # ============================================================================= # _decide_route 決策矩陣 # ============================================================================= class TestDecideRoute: """_decide_route 路由邏輯純函數測試(新簽名:只需 health_111, url_111)""" def _setup(self) -> OllamaFailoverManager: return _make_manager() # ------------------------------------------------------------------ # 111 HEALTHY # ------------------------------------------------------------------ def test_111_healthy_primary_is_ollama(self): manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "ollama" assert result.primary.url == URL_111 def test_111_healthy_fallback_is_gemini_only(self): """統帥鐵律:HEALTHY fallback 只有 Gemini,188/Nemotron 移出""" manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert provider_names == ["gemini"] assert "ollama_188" not in provider_names assert "nemotron" not in provider_names def test_111_healthy_fallback_includes_gemini(self): manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "gemini" in provider_names def test_111_healthy_fallback_order_gemini_first(self): """統帥鐵律:Gemini 是唯一 fallback,排在 fallback_chain[0]""" manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) result = manager._decide_route(h111, URL_111) assert result.fallback_chain[0].provider_name == "gemini" # ------------------------------------------------------------------ # 111 SLOW # ------------------------------------------------------------------ def test_111_slow_primary_is_gemini(self): """新矩陣:111 SLOW → primary=Gemini(111 eval ~0.09 token/s, ~111s,Gemini 更快)""" manager = self._setup() h111 = _make_health(HealthStatus.SLOW, URL_111) result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" def test_111_slow_fallback_includes_111_and_nemotron(self): """SLOW 時 111 + Nemotron 在 fallback(188 已移出)""" manager = self._setup() h111 = _make_health(HealthStatus.SLOW, URL_111) result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "ollama" in provider_names assert "nemotron" in provider_names assert "ollama_188" not in provider_names def test_111_slow_primary_is_gemini_no_188(self): """111 SLOW + 188 不存在 → primary=Gemini(新矩陣,188 完全移出)""" manager = _make_manager() h111 = _make_health(HealthStatus.SLOW, URL_111) result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" # ------------------------------------------------------------------ # 111 DEGRADED # ------------------------------------------------------------------ def test_111_degraded_primary_is_gemini(self): """新矩陣:111 DEGRADED → primary=Gemini""" manager = self._setup() h111 = _make_health(HealthStatus.DEGRADED, URL_111) result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" def test_111_degraded_fallback_no_111(self): """DEGRADED 時 111 不在 fallback(太差了)""" manager = self._setup() h111 = _make_health(HealthStatus.DEGRADED, URL_111) result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "ollama" not in provider_names def test_111_degraded_fallback_includes_nemotron_claude(self): """統帥鐵律:DEGRADED fallback = [Nemotron, Claude](188 已移出)""" manager = self._setup() h111 = _make_health(HealthStatus.DEGRADED, URL_111) result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "nemotron" in provider_names assert "claude" in provider_names assert "ollama_188" not in provider_names # ------------------------------------------------------------------ # 111 OFFLINE # ------------------------------------------------------------------ def test_111_offline_primary_is_gemini(self): """新矩陣:111 OFFLINE → primary=Gemini""" manager = self._setup() h111 = _make_health(HealthStatus.OFFLINE, URL_111) result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" def test_111_offline_fallback_includes_nemotron_claude(self): """111 OFFLINE 時,fallback=[Nemotron, Claude](無可用 Ollama)""" manager = self._setup() h111 = _make_health(HealthStatus.OFFLINE, URL_111) result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "nemotron" in provider_names assert "claude" in provider_names assert "ollama_188" not in provider_names def test_111_offline_primary_is_gemini_no_188(self): """新矩陣:111 OFFLINE → Gemini(188 不再列入考慮)""" manager = _make_manager() h111 = _make_health(HealthStatus.OFFLINE, URL_111) result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" # ------------------------------------------------------------------ # routing_reason 記錄 # ------------------------------------------------------------------ def test_routing_reason_contains_status(self): """routing_reason 應包含 111 的狀態資訊""" manager = self._setup() h111 = _make_health(HealthStatus.OFFLINE, URL_111) result = manager._decide_route(h111, URL_111) assert "offline" in result.routing_reason.lower() or "111" in result.routing_reason # ============================================================================= # select_provider():只 check 111 # ============================================================================= class TestSelectProvider: """select_provider() 只 check 111 邏輯(統帥鐵律:188 完全移出)""" @pytest.mark.asyncio async def test_select_provider_checks_111_only(self): """統帥鐵律:select_provider 只 check 111,call_count == 1""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( return_value=_make_health(HealthStatus.HEALTHY, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None): result = await manager.select_provider() # 只 check 111,不再並行 check 188 assert mock_monitor.check.call_count == 1 called_url = mock_monitor.check.call_args.args[0] assert called_url == URL_111 @pytest.mark.asyncio async def test_select_provider_single_node_primary_ollama(self): """111 HEALTHY → primary=ollama""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.HEALTHY, URL_111)) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None): result = await manager.select_provider() assert mock_monitor.check.call_count == 1 assert result.primary.provider_name == "ollama" @pytest.mark.asyncio async def test_select_provider_returns_routing_result(self): """select_provider 返回 OllamaRoutingResult 類型(新矩陣:111 OFFLINE → Gemini)""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( return_value=_make_health(HealthStatus.OFFLINE, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings # 必須 mock Redis pool(_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \ patch( "src.services.failover_alerter.get_failover_alerter", return_value=MagicMock( alert_failover=AsyncMock(), alert_gemini_quota_exceeded=AsyncMock(), ), ): result = await manager.select_provider() assert isinstance(result, OllamaRoutingResult) # 新矩陣:111 OFFLINE + Gemini quota OK → primary=Gemini assert result.primary.provider_name == "gemini" @pytest.mark.asyncio async def test_audit_not_written_when_111_healthy(self): """111 正常時不觸發 failover audit""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( return_value=_make_health(HealthStatus.HEALTHY, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings audit_called = [False] async def _spy_audit(result): # _write_failover_audit 在 111 HEALTHY 時 early return,不寫 DB audit_called[0] = result.primary.provider_name != "ollama" with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit): await manager.select_provider() # 111 HEALTHY,不應有 failover 事件 assert audit_called[0] is False # ============================================================================= # clear_cache() / notify_recovery() # ============================================================================= class TestRecoveryAPI: """clear_cache() / notify_recovery() 方法""" @pytest.mark.asyncio async def test_clear_cache_calls_redis_delete(self): """clear_cache() 呼叫 redis.delete 清除 health monitor 快取""" manager = _make_manager() mock_redis = AsyncMock() mock_redis.delete = AsyncMock() with patch("src.services.ollama_failover_manager.OllamaFailoverManager.clear_cache") as mock_clear: mock_clear.return_value = None await manager.clear_cache() mock_clear.assert_called_once() @pytest.mark.asyncio async def test_clear_cache_fails_gracefully(self): """Redis import 失敗時,clear_cache() 內部 try/except 攔截,靜默不 crash""" manager = _make_manager() # 模擬 get_redis 拋 ImportError(Redis 不可用) # clear_cache 有 try/except Exception,應靜默吸收 with patch( "src.services.ollama_failover_manager.get_redis", side_effect=ImportError("no redis"), create=True, ): # 不應 raise await manager.clear_cache() def test_notify_recovery_does_not_raise(self): """notify_recovery() 只寫 structlog,不應 raise""" manager = _make_manager() # 不應 raise manager.notify_recovery("ollama_111") # ============================================================================= # OllamaRoutingResult # ============================================================================= class TestOllamaRoutingResult: """OllamaRoutingResult 輔助方法""" def test_all_endpoints_in_order(self): from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="m1") fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash") fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3") result = OllamaRoutingResult( primary=primary, fallback_chain=[fb1, fb2], routing_reason="test", health_111=_make_health(HealthStatus.HEALTHY), ) ordered = result.all_endpoints_in_order() assert ordered[0].provider_name == "ollama" assert ordered[1].provider_name == "gemini" assert ordered[2].provider_name == "nemotron" def test_to_dict_structure(self): from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen") result = OllamaRoutingResult( primary=primary, fallback_chain=[], routing_reason="111 HEALTHY", health_111=_make_health(HealthStatus.HEALTHY), ) d = result.to_dict() assert d["primary"]["provider"] == "ollama" assert d["routing_reason"] == "111 HEALTHY" assert isinstance(d["fallback_chain"], list) def test_health_188_optional_field_backward_compat(self): """health_188 保留為 optional None(backward-compat,不傳也可以)""" from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen") result = OllamaRoutingResult( primary=primary, fallback_chain=[], routing_reason="test", health_111=_make_health(HealthStatus.HEALTHY), # health_188 不傳,應為 None ) assert result.health_188 is None # ============================================================================= # Singleton # ============================================================================= def test_singleton_returns_same_instance(): m1 = get_ollama_failover_manager() m2 = get_ollama_failover_manager() assert m1 is m2 def test_reset_singleton_gives_new_instance(): m1 = get_ollama_failover_manager() reset_ollama_failover_manager() m2 = get_ollama_failover_manager() assert m1 is not m2 # ============================================================================= # B1: _write_failover_audit 改用 structlog(不再寫 DB) # 2026-04-25 critic-fix Part2 by Claude Engineer-C2 # ============================================================================= class TestWriteFailoverAudit: """B1 修復驗證:_write_failover_audit 使用 structlog,不依賴 AuditLog model""" @pytest.mark.asyncio async def test_audit_uses_structlog_not_db(self): """_write_failover_audit 應呼叫 structlog,不呼叫 DB""" import structlog manager = _make_manager() from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult result = OllamaRoutingResult( primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"), fallback_chain=[], routing_reason="111 OFFLINE → 切 Gemini", health_111=_make_health(HealthStatus.OFFLINE), ) # 只要不 raise 就是成功(DB path 已移除,structlog path 無 DB 依賴) await manager._write_failover_audit(result) @pytest.mark.asyncio async def test_audit_skipped_when_111_healthy(self): """111 HEALTHY 時 early return,不記錄 failover""" manager = _make_manager() from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult result = OllamaRoutingResult( primary=OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen"), fallback_chain=[], routing_reason="111 HEALTHY → 主 111", health_111=_make_health(HealthStatus.HEALTHY), ) # primary=ollama → early return,不執行任何 DB/log await manager._write_failover_audit(result) # 不應 raise # ============================================================================= # B2: AIProviderEnum.OLLAMA_188 存在 # 2026-04-25 critic-fix Part2 by Claude Engineer-C2 # ============================================================================= class TestAIProviderEnumOllama188: """B2 修復驗證:AIProviderEnum.OLLAMA_188 存在且 PROVIDER_LATENCY_BUDGET 有對應值""" def test_ollama_188_enum_exists(self): from src.services.ai_router import AIProviderEnum assert AIProviderEnum.OLLAMA_188.value == "ollama_188" def test_ollama_188_in_latency_budget(self): from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET assert AIProviderEnum.OLLAMA_188 in PROVIDER_LATENCY_BUDGET assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_188] == 120000 # ============================================================================= # H4: asyncio.gather return_exceptions=True # 2026-04-25 critic-fix Part2 by Claude Engineer-C2 # ============================================================================= class TestGatherReturnExceptions: """H4 修復驗證:111 check 拋例外時不炸整個 select_provider""" @pytest.mark.asyncio async def test_gather_exception_in_111_treated_as_offline(self): """111 check 拋例外 → health_111=OFFLINE,select_provider 正常返回""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( side_effect=RuntimeError("111 network error") ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() # 111 exception → OFFLINE → primary=gemini(new matrix) assert result.primary.provider_name == "gemini" @pytest.mark.asyncio async def test_111_healthy_select_provider_primary_ollama(self): """111 HEALTHY → primary=ollama,select_provider 正常返回(取代舊的 188 exception 測試)""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( return_value=_make_health(HealthStatus.HEALTHY, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() # 111 HEALTHY → primary=ollama assert result.primary.provider_name == "ollama" # ============================================================================= # H7: Gemini 帳單熔斷 # 2026-04-25 critic-fix Part2 by Claude Engineer-C2 # ============================================================================= class TestGeminiQuota: """H7 修復驗證:Gemini 每日配額熔斷""" @pytest.mark.asyncio async def test_gemini_quota_under_limit(self): """count=500 < quota=1000 → 返回 True(允許走 Gemini) 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mock(atomic 修復後) 原 GET/INCR/EXPIRE 三步已改為 pipeline.set(NX)+incr,mock 跟著更新。 """ manager = _make_manager() manager._settings.GEMINI_DAILY_QUOTA = 1000 # pipeline mock:SET NX 返回 True(首次),INCR 返回 501(500+1,未達 quota=1000) mock_pipe = MagicMock() mock_pipe.set = MagicMock(return_value=mock_pipe) mock_pipe.incr = MagicMock(return_value=mock_pipe) mock_pipe.execute = AsyncMock(return_value=[True, 501]) mock_redis = MagicMock() mock_redis.pipeline = MagicMock(return_value=mock_pipe) with patch("src.core.redis_client.get_redis", return_value=mock_redis): ok = await manager._check_gemini_quota() assert ok is True mock_pipe.execute.assert_awaited_once() @pytest.mark.asyncio async def test_gemini_quota_exactly_at_limit(self): """count=1001 > quota=1000 → 返回 False(熔斷,不再呼叫 Gemini) 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mock(atomic 修復後) pipeline.incr 返回 1001(> quota=1000),應返回 False。 """ manager = _make_manager() manager._settings.GEMINI_DAILY_QUOTA = 1000 mock_pipe = MagicMock() mock_pipe.set = MagicMock(return_value=mock_pipe) mock_pipe.incr = MagicMock(return_value=mock_pipe) mock_pipe.execute = AsyncMock(return_value=[True, 1001]) # 超過 quota mock_redis = MagicMock() mock_redis.pipeline = MagicMock(return_value=mock_pipe) with patch("src.core.redis_client.get_redis", return_value=mock_redis): ok = await manager._check_gemini_quota() assert ok is False @pytest.mark.asyncio async def test_gemini_quota_redis_unavailable_fail_closed(self): """Redis 掛掉 → 返回 False(2026-04-27 Wave8-X2 fail-closed,違反費用鐵律的修復)""" manager = _make_manager() with patch( "src.core.redis_client.get_redis", side_effect=RuntimeError("Redis unavailable"), ), patch( "src.services.failover_alerter.get_failover_alerter", return_value=MagicMock(alert_gemini_quota_exceeded=AsyncMock()), ): ok = await manager._check_gemini_quota() # fail-closed:Redis 異常時拒絕 Gemini,避免費用失控(违反 feedback_cost_change_approval.md) assert ok is False @pytest.mark.asyncio async def test_select_provider_quota_exceeded_uses_nemotron(self): """select_provider:Gemini quota 超過 → primary 改為 Nemotron(統帥鐵律:188 移出)""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( return_value=_make_health(HealthStatus.OFFLINE, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=False): result = await manager.select_provider() # quota 超過 → 不走 Gemini,改走 Nemotron(188 已移出) assert result.primary.provider_name == "nemotron" @pytest.mark.asyncio async def test_select_provider_quota_exceeded_no_188_uses_nemotron(self): """select_provider:Gemini quota 超過 + 188 不可用 → primary=Nemotron""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.OFFLINE, URL_111)) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=False): result = await manager.select_provider() assert result.primary.provider_name == "nemotron"