# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei # Created 2026-04-25 P1.1c by Claude Engineer-C # 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新) # 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111,188 完全移出 # 2026-05-03 ogt: ADR-110 GCP 三層容災架構,URL 常數更新為 GCP-A/B/Local,新增三層容災場景 # 2026-05-19 Codex: GCP-A healthy fast path 不等待 Local 111 health timeout """ OllamaFailoverManager 單元測試 - P1.1c v4.0 ============================================= 測試覆蓋(新路由矩陣:ADR-110 GCP 三層容災,2026-05-03): - GCP-A HEALTHY → primary=ollama_gcp_a - GCP-A OFFLINE + GCP-B HEALTHY → primary=ollama_gcp_b - GCP-A OFFLINE + GCP-B OFFLINE + Local HEALTHY → primary=ollama_local - 全部 OFFLINE → primary=Gemini - Gemini quota exceeded → primary=Nemotron,fallback=[Claude] - select_provider 只 check GCP-A(primary URL) - clear_cache() / notify_recovery() 方法 - OllamaRoutingResult.health_111 backward-compat property(實際欄位 health_gcp_a) 測試分類:unit(mock OllamaHealthMonitor,無 DB 依賴) """ from __future__ import annotations from unittest.mock import AsyncMock, MagicMock, patch import pytest from src.services.ollama_failover_manager import ( OllamaFailoverManager, OllamaRoutingResult, get_ollama_failover_manager, reset_ollama_failover_manager, ) from src.services.ollama_health_monitor import HealthReport, HealthStatus # ============================================================================= # Fixtures # ============================================================================= URL_GCP_A = "http://34.143.170.20:11434" # GCP-A Primary (SSD) URL_GCP_B = "http://34.21.145.224:11434" # GCP-B Secondary (SSD) URL_LOCAL = "http://192.168.0.111:11434" # Local HDD Fallback(後備) # 向下相容別名(舊測試引用 URL_111 時仍可用) URL_111 = URL_GCP_A @pytest.fixture(autouse=True) def reset_singleton(): yield reset_ollama_failover_manager() def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport: return HealthReport(status=status, host=url, latency_ms=500.0) def _make_manager( url_primary: str = URL_GCP_A, url_secondary: str = URL_GCP_B, url_fallback: str = URL_LOCAL, ) -> OllamaFailoverManager: """建立 manager,settings mock 為 GCP 三層容災 URL(ADR-110)""" mock_settings = MagicMock() mock_settings.OLLAMA_URL = url_primary mock_settings.OLLAMA_SECONDARY_URL = url_secondary mock_settings.OLLAMA_FALLBACK_URL = url_fallback mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_monitor = MagicMock() manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings return manager # ============================================================================= # _decide_route 決策矩陣 # ============================================================================= def _offline_health(url: str = URL_GCP_A) -> HealthReport: """建立 OFFLINE 的 HealthReport""" return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0) class TestDecideRoute: """_decide_route 路由邏輯純函數測試(ADR-110 三層容災:GCP-A → GCP-B → Local → Gemini)""" def _setup(self) -> OllamaFailoverManager: return _make_manager() # ------------------------------------------------------------------ # GCP-A HEALTHY → primary=GCP-A # ------------------------------------------------------------------ def test_gcp_a_healthy_primary_is_ollama_gcp_a(self): """ADR-110:GCP-A HEALTHY → primary=ollama_gcp_a(SSD 主力)""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.HEALTHY, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) assert result.primary.provider_name in ("ollama_gcp_a", "ollama") assert result.primary.url == URL_GCP_A def test_gcp_a_healthy_fallback_includes_gemini(self): """GCP-A HEALTHY 時 fallback 包含 Gemini""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.HEALTHY, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) provider_names = [e.provider_name for e in result.fallback_chain] assert "gemini" in provider_names # ------------------------------------------------------------------ # GCP-A SLOW → primary=GCP-A(高負載時仍優先保住本地/代理 Ollama) # ------------------------------------------------------------------ def test_gcp_a_slow_primary_is_ollama_gcp_a(self): """GCP-A SLOW → primary=GCP-A,避免過早切 Gemini""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.SLOW, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) assert result.primary.provider_name == "ollama_gcp_a" # ------------------------------------------------------------------ # GCP-A DEGRADED → primary=Gemini # ------------------------------------------------------------------ def test_gcp_a_degraded_primary_is_gemini(self): """GCP-A DEGRADED → primary=Gemini""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.DEGRADED, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) assert result.primary.provider_name == "gemini" def test_gcp_a_degraded_fallback_includes_nemotron_claude(self): """GCP-A DEGRADED fallback 應包含 Nemotron 和 Claude""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.DEGRADED, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) provider_names = [e.provider_name for e in result.fallback_chain] assert "nemotron" in provider_names assert "claude" in provider_names # ------------------------------------------------------------------ # GCP-A OFFLINE → primary=Gemini # ------------------------------------------------------------------ def test_gcp_a_offline_primary_is_gemini(self): """GCP-A OFFLINE → primary=Gemini""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) assert result.primary.provider_name == "gemini" def test_gcp_a_offline_fallback_includes_nemotron_claude(self): """GCP-A OFFLINE 時,fallback 包含 Nemotron, Claude""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) provider_names = [e.provider_name for e in result.fallback_chain] assert "nemotron" in provider_names assert "claude" in provider_names # ------------------------------------------------------------------ # routing_reason 記錄 # ------------------------------------------------------------------ def test_routing_reason_contains_status(self): """routing_reason 應包含 GCP-A 的狀態資訊""" manager = self._setup() h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A) h_gcp_b = _offline_health(URL_GCP_B) h_local = _offline_health(URL_LOCAL) result = manager._decide_route( health_gcp_a=h_gcp_a, health_gcp_b=h_gcp_b, health_local=h_local, url_gcp_a=URL_GCP_A, url_gcp_b=URL_GCP_B, url_local=URL_LOCAL, ) reason_lower = result.routing_reason.lower() assert ( "offline" in reason_lower or "gcp" in reason_lower or "gemini" in reason_lower ) # ============================================================================= # select_provider():GCP-A healthy fast path # ============================================================================= class TestSelectProvider: """select_provider() 三層容災健康檢查。""" def _make_three_layer_mock( self, gcp_a_status: HealthStatus = HealthStatus.HEALTHY, gcp_b_status: HealthStatus = HealthStatus.OFFLINE, local_status: HealthStatus = HealthStatus.OFFLINE, ): """建立三層健康 mock:按呼叫順序返回 GCP-A / GCP-B / Local 健康報告""" side_effect_map = { URL_GCP_A: _make_health(gcp_a_status, URL_GCP_A), URL_GCP_B: _make_health(gcp_b_status, URL_GCP_B), URL_LOCAL: _make_health(local_status, URL_LOCAL), } async def _check_side_effect(url): return side_effect_map.get(url, HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0)) mock_monitor = AsyncMock() mock_monitor.check = AsyncMock(side_effect=_check_side_effect) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_GCP_A mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings return manager, mock_monitor @pytest.mark.asyncio async def test_select_provider_gcp_a_healthy_checks_primary_only(self): """GCP-A healthy 時不等待 GCP-B / Local 111,避免 routing 被 111 timeout 拖慢。""" manager, mock_monitor = self._make_three_layer_mock( gcp_a_status=HealthStatus.HEALTHY, ) with patch.object(manager, "_write_failover_audit", return_value=None): result = await manager.select_provider() assert result.primary.provider_name == "ollama_gcp_a" assert result.health_gcp_b is None assert result.health_local is None assert mock_monitor.check.call_count == 1 called_urls = {call.args[0] for call in mock_monitor.check.call_args_list} assert URL_GCP_A in called_urls assert URL_GCP_B not in called_urls assert URL_LOCAL not in called_urls @pytest.mark.asyncio async def test_select_provider_checks_fallback_hosts_when_gcp_a_not_healthy(self): """GCP-A 不健康時仍檢查 GCP-B / Local,保留三層容災。""" manager, mock_monitor = self._make_three_layer_mock( gcp_a_status=HealthStatus.OFFLINE, gcp_b_status=HealthStatus.HEALTHY, local_status=HealthStatus.OFFLINE, ) with patch.object(manager, "_write_failover_audit", return_value=None): result = await manager.select_provider() assert result.primary.provider_name == "ollama_gcp_b" assert mock_monitor.check.call_count == 3 called_urls = {call.args[0] for call in mock_monitor.check.call_args_list} assert URL_GCP_A in called_urls assert URL_GCP_B in called_urls assert URL_LOCAL in called_urls @pytest.mark.asyncio async def test_select_provider_gcp_a_healthy_primary_ollama(self): """GCP-A HEALTHY → primary=ollama_gcp_a(或向下相容 ollama)""" manager, _ = self._make_three_layer_mock(gcp_a_status=HealthStatus.HEALTHY) with patch.object(manager, "_write_failover_audit", return_value=None): result = await manager.select_provider() assert result.primary.provider_name in ("ollama_gcp_a", "ollama") @pytest.mark.asyncio async def test_select_provider_returns_routing_result(self): """select_provider 返回 OllamaRoutingResult 類型(三層全 OFFLINE → Gemini)""" manager, _ = self._make_three_layer_mock( gcp_a_status=HealthStatus.OFFLINE, gcp_b_status=HealthStatus.OFFLINE, local_status=HealthStatus.OFFLINE, ) # 必須 mock Redis pool(_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \ patch( "src.services.failover_alerter.get_failover_alerter", return_value=MagicMock( alert_failover=AsyncMock(), alert_gemini_quota_exceeded=AsyncMock(), ), ): result = await manager.select_provider() assert isinstance(result, OllamaRoutingResult) # 三層全 OFFLINE + Gemini quota OK → primary=Gemini assert result.primary.provider_name == "gemini" @pytest.mark.asyncio async def test_audit_not_written_when_gcp_a_healthy(self): """GCP-A 正常時不觸發 failover audit""" manager, _ = self._make_three_layer_mock(gcp_a_status=HealthStatus.HEALTHY) audit_called = [False] async def _spy_audit(result): audit_called[0] = result.primary.provider_name not in ("ollama_gcp_a", "ollama") with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit): await manager.select_provider() # GCP-A HEALTHY,不應有 failover 事件 assert audit_called[0] is False # ============================================================================= # clear_cache() / notify_recovery() # ============================================================================= class TestRecoveryAPI: """clear_cache() / notify_recovery() 方法""" @pytest.mark.asyncio async def test_clear_cache_calls_redis_delete(self): """clear_cache() 呼叫 redis.delete 清除 health monitor 快取""" manager = _make_manager() mock_redis = AsyncMock() mock_redis.delete = AsyncMock() with patch("src.services.ollama_failover_manager.OllamaFailoverManager.clear_cache") as mock_clear: mock_clear.return_value = None await manager.clear_cache() mock_clear.assert_called_once() @pytest.mark.asyncio async def test_clear_cache_fails_gracefully(self): """Redis import 失敗時,clear_cache() 內部 try/except 攔截,靜默不 crash""" manager = _make_manager() # 模擬 get_redis 拋 ImportError(Redis 不可用) # clear_cache 有 try/except Exception,應靜默吸收 with patch( "src.services.ollama_failover_manager.get_redis", side_effect=ImportError("no redis"), create=True, ): # 不應 raise await manager.clear_cache() def test_notify_recovery_does_not_raise(self): """notify_recovery() 只寫 structlog,不應 raise""" manager = _make_manager() # 不應 raise(舊呼叫方式仍支援) manager.notify_recovery("ollama_111") manager.notify_recovery("ollama_gcp_a") # ============================================================================= # OllamaRoutingResult # ============================================================================= class TestOllamaRoutingResult: """OllamaRoutingResult 輔助方法""" def test_all_endpoints_in_order(self): from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="m1") fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash") fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3") result = OllamaRoutingResult( primary=primary, fallback_chain=[fb1, fb2], routing_reason="test", health_gcp_a=_make_health(HealthStatus.HEALTHY), ) ordered = result.all_endpoints_in_order() assert ordered[0].provider_name == "ollama_gcp_a" assert ordered[1].provider_name == "gemini" assert ordered[2].provider_name == "nemotron" def test_to_dict_structure(self): from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen") result = OllamaRoutingResult( primary=primary, fallback_chain=[], routing_reason="GCP-A HEALTHY", health_gcp_a=_make_health(HealthStatus.HEALTHY), ) d = result.to_dict() assert d["primary"]["provider"] == "ollama_gcp_a" assert d["routing_reason"] == "GCP-A HEALTHY" assert isinstance(d["fallback_chain"], list) def test_health_111_backward_compat_property(self): """health_111 是 backward-compat property,指向 health_gcp_a""" from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen") h = _make_health(HealthStatus.HEALTHY) result = OllamaRoutingResult( primary=primary, fallback_chain=[], routing_reason="test", health_gcp_a=h, ) # health_111 property 應指向 health_gcp_a assert result.health_111 is result.health_gcp_a def test_health_gcp_b_and_local_optional(self): """health_gcp_b 和 health_local 為 optional None(未傳時)""" from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen") result = OllamaRoutingResult( primary=primary, fallback_chain=[], routing_reason="test", health_gcp_a=_make_health(HealthStatus.HEALTHY), # health_gcp_b / health_local 不傳,應為 None ) assert result.health_gcp_b is None assert result.health_local is None # ============================================================================= # ADR-110 三層容災場景(2026-05-03 ogt 新增) # GCP-A → GCP-B → Local → Gemini 四段容災路由 # ============================================================================= class TestThreeLayerFailover: """ADR-110 三層容災場景:GCP-A → GCP-B → Local → Gemini""" def _make_manager_with_health( self, gcp_a: HealthStatus, gcp_b: HealthStatus, local: HealthStatus, ) -> OllamaFailoverManager: """建立三層健康 mock manager(按 URL 路由 health status)""" health_map = { URL_GCP_A: HealthReport(status=gcp_a, host=URL_GCP_A, latency_ms=500.0), URL_GCP_B: HealthReport(status=gcp_b, host=URL_GCP_B, latency_ms=500.0), URL_LOCAL: HealthReport(status=local, host=URL_LOCAL, latency_ms=500.0), } async def _check(url): return health_map.get(url, HealthReport(status=HealthStatus.OFFLINE, host=url)) mock_monitor = AsyncMock() mock_monitor.check = AsyncMock(side_effect=_check) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_GCP_A mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings return manager @pytest.mark.asyncio async def test_gcp_a_healthy_uses_gcp_a(self): """場景1:GCP-A HEALTHY → primary=GCP-A(SSD 主力)""" manager = self._make_manager_with_health( gcp_a=HealthStatus.HEALTHY, gcp_b=HealthStatus.OFFLINE, local=HealthStatus.OFFLINE, ) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() assert result.primary.url == URL_GCP_A or result.primary.provider_name in ("ollama_gcp_a", "ollama") @pytest.mark.asyncio async def test_gcp_a_offline_gcp_b_healthy_uses_gcp_b(self): """場景2:GCP-A OFFLINE + GCP-B HEALTHY → primary=GCP-B""" manager = self._make_manager_with_health( gcp_a=HealthStatus.OFFLINE, gcp_b=HealthStatus.HEALTHY, local=HealthStatus.OFFLINE, ) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() # GCP-A 掛了,應切到 GCP-B assert result.primary.url == URL_GCP_B or result.primary.provider_name in ("ollama_gcp_b", "ollama_gcp_a") @pytest.mark.asyncio async def test_gcp_a_gcp_b_offline_local_healthy_uses_local(self): """場景3:GCP-A OFFLINE + GCP-B OFFLINE + Local HEALTHY → primary=Local(111)""" manager = self._make_manager_with_health( gcp_a=HealthStatus.OFFLINE, gcp_b=HealthStatus.OFFLINE, local=HealthStatus.HEALTHY, ) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() # GCP-A/B 皆掛,切到 Local assert result.primary.url == URL_LOCAL or result.primary.provider_name in ("ollama_local", "ollama") @pytest.mark.asyncio async def test_all_offline_uses_gemini(self): """場景4:三層全 OFFLINE → primary=Gemini(最終雲端備援)""" manager = self._make_manager_with_health( gcp_a=HealthStatus.OFFLINE, gcp_b=HealthStatus.OFFLINE, local=HealthStatus.OFFLINE, ) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() assert result.primary.provider_name == "gemini" @pytest.mark.asyncio async def test_all_offline_gemini_quota_exceeded_uses_nemotron(self): """場景5:三層全 OFFLINE + Gemini quota 耗盡 → primary=Nemotron""" manager = self._make_manager_with_health( gcp_a=HealthStatus.OFFLINE, gcp_b=HealthStatus.OFFLINE, local=HealthStatus.OFFLINE, ) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=False): result = await manager.select_provider() assert result.primary.provider_name == "nemotron" # ============================================================================= # Singleton # ============================================================================= def test_singleton_returns_same_instance(): m1 = get_ollama_failover_manager() m2 = get_ollama_failover_manager() assert m1 is m2 def test_reset_singleton_gives_new_instance(): m1 = get_ollama_failover_manager() reset_ollama_failover_manager() m2 = get_ollama_failover_manager() assert m1 is not m2 # ============================================================================= # B1: _write_failover_audit 改用 structlog(不再寫 DB) # 2026-04-25 critic-fix Part2 by Claude Engineer-C2 # ============================================================================= class TestWriteFailoverAudit: """B1 修復驗證:_write_failover_audit 使用 structlog,不依賴 AuditLog model""" @pytest.mark.asyncio async def test_audit_uses_structlog_not_db(self): """_write_failover_audit 應呼叫 structlog,不呼叫 DB""" manager = _make_manager() from src.services.ollama_failover_manager import ( OllamaEndpoint, OllamaRoutingResult, ) result = OllamaRoutingResult( primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"), fallback_chain=[], routing_reason="GCP-A OFFLINE → 切 Gemini", health_gcp_a=_make_health(HealthStatus.OFFLINE), ) # 只要不 raise 就是成功(DB path 已移除,structlog path 無 DB 依賴) await manager._write_failover_audit(result) @pytest.mark.asyncio async def test_audit_skipped_when_gcp_a_healthy(self): """GCP-A HEALTHY 時 early return,不記錄 failover""" manager = _make_manager() from src.services.ollama_failover_manager import ( OllamaEndpoint, OllamaRoutingResult, ) result = OllamaRoutingResult( primary=OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen"), fallback_chain=[], routing_reason="GCP-A HEALTHY → 主 GCP-A", health_gcp_a=_make_health(HealthStatus.HEALTHY), ) # primary=ollama_gcp_a → early return,不執行任何 DB/log await manager._write_failover_audit(result) # 不應 raise # ============================================================================= # B2: AIProviderEnum.OLLAMA_LOCAL 存在 # 2026-05-06 Codex — 188 不再作為 Ollama Provider # ============================================================================= class TestAIProviderEnumOllamaLocal: """B2 修復驗證:AIProviderEnum.OLLAMA_LOCAL 存在且 PROVIDER_LATENCY_BUDGET 有對應值""" def test_ollama_local_enum_exists(self): from src.services.ai_router import AIProviderEnum assert AIProviderEnum.OLLAMA_LOCAL.value == "ollama_local" def test_ollama_local_in_latency_budget(self): from src.services.ai_router import PROVIDER_LATENCY_BUDGET, AIProviderEnum assert AIProviderEnum.OLLAMA_LOCAL in PROVIDER_LATENCY_BUDGET assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_LOCAL] == 90000 # ============================================================================= # H4: asyncio.gather return_exceptions=True # 2026-04-25 critic-fix Part2 by Claude Engineer-C2 # ============================================================================= class TestGatherReturnExceptions: """H4 修復驗證:三層主機 check 拋例外時不炸整個 select_provider""" @pytest.mark.asyncio async def test_gather_exception_in_all_hosts_treated_as_offline(self): """三台主機 check 全部拋例外 → 視為 OFFLINE,select_provider 正常返回 Gemini""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( side_effect=RuntimeError("network error") ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_GCP_A mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() # 三層全部 exception → OFFLINE → primary=gemini assert result.primary.provider_name == "gemini" @pytest.mark.asyncio async def test_gcp_a_healthy_select_provider_primary_ollama(self): """GCP-A HEALTHY → primary=ollama_gcp_a,select_provider 正常返回""" async def _check_side_effect(url): if url == URL_GCP_A: return _make_health(HealthStatus.HEALTHY, URL_GCP_A) return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0) mock_monitor = AsyncMock() mock_monitor.check = AsyncMock(side_effect=_check_side_effect) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_GCP_A mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() # GCP-A HEALTHY → primary=ollama_gcp_a(或 backward-compat ollama) assert result.primary.provider_name in ("ollama_gcp_a", "ollama") # ============================================================================= # H7: Gemini 帳單熔斷 # 2026-04-25 critic-fix Part2 by Claude Engineer-C2 # ============================================================================= class TestGeminiQuota: """H7 修復驗證:Gemini 每日配額熔斷""" @pytest.mark.asyncio async def test_gemini_quota_under_limit(self): """count=500 < quota=1000 → 返回 True(允許走 Gemini) 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mock(atomic 修復後) 原 GET/INCR/EXPIRE 三步已改為 pipeline.set(NX)+incr,mock 跟著更新。 """ manager = _make_manager() manager._settings.GEMINI_DAILY_QUOTA = 1000 # pipeline mock:SET NX 返回 True(首次),INCR 返回 501(500+1,未達 quota=1000) mock_pipe = MagicMock() mock_pipe.set = MagicMock(return_value=mock_pipe) mock_pipe.incr = MagicMock(return_value=mock_pipe) mock_pipe.execute = AsyncMock(return_value=[True, 501]) mock_redis = MagicMock() mock_redis.pipeline = MagicMock(return_value=mock_pipe) with patch("src.core.redis_client.get_redis", return_value=mock_redis): ok = await manager._check_gemini_quota() assert ok is True mock_pipe.execute.assert_awaited_once() @pytest.mark.asyncio async def test_gemini_quota_exactly_at_limit(self): """count=1001 > quota=1000 → 返回 False(熔斷,不再呼叫 Gemini) 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mock(atomic 修復後) pipeline.incr 返回 1001(> quota=1000),應返回 False。 """ manager = _make_manager() manager._settings.GEMINI_DAILY_QUOTA = 1000 mock_pipe = MagicMock() mock_pipe.set = MagicMock(return_value=mock_pipe) mock_pipe.incr = MagicMock(return_value=mock_pipe) mock_pipe.execute = AsyncMock(return_value=[True, 1001]) # 超過 quota mock_redis = MagicMock() mock_redis.pipeline = MagicMock(return_value=mock_pipe) with patch("src.core.redis_client.get_redis", return_value=mock_redis): ok = await manager._check_gemini_quota() assert ok is False @pytest.mark.asyncio async def test_gemini_quota_redis_unavailable_fail_closed(self): """Redis 掛掉 → 返回 False(2026-04-27 Wave8-X2 fail-closed,違反費用鐵律的修復)""" manager = _make_manager() with patch( "src.core.redis_client.get_redis", side_effect=RuntimeError("Redis unavailable"), ), patch( "src.services.failover_alerter.get_failover_alerter", return_value=MagicMock(alert_gemini_quota_exceeded=AsyncMock()), ): ok = await manager._check_gemini_quota() # fail-closed:Redis 異常時拒絕 Gemini,避免費用失控(违反 feedback_cost_change_approval.md) assert ok is False @pytest.mark.asyncio async def test_select_provider_quota_exceeded_uses_nemotron(self): """select_provider:Gemini quota 超過 → primary 改為 Nemotron(三層全 OFFLINE 情境)""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( return_value=_make_health(HealthStatus.OFFLINE, URL_GCP_A) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_GCP_A mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=False): result = await manager.select_provider() # quota 超過 → 不走 Gemini,改走 Nemotron assert result.primary.provider_name == "nemotron" @pytest.mark.asyncio async def test_select_provider_quota_exceeded_all_offline_uses_nemotron(self): """select_provider:Gemini quota 超過 + 三層全 OFFLINE → primary=Nemotron""" async def _all_offline(url): return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0) mock_monitor = AsyncMock() mock_monitor.check = AsyncMock(side_effect=_all_offline) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_GCP_A mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", return_value=False): result = await manager.select_provider() assert result.primary.provider_name == "nemotron"