diff --git a/apps/api/src/services/ollama_failover_manager.py b/apps/api/src/services/ollama_failover_manager.py index 4fdcfa18..55018dd6 100644 --- a/apps/api/src/services/ollama_failover_manager.py +++ b/apps/api/src/services/ollama_failover_manager.py @@ -26,9 +26,8 @@ Ollama 自動容災管理 - P1.1b from __future__ import annotations -import asyncio import datetime -from dataclasses import dataclass, field +from dataclasses import dataclass # 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2 # 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區 # 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC @@ -102,7 +101,6 @@ class OllamaRoutingResult: ], "routing_reason": self.routing_reason, "health_111": self.health_111.to_dict(), - "health_188": self.health_188.to_dict() if self.health_188 else None, } @@ -167,53 +165,22 @@ class OllamaFailoverManager: async def select_provider( self, - task_type: str = "", - context: dict | None = None, + task_type: str = "", # noqa: ARG002 + context: dict | None = None, # noqa: ARG002 ) -> OllamaRoutingResult: """ - 並行檢查 111 + 188,返回路由結果 + 檢查 111 健康狀態,返回路由結果。 - Args: - task_type: 任務類型(預留,目前未影響路由邏輯) - context: 額外上下文(預留) - - Returns: - OllamaRoutingResult + 2026-04-26 統帥鐵律:唯一 Ollama = 111,188 禁止用於即時回應。 """ url_111 = self._settings.OLLAMA_URL - url_188 = self._settings.OLLAMA_FALLBACK_URL or "" - # 並行檢查 - # 2026-04-25 critic-fix Part2 H4 by Claude Engineer-C2 - # return_exceptions=True 防止任一 check 例外導致整個 select_provider 炸 - if url_188: - results = await asyncio.gather( - self._monitor.check(url_111), - self._monitor.check(url_188), - return_exceptions=True, - ) - # 處理 exception — 任一失敗視為 OFFLINE - health_111_raw, health_188_raw = results - health_111: HealthReport = ( - HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_111_raw}") - if isinstance(health_111_raw, Exception) - else health_111_raw - ) - health_188: HealthReport | None = ( - HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_188_raw}") - if isinstance(health_188_raw, Exception) - else health_188_raw - ) - else: + try: health_111 = await self._monitor.check(url_111) - health_188 = None + except Exception as e: + health_111 = HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {e}") - result = self._decide_route( - health_111=health_111, - health_188=health_188, - url_111=url_111, - url_188=url_188, - ) + result = self._decide_route(health_111=health_111, url_111=url_111) # Gemini 帳單熔斷(quota gate) # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2 @@ -226,12 +193,7 @@ class OllamaFailoverManager: quota=quota, health_111=health_111.status.value, ) - result = self._build_quota_exceeded_route( - health_111=health_111, - health_188=health_188, - url_111=url_111, - url_188=url_188, - ) + result = self._build_quota_exceeded_route(health_111=health_111) # 2026-04-26 P1.5 整合點 3 by Claude Opus 4.7 — 配額耗盡 Telegram 告警 # alerter 內部 24h dedup(QUOTA_DEDUP_TTL_SEC),即使每次 quota exceeded # 都呼叫,當日只會發送一次告警。失敗 fail-open(不阻擋 routing)。 @@ -267,7 +229,6 @@ class OllamaFailoverManager: reason=result.routing_reason, fallback_count=len(result.fallback_chain), health_111=health_111.status.value, - health_188=health_188.status.value if health_188 else "not_configured", ) # 通知 recovery service 當前 primary(跨重啟持久化) @@ -290,102 +251,44 @@ class OllamaFailoverManager: def _decide_route( self, health_111: HealthReport, - health_188: HealthReport | None, url_111: str, - url_188: str, ) -> OllamaRoutingResult: """ - 決策矩陣(2026-04-25 統帥指令:Gemini 優先,188 最後備援): + 決策矩陣(2026-04-26 統帥鐵律:唯一 Ollama=111,備援只用 Gemini): - 111 HEALTHY → primary=111, fallback=[Gemini, 188, Nemotron] - 111 SLOW → primary=Gemini, fallback=[111, 188] - 111 DEGRADED → primary=Gemini, fallback=[188, Nemotron, Claude] - 111 OFFLINE → primary=Gemini, fallback=[188, Nemotron, Claude] - 111 OFFLINE + 188 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude] + 111 HEALTHY → primary=111, fallback=[Gemini] + 111 SLOW → primary=Gemini, fallback=[111, Nemotron, Claude] + 111 DEGRADED → primary=Gemini, fallback=[Nemotron, Claude] + 111 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude] - 關鍵原則: - - 111 非 HEALTHY 時,primary 必為 Gemini(快速雲端,不等 188 慢推理) - - 188 永遠在 fallback chain,作為 Gemini 額度耗盡的最後備援 - - degradation_reason 記錄切換原因 + 時間戳 - - 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復 + 188 完全移出(CPU-only 0.45 tok/s,禁止即時回應)。 """ model_111 = self._settings.OLLAMA_HEALTH_CHECK_MODEL - model_188 = "qwen2.5:7b-instruct" # 188 CPU-only 備援推薦模型(plan 方案 C) - ep_111 = OllamaEndpoint(url=url_111, provider_name="ollama", model=model_111) - ep_188 = ( - OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188) - if url_188 - else None - ) - - # 188 可用性判斷(僅供 fallback 使用) - has_188 = ep_188 is not None and ( - health_188 is not None and health_188.status != HealthStatus.OFFLINE - ) - - # 切換時間戳(台北時區 +8,標準庫保證) - # 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2 now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat() - # ========================================================== - # 111 HEALTHY → 主 111,Gemini 作為第一 fallback(快速雲端) - # ========================================================== if health_111.status == HealthStatus.HEALTHY: - fallback: list[OllamaEndpoint] = [_GEMINI_ENDPOINT] - if has_188 and ep_188: - fallback.append(ep_188) - fallback.append(_NEMOTRON_ENDPOINT) return OllamaRoutingResult( primary=ep_111, - fallback_chain=fallback, + fallback_chain=[_GEMINI_ENDPOINT], routing_reason="111 HEALTHY → 主 111", health_111=health_111, - health_188=health_188, ) - # ========================================================== - # 111 SLOW → primary=Gemini,fallback=[111, 188] - # 111 實測 eval rate 0.09 token/s,~111s 推理,Gemini 更快 - # ========================================================== if health_111.status == HealthStatus.SLOW: - fallback_slow: list[OllamaEndpoint] = [ep_111] - if has_188 and ep_188: - fallback_slow.append(ep_188) - degradation_reason = ( - f"111 SLOW(eval ~0.09 token/s, ~111s)→ 切 Gemini at {now_ts}" - ) return OllamaRoutingResult( primary=_GEMINI_ENDPOINT, - fallback_chain=fallback_slow, - routing_reason=degradation_reason, + fallback_chain=[ep_111, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT], + routing_reason=f"111 SLOW → 切 Gemini at {now_ts}", health_111=health_111, - health_188=health_188, ) - # ========================================================== - # 111 DEGRADED 或 OFFLINE → primary=Gemini,188 在 fallback - # ========================================================== - status_label = health_111.status.value # "degraded" / "offline" - degradation_reason = f"111 {status_label} → 切 Gemini at {now_ts}" - if has_188 and ep_188: - return OllamaRoutingResult( - primary=_GEMINI_ENDPOINT, - fallback_chain=[ep_188, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT], - routing_reason=degradation_reason, - health_111=health_111, - health_188=health_188, - ) - - # 188 也不可用 → Gemini 主力,最後備援 Nemotron / Claude - degradation_reason = f"111 {status_label} + 188 不可用 → 切 Gemini at {now_ts}" + status_label = health_111.status.value return OllamaRoutingResult( primary=_GEMINI_ENDPOINT, fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT], - routing_reason=degradation_reason, + routing_reason=f"111 {status_label} → 切 Gemini at {now_ts}", health_111=health_111, - health_188=health_188, ) # ------------------------------------------------------------------------- @@ -464,40 +367,13 @@ class OllamaFailoverManager: def _build_quota_exceeded_route( self, health_111: HealthReport, - health_188: HealthReport | None, - url_111: str, # noqa: ARG002 — 保留供 OllamaRoutingResult 結構完整性(health_111 對應) - url_188: str, ) -> OllamaRoutingResult: - """ - Gemini 配額耗盡時的備援路由:primary=OLLAMA_188,fallback=[Nemotron, Claude] - 若 188 也不可用,則 primary=Nemotron。 - """ - model_188 = "qwen2.5:7b-instruct" - ep_188 = ( - OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188) - if url_188 - else None - ) - has_188 = ep_188 is not None and ( - health_188 is not None and health_188.status != HealthStatus.OFFLINE - ) - - if has_188 and ep_188: - return OllamaRoutingResult( - primary=ep_188, - fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT], - routing_reason="Gemini quota exceeded → 188 CPU-only 備援", - health_111=health_111, - health_188=health_188, - ) - - # 188 也不可用 + """Gemini 配額耗盡 → Nemotron 備援。2026-04-26 統帥鐵律:188 移出。""" return OllamaRoutingResult( primary=_NEMOTRON_ENDPOINT, fallback_chain=[_CLAUDE_ENDPOINT], - routing_reason="Gemini quota exceeded + 188 不可用 → Nemotron 備援", + routing_reason="Gemini quota exceeded → Nemotron 備援", health_111=health_111, - health_188=health_188, ) # ------------------------------------------------------------------------- diff --git a/apps/api/tests/test_ollama_failover_manager.py b/apps/api/tests/test_ollama_failover_manager.py index 6e1b23d3..f112c67d 100644 --- a/apps/api/tests/test_ollama_failover_manager.py +++ b/apps/api/tests/test_ollama_failover_manager.py @@ -1,18 +1,19 @@ -# apps/api/tests/test_ollama_failover_manager.py | 2026-04-25 @ Asia/Taipei +# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei # Created 2026-04-25 P1.1c by Claude Engineer-C # 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新) +# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111,188 完全移出 """ -OllamaFailoverManager 單元測試 - P1.1c v2.0 +OllamaFailoverManager 單元測試 - P1.1c v3.0 ============================================= -測試覆蓋(新路由矩陣:Gemini 優先,188 最後備援): -- 111 HEALTHY → primary=ollama(111),fallback=[Gemini, 188, Nemotron] -- 111 SLOW → primary=Gemini,fallback 包含 111 + 188 -- 111 DEGRADED → primary=Gemini,fallback 包含 188 + nemotron + claude -- 111 OFFLINE → primary=Gemini,fallback 包含 188 + nemotron + claude -- 111 + 188 都 OFFLINE → primary=Gemini,fallback 包含 nemotron + claude -- OLLAMA_FALLBACK_URL 未設定時的單節點行為 -- 並行 gather 邏輯(asyncio.gather mock) +測試覆蓋(新路由矩陣:統帥鐵律 2026-04-26,唯一 Ollama=111,備援只用 Gemini): +- 111 HEALTHY → primary=ollama(111),fallback=[Gemini] +- 111 SLOW → primary=Gemini,fallback=[111, Nemotron, Claude] +- 111 DEGRADED → primary=Gemini,fallback=[Nemotron, Claude] +- 111 OFFLINE → primary=Gemini,fallback=[Nemotron, Claude] +- Gemini quota exceeded → primary=Nemotron,fallback=[Claude] +- select_provider 只 check 111(不再並行 check 188) - clear_cache() / notify_recovery() 方法 +- OllamaRoutingResult.health_188 保留為 optional(backward-compat) 測試分類:unit(mock OllamaHealthMonitor,無 DB 依賴) """ @@ -50,11 +51,10 @@ def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport: return HealthReport(status=status, host=url, latency_ms=500.0) -def _make_manager(url_111: str = URL_111, url_188: str = URL_188) -> OllamaFailoverManager: - """建立 manager,settings mock 為指定 URL""" +def _make_manager(url_111: str = URL_111) -> OllamaFailoverManager: + """建立 manager,settings mock 為指定 URL(188 已移除)""" mock_settings = MagicMock() mock_settings.OLLAMA_URL = url_111 - mock_settings.OLLAMA_FALLBACK_URL = url_188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_monitor = MagicMock() @@ -69,10 +69,10 @@ def _make_manager(url_111: str = URL_111, url_188: str = URL_188) -> OllamaFailo class TestDecideRoute: - """_decide_route 路由邏輯純函數測試""" + """_decide_route 路由邏輯純函數測試(新簽名:只需 health_111, url_111)""" - def _setup(self, url_188: str = URL_188) -> OllamaFailoverManager: - return _make_manager(url_188=url_188) + def _setup(self) -> OllamaFailoverManager: + return _make_manager() # ------------------------------------------------------------------ # 111 HEALTHY @@ -81,41 +81,39 @@ class TestDecideRoute: def test_111_healthy_primary_is_ollama(self): manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "ollama" assert result.primary.url == URL_111 - def test_111_healthy_fallback_includes_188(self): + def test_111_healthy_fallback_is_gemini_only(self): + """統帥鐵律:HEALTHY fallback 只有 Gemini,188/Nemotron 移出""" manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] - assert "ollama_188" in provider_names + assert provider_names == ["gemini"] + assert "ollama_188" not in provider_names + assert "nemotron" not in provider_names - def test_111_healthy_fallback_includes_nemotron_gemini(self): + def test_111_healthy_fallback_includes_gemini(self): manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] - assert "nemotron" in provider_names assert "gemini" in provider_names def test_111_healthy_fallback_order_gemini_first(self): - """新矩陣:Gemini 應排在 188/nemotron 之前(快速雲端優先)""" + """統帥鐵律:Gemini 是唯一 fallback,排在 fallback_chain[0]""" manager = self._setup() h111 = _make_health(HealthStatus.HEALTHY, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) assert result.fallback_chain[0].provider_name == "gemini" @@ -127,30 +125,29 @@ class TestDecideRoute: """新矩陣:111 SLOW → primary=Gemini(111 eval ~0.09 token/s, ~111s,Gemini 更快)""" manager = self._setup() h111 = _make_health(HealthStatus.SLOW, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" - def test_111_slow_fallback_includes_111_and_188(self): - """SLOW 時 111 + 188 仍在 fallback(Gemini 額度耗盡時的降級鏈)""" + def test_111_slow_fallback_includes_111_and_nemotron(self): + """SLOW 時 111 + Nemotron 在 fallback(188 已移出)""" manager = self._setup() h111 = _make_health(HealthStatus.SLOW, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "ollama" in provider_names - assert "ollama_188" in provider_names + assert "nemotron" in provider_names + assert "ollama_188" not in provider_names - def test_111_slow_no_188_primary_is_gemini(self): - """111 SLOW + 188 未設定 → primary=Gemini(新矩陣,不強撐 111)""" - manager = _make_manager(url_188="") # 188 未設定 + def test_111_slow_primary_is_gemini_no_188(self): + """111 SLOW + 188 不存在 → primary=Gemini(新矩陣,188 完全移出)""" + manager = _make_manager() h111 = _make_health(HealthStatus.SLOW, URL_111) - result = manager._decide_route(h111, None, URL_111, "") + result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" @@ -162,9 +159,8 @@ class TestDecideRoute: """新矩陣:111 DEGRADED → primary=Gemini""" manager = self._setup() h111 = _make_health(HealthStatus.DEGRADED, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" @@ -172,72 +168,55 @@ class TestDecideRoute: """DEGRADED 時 111 不在 fallback(太差了)""" manager = self._setup() h111 = _make_health(HealthStatus.DEGRADED, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "ollama" not in provider_names - def test_111_degraded_fallback_includes_188_nemotron_claude(self): - """新矩陣:DEGRADED fallback = [188, nemotron, claude]""" + def test_111_degraded_fallback_includes_nemotron_claude(self): + """統帥鐵律:DEGRADED fallback = [Nemotron, Claude](188 已移出)""" manager = self._setup() h111 = _make_health(HealthStatus.DEGRADED, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] - assert "ollama_188" in provider_names assert "nemotron" in provider_names assert "claude" in provider_names + assert "ollama_188" not in provider_names # ------------------------------------------------------------------ # 111 OFFLINE # ------------------------------------------------------------------ def test_111_offline_primary_is_gemini(self): - """新矩陣:111 OFFLINE → primary=Gemini(188 降為 fallback 備援)""" + """新矩陣:111 OFFLINE → primary=Gemini""" manager = self._setup() h111 = _make_health(HealthStatus.OFFLINE, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" - # ------------------------------------------------------------------ - # 雙節點都 OFFLINE - # ------------------------------------------------------------------ - - def test_both_offline_primary_is_gemini(self): - """新矩陣:111 + 188 都 OFFLINE → Gemini 接手(最快雲端)""" + def test_111_offline_fallback_includes_nemotron_claude(self): + """111 OFFLINE 時,fallback=[Nemotron, Claude](無可用 Ollama)""" manager = self._setup() h111 = _make_health(HealthStatus.OFFLINE, URL_111) - h188 = _make_health(HealthStatus.OFFLINE, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) - - assert result.primary.provider_name == "gemini" - - def test_both_offline_fallback_includes_nemotron_claude(self): - """雙 OFFLINE 時,fallback=[Nemotron, Claude](無可用 Ollama)""" - manager = self._setup() - h111 = _make_health(HealthStatus.OFFLINE, URL_111) - h188 = _make_health(HealthStatus.OFFLINE, URL_188) - - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) provider_names = [e.provider_name for e in result.fallback_chain] assert "nemotron" in provider_names assert "claude" in provider_names + assert "ollama_188" not in provider_names - def test_111_offline_no_188_primary_is_gemini(self): - """新矩陣:111 OFFLINE + 188 未設定 → Gemini(不是 Nemotron)""" - manager = _make_manager(url_188="") + def test_111_offline_primary_is_gemini_no_188(self): + """新矩陣:111 OFFLINE → Gemini(188 不再列入考慮)""" + manager = _make_manager() h111 = _make_health(HealthStatus.OFFLINE, URL_111) - result = manager._decide_route(h111, None, URL_111, "") + result = manager._decide_route(h111, URL_111) assert result.primary.provider_name == "gemini" @@ -249,35 +228,30 @@ class TestDecideRoute: """routing_reason 應包含 111 的狀態資訊""" manager = self._setup() h111 = _make_health(HealthStatus.OFFLINE, URL_111) - h188 = _make_health(HealthStatus.HEALTHY, URL_188) - result = manager._decide_route(h111, h188, URL_111, URL_188) + result = manager._decide_route(h111, URL_111) assert "offline" in result.routing_reason.lower() or "111" in result.routing_reason # ============================================================================= -# select_provider():並行 gather +# select_provider():只 check 111 # ============================================================================= class TestSelectProvider: - """select_provider() 並行邏輯""" + """select_provider() 只 check 111 邏輯(統帥鐵律:188 完全移出)""" @pytest.mark.asyncio - async def test_select_provider_calls_gather(self): - """有 url_188 時應並行 gather 兩個 check""" + async def test_select_provider_checks_111_only(self): + """統帥鐵律:select_provider 只 check 111,call_count == 1""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( - side_effect=[ - _make_health(HealthStatus.HEALTHY, URL_111), - _make_health(HealthStatus.HEALTHY, URL_188), - ] + return_value=_make_health(HealthStatus.HEALTHY, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = URL_188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) @@ -286,21 +260,19 @@ class TestSelectProvider: with patch.object(manager, "_write_failover_audit", return_value=None): result = await manager.select_provider() - # 兩個 host 都被 check - assert mock_monitor.check.call_count == 2 - called_hosts = {call.args[0] for call in mock_monitor.check.call_args_list} - assert URL_111 in called_hosts - assert URL_188 in called_hosts + # 只 check 111,不再並行 check 188 + assert mock_monitor.check.call_count == 1 + called_url = mock_monitor.check.call_args.args[0] + assert called_url == URL_111 @pytest.mark.asyncio - async def test_select_provider_single_node_no_188(self): - """OLLAMA_FALLBACK_URL 空字串 → 只 check 111""" + async def test_select_provider_single_node_primary_ollama(self): + """111 HEALTHY → primary=ollama""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.HEALTHY, URL_111)) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = "" mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) @@ -317,22 +289,17 @@ class TestSelectProvider: """select_provider 返回 OllamaRoutingResult 類型(新矩陣:111 OFFLINE → Gemini)""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( - side_effect=[ - _make_health(HealthStatus.OFFLINE, URL_111), - _make_health(HealthStatus.HEALTHY, URL_188), - ] + return_value=_make_health(HealthStatus.OFFLINE, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = URL_188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings - # 2026-04-27 Wave8-X2: 必須 mock Redis pool(_check_gemini_quota 走 fail-closed 路徑會切到 188 而非 Gemini) - # 測試本身只要驗 OFFLINE → Gemini 路由邏輯,故繞過 quota check + # 必須 mock Redis pool(_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini) with patch.object(manager, "_write_failover_audit", return_value=None), \ patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \ patch( @@ -345,7 +312,7 @@ class TestSelectProvider: result = await manager.select_provider() assert isinstance(result, OllamaRoutingResult) - # 新矩陣:111 OFFLINE + Gemini quota OK → primary=Gemini(188 降為 fallback) + # 新矩陣:111 OFFLINE + Gemini quota OK → primary=Gemini assert result.primary.provider_name == "gemini" @pytest.mark.asyncio @@ -353,26 +320,20 @@ class TestSelectProvider: """111 正常時不觸發 failover audit""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( - side_effect=[ - _make_health(HealthStatus.HEALTHY, URL_111), - _make_health(HealthStatus.HEALTHY, URL_188), - ] + return_value=_make_health(HealthStatus.HEALTHY, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = URL_188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" manager = OllamaFailoverManager(health_monitor=mock_monitor) manager._settings = mock_settings audit_called = [False] - original_write = manager._write_failover_audit async def _spy_audit(result): # _write_failover_audit 在 111 HEALTHY 時 early return,不寫 DB - # 追蹤呼叫是否有 side effect(DB 寫入) audit_called[0] = result.primary.provider_name != "ollama" with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit): @@ -435,7 +396,7 @@ class TestOllamaRoutingResult: def test_all_endpoints_in_order(self): from src.services.ollama_failover_manager import OllamaEndpoint primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="m1") - fb1 = OllamaEndpoint(url=URL_188, provider_name="ollama_188", model="m2") + fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash") fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3") result = OllamaRoutingResult( @@ -447,7 +408,7 @@ class TestOllamaRoutingResult: ordered = result.all_endpoints_in_order() assert ordered[0].provider_name == "ollama" - assert ordered[1].provider_name == "ollama_188" + assert ordered[1].provider_name == "gemini" assert ordered[2].provider_name == "nemotron" def test_to_dict_structure(self): @@ -464,6 +425,19 @@ class TestOllamaRoutingResult: assert d["routing_reason"] == "111 HEALTHY" assert isinstance(d["fallback_chain"], list) + def test_health_188_optional_field_backward_compat(self): + """health_188 保留為 optional None(backward-compat,不傳也可以)""" + from src.services.ollama_failover_manager import OllamaEndpoint + primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen") + result = OllamaRoutingResult( + primary=primary, + fallback_chain=[], + routing_reason="test", + health_111=_make_health(HealthStatus.HEALTHY), + # health_188 不傳,應為 None + ) + assert result.health_188 is None + # ============================================================================= # Singleton @@ -552,22 +526,18 @@ class TestAIProviderEnumOllama188: class TestGatherReturnExceptions: - """H4 修復驗證:任一 check 拋例外時不炸整個 select_provider""" + """H4 修復驗證:111 check 拋例外時不炸整個 select_provider""" @pytest.mark.asyncio async def test_gather_exception_in_111_treated_as_offline(self): """111 check 拋例外 → health_111=OFFLINE,select_provider 正常返回""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( - side_effect=[ - RuntimeError("111 network error"), - _make_health(HealthStatus.HEALTHY, URL_188), - ] + side_effect=RuntimeError("111 network error") ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = URL_188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 @@ -582,19 +552,15 @@ class TestGatherReturnExceptions: assert result.primary.provider_name == "gemini" @pytest.mark.asyncio - async def test_gather_exception_in_188_treated_as_offline(self): - """188 check 拋例外 → health_188=OFFLINE,select_provider 正常返回""" + async def test_111_healthy_select_provider_primary_ollama(self): + """111 HEALTHY → primary=ollama,select_provider 正常返回(取代舊的 188 exception 測試)""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( - side_effect=[ - _make_health(HealthStatus.HEALTHY, URL_111), - RuntimeError("188 network error"), - ] + return_value=_make_health(HealthStatus.HEALTHY, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = URL_188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 @@ -605,7 +571,7 @@ class TestGatherReturnExceptions: patch.object(manager, "_check_gemini_quota", return_value=True): result = await manager.select_provider() - # 111 HEALTHY → primary=ollama(188 exception 不影響主路由) + # 111 HEALTHY → primary=ollama assert result.primary.provider_name == "ollama" @@ -682,19 +648,15 @@ class TestGeminiQuota: assert ok is False @pytest.mark.asyncio - async def test_select_provider_quota_exceeded_uses_188(self): - """select_provider:Gemini quota 超過 → primary 改為 OLLAMA_188""" + async def test_select_provider_quota_exceeded_uses_nemotron(self): + """select_provider:Gemini quota 超過 → primary 改為 Nemotron(統帥鐵律:188 移出)""" mock_monitor = AsyncMock() mock_monitor.check = AsyncMock( - side_effect=[ - _make_health(HealthStatus.OFFLINE, URL_111), - _make_health(HealthStatus.HEALTHY, URL_188), - ] + return_value=_make_health(HealthStatus.OFFLINE, URL_111) ) mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = URL_188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000 @@ -705,8 +667,8 @@ class TestGeminiQuota: patch.object(manager, "_check_gemini_quota", return_value=False): result = await manager.select_provider() - # quota 超過 → 不走 Gemini,改走 188 - assert result.primary.provider_name == "ollama_188" + # quota 超過 → 不走 Gemini,改走 Nemotron(188 已移出) + assert result.primary.provider_name == "nemotron" @pytest.mark.asyncio async def test_select_provider_quota_exceeded_no_188_uses_nemotron(self): @@ -716,7 +678,6 @@ class TestGeminiQuota: mock_settings = MagicMock() mock_settings.OLLAMA_URL = URL_111 - mock_settings.OLLAMA_FALLBACK_URL = "" # 無 188 mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" mock_settings.GEMINI_DAILY_QUOTA = 1000