Files
awoooi/apps/api/tests/test_ollama_failover_manager.py
Your Name b432becd4e
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
fix(failover): 188 完全移出 routing chain,備援只用 Gemini
統帥鐵律 2026-04-26:
- 唯一 Ollama = 111(M1 Pro Metal 加速)
- 188 CPU-only (0.45 tok/s) 禁止即時回應,移出所有 fallback chain
- 111 HEALTHY → fallback=[Gemini]
- 111 非HEALTHY → primary=Gemini, fallback=[Nemotron, Claude]
- Gemini quota exceeded → Nemotron → Claude(不落 188)
- OllamaRoutingResult 移除 health_188 欄位
- select_provider 只 check 111(不再 asyncio.gather 兩節點)
- 測試全部對齊新規則(1451 passed)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-27 15:47:41 +08:00

692 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei
# Created 2026-04-25 P1.1c by Claude Engineer-C
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111188 完全移出
"""
OllamaFailoverManager 單元測試 - P1.1c v3.0
=============================================
測試覆蓋(新路由矩陣:統帥鐵律 2026-04-26唯一 Ollama=111備援只用 Gemini
- 111 HEALTHY → primary=ollama(111)fallback=[Gemini]
- 111 SLOW → primary=Geminifallback=[111, Nemotron, Claude]
- 111 DEGRADED → primary=Geminifallback=[Nemotron, Claude]
- 111 OFFLINE → primary=Geminifallback=[Nemotron, Claude]
- Gemini quota exceeded → primary=Nemotronfallback=[Claude]
- select_provider 只 check 111不再並行 check 188
- clear_cache() / notify_recovery() 方法
- OllamaRoutingResult.health_188 保留為 optionalbackward-compat
測試分類unitmock OllamaHealthMonitor無 DB 依賴)
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.ollama_health_monitor import HealthReport, HealthStatus
from src.services.ollama_failover_manager import (
OllamaFailoverManager,
OllamaRoutingResult,
get_ollama_failover_manager,
reset_ollama_failover_manager,
)
# =============================================================================
# Fixtures
# =============================================================================
URL_111 = "http://192.168.0.111:11434"
URL_188 = "http://192.168.0.188:11434"
@pytest.fixture(autouse=True)
def reset_singleton():
yield
reset_ollama_failover_manager()
def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport:
return HealthReport(status=status, host=url, latency_ms=500.0)
def _make_manager(url_111: str = URL_111) -> OllamaFailoverManager:
"""建立 managersettings mock 為指定 URL188 已移除)"""
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = url_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_monitor = MagicMock()
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
return manager
# =============================================================================
# _decide_route 決策矩陣
# =============================================================================
class TestDecideRoute:
"""_decide_route 路由邏輯純函數測試(新簽名:只需 health_111, url_111"""
def _setup(self) -> OllamaFailoverManager:
return _make_manager()
# ------------------------------------------------------------------
# 111 HEALTHY
# ------------------------------------------------------------------
def test_111_healthy_primary_is_ollama(self):
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "ollama"
assert result.primary.url == URL_111
def test_111_healthy_fallback_is_gemini_only(self):
"""統帥鐵律HEALTHY fallback 只有 Gemini188/Nemotron 移出"""
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert provider_names == ["gemini"]
assert "ollama_188" not in provider_names
assert "nemotron" not in provider_names
def test_111_healthy_fallback_includes_gemini(self):
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "gemini" in provider_names
def test_111_healthy_fallback_order_gemini_first(self):
"""統帥鐵律Gemini 是唯一 fallback排在 fallback_chain[0]"""
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
result = manager._decide_route(h111, URL_111)
assert result.fallback_chain[0].provider_name == "gemini"
# ------------------------------------------------------------------
# 111 SLOW
# ------------------------------------------------------------------
def test_111_slow_primary_is_gemini(self):
"""新矩陣111 SLOW → primary=Gemini111 eval ~0.09 token/s, ~111sGemini 更快)"""
manager = self._setup()
h111 = _make_health(HealthStatus.SLOW, URL_111)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
def test_111_slow_fallback_includes_111_and_nemotron(self):
"""SLOW 時 111 + Nemotron 在 fallback188 已移出)"""
manager = self._setup()
h111 = _make_health(HealthStatus.SLOW, URL_111)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama" in provider_names
assert "nemotron" in provider_names
assert "ollama_188" not in provider_names
def test_111_slow_primary_is_gemini_no_188(self):
"""111 SLOW + 188 不存在 → primary=Gemini新矩陣188 完全移出)"""
manager = _make_manager()
h111 = _make_health(HealthStatus.SLOW, URL_111)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
# ------------------------------------------------------------------
# 111 DEGRADED
# ------------------------------------------------------------------
def test_111_degraded_primary_is_gemini(self):
"""新矩陣111 DEGRADED → primary=Gemini"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
def test_111_degraded_fallback_no_111(self):
"""DEGRADED 時 111 不在 fallback太差了"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama" not in provider_names
def test_111_degraded_fallback_includes_nemotron_claude(self):
"""統帥鐵律DEGRADED fallback = [Nemotron, Claude]188 已移出)"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "claude" in provider_names
assert "ollama_188" not in provider_names
# ------------------------------------------------------------------
# 111 OFFLINE
# ------------------------------------------------------------------
def test_111_offline_primary_is_gemini(self):
"""新矩陣111 OFFLINE → primary=Gemini"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
def test_111_offline_fallback_includes_nemotron_claude(self):
"""111 OFFLINE 時fallback=[Nemotron, Claude](無可用 Ollama"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "claude" in provider_names
assert "ollama_188" not in provider_names
def test_111_offline_primary_is_gemini_no_188(self):
"""新矩陣111 OFFLINE → Gemini188 不再列入考慮)"""
manager = _make_manager()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
# ------------------------------------------------------------------
# routing_reason 記錄
# ------------------------------------------------------------------
def test_routing_reason_contains_status(self):
"""routing_reason 應包含 111 的狀態資訊"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
result = manager._decide_route(h111, URL_111)
assert "offline" in result.routing_reason.lower() or "111" in result.routing_reason
# =============================================================================
# select_provider():只 check 111
# =============================================================================
class TestSelectProvider:
"""select_provider() 只 check 111 邏輯統帥鐵律188 完全移出)"""
@pytest.mark.asyncio
async def test_select_provider_checks_111_only(self):
"""統帥鐵律select_provider 只 check 111call_count == 1"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
# 只 check 111不再並行 check 188
assert mock_monitor.check.call_count == 1
called_url = mock_monitor.check.call_args.args[0]
assert called_url == URL_111
@pytest.mark.asyncio
async def test_select_provider_single_node_primary_ollama(self):
"""111 HEALTHY → primary=ollama"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.HEALTHY, URL_111))
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
assert mock_monitor.check.call_count == 1
assert result.primary.provider_name == "ollama"
@pytest.mark.asyncio
async def test_select_provider_returns_routing_result(self):
"""select_provider 返回 OllamaRoutingResult 類型新矩陣111 OFFLINE → Gemini"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
return_value=_make_health(HealthStatus.OFFLINE, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
# 必須 mock Redis pool_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \
patch(
"src.services.failover_alerter.get_failover_alerter",
return_value=MagicMock(
alert_failover=AsyncMock(),
alert_gemini_quota_exceeded=AsyncMock(),
),
):
result = await manager.select_provider()
assert isinstance(result, OllamaRoutingResult)
# 新矩陣111 OFFLINE + Gemini quota OK → primary=Gemini
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_audit_not_written_when_111_healthy(self):
"""111 正常時不觸發 failover audit"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
audit_called = [False]
async def _spy_audit(result):
# _write_failover_audit 在 111 HEALTHY 時 early return不寫 DB
audit_called[0] = result.primary.provider_name != "ollama"
with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit):
await manager.select_provider()
# 111 HEALTHY不應有 failover 事件
assert audit_called[0] is False
# =============================================================================
# clear_cache() / notify_recovery()
# =============================================================================
class TestRecoveryAPI:
"""clear_cache() / notify_recovery() 方法"""
@pytest.mark.asyncio
async def test_clear_cache_calls_redis_delete(self):
"""clear_cache() 呼叫 redis.delete 清除 health monitor 快取"""
manager = _make_manager()
mock_redis = AsyncMock()
mock_redis.delete = AsyncMock()
with patch("src.services.ollama_failover_manager.OllamaFailoverManager.clear_cache") as mock_clear:
mock_clear.return_value = None
await manager.clear_cache()
mock_clear.assert_called_once()
@pytest.mark.asyncio
async def test_clear_cache_fails_gracefully(self):
"""Redis import 失敗時clear_cache() 內部 try/except 攔截,靜默不 crash"""
manager = _make_manager()
# 模擬 get_redis 拋 ImportErrorRedis 不可用)
# clear_cache 有 try/except Exception應靜默吸收
with patch(
"src.services.ollama_failover_manager.get_redis",
side_effect=ImportError("no redis"),
create=True,
):
# 不應 raise
await manager.clear_cache()
def test_notify_recovery_does_not_raise(self):
"""notify_recovery() 只寫 structlog不應 raise"""
manager = _make_manager()
# 不應 raise
manager.notify_recovery("ollama_111")
# =============================================================================
# OllamaRoutingResult
# =============================================================================
class TestOllamaRoutingResult:
"""OllamaRoutingResult 輔助方法"""
def test_all_endpoints_in_order(self):
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="m1")
fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash")
fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[fb1, fb2],
routing_reason="test",
health_111=_make_health(HealthStatus.HEALTHY),
)
ordered = result.all_endpoints_in_order()
assert ordered[0].provider_name == "ollama"
assert ordered[1].provider_name == "gemini"
assert ordered[2].provider_name == "nemotron"
def test_to_dict_structure(self):
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[],
routing_reason="111 HEALTHY",
health_111=_make_health(HealthStatus.HEALTHY),
)
d = result.to_dict()
assert d["primary"]["provider"] == "ollama"
assert d["routing_reason"] == "111 HEALTHY"
assert isinstance(d["fallback_chain"], list)
def test_health_188_optional_field_backward_compat(self):
"""health_188 保留為 optional Nonebackward-compat不傳也可以"""
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[],
routing_reason="test",
health_111=_make_health(HealthStatus.HEALTHY),
# health_188 不傳,應為 None
)
assert result.health_188 is None
# =============================================================================
# Singleton
# =============================================================================
def test_singleton_returns_same_instance():
m1 = get_ollama_failover_manager()
m2 = get_ollama_failover_manager()
assert m1 is m2
def test_reset_singleton_gives_new_instance():
m1 = get_ollama_failover_manager()
reset_ollama_failover_manager()
m2 = get_ollama_failover_manager()
assert m1 is not m2
# =============================================================================
# B1: _write_failover_audit 改用 structlog不再寫 DB
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestWriteFailoverAudit:
"""B1 修復驗證_write_failover_audit 使用 structlog不依賴 AuditLog model"""
@pytest.mark.asyncio
async def test_audit_uses_structlog_not_db(self):
"""_write_failover_audit 應呼叫 structlog不呼叫 DB"""
import structlog
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
result = OllamaRoutingResult(
primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"),
fallback_chain=[],
routing_reason="111 OFFLINE → 切 Gemini",
health_111=_make_health(HealthStatus.OFFLINE),
)
# 只要不 raise 就是成功DB path 已移除structlog path 無 DB 依賴)
await manager._write_failover_audit(result)
@pytest.mark.asyncio
async def test_audit_skipped_when_111_healthy(self):
"""111 HEALTHY 時 early return不記錄 failover"""
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
result = OllamaRoutingResult(
primary=OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen"),
fallback_chain=[],
routing_reason="111 HEALTHY → 主 111",
health_111=_make_health(HealthStatus.HEALTHY),
)
# primary=ollama → early return不執行任何 DB/log
await manager._write_failover_audit(result) # 不應 raise
# =============================================================================
# B2: AIProviderEnum.OLLAMA_188 存在
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestAIProviderEnumOllama188:
"""B2 修復驗證AIProviderEnum.OLLAMA_188 存在且 PROVIDER_LATENCY_BUDGET 有對應值"""
def test_ollama_188_enum_exists(self):
from src.services.ai_router import AIProviderEnum
assert AIProviderEnum.OLLAMA_188.value == "ollama_188"
def test_ollama_188_in_latency_budget(self):
from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET
assert AIProviderEnum.OLLAMA_188 in PROVIDER_LATENCY_BUDGET
assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_188] == 120000
# =============================================================================
# H4: asyncio.gather return_exceptions=True
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestGatherReturnExceptions:
"""H4 修復驗證111 check 拋例外時不炸整個 select_provider"""
@pytest.mark.asyncio
async def test_gather_exception_in_111_treated_as_offline(self):
"""111 check 拋例外 → health_111=OFFLINEselect_provider 正常返回"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=RuntimeError("111 network error")
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# 111 exception → OFFLINE → primary=gemininew matrix
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_111_healthy_select_provider_primary_ollama(self):
"""111 HEALTHY → primary=ollamaselect_provider 正常返回(取代舊的 188 exception 測試)"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# 111 HEALTHY → primary=ollama
assert result.primary.provider_name == "ollama"
# =============================================================================
# H7: Gemini 帳單熔斷
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestGeminiQuota:
"""H7 修復驗證Gemini 每日配額熔斷"""
@pytest.mark.asyncio
async def test_gemini_quota_under_limit(self):
"""count=500 < quota=1000 → 返回 True允許走 Gemini
2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mockatomic 修復後)
原 GET/INCR/EXPIRE 三步已改為 pipeline.set(NX)+incrmock 跟著更新。
"""
manager = _make_manager()
manager._settings.GEMINI_DAILY_QUOTA = 1000
# pipeline mockSET NX 返回 True首次INCR 返回 501500+1未達 quota=1000
mock_pipe = MagicMock()
mock_pipe.set = MagicMock(return_value=mock_pipe)
mock_pipe.incr = MagicMock(return_value=mock_pipe)
mock_pipe.execute = AsyncMock(return_value=[True, 501])
mock_redis = MagicMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
ok = await manager._check_gemini_quota()
assert ok is True
mock_pipe.execute.assert_awaited_once()
@pytest.mark.asyncio
async def test_gemini_quota_exactly_at_limit(self):
"""count=1001 > quota=1000 → 返回 False熔斷不再呼叫 Gemini
2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mockatomic 修復後)
pipeline.incr 返回 1001> quota=1000應返回 False。
"""
manager = _make_manager()
manager._settings.GEMINI_DAILY_QUOTA = 1000
mock_pipe = MagicMock()
mock_pipe.set = MagicMock(return_value=mock_pipe)
mock_pipe.incr = MagicMock(return_value=mock_pipe)
mock_pipe.execute = AsyncMock(return_value=[True, 1001]) # 超過 quota
mock_redis = MagicMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
ok = await manager._check_gemini_quota()
assert ok is False
@pytest.mark.asyncio
async def test_gemini_quota_redis_unavailable_fail_closed(self):
"""Redis 掛掉 → 返回 False2026-04-27 Wave8-X2 fail-closed違反費用鐵律的修復"""
manager = _make_manager()
with patch(
"src.core.redis_client.get_redis",
side_effect=RuntimeError("Redis unavailable"),
), patch(
"src.services.failover_alerter.get_failover_alerter",
return_value=MagicMock(alert_gemini_quota_exceeded=AsyncMock()),
):
ok = await manager._check_gemini_quota()
# fail-closedRedis 異常時拒絕 Gemini避免費用失控违反 feedback_cost_change_approval.md
assert ok is False
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_uses_nemotron(self):
"""select_providerGemini quota 超過 → primary 改為 Nemotron統帥鐵律188 移出)"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
return_value=_make_health(HealthStatus.OFFLINE, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
# quota 超過 → 不走 Gemini改走 Nemotron188 已移出)
assert result.primary.provider_name == "nemotron"
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_no_188_uses_nemotron(self):
"""select_providerGemini quota 超過 + 188 不可用 → primary=Nemotron"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.OFFLINE, URL_111))
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
assert result.primary.provider_name == "nemotron"