Files
awoooi/apps/api/tests/test_ollama_failover_manager.py
Your Name 55c6b4e2d9 feat(p1): Ollama 多層容災系統 — P1.1 健康檢測 + P1.2 ai_router 整合 + P1.5 容災告警
ADR-092 P1 飛輪閉環的 Ollama 失敗轉移子系統,全部 Engineer-A2/C/C2 補上。

新服務 (1581 行):
- ollama_health_monitor.py (356):3 層健康檢測(TCP/HTTP/推理)
- ollama_failover_manager.py (571):111→188 自動切換 + Redis 持久化 + recovery callback
- ollama_auto_recovery.py (436):30s 背景監控 + 連續 3 次 HEALTHY → 切回 + clear_cache
- failover_alerter.py (218):P1.5 Telegram 容災告警

服務整合:
- ai_router.py: AIProviderEnum.OLLAMA_188 + 120s budget + failover fallback chain
- main.py lifespan: 啟動時 wire callback + start recovery,關閉時優雅 stop
- config.py: OLLAMA_FALLBACK_URL / OLLAMA_HEALTH_CHECK_MODEL / GEMINI_DAILY_QUOTA(帳單熔斷)

K8s 配置:
- 04-configmap.yaml.patch-188-fallback:注入 OLLAMA_FALLBACK_URL=http://192.168.0.188:11434

測試 (2082 行):
- test_ollama_health_monitor.py (402)
- test_ollama_failover_manager.py (707)
- test_ollama_auto_recovery.py (580)
- test_ai_router_failover_integration.py (257)
- test_lifespan_failover_wiring.py (136)

依賴鏈:service 三件套 + ai_router + main.py 一起 commit,缺一就 ImportError。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 20:18:33 +08:00

708 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-25 @ Asia/Taipei
# Created 2026-04-25 P1.1c by Claude Engineer-C
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
"""
OllamaFailoverManager 單元測試 - P1.1c v2.0
=============================================
測試覆蓋新路由矩陣Gemini 優先188 最後備援):
- 111 HEALTHY → primary=ollama(111)fallback=[Gemini, 188, Nemotron]
- 111 SLOW → primary=Geminifallback 包含 111 + 188
- 111 DEGRADED → primary=Geminifallback 包含 188 + nemotron + claude
- 111 OFFLINE → primary=Geminifallback 包含 188 + nemotron + claude
- 111 + 188 都 OFFLINE → primary=Geminifallback 包含 nemotron + claude
- OLLAMA_FALLBACK_URL 未設定時的單節點行為
- 並行 gather 邏輯asyncio.gather mock
- clear_cache() / notify_recovery() 方法
測試分類unitmock OllamaHealthMonitor無 DB 依賴)
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.ollama_health_monitor import HealthReport, HealthStatus
from src.services.ollama_failover_manager import (
OllamaFailoverManager,
OllamaRoutingResult,
get_ollama_failover_manager,
reset_ollama_failover_manager,
)
# =============================================================================
# Fixtures
# =============================================================================
URL_111 = "http://192.168.0.111:11434"
URL_188 = "http://192.168.0.188:11434"
@pytest.fixture(autouse=True)
def reset_singleton():
yield
reset_ollama_failover_manager()
def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport:
return HealthReport(status=status, host=url, latency_ms=500.0)
def _make_manager(url_111: str = URL_111, url_188: str = URL_188) -> OllamaFailoverManager:
"""建立 managersettings mock 為指定 URL"""
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = url_111
mock_settings.OLLAMA_FALLBACK_URL = url_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_monitor = MagicMock()
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
return manager
# =============================================================================
# _decide_route 決策矩陣
# =============================================================================
class TestDecideRoute:
"""_decide_route 路由邏輯純函數測試"""
def _setup(self, url_188: str = URL_188) -> OllamaFailoverManager:
return _make_manager(url_188=url_188)
# ------------------------------------------------------------------
# 111 HEALTHY
# ------------------------------------------------------------------
def test_111_healthy_primary_is_ollama(self):
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert result.primary.provider_name == "ollama"
assert result.primary.url == URL_111
def test_111_healthy_fallback_includes_188(self):
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama_188" in provider_names
def test_111_healthy_fallback_includes_nemotron_gemini(self):
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "gemini" in provider_names
def test_111_healthy_fallback_order_gemini_first(self):
"""新矩陣Gemini 應排在 188/nemotron 之前(快速雲端優先)"""
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert result.fallback_chain[0].provider_name == "gemini"
# ------------------------------------------------------------------
# 111 SLOW
# ------------------------------------------------------------------
def test_111_slow_primary_is_gemini(self):
"""新矩陣111 SLOW → primary=Gemini111 eval ~0.09 token/s, ~111sGemini 更快)"""
manager = self._setup()
h111 = _make_health(HealthStatus.SLOW, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert result.primary.provider_name == "gemini"
def test_111_slow_fallback_includes_111_and_188(self):
"""SLOW 時 111 + 188 仍在 fallbackGemini 額度耗盡時的降級鏈)"""
manager = self._setup()
h111 = _make_health(HealthStatus.SLOW, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama" in provider_names
assert "ollama_188" in provider_names
def test_111_slow_no_188_primary_is_gemini(self):
"""111 SLOW + 188 未設定 → primary=Gemini新矩陣不強撐 111"""
manager = _make_manager(url_188="") # 188 未設定
h111 = _make_health(HealthStatus.SLOW, URL_111)
result = manager._decide_route(h111, None, URL_111, "")
assert result.primary.provider_name == "gemini"
# ------------------------------------------------------------------
# 111 DEGRADED
# ------------------------------------------------------------------
def test_111_degraded_primary_is_gemini(self):
"""新矩陣111 DEGRADED → primary=Gemini"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert result.primary.provider_name == "gemini"
def test_111_degraded_fallback_no_111(self):
"""DEGRADED 時 111 不在 fallback太差了"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama" not in provider_names
def test_111_degraded_fallback_includes_188_nemotron_claude(self):
"""新矩陣DEGRADED fallback = [188, nemotron, claude]"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama_188" in provider_names
assert "nemotron" in provider_names
assert "claude" in provider_names
# ------------------------------------------------------------------
# 111 OFFLINE
# ------------------------------------------------------------------
def test_111_offline_primary_is_gemini(self):
"""新矩陣111 OFFLINE → primary=Gemini188 降為 fallback 備援)"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert result.primary.provider_name == "gemini"
# ------------------------------------------------------------------
# 雙節點都 OFFLINE
# ------------------------------------------------------------------
def test_both_offline_primary_is_gemini(self):
"""新矩陣111 + 188 都 OFFLINE → Gemini 接手(最快雲端)"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.OFFLINE, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert result.primary.provider_name == "gemini"
def test_both_offline_fallback_includes_nemotron_claude(self):
"""雙 OFFLINE 時fallback=[Nemotron, Claude](無可用 Ollama"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.OFFLINE, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "claude" in provider_names
def test_111_offline_no_188_primary_is_gemini(self):
"""新矩陣111 OFFLINE + 188 未設定 → Gemini不是 Nemotron"""
manager = _make_manager(url_188="")
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
result = manager._decide_route(h111, None, URL_111, "")
assert result.primary.provider_name == "gemini"
# ------------------------------------------------------------------
# routing_reason 記錄
# ------------------------------------------------------------------
def test_routing_reason_contains_status(self):
"""routing_reason 應包含 111 的狀態資訊"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert "offline" in result.routing_reason.lower() or "111" in result.routing_reason
# =============================================================================
# select_provider():並行 gather
# =============================================================================
class TestSelectProvider:
"""select_provider() 並行邏輯"""
@pytest.mark.asyncio
async def test_select_provider_calls_gather(self):
"""有 url_188 時應並行 gather 兩個 check"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.HEALTHY, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
# 兩個 host 都被 check
assert mock_monitor.check.call_count == 2
called_hosts = {call.args[0] for call in mock_monitor.check.call_args_list}
assert URL_111 in called_hosts
assert URL_188 in called_hosts
@pytest.mark.asyncio
async def test_select_provider_single_node_no_188(self):
"""OLLAMA_FALLBACK_URL 空字串 → 只 check 111"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.HEALTHY, URL_111))
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = ""
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
assert mock_monitor.check.call_count == 1
assert result.primary.provider_name == "ollama"
@pytest.mark.asyncio
async def test_select_provider_returns_routing_result(self):
"""select_provider 返回 OllamaRoutingResult 類型新矩陣111 OFFLINE → Gemini"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.OFFLINE, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
assert isinstance(result, OllamaRoutingResult)
# 新矩陣111 OFFLINE → primary=Gemini188 降為 fallback
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_audit_not_written_when_111_healthy(self):
"""111 正常時不觸發 failover audit"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.HEALTHY, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
audit_called = [False]
original_write = manager._write_failover_audit
async def _spy_audit(result):
# _write_failover_audit 在 111 HEALTHY 時 early return不寫 DB
# 追蹤呼叫是否有 side effectDB 寫入)
audit_called[0] = result.primary.provider_name != "ollama"
with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit):
await manager.select_provider()
# 111 HEALTHY不應有 failover 事件
assert audit_called[0] is False
# =============================================================================
# clear_cache() / notify_recovery()
# =============================================================================
class TestRecoveryAPI:
"""clear_cache() / notify_recovery() 方法"""
@pytest.mark.asyncio
async def test_clear_cache_calls_redis_delete(self):
"""clear_cache() 呼叫 redis.delete 清除 health monitor 快取"""
manager = _make_manager()
mock_redis = AsyncMock()
mock_redis.delete = AsyncMock()
with patch("src.services.ollama_failover_manager.OllamaFailoverManager.clear_cache") as mock_clear:
mock_clear.return_value = None
await manager.clear_cache()
mock_clear.assert_called_once()
@pytest.mark.asyncio
async def test_clear_cache_fails_gracefully(self):
"""Redis import 失敗時clear_cache() 內部 try/except 攔截,靜默不 crash"""
manager = _make_manager()
# 模擬 get_redis 拋 ImportErrorRedis 不可用)
# clear_cache 有 try/except Exception應靜默吸收
with patch(
"src.services.ollama_failover_manager.get_redis",
side_effect=ImportError("no redis"),
create=True,
):
# 不應 raise
await manager.clear_cache()
def test_notify_recovery_does_not_raise(self):
"""notify_recovery() 只寫 structlog不應 raise"""
manager = _make_manager()
# 不應 raise
manager.notify_recovery("ollama_111")
# =============================================================================
# OllamaRoutingResult
# =============================================================================
class TestOllamaRoutingResult:
"""OllamaRoutingResult 輔助方法"""
def test_all_endpoints_in_order(self):
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="m1")
fb1 = OllamaEndpoint(url=URL_188, provider_name="ollama_188", model="m2")
fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[fb1, fb2],
routing_reason="test",
health_111=_make_health(HealthStatus.HEALTHY),
)
ordered = result.all_endpoints_in_order()
assert ordered[0].provider_name == "ollama"
assert ordered[1].provider_name == "ollama_188"
assert ordered[2].provider_name == "nemotron"
def test_to_dict_structure(self):
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[],
routing_reason="111 HEALTHY",
health_111=_make_health(HealthStatus.HEALTHY),
)
d = result.to_dict()
assert d["primary"]["provider"] == "ollama"
assert d["routing_reason"] == "111 HEALTHY"
assert isinstance(d["fallback_chain"], list)
# =============================================================================
# Singleton
# =============================================================================
def test_singleton_returns_same_instance():
m1 = get_ollama_failover_manager()
m2 = get_ollama_failover_manager()
assert m1 is m2
def test_reset_singleton_gives_new_instance():
m1 = get_ollama_failover_manager()
reset_ollama_failover_manager()
m2 = get_ollama_failover_manager()
assert m1 is not m2
# =============================================================================
# B1: _write_failover_audit 改用 structlog不再寫 DB
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestWriteFailoverAudit:
"""B1 修復驗證_write_failover_audit 使用 structlog不依賴 AuditLog model"""
@pytest.mark.asyncio
async def test_audit_uses_structlog_not_db(self):
"""_write_failover_audit 應呼叫 structlog不呼叫 DB"""
import structlog
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
result = OllamaRoutingResult(
primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"),
fallback_chain=[],
routing_reason="111 OFFLINE → 切 Gemini",
health_111=_make_health(HealthStatus.OFFLINE),
)
# 只要不 raise 就是成功DB path 已移除structlog path 無 DB 依賴)
await manager._write_failover_audit(result)
@pytest.mark.asyncio
async def test_audit_skipped_when_111_healthy(self):
"""111 HEALTHY 時 early return不記錄 failover"""
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
result = OllamaRoutingResult(
primary=OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen"),
fallback_chain=[],
routing_reason="111 HEALTHY → 主 111",
health_111=_make_health(HealthStatus.HEALTHY),
)
# primary=ollama → early return不執行任何 DB/log
await manager._write_failover_audit(result) # 不應 raise
# =============================================================================
# B2: AIProviderEnum.OLLAMA_188 存在
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestAIProviderEnumOllama188:
"""B2 修復驗證AIProviderEnum.OLLAMA_188 存在且 PROVIDER_LATENCY_BUDGET 有對應值"""
def test_ollama_188_enum_exists(self):
from src.services.ai_router import AIProviderEnum
assert AIProviderEnum.OLLAMA_188.value == "ollama_188"
def test_ollama_188_in_latency_budget(self):
from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET
assert AIProviderEnum.OLLAMA_188 in PROVIDER_LATENCY_BUDGET
assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_188] == 120000
# =============================================================================
# H4: asyncio.gather return_exceptions=True
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestGatherReturnExceptions:
"""H4 修復驗證:任一 check 拋例外時不炸整個 select_provider"""
@pytest.mark.asyncio
async def test_gather_exception_in_111_treated_as_offline(self):
"""111 check 拋例外 → health_111=OFFLINEselect_provider 正常返回"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
RuntimeError("111 network error"),
_make_health(HealthStatus.HEALTHY, URL_188),
]
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# 111 exception → OFFLINE → primary=gemininew matrix
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_gather_exception_in_188_treated_as_offline(self):
"""188 check 拋例外 → health_188=OFFLINEselect_provider 正常返回"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.HEALTHY, URL_111),
RuntimeError("188 network error"),
]
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# 111 HEALTHY → primary=ollama188 exception 不影響主路由)
assert result.primary.provider_name == "ollama"
# =============================================================================
# H7: Gemini 帳單熔斷
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestGeminiQuota:
"""H7 修復驗證Gemini 每日配額熔斷"""
@pytest.mark.asyncio
async def test_gemini_quota_under_limit(self):
"""count=500 < quota=1000 → 返回 True允許走 Gemini"""
manager = _make_manager()
manager._settings.GEMINI_DAILY_QUOTA = 1000
mock_redis = AsyncMock()
mock_redis.get = AsyncMock(return_value=b"500")
mock_redis.incr = AsyncMock(return_value=501)
mock_redis.expire = AsyncMock()
# lazy import patch_check_gemini_quota 內用 `from src.core.redis_client import get_redis`
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
ok = await manager._check_gemini_quota()
assert ok is True
mock_redis.incr.assert_awaited_once()
mock_redis.expire.assert_awaited_once()
@pytest.mark.asyncio
async def test_gemini_quota_exactly_at_limit(self):
"""count=1000 >= quota=1000 → 返回 False熔斷不再呼叫 Gemini"""
manager = _make_manager()
manager._settings.GEMINI_DAILY_QUOTA = 1000
mock_redis = AsyncMock()
mock_redis.get = AsyncMock(return_value=b"1000")
mock_redis.incr = AsyncMock()
mock_redis.expire = AsyncMock()
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
ok = await manager._check_gemini_quota()
assert ok is False
# 超過配額不應再 incr
mock_redis.incr.assert_not_awaited()
@pytest.mark.asyncio
async def test_gemini_quota_redis_unavailable_fail_open(self):
"""Redis 掛掉 → 返回 Truefail-open仍允許走 Gemini"""
manager = _make_manager()
with patch(
"src.core.redis_client.get_redis",
side_effect=RuntimeError("Redis unavailable"),
):
ok = await manager._check_gemini_quota()
assert ok is True
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_uses_188(self):
"""select_providerGemini quota 超過 → primary 改為 OLLAMA_188"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.OFFLINE, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
# quota 超過 → 不走 Gemini改走 188
assert result.primary.provider_name == "ollama_188"
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_no_188_uses_nemotron(self):
"""select_providerGemini quota 超過 + 188 不可用 → primary=Nemotron"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.OFFLINE, URL_111))
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = "" # 無 188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
assert result.primary.provider_name == "nemotron"