Files
awoooi/apps/api/tests/test_ollama_failover_manager.py
Your Name 4111ea4f9f
All checks were successful
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / tests (push) Successful in 1m13s
CD Pipeline / build-and-deploy (push) Successful in 3m36s
CD Pipeline / post-deploy-checks (push) Successful in 1m20s
fix(ai): remove 188 ollama provider
2026-05-06 14:34:48 +08:00

861 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei
# Created 2026-04-25 P1.1c by Claude Engineer-C
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111188 完全移出
# 2026-05-03 ogt: ADR-110 GCP 三層容災架構URL 常數更新為 GCP-A/B/Local新增三層容災場景
"""
OllamaFailoverManager 單元測試 - P1.1c v4.0
=============================================
測試覆蓋新路由矩陣ADR-110 GCP 三層容災2026-05-03
- GCP-A HEALTHY → primary=ollama_gcp_a
- GCP-A OFFLINE + GCP-B HEALTHY → primary=ollama_gcp_b
- GCP-A OFFLINE + GCP-B OFFLINE + Local HEALTHY → primary=ollama_local
- 全部 OFFLINE → primary=Gemini
- Gemini quota exceeded → primary=Nemotronfallback=[Claude]
- select_provider 只 check GCP-Aprimary URL
- clear_cache() / notify_recovery() 方法
- OllamaRoutingResult.health_111 backward-compat property實際欄位 health_gcp_a
測試分類unitmock OllamaHealthMonitor無 DB 依賴)
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.ollama_health_monitor import HealthReport, HealthStatus
from src.services.ollama_failover_manager import (
OllamaFailoverManager,
OllamaRoutingResult,
get_ollama_failover_manager,
reset_ollama_failover_manager,
)
# =============================================================================
# Fixtures
# =============================================================================
URL_GCP_A = "http://34.143.170.20:11434" # GCP-A Primary (SSD)
URL_GCP_B = "http://34.21.145.224:11434" # GCP-B Secondary (SSD)
URL_LOCAL = "http://192.168.0.111:11434" # Local HDD Fallback後備
# 向下相容別名(舊測試引用 URL_111 時仍可用)
URL_111 = URL_GCP_A
@pytest.fixture(autouse=True)
def reset_singleton():
yield
reset_ollama_failover_manager()
def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport:
return HealthReport(status=status, host=url, latency_ms=500.0)
def _make_manager(
url_primary: str = URL_GCP_A,
url_secondary: str = URL_GCP_B,
url_fallback: str = URL_LOCAL,
) -> OllamaFailoverManager:
"""建立 managersettings mock 為 GCP 三層容災 URLADR-110"""
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = url_primary
mock_settings.OLLAMA_SECONDARY_URL = url_secondary
mock_settings.OLLAMA_FALLBACK_URL = url_fallback
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_monitor = MagicMock()
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
return manager
# =============================================================================
# _decide_route 決策矩陣
# =============================================================================
def _offline_health(url: str = URL_GCP_A) -> HealthReport:
"""建立 OFFLINE 的 HealthReport"""
return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0)
class TestDecideRoute:
"""_decide_route 路由邏輯純函數測試ADR-110 三層容災GCP-A → GCP-B → Local → Gemini"""
def _setup(self) -> OllamaFailoverManager:
return _make_manager()
# ------------------------------------------------------------------
# GCP-A HEALTHY → primary=GCP-A
# ------------------------------------------------------------------
def test_gcp_a_healthy_primary_is_ollama_gcp_a(self):
"""ADR-110GCP-A HEALTHY → primary=ollama_gcp_aSSD 主力)"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.HEALTHY, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
assert result.primary.provider_name in ("ollama_gcp_a", "ollama")
assert result.primary.url == URL_GCP_A
def test_gcp_a_healthy_fallback_includes_gemini(self):
"""GCP-A HEALTHY 時 fallback 包含 Gemini"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.HEALTHY, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "gemini" in provider_names
# ------------------------------------------------------------------
# GCP-A SLOW → primary=GCP-A高負載時仍優先保住本地/代理 Ollama
# ------------------------------------------------------------------
def test_gcp_a_slow_primary_is_ollama_gcp_a(self):
"""GCP-A SLOW → primary=GCP-A避免過早切 Gemini"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.SLOW, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
assert result.primary.provider_name == "ollama_gcp_a"
# ------------------------------------------------------------------
# GCP-A DEGRADED → primary=Gemini
# ------------------------------------------------------------------
def test_gcp_a_degraded_primary_is_gemini(self):
"""GCP-A DEGRADED → primary=Gemini"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.DEGRADED, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
assert result.primary.provider_name == "gemini"
def test_gcp_a_degraded_fallback_includes_nemotron_claude(self):
"""GCP-A DEGRADED fallback 應包含 Nemotron 和 Claude"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.DEGRADED, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "claude" in provider_names
# ------------------------------------------------------------------
# GCP-A OFFLINE → primary=Gemini
# ------------------------------------------------------------------
def test_gcp_a_offline_primary_is_gemini(self):
"""GCP-A OFFLINE → primary=Gemini"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
assert result.primary.provider_name == "gemini"
def test_gcp_a_offline_fallback_includes_nemotron_claude(self):
"""GCP-A OFFLINE 時fallback 包含 Nemotron, Claude"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "claude" in provider_names
# ------------------------------------------------------------------
# routing_reason 記錄
# ------------------------------------------------------------------
def test_routing_reason_contains_status(self):
"""routing_reason 應包含 GCP-A 的狀態資訊"""
manager = self._setup()
h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A)
h_gcp_b = _offline_health(URL_GCP_B)
h_local = _offline_health(URL_LOCAL)
result = manager._decide_route(
health_gcp_a=h_gcp_a,
health_gcp_b=h_gcp_b,
health_local=h_local,
url_gcp_a=URL_GCP_A,
url_gcp_b=URL_GCP_B,
url_local=URL_LOCAL,
)
reason_lower = result.routing_reason.lower()
assert (
"offline" in reason_lower
or "gcp" in reason_lower
or "gemini" in reason_lower
)
# =============================================================================
# select_provider():只 check 111
# =============================================================================
class TestSelectProvider:
"""select_provider() 三層容災健康檢查ADR-110並行 check GCP-A / GCP-B / Local"""
def _make_three_layer_mock(
self,
gcp_a_status: HealthStatus = HealthStatus.HEALTHY,
gcp_b_status: HealthStatus = HealthStatus.OFFLINE,
local_status: HealthStatus = HealthStatus.OFFLINE,
):
"""建立三層健康 mock按呼叫順序返回 GCP-A / GCP-B / Local 健康報告"""
side_effect_map = {
URL_GCP_A: _make_health(gcp_a_status, URL_GCP_A),
URL_GCP_B: _make_health(gcp_b_status, URL_GCP_B),
URL_LOCAL: _make_health(local_status, URL_LOCAL),
}
async def _check_side_effect(url):
return side_effect_map.get(url, HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0))
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(side_effect=_check_side_effect)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_GCP_A
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
return manager, mock_monitor
@pytest.mark.asyncio
async def test_select_provider_checks_all_three_hosts(self):
"""ADR-110select_provider 並行 check 三台 Ollama 主機"""
manager, mock_monitor = self._make_three_layer_mock(
gcp_a_status=HealthStatus.HEALTHY,
)
with patch.object(manager, "_write_failover_audit", return_value=None):
await manager.select_provider()
# 並行 check 三台主機GCP-A / GCP-B / Local
assert mock_monitor.check.call_count == 3
called_urls = {call.args[0] for call in mock_monitor.check.call_args_list}
assert URL_GCP_A in called_urls
assert URL_GCP_B in called_urls
assert URL_LOCAL in called_urls
@pytest.mark.asyncio
async def test_select_provider_gcp_a_healthy_primary_ollama(self):
"""GCP-A HEALTHY → primary=ollama_gcp_a或向下相容 ollama"""
manager, _ = self._make_three_layer_mock(gcp_a_status=HealthStatus.HEALTHY)
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
assert result.primary.provider_name in ("ollama_gcp_a", "ollama")
@pytest.mark.asyncio
async def test_select_provider_returns_routing_result(self):
"""select_provider 返回 OllamaRoutingResult 類型(三層全 OFFLINE → Gemini"""
manager, _ = self._make_three_layer_mock(
gcp_a_status=HealthStatus.OFFLINE,
gcp_b_status=HealthStatus.OFFLINE,
local_status=HealthStatus.OFFLINE,
)
# 必須 mock Redis pool_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \
patch(
"src.services.failover_alerter.get_failover_alerter",
return_value=MagicMock(
alert_failover=AsyncMock(),
alert_gemini_quota_exceeded=AsyncMock(),
),
):
result = await manager.select_provider()
assert isinstance(result, OllamaRoutingResult)
# 三層全 OFFLINE + Gemini quota OK → primary=Gemini
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_audit_not_written_when_gcp_a_healthy(self):
"""GCP-A 正常時不觸發 failover audit"""
manager, _ = self._make_three_layer_mock(gcp_a_status=HealthStatus.HEALTHY)
audit_called = [False]
async def _spy_audit(result):
audit_called[0] = result.primary.provider_name not in ("ollama_gcp_a", "ollama")
with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit):
await manager.select_provider()
# GCP-A HEALTHY不應有 failover 事件
assert audit_called[0] is False
# =============================================================================
# clear_cache() / notify_recovery()
# =============================================================================
class TestRecoveryAPI:
"""clear_cache() / notify_recovery() 方法"""
@pytest.mark.asyncio
async def test_clear_cache_calls_redis_delete(self):
"""clear_cache() 呼叫 redis.delete 清除 health monitor 快取"""
manager = _make_manager()
mock_redis = AsyncMock()
mock_redis.delete = AsyncMock()
with patch("src.services.ollama_failover_manager.OllamaFailoverManager.clear_cache") as mock_clear:
mock_clear.return_value = None
await manager.clear_cache()
mock_clear.assert_called_once()
@pytest.mark.asyncio
async def test_clear_cache_fails_gracefully(self):
"""Redis import 失敗時clear_cache() 內部 try/except 攔截,靜默不 crash"""
manager = _make_manager()
# 模擬 get_redis 拋 ImportErrorRedis 不可用)
# clear_cache 有 try/except Exception應靜默吸收
with patch(
"src.services.ollama_failover_manager.get_redis",
side_effect=ImportError("no redis"),
create=True,
):
# 不應 raise
await manager.clear_cache()
def test_notify_recovery_does_not_raise(self):
"""notify_recovery() 只寫 structlog不應 raise"""
manager = _make_manager()
# 不應 raise舊呼叫方式仍支援
manager.notify_recovery("ollama_111")
manager.notify_recovery("ollama_gcp_a")
# =============================================================================
# OllamaRoutingResult
# =============================================================================
class TestOllamaRoutingResult:
"""OllamaRoutingResult 輔助方法"""
def test_all_endpoints_in_order(self):
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="m1")
fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash")
fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[fb1, fb2],
routing_reason="test",
health_gcp_a=_make_health(HealthStatus.HEALTHY),
)
ordered = result.all_endpoints_in_order()
assert ordered[0].provider_name == "ollama_gcp_a"
assert ordered[1].provider_name == "gemini"
assert ordered[2].provider_name == "nemotron"
def test_to_dict_structure(self):
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[],
routing_reason="GCP-A HEALTHY",
health_gcp_a=_make_health(HealthStatus.HEALTHY),
)
d = result.to_dict()
assert d["primary"]["provider"] == "ollama_gcp_a"
assert d["routing_reason"] == "GCP-A HEALTHY"
assert isinstance(d["fallback_chain"], list)
def test_health_111_backward_compat_property(self):
"""health_111 是 backward-compat property指向 health_gcp_a"""
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen")
h = _make_health(HealthStatus.HEALTHY)
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[],
routing_reason="test",
health_gcp_a=h,
)
# health_111 property 應指向 health_gcp_a
assert result.health_111 is result.health_gcp_a
def test_health_gcp_b_and_local_optional(self):
"""health_gcp_b 和 health_local 為 optional None未傳時"""
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[],
routing_reason="test",
health_gcp_a=_make_health(HealthStatus.HEALTHY),
# health_gcp_b / health_local 不傳,應為 None
)
assert result.health_gcp_b is None
assert result.health_local is None
# =============================================================================
# ADR-110 三層容災場景2026-05-03 ogt 新增)
# GCP-A → GCP-B → Local → Gemini 四段容災路由
# =============================================================================
class TestThreeLayerFailover:
"""ADR-110 三層容災場景GCP-A → GCP-B → Local → Gemini"""
def _make_manager_with_health(
self,
gcp_a: HealthStatus,
gcp_b: HealthStatus,
local: HealthStatus,
) -> OllamaFailoverManager:
"""建立三層健康 mock manager按 URL 路由 health status"""
health_map = {
URL_GCP_A: HealthReport(status=gcp_a, host=URL_GCP_A, latency_ms=500.0),
URL_GCP_B: HealthReport(status=gcp_b, host=URL_GCP_B, latency_ms=500.0),
URL_LOCAL: HealthReport(status=local, host=URL_LOCAL, latency_ms=500.0),
}
async def _check(url):
return health_map.get(url, HealthReport(status=HealthStatus.OFFLINE, host=url))
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(side_effect=_check)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_GCP_A
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
return manager
@pytest.mark.asyncio
async def test_gcp_a_healthy_uses_gcp_a(self):
"""場景1GCP-A HEALTHY → primary=GCP-ASSD 主力)"""
manager = self._make_manager_with_health(
gcp_a=HealthStatus.HEALTHY,
gcp_b=HealthStatus.OFFLINE,
local=HealthStatus.OFFLINE,
)
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
assert result.primary.url == URL_GCP_A or result.primary.provider_name in ("ollama_gcp_a", "ollama")
@pytest.mark.asyncio
async def test_gcp_a_offline_gcp_b_healthy_uses_gcp_b(self):
"""場景2GCP-A OFFLINE + GCP-B HEALTHY → primary=GCP-B"""
manager = self._make_manager_with_health(
gcp_a=HealthStatus.OFFLINE,
gcp_b=HealthStatus.HEALTHY,
local=HealthStatus.OFFLINE,
)
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# GCP-A 掛了,應切到 GCP-B
assert result.primary.url == URL_GCP_B or result.primary.provider_name in ("ollama_gcp_b", "ollama_gcp_a")
@pytest.mark.asyncio
async def test_gcp_a_gcp_b_offline_local_healthy_uses_local(self):
"""場景3GCP-A OFFLINE + GCP-B OFFLINE + Local HEALTHY → primary=Local(111)"""
manager = self._make_manager_with_health(
gcp_a=HealthStatus.OFFLINE,
gcp_b=HealthStatus.OFFLINE,
local=HealthStatus.HEALTHY,
)
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# GCP-A/B 皆掛,切到 Local
assert result.primary.url == URL_LOCAL or result.primary.provider_name in ("ollama_local", "ollama")
@pytest.mark.asyncio
async def test_all_offline_uses_gemini(self):
"""場景4三層全 OFFLINE → primary=Gemini最終雲端備援"""
manager = self._make_manager_with_health(
gcp_a=HealthStatus.OFFLINE,
gcp_b=HealthStatus.OFFLINE,
local=HealthStatus.OFFLINE,
)
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_all_offline_gemini_quota_exceeded_uses_nemotron(self):
"""場景5三層全 OFFLINE + Gemini quota 耗盡 → primary=Nemotron"""
manager = self._make_manager_with_health(
gcp_a=HealthStatus.OFFLINE,
gcp_b=HealthStatus.OFFLINE,
local=HealthStatus.OFFLINE,
)
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
assert result.primary.provider_name == "nemotron"
# =============================================================================
# Singleton
# =============================================================================
def test_singleton_returns_same_instance():
m1 = get_ollama_failover_manager()
m2 = get_ollama_failover_manager()
assert m1 is m2
def test_reset_singleton_gives_new_instance():
m1 = get_ollama_failover_manager()
reset_ollama_failover_manager()
m2 = get_ollama_failover_manager()
assert m1 is not m2
# =============================================================================
# B1: _write_failover_audit 改用 structlog不再寫 DB
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestWriteFailoverAudit:
"""B1 修復驗證_write_failover_audit 使用 structlog不依賴 AuditLog model"""
@pytest.mark.asyncio
async def test_audit_uses_structlog_not_db(self):
"""_write_failover_audit 應呼叫 structlog不呼叫 DB"""
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
result = OllamaRoutingResult(
primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"),
fallback_chain=[],
routing_reason="GCP-A OFFLINE → 切 Gemini",
health_gcp_a=_make_health(HealthStatus.OFFLINE),
)
# 只要不 raise 就是成功DB path 已移除structlog path 無 DB 依賴)
await manager._write_failover_audit(result)
@pytest.mark.asyncio
async def test_audit_skipped_when_gcp_a_healthy(self):
"""GCP-A HEALTHY 時 early return不記錄 failover"""
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
result = OllamaRoutingResult(
primary=OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen"),
fallback_chain=[],
routing_reason="GCP-A HEALTHY → 主 GCP-A",
health_gcp_a=_make_health(HealthStatus.HEALTHY),
)
# primary=ollama_gcp_a → early return不執行任何 DB/log
await manager._write_failover_audit(result) # 不應 raise
# =============================================================================
# B2: AIProviderEnum.OLLAMA_LOCAL 存在
# 2026-05-06 Codex — 188 不再作為 Ollama Provider
# =============================================================================
class TestAIProviderEnumOllamaLocal:
"""B2 修復驗證AIProviderEnum.OLLAMA_LOCAL 存在且 PROVIDER_LATENCY_BUDGET 有對應值"""
def test_ollama_local_enum_exists(self):
from src.services.ai_router import AIProviderEnum
assert AIProviderEnum.OLLAMA_LOCAL.value == "ollama_local"
def test_ollama_local_in_latency_budget(self):
from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET
assert AIProviderEnum.OLLAMA_LOCAL in PROVIDER_LATENCY_BUDGET
assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_LOCAL] == 90000
# =============================================================================
# H4: asyncio.gather return_exceptions=True
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestGatherReturnExceptions:
"""H4 修復驗證:三層主機 check 拋例外時不炸整個 select_provider"""
@pytest.mark.asyncio
async def test_gather_exception_in_all_hosts_treated_as_offline(self):
"""三台主機 check 全部拋例外 → 視為 OFFLINEselect_provider 正常返回 Gemini"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=RuntimeError("network error")
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_GCP_A
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# 三層全部 exception → OFFLINE → primary=gemini
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_gcp_a_healthy_select_provider_primary_ollama(self):
"""GCP-A HEALTHY → primary=ollama_gcp_aselect_provider 正常返回"""
async def _check_side_effect(url):
if url == URL_GCP_A:
return _make_health(HealthStatus.HEALTHY, URL_GCP_A)
return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0)
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(side_effect=_check_side_effect)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_GCP_A
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# GCP-A HEALTHY → primary=ollama_gcp_a或 backward-compat ollama
assert result.primary.provider_name in ("ollama_gcp_a", "ollama")
# =============================================================================
# H7: Gemini 帳單熔斷
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
# =============================================================================
class TestGeminiQuota:
"""H7 修復驗證Gemini 每日配額熔斷"""
@pytest.mark.asyncio
async def test_gemini_quota_under_limit(self):
"""count=500 < quota=1000 → 返回 True允許走 Gemini
2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mockatomic 修復後)
原 GET/INCR/EXPIRE 三步已改為 pipeline.set(NX)+incrmock 跟著更新。
"""
manager = _make_manager()
manager._settings.GEMINI_DAILY_QUOTA = 1000
# pipeline mockSET NX 返回 True首次INCR 返回 501500+1未達 quota=1000
mock_pipe = MagicMock()
mock_pipe.set = MagicMock(return_value=mock_pipe)
mock_pipe.incr = MagicMock(return_value=mock_pipe)
mock_pipe.execute = AsyncMock(return_value=[True, 501])
mock_redis = MagicMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
ok = await manager._check_gemini_quota()
assert ok is True
mock_pipe.execute.assert_awaited_once()
@pytest.mark.asyncio
async def test_gemini_quota_exactly_at_limit(self):
"""count=1001 > quota=1000 → 返回 False熔斷不再呼叫 Gemini
2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mockatomic 修復後)
pipeline.incr 返回 1001> quota=1000應返回 False。
"""
manager = _make_manager()
manager._settings.GEMINI_DAILY_QUOTA = 1000
mock_pipe = MagicMock()
mock_pipe.set = MagicMock(return_value=mock_pipe)
mock_pipe.incr = MagicMock(return_value=mock_pipe)
mock_pipe.execute = AsyncMock(return_value=[True, 1001]) # 超過 quota
mock_redis = MagicMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
ok = await manager._check_gemini_quota()
assert ok is False
@pytest.mark.asyncio
async def test_gemini_quota_redis_unavailable_fail_closed(self):
"""Redis 掛掉 → 返回 False2026-04-27 Wave8-X2 fail-closed違反費用鐵律的修復"""
manager = _make_manager()
with patch(
"src.core.redis_client.get_redis",
side_effect=RuntimeError("Redis unavailable"),
), patch(
"src.services.failover_alerter.get_failover_alerter",
return_value=MagicMock(alert_gemini_quota_exceeded=AsyncMock()),
):
ok = await manager._check_gemini_quota()
# fail-closedRedis 異常時拒絕 Gemini避免費用失控违反 feedback_cost_change_approval.md
assert ok is False
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_uses_nemotron(self):
"""select_providerGemini quota 超過 → primary 改為 Nemotron三層全 OFFLINE 情境)"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
return_value=_make_health(HealthStatus.OFFLINE, URL_GCP_A)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_GCP_A
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
# quota 超過 → 不走 Gemini改走 Nemotron
assert result.primary.provider_name == "nemotron"
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_all_offline_uses_nemotron(self):
"""select_providerGemini quota 超過 + 三層全 OFFLINE → primary=Nemotron"""
async def _all_offline(url):
return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0)
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(side_effect=_all_offline)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_GCP_A
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
assert result.primary.provider_name == "nemotron"