888 lines
35 KiB
Python
888 lines
35 KiB
Python
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei
|
||
# Created 2026-04-25 P1.1c by Claude Engineer-C
|
||
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
|
||
# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111,188 完全移出
|
||
# 2026-05-03 ogt: ADR-110 GCP 三層容災架構,URL 常數更新為 GCP-A/B/Local,新增三層容災場景
|
||
# 2026-05-19 Codex: GCP-A healthy fast path 不等待 Local 111 health timeout
|
||
"""
|
||
OllamaFailoverManager 單元測試 - P1.1c v4.0
|
||
=============================================
|
||
測試覆蓋(新路由矩陣:ADR-110 GCP 三層容災,2026-05-03):
|
||
- GCP-A HEALTHY → primary=ollama_gcp_a
|
||
- GCP-A OFFLINE + GCP-B HEALTHY → primary=ollama_gcp_b
|
||
- GCP-A OFFLINE + GCP-B OFFLINE + Local HEALTHY → primary=ollama_local
|
||
- 全部 OFFLINE → primary=Gemini
|
||
- Gemini quota exceeded → primary=Nemotron,fallback=[Claude]
|
||
- select_provider 只 check GCP-A(primary URL)
|
||
- clear_cache() / notify_recovery() 方法
|
||
- OllamaRoutingResult.health_111 backward-compat property(實際欄位 health_gcp_a)
|
||
|
||
測試分類:unit(mock OllamaHealthMonitor,無 DB 依賴)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from src.services.ollama_failover_manager import (
|
||
OllamaFailoverManager,
|
||
OllamaRoutingResult,
|
||
get_ollama_failover_manager,
|
||
reset_ollama_failover_manager,
|
||
)
|
||
from src.services.ollama_health_monitor import HealthReport, HealthStatus
|
||
|
||
# =============================================================================
|
||
# Fixtures
|
||
# =============================================================================
|
||
|
||
URL_GCP_A = "http://34.143.170.20:11434" # GCP-A Primary (SSD)
|
||
URL_GCP_B = "http://34.21.145.224:11434" # GCP-B Secondary (SSD)
|
||
URL_LOCAL = "http://192.168.0.111:11434" # Local HDD Fallback(後備)
|
||
# 向下相容別名(舊測試引用 URL_111 時仍可用)
|
||
URL_111 = URL_GCP_A
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def reset_singleton():
|
||
yield
|
||
reset_ollama_failover_manager()
|
||
|
||
|
||
def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport:
|
||
return HealthReport(status=status, host=url, latency_ms=500.0)
|
||
|
||
|
||
def _make_manager(
|
||
url_primary: str = URL_GCP_A,
|
||
url_secondary: str = URL_GCP_B,
|
||
url_fallback: str = URL_LOCAL,
|
||
) -> OllamaFailoverManager:
|
||
"""建立 manager,settings mock 為 GCP 三層容災 URL(ADR-110)"""
|
||
mock_settings = MagicMock()
|
||
mock_settings.OLLAMA_URL = url_primary
|
||
mock_settings.OLLAMA_SECONDARY_URL = url_secondary
|
||
mock_settings.OLLAMA_FALLBACK_URL = url_fallback
|
||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||
|
||
mock_monitor = MagicMock()
|
||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||
manager._settings = mock_settings
|
||
return manager
|
||
|
||
|
||
# =============================================================================
|
||
# _decide_route 決策矩陣
|
||
# =============================================================================
|
||
|
||
|
||
def _offline_health(url: str = URL_GCP_A) -> HealthReport:
|
||
"""建立 OFFLINE 的 HealthReport"""
|
||
return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0)
|
||
|
||
|
||
class TestDecideRoute:
|
||
"""_decide_route 路由邏輯純函數測試(ADR-110 三層容災:GCP-A → GCP-B → Local → Gemini)"""
|
||
|
||
def _setup(self) -> OllamaFailoverManager:
|
||
return _make_manager()
|
||
|
||
# ------------------------------------------------------------------
|
||
# GCP-A HEALTHY → primary=GCP-A
|
||
# ------------------------------------------------------------------
|
||
|
||
def test_gcp_a_healthy_primary_is_ollama_gcp_a(self):
|
||
"""ADR-110:GCP-A HEALTHY → primary=ollama_gcp_a(SSD 主力)"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.HEALTHY, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
assert result.primary.provider_name in ("ollama_gcp_a", "ollama")
|
||
assert result.primary.url == URL_GCP_A
|
||
|
||
def test_gcp_a_healthy_fallback_includes_gemini(self):
|
||
"""GCP-A HEALTHY 時 fallback 包含 Gemini"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.HEALTHY, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||
assert "gemini" in provider_names
|
||
|
||
# ------------------------------------------------------------------
|
||
# GCP-A SLOW → primary=GCP-A(高負載時仍優先保住本地/代理 Ollama)
|
||
# ------------------------------------------------------------------
|
||
|
||
def test_gcp_a_slow_primary_is_ollama_gcp_a(self):
|
||
"""GCP-A SLOW → primary=GCP-A,避免過早切 Gemini"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.SLOW, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
assert result.primary.provider_name == "ollama_gcp_a"
|
||
|
||
# ------------------------------------------------------------------
|
||
# GCP-A DEGRADED → primary=Gemini
|
||
# ------------------------------------------------------------------
|
||
|
||
def test_gcp_a_degraded_primary_is_gemini(self):
|
||
"""GCP-A DEGRADED → primary=Gemini"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.DEGRADED, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
assert result.primary.provider_name == "gemini"
|
||
|
||
def test_gcp_a_degraded_fallback_includes_nemotron_claude(self):
|
||
"""GCP-A DEGRADED fallback 應包含 Nemotron 和 Claude"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.DEGRADED, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||
assert "nemotron" in provider_names
|
||
assert "claude" in provider_names
|
||
|
||
# ------------------------------------------------------------------
|
||
# GCP-A OFFLINE → primary=Gemini
|
||
# ------------------------------------------------------------------
|
||
|
||
def test_gcp_a_offline_primary_is_gemini(self):
|
||
"""GCP-A OFFLINE → primary=Gemini"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
assert result.primary.provider_name == "gemini"
|
||
|
||
def test_gcp_a_offline_fallback_includes_nemotron_claude(self):
|
||
"""GCP-A OFFLINE 時,fallback 包含 Nemotron, Claude"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||
assert "nemotron" in provider_names
|
||
assert "claude" in provider_names
|
||
|
||
# ------------------------------------------------------------------
|
||
# routing_reason 記錄
|
||
# ------------------------------------------------------------------
|
||
|
||
def test_routing_reason_contains_status(self):
|
||
"""routing_reason 應包含 GCP-A 的狀態資訊"""
|
||
manager = self._setup()
|
||
h_gcp_a = _make_health(HealthStatus.OFFLINE, URL_GCP_A)
|
||
h_gcp_b = _offline_health(URL_GCP_B)
|
||
h_local = _offline_health(URL_LOCAL)
|
||
|
||
result = manager._decide_route(
|
||
health_gcp_a=h_gcp_a,
|
||
health_gcp_b=h_gcp_b,
|
||
health_local=h_local,
|
||
url_gcp_a=URL_GCP_A,
|
||
url_gcp_b=URL_GCP_B,
|
||
url_local=URL_LOCAL,
|
||
)
|
||
|
||
reason_lower = result.routing_reason.lower()
|
||
assert (
|
||
"offline" in reason_lower
|
||
or "gcp" in reason_lower
|
||
or "gemini" in reason_lower
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# select_provider():GCP-A healthy fast path
|
||
# =============================================================================
|
||
|
||
|
||
class TestSelectProvider:
|
||
"""select_provider() 三層容災健康檢查。"""
|
||
|
||
def _make_three_layer_mock(
|
||
self,
|
||
gcp_a_status: HealthStatus = HealthStatus.HEALTHY,
|
||
gcp_b_status: HealthStatus = HealthStatus.OFFLINE,
|
||
local_status: HealthStatus = HealthStatus.OFFLINE,
|
||
):
|
||
"""建立三層健康 mock:按呼叫順序返回 GCP-A / GCP-B / Local 健康報告"""
|
||
side_effect_map = {
|
||
URL_GCP_A: _make_health(gcp_a_status, URL_GCP_A),
|
||
URL_GCP_B: _make_health(gcp_b_status, URL_GCP_B),
|
||
URL_LOCAL: _make_health(local_status, URL_LOCAL),
|
||
}
|
||
|
||
async def _check_side_effect(url):
|
||
return side_effect_map.get(url, HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0))
|
||
|
||
mock_monitor = AsyncMock()
|
||
mock_monitor.check = AsyncMock(side_effect=_check_side_effect)
|
||
|
||
mock_settings = MagicMock()
|
||
mock_settings.OLLAMA_URL = URL_GCP_A
|
||
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
|
||
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
|
||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||
|
||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||
manager._settings = mock_settings
|
||
return manager, mock_monitor
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_select_provider_gcp_a_healthy_checks_primary_only(self):
|
||
"""GCP-A healthy 時不等待 GCP-B / Local 111,避免 routing 被 111 timeout 拖慢。"""
|
||
manager, mock_monitor = self._make_three_layer_mock(
|
||
gcp_a_status=HealthStatus.HEALTHY,
|
||
)
|
||
|
||
with patch.object(manager, "_write_failover_audit", return_value=None):
|
||
result = await manager.select_provider()
|
||
|
||
assert result.primary.provider_name == "ollama_gcp_a"
|
||
assert result.health_gcp_b is None
|
||
assert result.health_local is None
|
||
assert mock_monitor.check.call_count == 1
|
||
called_urls = {call.args[0] for call in mock_monitor.check.call_args_list}
|
||
assert URL_GCP_A in called_urls
|
||
assert URL_GCP_B not in called_urls
|
||
assert URL_LOCAL not in called_urls
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_select_provider_checks_fallback_hosts_when_gcp_a_not_healthy(self):
|
||
"""GCP-A 不健康時仍檢查 GCP-B / Local,保留三層容災。"""
|
||
manager, mock_monitor = self._make_three_layer_mock(
|
||
gcp_a_status=HealthStatus.OFFLINE,
|
||
gcp_b_status=HealthStatus.HEALTHY,
|
||
local_status=HealthStatus.OFFLINE,
|
||
)
|
||
|
||
with patch.object(manager, "_write_failover_audit", return_value=None):
|
||
result = await manager.select_provider()
|
||
|
||
assert result.primary.provider_name == "ollama_gcp_b"
|
||
assert mock_monitor.check.call_count == 3
|
||
called_urls = {call.args[0] for call in mock_monitor.check.call_args_list}
|
||
assert URL_GCP_A in called_urls
|
||
assert URL_GCP_B in called_urls
|
||
assert URL_LOCAL in called_urls
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_select_provider_gcp_a_healthy_primary_ollama(self):
|
||
"""GCP-A HEALTHY → primary=ollama_gcp_a(或向下相容 ollama)"""
|
||
manager, _ = self._make_three_layer_mock(gcp_a_status=HealthStatus.HEALTHY)
|
||
|
||
with patch.object(manager, "_write_failover_audit", return_value=None):
|
||
result = await manager.select_provider()
|
||
|
||
assert result.primary.provider_name in ("ollama_gcp_a", "ollama")
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_select_provider_returns_routing_result(self):
|
||
"""select_provider 返回 OllamaRoutingResult 類型(三層全 OFFLINE → Gemini)"""
|
||
manager, _ = self._make_three_layer_mock(
|
||
gcp_a_status=HealthStatus.OFFLINE,
|
||
gcp_b_status=HealthStatus.OFFLINE,
|
||
local_status=HealthStatus.OFFLINE,
|
||
)
|
||
|
||
# 必須 mock Redis pool(_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini)
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \
|
||
patch(
|
||
"src.services.failover_alerter.get_failover_alerter",
|
||
return_value=MagicMock(
|
||
alert_failover=AsyncMock(),
|
||
alert_gemini_quota_exceeded=AsyncMock(),
|
||
),
|
||
):
|
||
result = await manager.select_provider()
|
||
|
||
assert isinstance(result, OllamaRoutingResult)
|
||
# 三層全 OFFLINE + Gemini quota OK → primary=Gemini
|
||
assert result.primary.provider_name == "gemini"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_audit_not_written_when_gcp_a_healthy(self):
|
||
"""GCP-A 正常時不觸發 failover audit"""
|
||
manager, _ = self._make_three_layer_mock(gcp_a_status=HealthStatus.HEALTHY)
|
||
|
||
audit_called = [False]
|
||
|
||
async def _spy_audit(result):
|
||
audit_called[0] = result.primary.provider_name not in ("ollama_gcp_a", "ollama")
|
||
|
||
with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit):
|
||
await manager.select_provider()
|
||
|
||
# GCP-A HEALTHY,不應有 failover 事件
|
||
assert audit_called[0] is False
|
||
|
||
|
||
# =============================================================================
|
||
# clear_cache() / notify_recovery()
|
||
# =============================================================================
|
||
|
||
|
||
class TestRecoveryAPI:
|
||
"""clear_cache() / notify_recovery() 方法"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_clear_cache_calls_redis_delete(self):
|
||
"""clear_cache() 呼叫 redis.delete 清除 health monitor 快取"""
|
||
manager = _make_manager()
|
||
mock_redis = AsyncMock()
|
||
mock_redis.delete = AsyncMock()
|
||
|
||
with patch("src.services.ollama_failover_manager.OllamaFailoverManager.clear_cache") as mock_clear:
|
||
mock_clear.return_value = None
|
||
await manager.clear_cache()
|
||
mock_clear.assert_called_once()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_clear_cache_fails_gracefully(self):
|
||
"""Redis import 失敗時,clear_cache() 內部 try/except 攔截,靜默不 crash"""
|
||
manager = _make_manager()
|
||
|
||
# 模擬 get_redis 拋 ImportError(Redis 不可用)
|
||
# clear_cache 有 try/except Exception,應靜默吸收
|
||
with patch(
|
||
"src.services.ollama_failover_manager.get_redis",
|
||
side_effect=ImportError("no redis"),
|
||
create=True,
|
||
):
|
||
# 不應 raise
|
||
await manager.clear_cache()
|
||
|
||
def test_notify_recovery_does_not_raise(self):
|
||
"""notify_recovery() 只寫 structlog,不應 raise"""
|
||
manager = _make_manager()
|
||
# 不應 raise(舊呼叫方式仍支援)
|
||
manager.notify_recovery("ollama_111")
|
||
manager.notify_recovery("ollama_gcp_a")
|
||
|
||
|
||
# =============================================================================
|
||
# OllamaRoutingResult
|
||
# =============================================================================
|
||
|
||
|
||
class TestOllamaRoutingResult:
|
||
"""OllamaRoutingResult 輔助方法"""
|
||
|
||
def test_all_endpoints_in_order(self):
|
||
from src.services.ollama_failover_manager import OllamaEndpoint
|
||
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="m1")
|
||
fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash")
|
||
fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3")
|
||
|
||
result = OllamaRoutingResult(
|
||
primary=primary,
|
||
fallback_chain=[fb1, fb2],
|
||
routing_reason="test",
|
||
health_gcp_a=_make_health(HealthStatus.HEALTHY),
|
||
)
|
||
|
||
ordered = result.all_endpoints_in_order()
|
||
assert ordered[0].provider_name == "ollama_gcp_a"
|
||
assert ordered[1].provider_name == "gemini"
|
||
assert ordered[2].provider_name == "nemotron"
|
||
|
||
def test_to_dict_structure(self):
|
||
from src.services.ollama_failover_manager import OllamaEndpoint
|
||
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen")
|
||
result = OllamaRoutingResult(
|
||
primary=primary,
|
||
fallback_chain=[],
|
||
routing_reason="GCP-A HEALTHY",
|
||
health_gcp_a=_make_health(HealthStatus.HEALTHY),
|
||
)
|
||
d = result.to_dict()
|
||
assert d["primary"]["provider"] == "ollama_gcp_a"
|
||
assert d["routing_reason"] == "GCP-A HEALTHY"
|
||
assert isinstance(d["fallback_chain"], list)
|
||
|
||
def test_health_111_backward_compat_property(self):
|
||
"""health_111 是 backward-compat property,指向 health_gcp_a"""
|
||
from src.services.ollama_failover_manager import OllamaEndpoint
|
||
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen")
|
||
h = _make_health(HealthStatus.HEALTHY)
|
||
result = OllamaRoutingResult(
|
||
primary=primary,
|
||
fallback_chain=[],
|
||
routing_reason="test",
|
||
health_gcp_a=h,
|
||
)
|
||
# health_111 property 應指向 health_gcp_a
|
||
assert result.health_111 is result.health_gcp_a
|
||
|
||
def test_health_gcp_b_and_local_optional(self):
|
||
"""health_gcp_b 和 health_local 為 optional None(未傳時)"""
|
||
from src.services.ollama_failover_manager import OllamaEndpoint
|
||
primary = OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen")
|
||
result = OllamaRoutingResult(
|
||
primary=primary,
|
||
fallback_chain=[],
|
||
routing_reason="test",
|
||
health_gcp_a=_make_health(HealthStatus.HEALTHY),
|
||
# health_gcp_b / health_local 不傳,應為 None
|
||
)
|
||
assert result.health_gcp_b is None
|
||
assert result.health_local is None
|
||
|
||
|
||
# =============================================================================
|
||
# ADR-110 三層容災場景(2026-05-03 ogt 新增)
|
||
# GCP-A → GCP-B → Local → Gemini 四段容災路由
|
||
# =============================================================================
|
||
|
||
|
||
class TestThreeLayerFailover:
|
||
"""ADR-110 三層容災場景:GCP-A → GCP-B → Local → Gemini"""
|
||
|
||
def _make_manager_with_health(
|
||
self,
|
||
gcp_a: HealthStatus,
|
||
gcp_b: HealthStatus,
|
||
local: HealthStatus,
|
||
) -> OllamaFailoverManager:
|
||
"""建立三層健康 mock manager(按 URL 路由 health status)"""
|
||
health_map = {
|
||
URL_GCP_A: HealthReport(status=gcp_a, host=URL_GCP_A, latency_ms=500.0),
|
||
URL_GCP_B: HealthReport(status=gcp_b, host=URL_GCP_B, latency_ms=500.0),
|
||
URL_LOCAL: HealthReport(status=local, host=URL_LOCAL, latency_ms=500.0),
|
||
}
|
||
|
||
async def _check(url):
|
||
return health_map.get(url, HealthReport(status=HealthStatus.OFFLINE, host=url))
|
||
|
||
mock_monitor = AsyncMock()
|
||
mock_monitor.check = AsyncMock(side_effect=_check)
|
||
|
||
mock_settings = MagicMock()
|
||
mock_settings.OLLAMA_URL = URL_GCP_A
|
||
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
|
||
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
|
||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||
|
||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||
manager._settings = mock_settings
|
||
return manager
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gcp_a_healthy_uses_gcp_a(self):
|
||
"""場景1:GCP-A HEALTHY → primary=GCP-A(SSD 主力)"""
|
||
manager = self._make_manager_with_health(
|
||
gcp_a=HealthStatus.HEALTHY,
|
||
gcp_b=HealthStatus.OFFLINE,
|
||
local=HealthStatus.OFFLINE,
|
||
)
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=True):
|
||
result = await manager.select_provider()
|
||
|
||
assert result.primary.url == URL_GCP_A or result.primary.provider_name in ("ollama_gcp_a", "ollama")
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gcp_a_offline_gcp_b_healthy_uses_gcp_b(self):
|
||
"""場景2:GCP-A OFFLINE + GCP-B HEALTHY → primary=GCP-B"""
|
||
manager = self._make_manager_with_health(
|
||
gcp_a=HealthStatus.OFFLINE,
|
||
gcp_b=HealthStatus.HEALTHY,
|
||
local=HealthStatus.OFFLINE,
|
||
)
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=True):
|
||
result = await manager.select_provider()
|
||
|
||
# GCP-A 掛了,應切到 GCP-B
|
||
assert result.primary.url == URL_GCP_B or result.primary.provider_name in ("ollama_gcp_b", "ollama_gcp_a")
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gcp_a_gcp_b_offline_local_healthy_uses_local(self):
|
||
"""場景3:GCP-A OFFLINE + GCP-B OFFLINE + Local HEALTHY → primary=Local(111)"""
|
||
manager = self._make_manager_with_health(
|
||
gcp_a=HealthStatus.OFFLINE,
|
||
gcp_b=HealthStatus.OFFLINE,
|
||
local=HealthStatus.HEALTHY,
|
||
)
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=True):
|
||
result = await manager.select_provider()
|
||
|
||
# GCP-A/B 皆掛,切到 Local
|
||
assert result.primary.url == URL_LOCAL or result.primary.provider_name in ("ollama_local", "ollama")
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_all_offline_uses_gemini(self):
|
||
"""場景4:三層全 OFFLINE → primary=Gemini(最終雲端備援)"""
|
||
manager = self._make_manager_with_health(
|
||
gcp_a=HealthStatus.OFFLINE,
|
||
gcp_b=HealthStatus.OFFLINE,
|
||
local=HealthStatus.OFFLINE,
|
||
)
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=True):
|
||
result = await manager.select_provider()
|
||
|
||
assert result.primary.provider_name == "gemini"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_all_offline_gemini_quota_exceeded_uses_nemotron(self):
|
||
"""場景5:三層全 OFFLINE + Gemini quota 耗盡 → primary=Nemotron"""
|
||
manager = self._make_manager_with_health(
|
||
gcp_a=HealthStatus.OFFLINE,
|
||
gcp_b=HealthStatus.OFFLINE,
|
||
local=HealthStatus.OFFLINE,
|
||
)
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=False):
|
||
result = await manager.select_provider()
|
||
|
||
assert result.primary.provider_name == "nemotron"
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
|
||
def test_singleton_returns_same_instance():
|
||
m1 = get_ollama_failover_manager()
|
||
m2 = get_ollama_failover_manager()
|
||
assert m1 is m2
|
||
|
||
|
||
def test_reset_singleton_gives_new_instance():
|
||
m1 = get_ollama_failover_manager()
|
||
reset_ollama_failover_manager()
|
||
m2 = get_ollama_failover_manager()
|
||
assert m1 is not m2
|
||
|
||
|
||
# =============================================================================
|
||
# B1: _write_failover_audit 改用 structlog(不再寫 DB)
|
||
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
|
||
# =============================================================================
|
||
|
||
|
||
class TestWriteFailoverAudit:
|
||
"""B1 修復驗證:_write_failover_audit 使用 structlog,不依賴 AuditLog model"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_audit_uses_structlog_not_db(self):
|
||
"""_write_failover_audit 應呼叫 structlog,不呼叫 DB"""
|
||
manager = _make_manager()
|
||
from src.services.ollama_failover_manager import (
|
||
OllamaEndpoint,
|
||
OllamaRoutingResult,
|
||
)
|
||
|
||
result = OllamaRoutingResult(
|
||
primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"),
|
||
fallback_chain=[],
|
||
routing_reason="GCP-A OFFLINE → 切 Gemini",
|
||
health_gcp_a=_make_health(HealthStatus.OFFLINE),
|
||
)
|
||
|
||
# 只要不 raise 就是成功(DB path 已移除,structlog path 無 DB 依賴)
|
||
await manager._write_failover_audit(result)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_audit_skipped_when_gcp_a_healthy(self):
|
||
"""GCP-A HEALTHY 時 early return,不記錄 failover"""
|
||
manager = _make_manager()
|
||
from src.services.ollama_failover_manager import (
|
||
OllamaEndpoint,
|
||
OllamaRoutingResult,
|
||
)
|
||
|
||
result = OllamaRoutingResult(
|
||
primary=OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen"),
|
||
fallback_chain=[],
|
||
routing_reason="GCP-A HEALTHY → 主 GCP-A",
|
||
health_gcp_a=_make_health(HealthStatus.HEALTHY),
|
||
)
|
||
|
||
# primary=ollama_gcp_a → early return,不執行任何 DB/log
|
||
await manager._write_failover_audit(result) # 不應 raise
|
||
|
||
|
||
# =============================================================================
|
||
# B2: AIProviderEnum.OLLAMA_LOCAL 存在
|
||
# 2026-05-06 Codex — 188 不再作為 Ollama Provider
|
||
# =============================================================================
|
||
|
||
|
||
class TestAIProviderEnumOllamaLocal:
|
||
"""B2 修復驗證:AIProviderEnum.OLLAMA_LOCAL 存在且 PROVIDER_LATENCY_BUDGET 有對應值"""
|
||
|
||
def test_ollama_local_enum_exists(self):
|
||
from src.services.ai_router import AIProviderEnum
|
||
assert AIProviderEnum.OLLAMA_LOCAL.value == "ollama_local"
|
||
|
||
def test_ollama_local_in_latency_budget(self):
|
||
from src.services.ai_router import PROVIDER_LATENCY_BUDGET, AIProviderEnum
|
||
assert AIProviderEnum.OLLAMA_LOCAL in PROVIDER_LATENCY_BUDGET
|
||
assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_LOCAL] == 90000
|
||
|
||
|
||
# =============================================================================
|
||
# H4: asyncio.gather return_exceptions=True
|
||
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
|
||
# =============================================================================
|
||
|
||
|
||
class TestGatherReturnExceptions:
|
||
"""H4 修復驗證:三層主機 check 拋例外時不炸整個 select_provider"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gather_exception_in_all_hosts_treated_as_offline(self):
|
||
"""三台主機 check 全部拋例外 → 視為 OFFLINE,select_provider 正常返回 Gemini"""
|
||
mock_monitor = AsyncMock()
|
||
mock_monitor.check = AsyncMock(
|
||
side_effect=RuntimeError("network error")
|
||
)
|
||
|
||
mock_settings = MagicMock()
|
||
mock_settings.OLLAMA_URL = URL_GCP_A
|
||
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
|
||
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
|
||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||
|
||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||
manager._settings = mock_settings
|
||
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=True):
|
||
result = await manager.select_provider()
|
||
|
||
# 三層全部 exception → OFFLINE → primary=gemini
|
||
assert result.primary.provider_name == "gemini"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gcp_a_healthy_select_provider_primary_ollama(self):
|
||
"""GCP-A HEALTHY → primary=ollama_gcp_a,select_provider 正常返回"""
|
||
async def _check_side_effect(url):
|
||
if url == URL_GCP_A:
|
||
return _make_health(HealthStatus.HEALTHY, URL_GCP_A)
|
||
return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0)
|
||
|
||
mock_monitor = AsyncMock()
|
||
mock_monitor.check = AsyncMock(side_effect=_check_side_effect)
|
||
|
||
mock_settings = MagicMock()
|
||
mock_settings.OLLAMA_URL = URL_GCP_A
|
||
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
|
||
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
|
||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||
|
||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||
manager._settings = mock_settings
|
||
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=True):
|
||
result = await manager.select_provider()
|
||
|
||
# GCP-A HEALTHY → primary=ollama_gcp_a(或 backward-compat ollama)
|
||
assert result.primary.provider_name in ("ollama_gcp_a", "ollama")
|
||
|
||
|
||
# =============================================================================
|
||
# H7: Gemini 帳單熔斷
|
||
# 2026-04-25 critic-fix Part2 by Claude Engineer-C2
|
||
# =============================================================================
|
||
|
||
|
||
class TestGeminiQuota:
|
||
"""H7 修復驗證:Gemini 每日配額熔斷"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gemini_quota_under_limit(self):
|
||
"""count=500 < quota=1000 → 返回 True(允許走 Gemini)
|
||
2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mock(atomic 修復後)
|
||
原 GET/INCR/EXPIRE 三步已改為 pipeline.set(NX)+incr,mock 跟著更新。
|
||
"""
|
||
manager = _make_manager()
|
||
manager._settings.GEMINI_DAILY_QUOTA = 1000
|
||
|
||
# pipeline mock:SET NX 返回 True(首次),INCR 返回 501(500+1,未達 quota=1000)
|
||
mock_pipe = MagicMock()
|
||
mock_pipe.set = MagicMock(return_value=mock_pipe)
|
||
mock_pipe.incr = MagicMock(return_value=mock_pipe)
|
||
mock_pipe.execute = AsyncMock(return_value=[True, 501])
|
||
|
||
mock_redis = MagicMock()
|
||
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
|
||
|
||
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
|
||
ok = await manager._check_gemini_quota()
|
||
|
||
assert ok is True
|
||
mock_pipe.execute.assert_awaited_once()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gemini_quota_exactly_at_limit(self):
|
||
"""count=1001 > quota=1000 → 返回 False(熔斷,不再呼叫 Gemini)
|
||
2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — 改用 pipeline mock(atomic 修復後)
|
||
pipeline.incr 返回 1001(> quota=1000),應返回 False。
|
||
"""
|
||
manager = _make_manager()
|
||
manager._settings.GEMINI_DAILY_QUOTA = 1000
|
||
|
||
mock_pipe = MagicMock()
|
||
mock_pipe.set = MagicMock(return_value=mock_pipe)
|
||
mock_pipe.incr = MagicMock(return_value=mock_pipe)
|
||
mock_pipe.execute = AsyncMock(return_value=[True, 1001]) # 超過 quota
|
||
|
||
mock_redis = MagicMock()
|
||
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
|
||
|
||
with patch("src.core.redis_client.get_redis", return_value=mock_redis):
|
||
ok = await manager._check_gemini_quota()
|
||
|
||
assert ok is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_gemini_quota_redis_unavailable_fail_closed(self):
|
||
"""Redis 掛掉 → 返回 False(2026-04-27 Wave8-X2 fail-closed,違反費用鐵律的修復)"""
|
||
manager = _make_manager()
|
||
|
||
with patch(
|
||
"src.core.redis_client.get_redis",
|
||
side_effect=RuntimeError("Redis unavailable"),
|
||
), patch(
|
||
"src.services.failover_alerter.get_failover_alerter",
|
||
return_value=MagicMock(alert_gemini_quota_exceeded=AsyncMock()),
|
||
):
|
||
ok = await manager._check_gemini_quota()
|
||
|
||
# fail-closed:Redis 異常時拒絕 Gemini,避免費用失控(违反 feedback_cost_change_approval.md)
|
||
assert ok is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_select_provider_quota_exceeded_uses_nemotron(self):
|
||
"""select_provider:Gemini quota 超過 → primary 改為 Nemotron(三層全 OFFLINE 情境)"""
|
||
mock_monitor = AsyncMock()
|
||
mock_monitor.check = AsyncMock(
|
||
return_value=_make_health(HealthStatus.OFFLINE, URL_GCP_A)
|
||
)
|
||
|
||
mock_settings = MagicMock()
|
||
mock_settings.OLLAMA_URL = URL_GCP_A
|
||
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
|
||
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
|
||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||
|
||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||
manager._settings = mock_settings
|
||
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=False):
|
||
result = await manager.select_provider()
|
||
|
||
# quota 超過 → 不走 Gemini,改走 Nemotron
|
||
assert result.primary.provider_name == "nemotron"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_select_provider_quota_exceeded_all_offline_uses_nemotron(self):
|
||
"""select_provider:Gemini quota 超過 + 三層全 OFFLINE → primary=Nemotron"""
|
||
async def _all_offline(url):
|
||
return HealthReport(status=HealthStatus.OFFLINE, host=url, latency_ms=0.0)
|
||
|
||
mock_monitor = AsyncMock()
|
||
mock_monitor.check = AsyncMock(side_effect=_all_offline)
|
||
|
||
mock_settings = MagicMock()
|
||
mock_settings.OLLAMA_URL = URL_GCP_A
|
||
mock_settings.OLLAMA_SECONDARY_URL = URL_GCP_B
|
||
mock_settings.OLLAMA_FALLBACK_URL = URL_LOCAL
|
||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||
|
||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||
manager._settings = mock_settings
|
||
|
||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||
patch.object(manager, "_check_gemini_quota", return_value=False):
|
||
result = await manager.select_provider()
|
||
|
||
assert result.primary.provider_name == "nemotron"
|