fix(failover): 188 完全移出 routing chain,備援只用 Gemini
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled

統帥鐵律 2026-04-26:
- 唯一 Ollama = 111(M1 Pro Metal 加速)
- 188 CPU-only (0.45 tok/s) 禁止即時回應,移出所有 fallback chain
- 111 HEALTHY → fallback=[Gemini]
- 111 非HEALTHY → primary=Gemini, fallback=[Nemotron, Claude]
- Gemini quota exceeded → Nemotron → Claude(不落 188)
- OllamaRoutingResult 移除 health_188 欄位
- select_provider 只 check 111(不再 asyncio.gather 兩節點)
- 測試全部對齊新規則(1451 passed)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Your Name
2026-04-27 15:47:41 +08:00
parent 1b6a4dc14c
commit b432becd4e
2 changed files with 118 additions and 281 deletions

View File

@@ -26,9 +26,8 @@ Ollama 自動容災管理 - P1.1b
from __future__ import annotations
import asyncio
import datetime
from dataclasses import dataclass, field
from dataclasses import dataclass
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
@@ -102,7 +101,6 @@ class OllamaRoutingResult:
],
"routing_reason": self.routing_reason,
"health_111": self.health_111.to_dict(),
"health_188": self.health_188.to_dict() if self.health_188 else None,
}
@@ -167,53 +165,22 @@ class OllamaFailoverManager:
async def select_provider(
self,
task_type: str = "",
context: dict | None = None,
task_type: str = "", # noqa: ARG002
context: dict | None = None, # noqa: ARG002
) -> OllamaRoutingResult:
"""
並行檢查 111 + 188,返回路由結果
檢查 111 健康狀態,返回路由結果
Args:
task_type: 任務類型(預留,目前未影響路由邏輯)
context: 額外上下文(預留)
Returns:
OllamaRoutingResult
2026-04-26 統帥鐵律:唯一 Ollama = 111188 禁止用於即時回應。
"""
url_111 = self._settings.OLLAMA_URL
url_188 = self._settings.OLLAMA_FALLBACK_URL or ""
# 並行檢查
# 2026-04-25 critic-fix Part2 H4 by Claude Engineer-C2
# return_exceptions=True 防止任一 check 例外導致整個 select_provider 炸
if url_188:
results = await asyncio.gather(
self._monitor.check(url_111),
self._monitor.check(url_188),
return_exceptions=True,
)
# 處理 exception — 任一失敗視為 OFFLINE
health_111_raw, health_188_raw = results
health_111: HealthReport = (
HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_111_raw}")
if isinstance(health_111_raw, Exception)
else health_111_raw
)
health_188: HealthReport | None = (
HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_188_raw}")
if isinstance(health_188_raw, Exception)
else health_188_raw
)
else:
try:
health_111 = await self._monitor.check(url_111)
health_188 = None
except Exception as e:
health_111 = HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {e}")
result = self._decide_route(
health_111=health_111,
health_188=health_188,
url_111=url_111,
url_188=url_188,
)
result = self._decide_route(health_111=health_111, url_111=url_111)
# Gemini 帳單熔斷quota gate
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
@@ -226,12 +193,7 @@ class OllamaFailoverManager:
quota=quota,
health_111=health_111.status.value,
)
result = self._build_quota_exceeded_route(
health_111=health_111,
health_188=health_188,
url_111=url_111,
url_188=url_188,
)
result = self._build_quota_exceeded_route(health_111=health_111)
# 2026-04-26 P1.5 整合點 3 by Claude Opus 4.7 — 配額耗盡 Telegram 告警
# alerter 內部 24h dedupQUOTA_DEDUP_TTL_SEC即使每次 quota exceeded
# 都呼叫,當日只會發送一次告警。失敗 fail-open不阻擋 routing
@@ -267,7 +229,6 @@ class OllamaFailoverManager:
reason=result.routing_reason,
fallback_count=len(result.fallback_chain),
health_111=health_111.status.value,
health_188=health_188.status.value if health_188 else "not_configured",
)
# 通知 recovery service 當前 primary跨重啟持久化
@@ -290,102 +251,44 @@ class OllamaFailoverManager:
def _decide_route(
self,
health_111: HealthReport,
health_188: HealthReport | None,
url_111: str,
url_188: str,
) -> OllamaRoutingResult:
"""
決策矩陣2026-04-25 統帥指令Gemini 優先188 最後備援
決策矩陣2026-04-26 統帥鐵律:唯一 Ollama=111備援只用 Gemini
111 HEALTHY → primary=111, fallback=[Gemini, 188, Nemotron]
111 SLOW → primary=Gemini, fallback=[111, 188]
111 DEGRADED → primary=Gemini, fallback=[188, Nemotron, Claude]
111 OFFLINE → primary=Gemini, fallback=[188, Nemotron, Claude]
111 OFFLINE + 188 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude]
111 HEALTHY → primary=111, fallback=[Gemini]
111 SLOW → primary=Gemini, fallback=[111, Nemotron, Claude]
111 DEGRADED → primary=Gemini, fallback=[Nemotron, Claude]
111 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude]
關鍵原則:
- 111 非 HEALTHY 時primary 必為 Gemini快速雲端不等 188 慢推理)
- 188 永遠在 fallback chain作為 Gemini 額度耗盡的最後備援
- degradation_reason 記錄切換原因 + 時間戳
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
188 完全移出CPU-only 0.45 tok/s禁止即時回應
"""
model_111 = self._settings.OLLAMA_HEALTH_CHECK_MODEL
model_188 = "qwen2.5:7b-instruct" # 188 CPU-only 備援推薦模型plan 方案 C
ep_111 = OllamaEndpoint(url=url_111, provider_name="ollama", model=model_111)
ep_188 = (
OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188)
if url_188
else None
)
# 188 可用性判斷(僅供 fallback 使用)
has_188 = ep_188 is not None and (
health_188 is not None and health_188.status != HealthStatus.OFFLINE
)
# 切換時間戳(台北時區 +8標準庫保證
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()
# ==========================================================
# 111 HEALTHY → 主 111Gemini 作為第一 fallback快速雲端
# ==========================================================
if health_111.status == HealthStatus.HEALTHY:
fallback: list[OllamaEndpoint] = [_GEMINI_ENDPOINT]
if has_188 and ep_188:
fallback.append(ep_188)
fallback.append(_NEMOTRON_ENDPOINT)
return OllamaRoutingResult(
primary=ep_111,
fallback_chain=fallback,
fallback_chain=[_GEMINI_ENDPOINT],
routing_reason="111 HEALTHY → 主 111",
health_111=health_111,
health_188=health_188,
)
# ==========================================================
# 111 SLOW → primary=Geminifallback=[111, 188]
# 111 實測 eval rate 0.09 token/s~111s 推理Gemini 更快
# ==========================================================
if health_111.status == HealthStatus.SLOW:
fallback_slow: list[OllamaEndpoint] = [ep_111]
if has_188 and ep_188:
fallback_slow.append(ep_188)
degradation_reason = (
f"111 SLOWeval ~0.09 token/s, ~111s→ 切 Gemini at {now_ts}"
)
return OllamaRoutingResult(
primary=_GEMINI_ENDPOINT,
fallback_chain=fallback_slow,
routing_reason=degradation_reason,
fallback_chain=[ep_111, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
routing_reason=f"111 SLOW → 切 Gemini at {now_ts}",
health_111=health_111,
health_188=health_188,
)
# ==========================================================
# 111 DEGRADED 或 OFFLINE → primary=Gemini188 在 fallback
# ==========================================================
status_label = health_111.status.value # "degraded" / "offline"
degradation_reason = f"111 {status_label} → 切 Gemini at {now_ts}"
if has_188 and ep_188:
return OllamaRoutingResult(
primary=_GEMINI_ENDPOINT,
fallback_chain=[ep_188, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
routing_reason=degradation_reason,
health_111=health_111,
health_188=health_188,
)
# 188 也不可用 → Gemini 主力,最後備援 Nemotron / Claude
degradation_reason = f"111 {status_label} + 188 不可用 → 切 Gemini at {now_ts}"
status_label = health_111.status.value
return OllamaRoutingResult(
primary=_GEMINI_ENDPOINT,
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
routing_reason=degradation_reason,
routing_reason=f"111 {status_label} → 切 Gemini at {now_ts}",
health_111=health_111,
health_188=health_188,
)
# -------------------------------------------------------------------------
@@ -464,40 +367,13 @@ class OllamaFailoverManager:
def _build_quota_exceeded_route(
self,
health_111: HealthReport,
health_188: HealthReport | None,
url_111: str, # noqa: ARG002 — 保留供 OllamaRoutingResult 結構完整性health_111 對應)
url_188: str,
) -> OllamaRoutingResult:
"""
Gemini 配額耗盡時的備援路由primary=OLLAMA_188fallback=[Nemotron, Claude]
若 188 也不可用,則 primary=Nemotron。
"""
model_188 = "qwen2.5:7b-instruct"
ep_188 = (
OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188)
if url_188
else None
)
has_188 = ep_188 is not None and (
health_188 is not None and health_188.status != HealthStatus.OFFLINE
)
if has_188 and ep_188:
return OllamaRoutingResult(
primary=ep_188,
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
routing_reason="Gemini quota exceeded → 188 CPU-only 備援",
health_111=health_111,
health_188=health_188,
)
# 188 也不可用
"""Gemini 配額耗盡 → Nemotron 備援。2026-04-26 統帥鐵律188 移出。"""
return OllamaRoutingResult(
primary=_NEMOTRON_ENDPOINT,
fallback_chain=[_CLAUDE_ENDPOINT],
routing_reason="Gemini quota exceeded + 188 不可用 → Nemotron 備援",
routing_reason="Gemini quota exceeded → Nemotron 備援",
health_111=health_111,
health_188=health_188,
)
# -------------------------------------------------------------------------

View File

@@ -1,18 +1,19 @@
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-25 @ Asia/Taipei
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei
# Created 2026-04-25 P1.1c by Claude Engineer-C
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111188 完全移出
"""
OllamaFailoverManager 單元測試 - P1.1c v2.0
OllamaFailoverManager 單元測試 - P1.1c v3.0
=============================================
測試覆蓋(新路由矩陣:Gemini 優先188 最後備援
- 111 HEALTHY → primary=ollama(111)fallback=[Gemini, 188, Nemotron]
- 111 SLOW → primary=Geminifallback 包含 111 + 188
- 111 DEGRADED → primary=Geminifallback 包含 188 + nemotron + claude
- 111 OFFLINE → primary=Geminifallback 包含 188 + nemotron + claude
- 111 + 188 都 OFFLINE → primary=Geminifallback 包含 nemotron + claude
- OLLAMA_FALLBACK_URL 未設定時的單節點行為
- 並行 gather 邏輯asyncio.gather mock
測試覆蓋(新路由矩陣:統帥鐵律 2026-04-26唯一 Ollama=111備援只用 Gemini
- 111 HEALTHY → primary=ollama(111)fallback=[Gemini]
- 111 SLOW → primary=Geminifallback=[111, Nemotron, Claude]
- 111 DEGRADED → primary=Geminifallback=[Nemotron, Claude]
- 111 OFFLINE → primary=Geminifallback=[Nemotron, Claude]
- Gemini quota exceeded → primary=Nemotronfallback=[Claude]
- select_provider 只 check 111不再並行 check 188
- clear_cache() / notify_recovery() 方法
- OllamaRoutingResult.health_188 保留為 optionalbackward-compat
測試分類unitmock OllamaHealthMonitor無 DB 依賴)
"""
@@ -50,11 +51,10 @@ def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport:
return HealthReport(status=status, host=url, latency_ms=500.0)
def _make_manager(url_111: str = URL_111, url_188: str = URL_188) -> OllamaFailoverManager:
"""建立 managersettings mock 為指定 URL"""
def _make_manager(url_111: str = URL_111) -> OllamaFailoverManager:
"""建立 managersettings mock 為指定 URL188 已移除)"""
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = url_111
mock_settings.OLLAMA_FALLBACK_URL = url_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_monitor = MagicMock()
@@ -69,10 +69,10 @@ def _make_manager(url_111: str = URL_111, url_188: str = URL_188) -> OllamaFailo
class TestDecideRoute:
"""_decide_route 路由邏輯純函數測試"""
"""_decide_route 路由邏輯純函數測試(新簽名:只需 health_111, url_111"""
def _setup(self, url_188: str = URL_188) -> OllamaFailoverManager:
return _make_manager(url_188=url_188)
def _setup(self) -> OllamaFailoverManager:
return _make_manager()
# ------------------------------------------------------------------
# 111 HEALTHY
@@ -81,41 +81,39 @@ class TestDecideRoute:
def test_111_healthy_primary_is_ollama(self):
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "ollama"
assert result.primary.url == URL_111
def test_111_healthy_fallback_includes_188(self):
def test_111_healthy_fallback_is_gemini_only(self):
"""統帥鐵律HEALTHY fallback 只有 Gemini188/Nemotron 移出"""
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama_188" in provider_names
assert provider_names == ["gemini"]
assert "ollama_188" not in provider_names
assert "nemotron" not in provider_names
def test_111_healthy_fallback_includes_nemotron_gemini(self):
def test_111_healthy_fallback_includes_gemini(self):
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "gemini" in provider_names
def test_111_healthy_fallback_order_gemini_first(self):
"""新矩陣Gemini 應排在 188/nemotron 之前(快速雲端優先)"""
"""統帥鐵律Gemini 是唯一 fallback排在 fallback_chain[0]"""
manager = self._setup()
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
assert result.fallback_chain[0].provider_name == "gemini"
@@ -127,30 +125,29 @@ class TestDecideRoute:
"""新矩陣111 SLOW → primary=Gemini111 eval ~0.09 token/s, ~111sGemini 更快)"""
manager = self._setup()
h111 = _make_health(HealthStatus.SLOW, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
def test_111_slow_fallback_includes_111_and_188(self):
"""SLOW 時 111 + 188 仍在 fallbackGemini 額度耗盡時的降級鏈"""
def test_111_slow_fallback_includes_111_and_nemotron(self):
"""SLOW 時 111 + Nemotron 在 fallback188 已移出"""
manager = self._setup()
h111 = _make_health(HealthStatus.SLOW, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama" in provider_names
assert "ollama_188" in provider_names
assert "nemotron" in provider_names
assert "ollama_188" not in provider_names
def test_111_slow_no_188_primary_is_gemini(self):
"""111 SLOW + 188 未設定 → primary=Gemini新矩陣不強撐 111"""
manager = _make_manager(url_188="") # 188 未設定
def test_111_slow_primary_is_gemini_no_188(self):
"""111 SLOW + 188 不存在 → primary=Gemini新矩陣188 完全移出"""
manager = _make_manager()
h111 = _make_health(HealthStatus.SLOW, URL_111)
result = manager._decide_route(h111, None, URL_111, "")
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
@@ -162,9 +159,8 @@ class TestDecideRoute:
"""新矩陣111 DEGRADED → primary=Gemini"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
@@ -172,72 +168,55 @@ class TestDecideRoute:
"""DEGRADED 時 111 不在 fallback太差了"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama" not in provider_names
def test_111_degraded_fallback_includes_188_nemotron_claude(self):
"""新矩陣DEGRADED fallback = [188, nemotron, claude]"""
def test_111_degraded_fallback_includes_nemotron_claude(self):
"""統帥鐵律DEGRADED fallback = [Nemotron, Claude]188 已移出)"""
manager = self._setup()
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "ollama_188" in provider_names
assert "nemotron" in provider_names
assert "claude" in provider_names
assert "ollama_188" not in provider_names
# ------------------------------------------------------------------
# 111 OFFLINE
# ------------------------------------------------------------------
def test_111_offline_primary_is_gemini(self):
"""新矩陣111 OFFLINE → primary=Gemini188 降為 fallback 備援)"""
"""新矩陣111 OFFLINE → primary=Gemini"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
# ------------------------------------------------------------------
# 雙節點都 OFFLINE
# ------------------------------------------------------------------
def test_both_offline_primary_is_gemini(self):
"""新矩陣111 + 188 都 OFFLINE → Gemini 接手(最快雲端)"""
def test_111_offline_fallback_includes_nemotron_claude(self):
"""111 OFFLINE 時fallback=[Nemotron, Claude](無可用 Ollama"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.OFFLINE, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
assert result.primary.provider_name == "gemini"
def test_both_offline_fallback_includes_nemotron_claude(self):
"""雙 OFFLINE 時fallback=[Nemotron, Claude](無可用 Ollama"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.OFFLINE, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
provider_names = [e.provider_name for e in result.fallback_chain]
assert "nemotron" in provider_names
assert "claude" in provider_names
assert "ollama_188" not in provider_names
def test_111_offline_no_188_primary_is_gemini(self):
"""新矩陣111 OFFLINE + 188 未設定 → Gemini不是 Nemotron"""
manager = _make_manager(url_188="")
def test_111_offline_primary_is_gemini_no_188(self):
"""新矩陣111 OFFLINE → Gemini188 不再列入考慮"""
manager = _make_manager()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
result = manager._decide_route(h111, None, URL_111, "")
result = manager._decide_route(h111, URL_111)
assert result.primary.provider_name == "gemini"
@@ -249,35 +228,30 @@ class TestDecideRoute:
"""routing_reason 應包含 111 的狀態資訊"""
manager = self._setup()
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
result = manager._decide_route(h111, h188, URL_111, URL_188)
result = manager._decide_route(h111, URL_111)
assert "offline" in result.routing_reason.lower() or "111" in result.routing_reason
# =============================================================================
# select_provider()並行 gather
# select_provider()只 check 111
# =============================================================================
class TestSelectProvider:
"""select_provider() 並行邏輯"""
"""select_provider() 只 check 111 邏輯統帥鐵律188 完全移出)"""
@pytest.mark.asyncio
async def test_select_provider_calls_gather(self):
"""有 url_188 時應並行 gather 兩個 check"""
async def test_select_provider_checks_111_only(self):
"""統帥鐵律select_provider check 111call_count == 1"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.HEALTHY, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
@@ -286,21 +260,19 @@ class TestSelectProvider:
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
# 兩個 host 都被 check
assert mock_monitor.check.call_count == 2
called_hosts = {call.args[0] for call in mock_monitor.check.call_args_list}
assert URL_111 in called_hosts
assert URL_188 in called_hosts
# 只 check 111不再並行 check 188
assert mock_monitor.check.call_count == 1
called_url = mock_monitor.check.call_args.args[0]
assert called_url == URL_111
@pytest.mark.asyncio
async def test_select_provider_single_node_no_188(self):
"""OLLAMA_FALLBACK_URL 空字串 → 只 check 111"""
async def test_select_provider_single_node_primary_ollama(self):
"""111 HEALTHY → primary=ollama"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.HEALTHY, URL_111))
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = ""
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
@@ -317,22 +289,17 @@ class TestSelectProvider:
"""select_provider 返回 OllamaRoutingResult 類型新矩陣111 OFFLINE → Gemini"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.OFFLINE, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
return_value=_make_health(HealthStatus.OFFLINE, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
# 2026-04-27 Wave8-X2: 必須 mock Redis pool_check_gemini_quota 走 fail-closed 路徑會切到 188 而非 Gemini
# 測試本身只要驗 OFFLINE → Gemini 路由邏輯,故繞過 quota check
# 必須 mock Redis pool_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini
with patch.object(manager, "_write_failover_audit", return_value=None), \
patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \
patch(
@@ -345,7 +312,7 @@ class TestSelectProvider:
result = await manager.select_provider()
assert isinstance(result, OllamaRoutingResult)
# 新矩陣111 OFFLINE + Gemini quota OK → primary=Gemini188 降為 fallback
# 新矩陣111 OFFLINE + Gemini quota OK → primary=Gemini
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
@@ -353,26 +320,20 @@ class TestSelectProvider:
"""111 正常時不觸發 failover audit"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.HEALTHY, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
manager = OllamaFailoverManager(health_monitor=mock_monitor)
manager._settings = mock_settings
audit_called = [False]
original_write = manager._write_failover_audit
async def _spy_audit(result):
# _write_failover_audit 在 111 HEALTHY 時 early return不寫 DB
# 追蹤呼叫是否有 side effectDB 寫入)
audit_called[0] = result.primary.provider_name != "ollama"
with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit):
@@ -435,7 +396,7 @@ class TestOllamaRoutingResult:
def test_all_endpoints_in_order(self):
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="m1")
fb1 = OllamaEndpoint(url=URL_188, provider_name="ollama_188", model="m2")
fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash")
fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3")
result = OllamaRoutingResult(
@@ -447,7 +408,7 @@ class TestOllamaRoutingResult:
ordered = result.all_endpoints_in_order()
assert ordered[0].provider_name == "ollama"
assert ordered[1].provider_name == "ollama_188"
assert ordered[1].provider_name == "gemini"
assert ordered[2].provider_name == "nemotron"
def test_to_dict_structure(self):
@@ -464,6 +425,19 @@ class TestOllamaRoutingResult:
assert d["routing_reason"] == "111 HEALTHY"
assert isinstance(d["fallback_chain"], list)
def test_health_188_optional_field_backward_compat(self):
"""health_188 保留為 optional Nonebackward-compat不傳也可以"""
from src.services.ollama_failover_manager import OllamaEndpoint
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen")
result = OllamaRoutingResult(
primary=primary,
fallback_chain=[],
routing_reason="test",
health_111=_make_health(HealthStatus.HEALTHY),
# health_188 不傳,應為 None
)
assert result.health_188 is None
# =============================================================================
# Singleton
@@ -552,22 +526,18 @@ class TestAIProviderEnumOllama188:
class TestGatherReturnExceptions:
"""H4 修復驗證:任一 check 拋例外時不炸整個 select_provider"""
"""H4 修復驗證:111 check 拋例外時不炸整個 select_provider"""
@pytest.mark.asyncio
async def test_gather_exception_in_111_treated_as_offline(self):
"""111 check 拋例外 → health_111=OFFLINEselect_provider 正常返回"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
RuntimeError("111 network error"),
_make_health(HealthStatus.HEALTHY, URL_188),
]
side_effect=RuntimeError("111 network error")
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
@@ -582,19 +552,15 @@ class TestGatherReturnExceptions:
assert result.primary.provider_name == "gemini"
@pytest.mark.asyncio
async def test_gather_exception_in_188_treated_as_offline(self):
"""188 check 拋例外 → health_188=OFFLINEselect_provider 正常返回"""
async def test_111_healthy_select_provider_primary_ollama(self):
"""111 HEALTHY → primary=ollamaselect_provider 正常返回(取代舊的 188 exception 測試)"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.HEALTHY, URL_111),
RuntimeError("188 network error"),
]
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
@@ -605,7 +571,7 @@ class TestGatherReturnExceptions:
patch.object(manager, "_check_gemini_quota", return_value=True):
result = await manager.select_provider()
# 111 HEALTHY → primary=ollama188 exception 不影響主路由)
# 111 HEALTHY → primary=ollama
assert result.primary.provider_name == "ollama"
@@ -682,19 +648,15 @@ class TestGeminiQuota:
assert ok is False
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_uses_188(self):
"""select_providerGemini quota 超過 → primary 改為 OLLAMA_188"""
async def test_select_provider_quota_exceeded_uses_nemotron(self):
"""select_providerGemini quota 超過 → primary 改為 Nemotron統帥鐵律188 移出)"""
mock_monitor = AsyncMock()
mock_monitor.check = AsyncMock(
side_effect=[
_make_health(HealthStatus.OFFLINE, URL_111),
_make_health(HealthStatus.HEALTHY, URL_188),
]
return_value=_make_health(HealthStatus.OFFLINE, URL_111)
)
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = URL_188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000
@@ -705,8 +667,8 @@ class TestGeminiQuota:
patch.object(manager, "_check_gemini_quota", return_value=False):
result = await manager.select_provider()
# quota 超過 → 不走 Gemini改走 188
assert result.primary.provider_name == "ollama_188"
# quota 超過 → 不走 Gemini改走 Nemotron188 已移出)
assert result.primary.provider_name == "nemotron"
@pytest.mark.asyncio
async def test_select_provider_quota_exceeded_no_188_uses_nemotron(self):
@@ -716,7 +678,6 @@ class TestGeminiQuota:
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = URL_111
mock_settings.OLLAMA_FALLBACK_URL = "" # 無 188
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
mock_settings.GEMINI_DAILY_QUOTA = 1000