fix(failover): 188 完全移出 routing chain,備援只用 Gemini
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
統帥鐵律 2026-04-26: - 唯一 Ollama = 111(M1 Pro Metal 加速) - 188 CPU-only (0.45 tok/s) 禁止即時回應,移出所有 fallback chain - 111 HEALTHY → fallback=[Gemini] - 111 非HEALTHY → primary=Gemini, fallback=[Nemotron, Claude] - Gemini quota exceeded → Nemotron → Claude(不落 188) - OllamaRoutingResult 移除 health_188 欄位 - select_provider 只 check 111(不再 asyncio.gather 兩節點) - 測試全部對齊新規則(1451 passed) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,9 +26,8 @@ Ollama 自動容災管理 - P1.1b
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區
|
||||
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
|
||||
@@ -102,7 +101,6 @@ class OllamaRoutingResult:
|
||||
],
|
||||
"routing_reason": self.routing_reason,
|
||||
"health_111": self.health_111.to_dict(),
|
||||
"health_188": self.health_188.to_dict() if self.health_188 else None,
|
||||
}
|
||||
|
||||
|
||||
@@ -167,53 +165,22 @@ class OllamaFailoverManager:
|
||||
|
||||
async def select_provider(
|
||||
self,
|
||||
task_type: str = "",
|
||||
context: dict | None = None,
|
||||
task_type: str = "", # noqa: ARG002
|
||||
context: dict | None = None, # noqa: ARG002
|
||||
) -> OllamaRoutingResult:
|
||||
"""
|
||||
並行檢查 111 + 188,返回路由結果
|
||||
檢查 111 健康狀態,返回路由結果。
|
||||
|
||||
Args:
|
||||
task_type: 任務類型(預留,目前未影響路由邏輯)
|
||||
context: 額外上下文(預留)
|
||||
|
||||
Returns:
|
||||
OllamaRoutingResult
|
||||
2026-04-26 統帥鐵律:唯一 Ollama = 111,188 禁止用於即時回應。
|
||||
"""
|
||||
url_111 = self._settings.OLLAMA_URL
|
||||
url_188 = self._settings.OLLAMA_FALLBACK_URL or ""
|
||||
|
||||
# 並行檢查
|
||||
# 2026-04-25 critic-fix Part2 H4 by Claude Engineer-C2
|
||||
# return_exceptions=True 防止任一 check 例外導致整個 select_provider 炸
|
||||
if url_188:
|
||||
results = await asyncio.gather(
|
||||
self._monitor.check(url_111),
|
||||
self._monitor.check(url_188),
|
||||
return_exceptions=True,
|
||||
)
|
||||
# 處理 exception — 任一失敗視為 OFFLINE
|
||||
health_111_raw, health_188_raw = results
|
||||
health_111: HealthReport = (
|
||||
HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_111_raw}")
|
||||
if isinstance(health_111_raw, Exception)
|
||||
else health_111_raw
|
||||
)
|
||||
health_188: HealthReport | None = (
|
||||
HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_188_raw}")
|
||||
if isinstance(health_188_raw, Exception)
|
||||
else health_188_raw
|
||||
)
|
||||
else:
|
||||
try:
|
||||
health_111 = await self._monitor.check(url_111)
|
||||
health_188 = None
|
||||
except Exception as e:
|
||||
health_111 = HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {e}")
|
||||
|
||||
result = self._decide_route(
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
url_111=url_111,
|
||||
url_188=url_188,
|
||||
)
|
||||
result = self._decide_route(health_111=health_111, url_111=url_111)
|
||||
|
||||
# Gemini 帳單熔斷(quota gate)
|
||||
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
|
||||
@@ -226,12 +193,7 @@ class OllamaFailoverManager:
|
||||
quota=quota,
|
||||
health_111=health_111.status.value,
|
||||
)
|
||||
result = self._build_quota_exceeded_route(
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
url_111=url_111,
|
||||
url_188=url_188,
|
||||
)
|
||||
result = self._build_quota_exceeded_route(health_111=health_111)
|
||||
# 2026-04-26 P1.5 整合點 3 by Claude Opus 4.7 — 配額耗盡 Telegram 告警
|
||||
# alerter 內部 24h dedup(QUOTA_DEDUP_TTL_SEC),即使每次 quota exceeded
|
||||
# 都呼叫,當日只會發送一次告警。失敗 fail-open(不阻擋 routing)。
|
||||
@@ -267,7 +229,6 @@ class OllamaFailoverManager:
|
||||
reason=result.routing_reason,
|
||||
fallback_count=len(result.fallback_chain),
|
||||
health_111=health_111.status.value,
|
||||
health_188=health_188.status.value if health_188 else "not_configured",
|
||||
)
|
||||
|
||||
# 通知 recovery service 當前 primary(跨重啟持久化)
|
||||
@@ -290,102 +251,44 @@ class OllamaFailoverManager:
|
||||
def _decide_route(
|
||||
self,
|
||||
health_111: HealthReport,
|
||||
health_188: HealthReport | None,
|
||||
url_111: str,
|
||||
url_188: str,
|
||||
) -> OllamaRoutingResult:
|
||||
"""
|
||||
決策矩陣(2026-04-25 統帥指令:Gemini 優先,188 最後備援):
|
||||
決策矩陣(2026-04-26 統帥鐵律:唯一 Ollama=111,備援只用 Gemini):
|
||||
|
||||
111 HEALTHY → primary=111, fallback=[Gemini, 188, Nemotron]
|
||||
111 SLOW → primary=Gemini, fallback=[111, 188]
|
||||
111 DEGRADED → primary=Gemini, fallback=[188, Nemotron, Claude]
|
||||
111 OFFLINE → primary=Gemini, fallback=[188, Nemotron, Claude]
|
||||
111 OFFLINE + 188 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude]
|
||||
111 HEALTHY → primary=111, fallback=[Gemini]
|
||||
111 SLOW → primary=Gemini, fallback=[111, Nemotron, Claude]
|
||||
111 DEGRADED → primary=Gemini, fallback=[Nemotron, Claude]
|
||||
111 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude]
|
||||
|
||||
關鍵原則:
|
||||
- 111 非 HEALTHY 時,primary 必為 Gemini(快速雲端,不等 188 慢推理)
|
||||
- 188 永遠在 fallback chain,作為 Gemini 額度耗盡的最後備援
|
||||
- degradation_reason 記錄切換原因 + 時間戳
|
||||
|
||||
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
|
||||
188 完全移出(CPU-only 0.45 tok/s,禁止即時回應)。
|
||||
"""
|
||||
model_111 = self._settings.OLLAMA_HEALTH_CHECK_MODEL
|
||||
model_188 = "qwen2.5:7b-instruct" # 188 CPU-only 備援推薦模型(plan 方案 C)
|
||||
|
||||
ep_111 = OllamaEndpoint(url=url_111, provider_name="ollama", model=model_111)
|
||||
ep_188 = (
|
||||
OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188)
|
||||
if url_188
|
||||
else None
|
||||
)
|
||||
|
||||
# 188 可用性判斷(僅供 fallback 使用)
|
||||
has_188 = ep_188 is not None and (
|
||||
health_188 is not None and health_188.status != HealthStatus.OFFLINE
|
||||
)
|
||||
|
||||
# 切換時間戳(台北時區 +8,標準庫保證)
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()
|
||||
|
||||
# ==========================================================
|
||||
# 111 HEALTHY → 主 111,Gemini 作為第一 fallback(快速雲端)
|
||||
# ==========================================================
|
||||
if health_111.status == HealthStatus.HEALTHY:
|
||||
fallback: list[OllamaEndpoint] = [_GEMINI_ENDPOINT]
|
||||
if has_188 and ep_188:
|
||||
fallback.append(ep_188)
|
||||
fallback.append(_NEMOTRON_ENDPOINT)
|
||||
return OllamaRoutingResult(
|
||||
primary=ep_111,
|
||||
fallback_chain=fallback,
|
||||
fallback_chain=[_GEMINI_ENDPOINT],
|
||||
routing_reason="111 HEALTHY → 主 111",
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
)
|
||||
|
||||
# ==========================================================
|
||||
# 111 SLOW → primary=Gemini,fallback=[111, 188]
|
||||
# 111 實測 eval rate 0.09 token/s,~111s 推理,Gemini 更快
|
||||
# ==========================================================
|
||||
if health_111.status == HealthStatus.SLOW:
|
||||
fallback_slow: list[OllamaEndpoint] = [ep_111]
|
||||
if has_188 and ep_188:
|
||||
fallback_slow.append(ep_188)
|
||||
degradation_reason = (
|
||||
f"111 SLOW(eval ~0.09 token/s, ~111s)→ 切 Gemini at {now_ts}"
|
||||
)
|
||||
return OllamaRoutingResult(
|
||||
primary=_GEMINI_ENDPOINT,
|
||||
fallback_chain=fallback_slow,
|
||||
routing_reason=degradation_reason,
|
||||
fallback_chain=[ep_111, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
|
||||
routing_reason=f"111 SLOW → 切 Gemini at {now_ts}",
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
)
|
||||
|
||||
# ==========================================================
|
||||
# 111 DEGRADED 或 OFFLINE → primary=Gemini,188 在 fallback
|
||||
# ==========================================================
|
||||
status_label = health_111.status.value # "degraded" / "offline"
|
||||
degradation_reason = f"111 {status_label} → 切 Gemini at {now_ts}"
|
||||
if has_188 and ep_188:
|
||||
return OllamaRoutingResult(
|
||||
primary=_GEMINI_ENDPOINT,
|
||||
fallback_chain=[ep_188, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
|
||||
routing_reason=degradation_reason,
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
)
|
||||
|
||||
# 188 也不可用 → Gemini 主力,最後備援 Nemotron / Claude
|
||||
degradation_reason = f"111 {status_label} + 188 不可用 → 切 Gemini at {now_ts}"
|
||||
status_label = health_111.status.value
|
||||
return OllamaRoutingResult(
|
||||
primary=_GEMINI_ENDPOINT,
|
||||
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
|
||||
routing_reason=degradation_reason,
|
||||
routing_reason=f"111 {status_label} → 切 Gemini at {now_ts}",
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@@ -464,40 +367,13 @@ class OllamaFailoverManager:
|
||||
def _build_quota_exceeded_route(
|
||||
self,
|
||||
health_111: HealthReport,
|
||||
health_188: HealthReport | None,
|
||||
url_111: str, # noqa: ARG002 — 保留供 OllamaRoutingResult 結構完整性(health_111 對應)
|
||||
url_188: str,
|
||||
) -> OllamaRoutingResult:
|
||||
"""
|
||||
Gemini 配額耗盡時的備援路由:primary=OLLAMA_188,fallback=[Nemotron, Claude]
|
||||
若 188 也不可用,則 primary=Nemotron。
|
||||
"""
|
||||
model_188 = "qwen2.5:7b-instruct"
|
||||
ep_188 = (
|
||||
OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188)
|
||||
if url_188
|
||||
else None
|
||||
)
|
||||
has_188 = ep_188 is not None and (
|
||||
health_188 is not None and health_188.status != HealthStatus.OFFLINE
|
||||
)
|
||||
|
||||
if has_188 and ep_188:
|
||||
return OllamaRoutingResult(
|
||||
primary=ep_188,
|
||||
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
|
||||
routing_reason="Gemini quota exceeded → 188 CPU-only 備援",
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
)
|
||||
|
||||
# 188 也不可用
|
||||
"""Gemini 配額耗盡 → Nemotron 備援。2026-04-26 統帥鐵律:188 移出。"""
|
||||
return OllamaRoutingResult(
|
||||
primary=_NEMOTRON_ENDPOINT,
|
||||
fallback_chain=[_CLAUDE_ENDPOINT],
|
||||
routing_reason="Gemini quota exceeded + 188 不可用 → Nemotron 備援",
|
||||
routing_reason="Gemini quota exceeded → Nemotron 備援",
|
||||
health_111=health_111,
|
||||
health_188=health_188,
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-25 @ Asia/Taipei
|
||||
# apps/api/tests/test_ollama_failover_manager.py | 2026-04-27 @ Asia/Taipei
|
||||
# Created 2026-04-25 P1.1c by Claude Engineer-C
|
||||
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
|
||||
# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111,188 完全移出
|
||||
"""
|
||||
OllamaFailoverManager 單元測試 - P1.1c v2.0
|
||||
OllamaFailoverManager 單元測試 - P1.1c v3.0
|
||||
=============================================
|
||||
測試覆蓋(新路由矩陣:Gemini 優先,188 最後備援):
|
||||
- 111 HEALTHY → primary=ollama(111),fallback=[Gemini, 188, Nemotron]
|
||||
- 111 SLOW → primary=Gemini,fallback 包含 111 + 188
|
||||
- 111 DEGRADED → primary=Gemini,fallback 包含 188 + nemotron + claude
|
||||
- 111 OFFLINE → primary=Gemini,fallback 包含 188 + nemotron + claude
|
||||
- 111 + 188 都 OFFLINE → primary=Gemini,fallback 包含 nemotron + claude
|
||||
- OLLAMA_FALLBACK_URL 未設定時的單節點行為
|
||||
- 並行 gather 邏輯(asyncio.gather mock)
|
||||
測試覆蓋(新路由矩陣:統帥鐵律 2026-04-26,唯一 Ollama=111,備援只用 Gemini):
|
||||
- 111 HEALTHY → primary=ollama(111),fallback=[Gemini]
|
||||
- 111 SLOW → primary=Gemini,fallback=[111, Nemotron, Claude]
|
||||
- 111 DEGRADED → primary=Gemini,fallback=[Nemotron, Claude]
|
||||
- 111 OFFLINE → primary=Gemini,fallback=[Nemotron, Claude]
|
||||
- Gemini quota exceeded → primary=Nemotron,fallback=[Claude]
|
||||
- select_provider 只 check 111(不再並行 check 188)
|
||||
- clear_cache() / notify_recovery() 方法
|
||||
- OllamaRoutingResult.health_188 保留為 optional(backward-compat)
|
||||
|
||||
測試分類:unit(mock OllamaHealthMonitor,無 DB 依賴)
|
||||
"""
|
||||
@@ -50,11 +51,10 @@ def _make_health(status: HealthStatus, url: str = URL_111) -> HealthReport:
|
||||
return HealthReport(status=status, host=url, latency_ms=500.0)
|
||||
|
||||
|
||||
def _make_manager(url_111: str = URL_111, url_188: str = URL_188) -> OllamaFailoverManager:
|
||||
"""建立 manager,settings mock 為指定 URL"""
|
||||
def _make_manager(url_111: str = URL_111) -> OllamaFailoverManager:
|
||||
"""建立 manager,settings mock 為指定 URL(188 已移除)"""
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = url_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = url_188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
|
||||
mock_monitor = MagicMock()
|
||||
@@ -69,10 +69,10 @@ def _make_manager(url_111: str = URL_111, url_188: str = URL_188) -> OllamaFailo
|
||||
|
||||
|
||||
class TestDecideRoute:
|
||||
"""_decide_route 路由邏輯純函數測試"""
|
||||
"""_decide_route 路由邏輯純函數測試(新簽名:只需 health_111, url_111)"""
|
||||
|
||||
def _setup(self, url_188: str = URL_188) -> OllamaFailoverManager:
|
||||
return _make_manager(url_188=url_188)
|
||||
def _setup(self) -> OllamaFailoverManager:
|
||||
return _make_manager()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 111 HEALTHY
|
||||
@@ -81,41 +81,39 @@ class TestDecideRoute:
|
||||
def test_111_healthy_primary_is_ollama(self):
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert result.primary.provider_name == "ollama"
|
||||
assert result.primary.url == URL_111
|
||||
|
||||
def test_111_healthy_fallback_includes_188(self):
|
||||
def test_111_healthy_fallback_is_gemini_only(self):
|
||||
"""統帥鐵律:HEALTHY fallback 只有 Gemini,188/Nemotron 移出"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||||
assert "ollama_188" in provider_names
|
||||
assert provider_names == ["gemini"]
|
||||
assert "ollama_188" not in provider_names
|
||||
assert "nemotron" not in provider_names
|
||||
|
||||
def test_111_healthy_fallback_includes_nemotron_gemini(self):
|
||||
def test_111_healthy_fallback_includes_gemini(self):
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||||
assert "nemotron" in provider_names
|
||||
assert "gemini" in provider_names
|
||||
|
||||
def test_111_healthy_fallback_order_gemini_first(self):
|
||||
"""新矩陣:Gemini 應排在 188/nemotron 之前(快速雲端優先)"""
|
||||
"""統帥鐵律:Gemini 是唯一 fallback,排在 fallback_chain[0]"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.HEALTHY, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert result.fallback_chain[0].provider_name == "gemini"
|
||||
|
||||
@@ -127,30 +125,29 @@ class TestDecideRoute:
|
||||
"""新矩陣:111 SLOW → primary=Gemini(111 eval ~0.09 token/s, ~111s,Gemini 更快)"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.SLOW, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
def test_111_slow_fallback_includes_111_and_188(self):
|
||||
"""SLOW 時 111 + 188 仍在 fallback(Gemini 額度耗盡時的降級鏈)"""
|
||||
def test_111_slow_fallback_includes_111_and_nemotron(self):
|
||||
"""SLOW 時 111 + Nemotron 在 fallback(188 已移出)"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.SLOW, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||||
assert "ollama" in provider_names
|
||||
assert "ollama_188" in provider_names
|
||||
assert "nemotron" in provider_names
|
||||
assert "ollama_188" not in provider_names
|
||||
|
||||
def test_111_slow_no_188_primary_is_gemini(self):
|
||||
"""111 SLOW + 188 未設定 → primary=Gemini(新矩陣,不強撐 111)"""
|
||||
manager = _make_manager(url_188="") # 188 未設定
|
||||
def test_111_slow_primary_is_gemini_no_188(self):
|
||||
"""111 SLOW + 188 不存在 → primary=Gemini(新矩陣,188 完全移出)"""
|
||||
manager = _make_manager()
|
||||
h111 = _make_health(HealthStatus.SLOW, URL_111)
|
||||
|
||||
result = manager._decide_route(h111, None, URL_111, "")
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
@@ -162,9 +159,8 @@ class TestDecideRoute:
|
||||
"""新矩陣:111 DEGRADED → primary=Gemini"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
@@ -172,72 +168,55 @@ class TestDecideRoute:
|
||||
"""DEGRADED 時 111 不在 fallback(太差了)"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||||
assert "ollama" not in provider_names
|
||||
|
||||
def test_111_degraded_fallback_includes_188_nemotron_claude(self):
|
||||
"""新矩陣:DEGRADED fallback = [188, nemotron, claude]"""
|
||||
def test_111_degraded_fallback_includes_nemotron_claude(self):
|
||||
"""統帥鐵律:DEGRADED fallback = [Nemotron, Claude](188 已移出)"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.DEGRADED, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||||
assert "ollama_188" in provider_names
|
||||
assert "nemotron" in provider_names
|
||||
assert "claude" in provider_names
|
||||
assert "ollama_188" not in provider_names
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 111 OFFLINE
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_111_offline_primary_is_gemini(self):
|
||||
"""新矩陣:111 OFFLINE → primary=Gemini(188 降為 fallback 備援)"""
|
||||
"""新矩陣:111 OFFLINE → primary=Gemini"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 雙節點都 OFFLINE
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_both_offline_primary_is_gemini(self):
|
||||
"""新矩陣:111 + 188 都 OFFLINE → Gemini 接手(最快雲端)"""
|
||||
def test_111_offline_fallback_includes_nemotron_claude(self):
|
||||
"""111 OFFLINE 時,fallback=[Nemotron, Claude](無可用 Ollama)"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
|
||||
h188 = _make_health(HealthStatus.OFFLINE, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
def test_both_offline_fallback_includes_nemotron_claude(self):
|
||||
"""雙 OFFLINE 時,fallback=[Nemotron, Claude](無可用 Ollama)"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
|
||||
h188 = _make_health(HealthStatus.OFFLINE, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
provider_names = [e.provider_name for e in result.fallback_chain]
|
||||
assert "nemotron" in provider_names
|
||||
assert "claude" in provider_names
|
||||
assert "ollama_188" not in provider_names
|
||||
|
||||
def test_111_offline_no_188_primary_is_gemini(self):
|
||||
"""新矩陣:111 OFFLINE + 188 未設定 → Gemini(不是 Nemotron)"""
|
||||
manager = _make_manager(url_188="")
|
||||
def test_111_offline_primary_is_gemini_no_188(self):
|
||||
"""新矩陣:111 OFFLINE → Gemini(188 不再列入考慮)"""
|
||||
manager = _make_manager()
|
||||
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
|
||||
|
||||
result = manager._decide_route(h111, None, URL_111, "")
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
@@ -249,35 +228,30 @@ class TestDecideRoute:
|
||||
"""routing_reason 應包含 111 的狀態資訊"""
|
||||
manager = self._setup()
|
||||
h111 = _make_health(HealthStatus.OFFLINE, URL_111)
|
||||
h188 = _make_health(HealthStatus.HEALTHY, URL_188)
|
||||
|
||||
result = manager._decide_route(h111, h188, URL_111, URL_188)
|
||||
result = manager._decide_route(h111, URL_111)
|
||||
|
||||
assert "offline" in result.routing_reason.lower() or "111" in result.routing_reason
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# select_provider():並行 gather
|
||||
# select_provider():只 check 111
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestSelectProvider:
|
||||
"""select_provider() 並行邏輯"""
|
||||
"""select_provider() 只 check 111 邏輯(統帥鐵律:188 完全移出)"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_select_provider_calls_gather(self):
|
||||
"""有 url_188 時應並行 gather 兩個 check"""
|
||||
async def test_select_provider_checks_111_only(self):
|
||||
"""統帥鐵律:select_provider 只 check 111,call_count == 1"""
|
||||
mock_monitor = AsyncMock()
|
||||
mock_monitor.check = AsyncMock(
|
||||
side_effect=[
|
||||
_make_health(HealthStatus.HEALTHY, URL_111),
|
||||
_make_health(HealthStatus.HEALTHY, URL_188),
|
||||
]
|
||||
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
|
||||
)
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = URL_188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
|
||||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||||
@@ -286,21 +260,19 @@ class TestSelectProvider:
|
||||
with patch.object(manager, "_write_failover_audit", return_value=None):
|
||||
result = await manager.select_provider()
|
||||
|
||||
# 兩個 host 都被 check
|
||||
assert mock_monitor.check.call_count == 2
|
||||
called_hosts = {call.args[0] for call in mock_monitor.check.call_args_list}
|
||||
assert URL_111 in called_hosts
|
||||
assert URL_188 in called_hosts
|
||||
# 只 check 111,不再並行 check 188
|
||||
assert mock_monitor.check.call_count == 1
|
||||
called_url = mock_monitor.check.call_args.args[0]
|
||||
assert called_url == URL_111
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_select_provider_single_node_no_188(self):
|
||||
"""OLLAMA_FALLBACK_URL 空字串 → 只 check 111"""
|
||||
async def test_select_provider_single_node_primary_ollama(self):
|
||||
"""111 HEALTHY → primary=ollama"""
|
||||
mock_monitor = AsyncMock()
|
||||
mock_monitor.check = AsyncMock(return_value=_make_health(HealthStatus.HEALTHY, URL_111))
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = ""
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
|
||||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||||
@@ -317,22 +289,17 @@ class TestSelectProvider:
|
||||
"""select_provider 返回 OllamaRoutingResult 類型(新矩陣:111 OFFLINE → Gemini)"""
|
||||
mock_monitor = AsyncMock()
|
||||
mock_monitor.check = AsyncMock(
|
||||
side_effect=[
|
||||
_make_health(HealthStatus.OFFLINE, URL_111),
|
||||
_make_health(HealthStatus.HEALTHY, URL_188),
|
||||
]
|
||||
return_value=_make_health(HealthStatus.OFFLINE, URL_111)
|
||||
)
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = URL_188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
|
||||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||||
manager._settings = mock_settings
|
||||
|
||||
# 2026-04-27 Wave8-X2: 必須 mock Redis pool(_check_gemini_quota 走 fail-closed 路徑會切到 188 而非 Gemini)
|
||||
# 測試本身只要驗 OFFLINE → Gemini 路由邏輯,故繞過 quota check
|
||||
# 必須 mock Redis pool(_check_gemini_quota 走 fail-closed 路徑會切到 Nemotron 而非 Gemini)
|
||||
with patch.object(manager, "_write_failover_audit", return_value=None), \
|
||||
patch.object(manager, "_check_gemini_quota", AsyncMock(return_value=True)), \
|
||||
patch(
|
||||
@@ -345,7 +312,7 @@ class TestSelectProvider:
|
||||
result = await manager.select_provider()
|
||||
|
||||
assert isinstance(result, OllamaRoutingResult)
|
||||
# 新矩陣:111 OFFLINE + Gemini quota OK → primary=Gemini(188 降為 fallback)
|
||||
# 新矩陣:111 OFFLINE + Gemini quota OK → primary=Gemini
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -353,26 +320,20 @@ class TestSelectProvider:
|
||||
"""111 正常時不觸發 failover audit"""
|
||||
mock_monitor = AsyncMock()
|
||||
mock_monitor.check = AsyncMock(
|
||||
side_effect=[
|
||||
_make_health(HealthStatus.HEALTHY, URL_111),
|
||||
_make_health(HealthStatus.HEALTHY, URL_188),
|
||||
]
|
||||
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
|
||||
)
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = URL_188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
|
||||
manager = OllamaFailoverManager(health_monitor=mock_monitor)
|
||||
manager._settings = mock_settings
|
||||
|
||||
audit_called = [False]
|
||||
original_write = manager._write_failover_audit
|
||||
|
||||
async def _spy_audit(result):
|
||||
# _write_failover_audit 在 111 HEALTHY 時 early return,不寫 DB
|
||||
# 追蹤呼叫是否有 side effect(DB 寫入)
|
||||
audit_called[0] = result.primary.provider_name != "ollama"
|
||||
|
||||
with patch.object(manager, "_write_failover_audit", side_effect=_spy_audit):
|
||||
@@ -435,7 +396,7 @@ class TestOllamaRoutingResult:
|
||||
def test_all_endpoints_in_order(self):
|
||||
from src.services.ollama_failover_manager import OllamaEndpoint
|
||||
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="m1")
|
||||
fb1 = OllamaEndpoint(url=URL_188, provider_name="ollama_188", model="m2")
|
||||
fb1 = OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash")
|
||||
fb2 = OllamaEndpoint(url="", provider_name="nemotron", model="m3")
|
||||
|
||||
result = OllamaRoutingResult(
|
||||
@@ -447,7 +408,7 @@ class TestOllamaRoutingResult:
|
||||
|
||||
ordered = result.all_endpoints_in_order()
|
||||
assert ordered[0].provider_name == "ollama"
|
||||
assert ordered[1].provider_name == "ollama_188"
|
||||
assert ordered[1].provider_name == "gemini"
|
||||
assert ordered[2].provider_name == "nemotron"
|
||||
|
||||
def test_to_dict_structure(self):
|
||||
@@ -464,6 +425,19 @@ class TestOllamaRoutingResult:
|
||||
assert d["routing_reason"] == "111 HEALTHY"
|
||||
assert isinstance(d["fallback_chain"], list)
|
||||
|
||||
def test_health_188_optional_field_backward_compat(self):
|
||||
"""health_188 保留為 optional None(backward-compat,不傳也可以)"""
|
||||
from src.services.ollama_failover_manager import OllamaEndpoint
|
||||
primary = OllamaEndpoint(url=URL_111, provider_name="ollama", model="qwen")
|
||||
result = OllamaRoutingResult(
|
||||
primary=primary,
|
||||
fallback_chain=[],
|
||||
routing_reason="test",
|
||||
health_111=_make_health(HealthStatus.HEALTHY),
|
||||
# health_188 不傳,應為 None
|
||||
)
|
||||
assert result.health_188 is None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Singleton
|
||||
@@ -552,22 +526,18 @@ class TestAIProviderEnumOllama188:
|
||||
|
||||
|
||||
class TestGatherReturnExceptions:
|
||||
"""H4 修復驗證:任一 check 拋例外時不炸整個 select_provider"""
|
||||
"""H4 修復驗證:111 check 拋例外時不炸整個 select_provider"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gather_exception_in_111_treated_as_offline(self):
|
||||
"""111 check 拋例外 → health_111=OFFLINE,select_provider 正常返回"""
|
||||
mock_monitor = AsyncMock()
|
||||
mock_monitor.check = AsyncMock(
|
||||
side_effect=[
|
||||
RuntimeError("111 network error"),
|
||||
_make_health(HealthStatus.HEALTHY, URL_188),
|
||||
]
|
||||
side_effect=RuntimeError("111 network error")
|
||||
)
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = URL_188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||||
|
||||
@@ -582,19 +552,15 @@ class TestGatherReturnExceptions:
|
||||
assert result.primary.provider_name == "gemini"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gather_exception_in_188_treated_as_offline(self):
|
||||
"""188 check 拋例外 → health_188=OFFLINE,select_provider 正常返回"""
|
||||
async def test_111_healthy_select_provider_primary_ollama(self):
|
||||
"""111 HEALTHY → primary=ollama,select_provider 正常返回(取代舊的 188 exception 測試)"""
|
||||
mock_monitor = AsyncMock()
|
||||
mock_monitor.check = AsyncMock(
|
||||
side_effect=[
|
||||
_make_health(HealthStatus.HEALTHY, URL_111),
|
||||
RuntimeError("188 network error"),
|
||||
]
|
||||
return_value=_make_health(HealthStatus.HEALTHY, URL_111)
|
||||
)
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = URL_188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||||
|
||||
@@ -605,7 +571,7 @@ class TestGatherReturnExceptions:
|
||||
patch.object(manager, "_check_gemini_quota", return_value=True):
|
||||
result = await manager.select_provider()
|
||||
|
||||
# 111 HEALTHY → primary=ollama(188 exception 不影響主路由)
|
||||
# 111 HEALTHY → primary=ollama
|
||||
assert result.primary.provider_name == "ollama"
|
||||
|
||||
|
||||
@@ -682,19 +648,15 @@ class TestGeminiQuota:
|
||||
assert ok is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_select_provider_quota_exceeded_uses_188(self):
|
||||
"""select_provider:Gemini quota 超過 → primary 改為 OLLAMA_188"""
|
||||
async def test_select_provider_quota_exceeded_uses_nemotron(self):
|
||||
"""select_provider:Gemini quota 超過 → primary 改為 Nemotron(統帥鐵律:188 移出)"""
|
||||
mock_monitor = AsyncMock()
|
||||
mock_monitor.check = AsyncMock(
|
||||
side_effect=[
|
||||
_make_health(HealthStatus.OFFLINE, URL_111),
|
||||
_make_health(HealthStatus.HEALTHY, URL_188),
|
||||
]
|
||||
return_value=_make_health(HealthStatus.OFFLINE, URL_111)
|
||||
)
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = URL_188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||||
|
||||
@@ -705,8 +667,8 @@ class TestGeminiQuota:
|
||||
patch.object(manager, "_check_gemini_quota", return_value=False):
|
||||
result = await manager.select_provider()
|
||||
|
||||
# quota 超過 → 不走 Gemini,改走 188
|
||||
assert result.primary.provider_name == "ollama_188"
|
||||
# quota 超過 → 不走 Gemini,改走 Nemotron(188 已移出)
|
||||
assert result.primary.provider_name == "nemotron"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_select_provider_quota_exceeded_no_188_uses_nemotron(self):
|
||||
@@ -716,7 +678,6 @@ class TestGeminiQuota:
|
||||
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.OLLAMA_URL = URL_111
|
||||
mock_settings.OLLAMA_FALLBACK_URL = "" # 無 188
|
||||
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
|
||||
mock_settings.GEMINI_DAILY_QUOTA = 1000
|
||||
|
||||
|
||||
Reference in New Issue
Block a user