Files
awoooi/apps/api/src/services/ollama_failover_manager.py
Your Name dccdcdbaf5
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 9m45s
fix(flywheel): unblock action safety and Claude fallback
2026-04-29 21:51:18 +08:00

541 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Ollama 自動容災管理 - P1.1b
============================
依 OllamaHealthMonitor 健康狀態決定 Ollama 路由方案。
路由邏輯2026-04-26 統帥鐵律111 = 唯一 Ollama備援只用 Gemini
111 HEALTHY → 主 111fallback [Gemini]
111 SLOW/DEGRADED/OFFLINE → 主 Geminifallback [Nemotron, Claude]
Gemini quota 超過 → 主 Nemotronfallback [Claude]
設計說明:
- 188 CPU-only 禁止用於即時回應0.45 tok/s完全移出 routing chain
- 唯一 Ollama 主機192.168.0.111M1 Pro, Metal 加速)
- 不直接依賴 AIProviderEnumP1.2 Engineer-A 整合時再對齊)
- 返回輕量 OllamaRoutingResult含主 endpoint + fallback 清單
- 只檢查 111不再並行檢查 188
- 切換觸發時寫 audit_logs service="ollama_failover"
- clear_cache() 方法供 OllamaAutoRecoveryService 切回後清空路由快取
版本: v2.0
建立: 2026-04-25 (台北時區)
建立者: Claude Engineer-C (P1.1b)
# Created 2026-04-25 P1.1 by Claude Engineer-C
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
"""
from __future__ import annotations
import asyncio
import datetime
from dataclasses import dataclass, field
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
from datetime import timezone, timedelta
import structlog
from src.core.config import get_settings
# 台北時區 +8標準庫保險絲100% 可用)
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
TAIPEI_TZ = timezone(timedelta(hours=8))
from src.services.ollama_health_monitor import (
HealthReport,
HealthStatus,
OllamaHealthMonitor,
get_ollama_health_monitor,
)
logger = structlog.get_logger(__name__)
# =============================================================================
# 路由結果模型輕量P1.2 整合時轉換為 RoutingDecision
# =============================================================================
@dataclass
class OllamaEndpoint:
"""Ollama 端點描述"""
url: str
provider_name: str # 給 AIRouterExecutor 用的 provider 名稱
model: str
def to_dict(self) -> dict:
return {"url": self.url, "provider_name": self.provider_name, "model": self.model}
@dataclass
class OllamaRoutingResult:
"""
Ollama 容災路由結果
P1.2 Engineer-A 整合時,將此結果轉換為 ai_router.RoutingDecision
- selected_provider = AIProviderEnum[result.primary.provider_name.upper()] or 新的 OLLAMA_188
- selected_model = result.primary.model
- fallback_chain = [(AIProviderEnum[p.provider_name.upper()], p.model) for p in result.fallback_chain]
"""
primary: OllamaEndpoint
fallback_chain: list[OllamaEndpoint]
routing_reason: str
health_111: HealthReport
health_188: HealthReport | None = None # optional backward-compat188 不在路由鏈時為 None
def all_endpoints_in_order(self) -> list[OllamaEndpoint]:
"""返回完整的優先序端點列表primary 在前)"""
return [self.primary, *self.fallback_chain]
def to_dict(self) -> dict:
return {
"primary": {
"url": self.primary.url,
"provider": self.primary.provider_name,
"model": self.primary.model,
},
"fallback_chain": [
{"url": e.url, "provider": e.provider_name, "model": e.model} # noqa: E501
for e in self.fallback_chain
],
"routing_reason": self.routing_reason,
"health_111": self.health_111.to_dict(),
"health_188": self.health_188.to_dict() if self.health_188 else None,
}
# =============================================================================
# 已知 Fallback 端點定義Nemotron / Gemini / Claude
# =============================================================================
# 以 provider_name 對應 ai_router.AIProviderEnum 的 value
_NEMOTRON_ENDPOINT = OllamaEndpoint(
url="", # Nemotron 不是 HTTP URL由 AIRouterExecutor 從 Registry 取得
provider_name="nemotron",
model="nvidia/nemotron-mini-4b-instruct",
)
_GEMINI_ENDPOINT = OllamaEndpoint(
url="",
provider_name="gemini",
model="gemini-1.5-flash",
)
_CLAUDE_ENDPOINT = OllamaEndpoint(
url="",
provider_name="claude",
model="claude-haiku-4-5-20251001",
)
# =============================================================================
# OllamaFailoverManager
# =============================================================================
class OllamaFailoverManager:
"""
Ollama 自動容災管理器
並行檢查 111 + 188依健康狀態選擇最佳路由。
使用方式:
manager = OllamaFailoverManager()
result = await manager.select_provider()
# result.primary.url → 使用的 Ollama URL
# result.fallback_chain → 依序 fallback
2026-04-25 Claude Engineer-C (P1.1b)
"""
def __init__(
self,
health_monitor: OllamaHealthMonitor | None = None,
recovery_callback=None,
) -> None:
self._monitor = health_monitor or get_ollama_health_monitor()
self._settings = get_settings()
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
# recovery_callback: async callable(provider_name: str) → None
# OllamaAutoRecoveryService.set_current_primary 在 failover 時被通知,
# 避免重啟後 _current_primary 停留在 "ollama" 而永不啟動恢復監控
self._recovery_callback = recovery_callback
# -------------------------------------------------------------------------
# Public API
# -------------------------------------------------------------------------
async def select_provider(
self,
task_type: str = "",
context: dict | None = None,
) -> OllamaRoutingResult:
"""
檢查 111 健康狀態,返回路由結果。
2026-04-26 統帥鐵律:唯一 Ollama = 111188 禁止用於即時回應。
只檢查 111不再並行檢查 188。
Args:
task_type: 任務類型(預留,目前未影響路由邏輯)
context: 額外上下文(預留)
Returns:
OllamaRoutingResult
"""
url_111 = self._settings.OLLAMA_URL
# 只檢查 111188 移出 routing chain
try:
health_111 = await self._monitor.check(url_111)
except Exception as e:
health_111 = HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {e}")
result = self._decide_route(
health_111=health_111,
url_111=url_111,
)
# Gemini 帳單熔斷quota gate
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
if result.primary.provider_name == "gemini":
quota_ok = await self._check_gemini_quota()
if not quota_ok:
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
logger.warning(
"gemini_quota_exceeded_fallback_to_nemotron",
quota=quota,
health_111=health_111.status.value,
)
# 2026-04-26 統帥鐵律188 移出quota 超過 → Nemotron → Claude
result = self._build_quota_exceeded_route(health_111=health_111)
# Quota 耗盡 Telegram 告警24h dedup
try:
from src.services.failover_alerter import get_failover_alerter
from src.core.redis_client import get_redis
_current_count = quota
try:
_redis = get_redis()
if _redis is not None:
_key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
_raw = await _redis.get(_key)
_current_count = int(_raw or 0)
except Exception:
pass
await get_failover_alerter().alert_gemini_quota_exceeded({
"quota": quota,
"current_count": _current_count,
})
except Exception as _alert_err:
logger.warning(
"gemini_quota_alert_dispatch_failed",
error=str(_alert_err),
)
# 寫入 audit_logbest-effort
await self._write_failover_audit(result)
logger.info(
"ollama_failover_decision",
primary=result.primary.provider_name,
primary_url=result.primary.url,
reason=result.routing_reason,
fallback_count=len(result.fallback_chain),
health_111=health_111.status.value,
)
# 通知 recovery service 當前 primary跨重啟持久化
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
if self._recovery_callback is not None:
try:
await self._recovery_callback(result.primary.provider_name)
except Exception as e:
logger.warning(
"ollama_failover_recovery_callback_failed",
error=str(e),
)
return result
# -------------------------------------------------------------------------
# 路由決策邏輯
# -------------------------------------------------------------------------
def _decide_route(
self,
health_111: HealthReport,
url_111: str,
) -> OllamaRoutingResult:
"""
決策矩陣2026-04-26 統帥鐵律:唯一 Ollama=111備援只用 Gemini
111 HEALTHY → primary=111, fallback=[Gemini]
111 SLOW → primary=Gemini, fallback=[111, Nemotron, Claude]
111 DEGRADED → primary=Gemini, fallback=[Nemotron, Claude]
111 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude]
188 完全移出 routing chainCPU-only 0.45 tok/s禁止即時回應
Gemini quota 超過由 _build_quota_exceeded_route() 接管。
"""
model_111 = self._settings.OLLAMA_HEALTH_CHECK_MODEL
ep_111 = OllamaEndpoint(url=url_111, provider_name="ollama", model=model_111)
now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()
if health_111.status == HealthStatus.HEALTHY:
return OllamaRoutingResult(
primary=ep_111,
fallback_chain=[_GEMINI_ENDPOINT],
routing_reason="111 HEALTHY → 主 111",
health_111=health_111,
)
if health_111.status == HealthStatus.SLOW:
return OllamaRoutingResult(
primary=_GEMINI_ENDPOINT,
fallback_chain=[ep_111, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
routing_reason=f"111 SLOW → 切 Gemini at {now_ts}",
health_111=health_111,
)
# DEGRADED / OFFLINE
status_label = health_111.status.value
return OllamaRoutingResult(
primary=_GEMINI_ENDPOINT,
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
routing_reason=f"111 {status_label} → 切 Gemini at {now_ts}",
health_111=health_111,
)
# -------------------------------------------------------------------------
# Gemini 帳單熔斷quota gate
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
# -------------------------------------------------------------------------
async def _check_gemini_quota(self) -> bool:
"""
檢查每日 Gemini call 配額,超過上限則禁用。
Redis key: ollama:gemini_daily_count:{YYYY-MM-DD}TTL 86400s
計數 atomicincr
Returns:
True → 仍在配額內,可使用 Gemini
False → 已超配額,應切到 188+Nemotron
fail-openRedis 不可用時允許走 Gemini不阻擋服務
"""
try:
from src.core.redis_client import get_redis
redis = get_redis()
if redis is None:
return True # fail-open
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
# 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — atomic pipeline 修復 TOCTOU
# 原實作GET → 判斷 → INCR → EXPIRE分四步INCR 後 crash 會丟 TTL
# 且並行請求在 GET/INCR 之間競爭導致配額超發)
# 修法pipeline 原子執行 SET NX首次設 TTL + INCR用 INCR 後的新值判斷
pipe = redis.pipeline()
pipe.set(key, 0, ex=86400, nx=True) # 僅首次寫入設 TTL已存在則跳過
pipe.incr(key) # 原子遞增,回傳遞增後的值
results = await pipe.execute()
new_count = int(results[1]) # results[1] = INCR 後新值
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 刷新 Gemini Prometheus Gauge
# 每次 quota check 時同步更新,讓 Prometheus 取到最新值
try:
from src.core.metrics import GEMINI_DAILY_CALL_COUNT, GEMINI_DAILY_QUOTA
GEMINI_DAILY_CALL_COUNT.set(new_count)
GEMINI_DAILY_QUOTA.set(quota)
except Exception:
pass # metric 更新失敗不阻斷主路由邏輯
if new_count > quota:
# 已超配額INCR 後 > quota回退不是必要的最多超發 1 次)
# 但要回傳 False 讓 router 切到 188
return False
return True
except Exception as e:
# 2026-04-27 Wave8-X2 by Claude — B14 quota fail-closed
# 原 fail-openRedis 異常 → return True → Gemini 盲開 → 費用鐵律違反
# 修法Redis 異常時 fail-closed拒絕走 Gemini讓 fallback chain 接手 188/Nemotron
# 費用安全 > 服務可用性(統帥鐵律:費用變更必須停下)
logger.exception(
"gemini_quota_check_failed_failing_closed",
error=str(e),
security_note="Redis 異常時為費用安全 fail-closed切到 fallback chain",
)
# 嘗試告警best-effort不阻塞路由
try:
from src.services.failover_alerter import get_failover_alerter
await get_failover_alerter().alert_gemini_quota_exceeded({
"quota": getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000),
"current_count": "unknown (Redis error)",
"reason": "fail_closed_due_to_redis_error",
})
except Exception:
pass
return False # fail-closed拒絕 Gemini讓 fallback chain188/Nemotron接手
def _build_quota_exceeded_route(
self,
health_111: HealthReport,
) -> OllamaRoutingResult:
"""
Gemini 配額耗盡時的備援路由primary=Nemotron, fallback=[Claude]
2026-04-26 統帥鐵律188 移出quota 超過直接走 Nemotron → Claude。
"""
return OllamaRoutingResult(
primary=_NEMOTRON_ENDPOINT,
fallback_chain=[_CLAUDE_ENDPOINT],
routing_reason="Gemini quota exceeded → Nemotron 備援",
health_111=health_111,
)
# -------------------------------------------------------------------------
# Recovery API供 OllamaAutoRecoveryService 呼叫)
# -------------------------------------------------------------------------
def set_recovery_callback(self, callback) -> None:
"""
設定 recovery callback供 lifespan wiring 使用)。
callback signature: async (provider_name: str) -> None
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
"""
self._recovery_callback = callback
async def clear_cache(self) -> None:
"""
清空路由決策快取,讓下次 select_provider 重新評估健康狀態。
OllamaAutoRecoveryService 在偵測 111 恢復後呼叫此方法。
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
# 2026-04-25 P1.2 by Claude Engineer-A2 — 改用 make_cache_key 動態組 key消除硬編碼 IP
"""
try:
from src.core.redis_client import get_redis
from src.services.ollama_health_monitor import make_cache_key
redis = get_redis()
if redis is None:
return
# 動態由 settings URL 組 cache key避免硬編碼 IP
keys = [
make_cache_key(self._settings.OLLAMA_URL),
make_cache_key(self._settings.OLLAMA_FALLBACK_URL or ""),
]
for k in keys:
if k and k != "ollama_health:": # 空 URL 會產生無意義的 key跳過
await redis.delete(k)
logger.info(
"ollama_failover_cache_cleared",
service="ollama_failover",
reason="recovery_triggered",
)
except Exception as e:
logger.debug("ollama_failover_clear_cache_failed", error=str(e))
def notify_recovery(self, provider: str) -> None:
"""
預留P1.5 Engineer 接入 Telegram alerter 時使用。
目前僅寫 structlog audit。
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
"""
logger.info(
"ollama_recovery_notified",
service="ollama_failover",
provider=provider,
action="recovery_received",
)
# -------------------------------------------------------------------------
# Audit Log
# -------------------------------------------------------------------------
async def _write_failover_audit(self, result: OllamaRoutingResult) -> None:
"""
切換觸發時寫 structlog auditbest-effort+ Telegram 告警
# 2026-04-25 critic-fix Part2 B1 by Claude Engineer-C2
# 原 AuditLog DB 寫入使用不存在的欄位service/action/target/status/metadata
# → SQLAlchemy crash → except 吃掉 → 零稽核
# 修法:刪除 DB 寫入路徑,改用 structlog onlyaudit 不依賴 DB schema
# 2026-04-25 P1.5 by Claude Engineer-D — 新增 Telegram 告警dedup 10min
service="ollama_failover"per 任務規格)
僅在 primary 非 111 時記錄(真正發生切換)
"""
if result.primary.provider_name == "ollama":
# 111 正常,無切換事件
return
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
try:
from src.core.metrics import (
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
)
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
from_provider="ollama",
to_provider=result.primary.provider_name,
).inc()
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA.set(0)
except Exception as _metric_err:
logger.debug("ollama_failover_metric_error", error=str(_metric_err))
logger.info(
"ollama_failover_triggered",
service="ollama_failover",
action="failover_triggered",
from_provider="ollama",
to_provider=result.primary.provider_name,
reason=result.routing_reason,
primary_url=result.primary.url or result.primary.provider_name,
health_111=result.health_111.status.value,
health_188=result.health_188.status.value if result.health_188 else "not_configured",
)
# Telegram 告警首次切換才通知dedup 10min 內建)
# 2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不阻斷主路由邏輯
try:
from src.services.failover_alerter import get_failover_alerter
fallback_chain_str = "".join(
p.provider_name for p in result.fallback_chain
)
alerter = get_failover_alerter()
await alerter.alert_failover({
"to_provider": result.primary.provider_name,
"model": result.primary.model,
"reason": result.routing_reason,
"timestamp": datetime.datetime.now(TAIPEI_TZ).isoformat(),
"fallback_chain_str": fallback_chain_str,
})
except Exception as e:
logger.warning("failover_alert_failed", error=str(e))
# =============================================================================
# Singleton
# =============================================================================
_failover_manager: OllamaFailoverManager | None = None
def get_ollama_failover_manager() -> OllamaFailoverManager:
"""取得 OllamaFailoverManager singleton"""
global _failover_manager
if _failover_manager is None:
_failover_manager = OllamaFailoverManager()
return _failover_manager
def reset_ollama_failover_manager() -> None:
"""重置 singleton測試用"""
global _failover_manager
_failover_manager = None