541 lines
22 KiB
Python
541 lines
22 KiB
Python
"""
|
||
Ollama 自動容災管理 - P1.1b
|
||
============================
|
||
依 OllamaHealthMonitor 健康狀態決定 Ollama 路由方案。
|
||
|
||
路由邏輯(2026-04-26 統帥鐵律:111 = 唯一 Ollama,備援只用 Gemini):
|
||
111 HEALTHY → 主 111,fallback [Gemini]
|
||
111 SLOW/DEGRADED/OFFLINE → 主 Gemini,fallback [Nemotron, Claude]
|
||
Gemini quota 超過 → 主 Nemotron,fallback [Claude]
|
||
|
||
設計說明:
|
||
- 188 CPU-only 禁止用於即時回應(0.45 tok/s),完全移出 routing chain
|
||
- 唯一 Ollama 主機:192.168.0.111(M1 Pro, Metal 加速)
|
||
- 不直接依賴 AIProviderEnum(P1.2 Engineer-A 整合時再對齊)
|
||
- 返回輕量 OllamaRoutingResult,含主 endpoint + fallback 清單
|
||
- 只檢查 111(不再並行檢查 188)
|
||
- 切換觸發時寫 audit_logs service="ollama_failover"
|
||
- clear_cache() 方法供 OllamaAutoRecoveryService 切回後清空路由快取
|
||
|
||
版本: v2.0
|
||
建立: 2026-04-25 (台北時區)
|
||
建立者: Claude Engineer-C (P1.1b)
|
||
# Created 2026-04-25 P1.1 by Claude Engineer-C
|
||
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import datetime
|
||
from dataclasses import dataclass, field
|
||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區
|
||
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
|
||
from datetime import timezone, timedelta
|
||
|
||
import structlog
|
||
|
||
from src.core.config import get_settings
|
||
|
||
# 台北時區 +8(標準庫保險絲,100% 可用)
|
||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||
from src.services.ollama_health_monitor import (
|
||
HealthReport,
|
||
HealthStatus,
|
||
OllamaHealthMonitor,
|
||
get_ollama_health_monitor,
|
||
)
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
# =============================================================================
|
||
# 路由結果模型(輕量,P1.2 整合時轉換為 RoutingDecision)
|
||
# =============================================================================
|
||
|
||
|
||
@dataclass
|
||
class OllamaEndpoint:
|
||
"""Ollama 端點描述"""
|
||
|
||
url: str
|
||
provider_name: str # 給 AIRouterExecutor 用的 provider 名稱
|
||
model: str
|
||
|
||
def to_dict(self) -> dict:
|
||
return {"url": self.url, "provider_name": self.provider_name, "model": self.model}
|
||
|
||
|
||
@dataclass
|
||
class OllamaRoutingResult:
|
||
"""
|
||
Ollama 容災路由結果
|
||
|
||
P1.2 Engineer-A 整合時,將此結果轉換為 ai_router.RoutingDecision:
|
||
- selected_provider = AIProviderEnum[result.primary.provider_name.upper()] or 新的 OLLAMA_188
|
||
- selected_model = result.primary.model
|
||
- fallback_chain = [(AIProviderEnum[p.provider_name.upper()], p.model) for p in result.fallback_chain]
|
||
"""
|
||
|
||
primary: OllamaEndpoint
|
||
fallback_chain: list[OllamaEndpoint]
|
||
routing_reason: str
|
||
health_111: HealthReport
|
||
health_188: HealthReport | None = None # optional backward-compat(188 不在路由鏈時為 None)
|
||
|
||
def all_endpoints_in_order(self) -> list[OllamaEndpoint]:
|
||
"""返回完整的優先序端點列表(primary 在前)"""
|
||
return [self.primary, *self.fallback_chain]
|
||
|
||
def to_dict(self) -> dict:
|
||
return {
|
||
"primary": {
|
||
"url": self.primary.url,
|
||
"provider": self.primary.provider_name,
|
||
"model": self.primary.model,
|
||
},
|
||
"fallback_chain": [
|
||
{"url": e.url, "provider": e.provider_name, "model": e.model} # noqa: E501
|
||
for e in self.fallback_chain
|
||
],
|
||
"routing_reason": self.routing_reason,
|
||
"health_111": self.health_111.to_dict(),
|
||
"health_188": self.health_188.to_dict() if self.health_188 else None,
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# 已知 Fallback 端點定義(Nemotron / Gemini / Claude)
|
||
# =============================================================================
|
||
|
||
# 以 provider_name 對應 ai_router.AIProviderEnum 的 value
|
||
_NEMOTRON_ENDPOINT = OllamaEndpoint(
|
||
url="", # Nemotron 不是 HTTP URL,由 AIRouterExecutor 從 Registry 取得
|
||
provider_name="nemotron",
|
||
model="nvidia/nemotron-mini-4b-instruct",
|
||
)
|
||
_GEMINI_ENDPOINT = OllamaEndpoint(
|
||
url="",
|
||
provider_name="gemini",
|
||
model="gemini-1.5-flash",
|
||
)
|
||
_CLAUDE_ENDPOINT = OllamaEndpoint(
|
||
url="",
|
||
provider_name="claude",
|
||
model="claude-haiku-4-5-20251001",
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# OllamaFailoverManager
|
||
# =============================================================================
|
||
|
||
|
||
class OllamaFailoverManager:
|
||
"""
|
||
Ollama 自動容災管理器
|
||
|
||
並行檢查 111 + 188,依健康狀態選擇最佳路由。
|
||
|
||
使用方式:
|
||
manager = OllamaFailoverManager()
|
||
result = await manager.select_provider()
|
||
# result.primary.url → 使用的 Ollama URL
|
||
# result.fallback_chain → 依序 fallback
|
||
|
||
2026-04-25 Claude Engineer-C (P1.1b)
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
health_monitor: OllamaHealthMonitor | None = None,
|
||
recovery_callback=None,
|
||
) -> None:
|
||
self._monitor = health_monitor or get_ollama_health_monitor()
|
||
self._settings = get_settings()
|
||
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
|
||
# recovery_callback: async callable(provider_name: str) → None
|
||
# OllamaAutoRecoveryService.set_current_primary 在 failover 時被通知,
|
||
# 避免重啟後 _current_primary 停留在 "ollama" 而永不啟動恢復監控
|
||
self._recovery_callback = recovery_callback
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Public API
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def select_provider(
|
||
self,
|
||
task_type: str = "",
|
||
context: dict | None = None,
|
||
) -> OllamaRoutingResult:
|
||
"""
|
||
檢查 111 健康狀態,返回路由結果。
|
||
|
||
2026-04-26 統帥鐵律:唯一 Ollama = 111,188 禁止用於即時回應。
|
||
只檢查 111,不再並行檢查 188。
|
||
|
||
Args:
|
||
task_type: 任務類型(預留,目前未影響路由邏輯)
|
||
context: 額外上下文(預留)
|
||
|
||
Returns:
|
||
OllamaRoutingResult
|
||
"""
|
||
url_111 = self._settings.OLLAMA_URL
|
||
|
||
# 只檢查 111(188 移出 routing chain)
|
||
try:
|
||
health_111 = await self._monitor.check(url_111)
|
||
except Exception as e:
|
||
health_111 = HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {e}")
|
||
|
||
result = self._decide_route(
|
||
health_111=health_111,
|
||
url_111=url_111,
|
||
)
|
||
|
||
# Gemini 帳單熔斷(quota gate)
|
||
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
|
||
if result.primary.provider_name == "gemini":
|
||
quota_ok = await self._check_gemini_quota()
|
||
if not quota_ok:
|
||
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
|
||
logger.warning(
|
||
"gemini_quota_exceeded_fallback_to_nemotron",
|
||
quota=quota,
|
||
health_111=health_111.status.value,
|
||
)
|
||
# 2026-04-26 統帥鐵律:188 移出,quota 超過 → Nemotron → Claude
|
||
result = self._build_quota_exceeded_route(health_111=health_111)
|
||
# Quota 耗盡 Telegram 告警(24h dedup)
|
||
try:
|
||
from src.services.failover_alerter import get_failover_alerter
|
||
from src.core.redis_client import get_redis
|
||
_current_count = quota
|
||
try:
|
||
_redis = get_redis()
|
||
if _redis is not None:
|
||
_key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
|
||
_raw = await _redis.get(_key)
|
||
_current_count = int(_raw or 0)
|
||
except Exception:
|
||
pass
|
||
await get_failover_alerter().alert_gemini_quota_exceeded({
|
||
"quota": quota,
|
||
"current_count": _current_count,
|
||
})
|
||
except Exception as _alert_err:
|
||
logger.warning(
|
||
"gemini_quota_alert_dispatch_failed",
|
||
error=str(_alert_err),
|
||
)
|
||
|
||
# 寫入 audit_log(best-effort)
|
||
await self._write_failover_audit(result)
|
||
|
||
logger.info(
|
||
"ollama_failover_decision",
|
||
primary=result.primary.provider_name,
|
||
primary_url=result.primary.url,
|
||
reason=result.routing_reason,
|
||
fallback_count=len(result.fallback_chain),
|
||
health_111=health_111.status.value,
|
||
)
|
||
|
||
# 通知 recovery service 當前 primary(跨重啟持久化)
|
||
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
|
||
if self._recovery_callback is not None:
|
||
try:
|
||
await self._recovery_callback(result.primary.provider_name)
|
||
except Exception as e:
|
||
logger.warning(
|
||
"ollama_failover_recovery_callback_failed",
|
||
error=str(e),
|
||
)
|
||
|
||
return result
|
||
|
||
# -------------------------------------------------------------------------
|
||
# 路由決策邏輯
|
||
# -------------------------------------------------------------------------
|
||
|
||
def _decide_route(
|
||
self,
|
||
health_111: HealthReport,
|
||
url_111: str,
|
||
) -> OllamaRoutingResult:
|
||
"""
|
||
決策矩陣(2026-04-26 統帥鐵律:唯一 Ollama=111,備援只用 Gemini):
|
||
|
||
111 HEALTHY → primary=111, fallback=[Gemini]
|
||
111 SLOW → primary=Gemini, fallback=[111, Nemotron, Claude]
|
||
111 DEGRADED → primary=Gemini, fallback=[Nemotron, Claude]
|
||
111 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude]
|
||
|
||
188 完全移出 routing chain(CPU-only 0.45 tok/s,禁止即時回應)。
|
||
Gemini quota 超過由 _build_quota_exceeded_route() 接管。
|
||
"""
|
||
model_111 = self._settings.OLLAMA_HEALTH_CHECK_MODEL
|
||
ep_111 = OllamaEndpoint(url=url_111, provider_name="ollama", model=model_111)
|
||
|
||
now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()
|
||
|
||
if health_111.status == HealthStatus.HEALTHY:
|
||
return OllamaRoutingResult(
|
||
primary=ep_111,
|
||
fallback_chain=[_GEMINI_ENDPOINT],
|
||
routing_reason="111 HEALTHY → 主 111",
|
||
health_111=health_111,
|
||
)
|
||
|
||
if health_111.status == HealthStatus.SLOW:
|
||
return OllamaRoutingResult(
|
||
primary=_GEMINI_ENDPOINT,
|
||
fallback_chain=[ep_111, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
|
||
routing_reason=f"111 SLOW → 切 Gemini at {now_ts}",
|
||
health_111=health_111,
|
||
)
|
||
|
||
# DEGRADED / OFFLINE
|
||
status_label = health_111.status.value
|
||
return OllamaRoutingResult(
|
||
primary=_GEMINI_ENDPOINT,
|
||
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
|
||
routing_reason=f"111 {status_label} → 切 Gemini at {now_ts}",
|
||
health_111=health_111,
|
||
)
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Gemini 帳單熔斷(quota gate)
|
||
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def _check_gemini_quota(self) -> bool:
|
||
"""
|
||
檢查每日 Gemini call 配額,超過上限則禁用。
|
||
|
||
Redis key: ollama:gemini_daily_count:{YYYY-MM-DD},TTL 86400s
|
||
計數 atomic(incr)。
|
||
|
||
Returns:
|
||
True → 仍在配額內,可使用 Gemini
|
||
False → 已超配額,應切到 188+Nemotron
|
||
|
||
fail-open:Redis 不可用時允許走 Gemini(不阻擋服務)
|
||
"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
if redis is None:
|
||
return True # fail-open
|
||
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
|
||
key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
|
||
|
||
# 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — atomic pipeline 修復 TOCTOU
|
||
# 原實作:GET → 判斷 → INCR → EXPIRE(分四步,INCR 後 crash 會丟 TTL,
|
||
# 且並行請求在 GET/INCR 之間競爭導致配額超發)
|
||
# 修法:pipeline 原子執行 SET NX(首次設 TTL) + INCR,用 INCR 後的新值判斷
|
||
pipe = redis.pipeline()
|
||
pipe.set(key, 0, ex=86400, nx=True) # 僅首次寫入設 TTL;已存在則跳過
|
||
pipe.incr(key) # 原子遞增,回傳遞增後的值
|
||
results = await pipe.execute()
|
||
new_count = int(results[1]) # results[1] = INCR 後新值
|
||
|
||
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 刷新 Gemini Prometheus Gauge
|
||
# 每次 quota check 時同步更新,讓 Prometheus 取到最新值
|
||
try:
|
||
from src.core.metrics import GEMINI_DAILY_CALL_COUNT, GEMINI_DAILY_QUOTA
|
||
GEMINI_DAILY_CALL_COUNT.set(new_count)
|
||
GEMINI_DAILY_QUOTA.set(quota)
|
||
except Exception:
|
||
pass # metric 更新失敗不阻斷主路由邏輯
|
||
|
||
if new_count > quota:
|
||
# 已超配額(INCR 後 > quota),回退不是必要的(最多超發 1 次)
|
||
# 但要回傳 False 讓 router 切到 188
|
||
return False
|
||
return True
|
||
except Exception as e:
|
||
# 2026-04-27 Wave8-X2 by Claude — B14 quota fail-closed
|
||
# 原 fail-open:Redis 異常 → return True → Gemini 盲開 → 費用鐵律違反
|
||
# 修法:Redis 異常時 fail-closed,拒絕走 Gemini,讓 fallback chain 接手 188/Nemotron
|
||
# 費用安全 > 服務可用性(統帥鐵律:費用變更必須停下)
|
||
logger.exception(
|
||
"gemini_quota_check_failed_failing_closed",
|
||
error=str(e),
|
||
security_note="Redis 異常時為費用安全 fail-closed,切到 fallback chain",
|
||
)
|
||
# 嘗試告警(best-effort,不阻塞路由)
|
||
try:
|
||
from src.services.failover_alerter import get_failover_alerter
|
||
|
||
await get_failover_alerter().alert_gemini_quota_exceeded({
|
||
"quota": getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000),
|
||
"current_count": "unknown (Redis error)",
|
||
"reason": "fail_closed_due_to_redis_error",
|
||
})
|
||
except Exception:
|
||
pass
|
||
return False # fail-closed:拒絕 Gemini,讓 fallback chain(188/Nemotron)接手
|
||
|
||
def _build_quota_exceeded_route(
|
||
self,
|
||
health_111: HealthReport,
|
||
) -> OllamaRoutingResult:
|
||
"""
|
||
Gemini 配額耗盡時的備援路由:primary=Nemotron, fallback=[Claude]
|
||
2026-04-26 統帥鐵律:188 移出,quota 超過直接走 Nemotron → Claude。
|
||
"""
|
||
return OllamaRoutingResult(
|
||
primary=_NEMOTRON_ENDPOINT,
|
||
fallback_chain=[_CLAUDE_ENDPOINT],
|
||
routing_reason="Gemini quota exceeded → Nemotron 備援",
|
||
health_111=health_111,
|
||
)
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Recovery API(供 OllamaAutoRecoveryService 呼叫)
|
||
# -------------------------------------------------------------------------
|
||
|
||
def set_recovery_callback(self, callback) -> None:
|
||
"""
|
||
設定 recovery callback(供 lifespan wiring 使用)。
|
||
callback signature: async (provider_name: str) -> None
|
||
|
||
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
|
||
"""
|
||
self._recovery_callback = callback
|
||
|
||
async def clear_cache(self) -> None:
|
||
"""
|
||
清空路由決策快取,讓下次 select_provider 重新評估健康狀態。
|
||
OllamaAutoRecoveryService 在偵測 111 恢復後呼叫此方法。
|
||
|
||
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
|
||
# 2026-04-25 P1.2 by Claude Engineer-A2 — 改用 make_cache_key 動態組 key,消除硬編碼 IP
|
||
"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
from src.services.ollama_health_monitor import make_cache_key
|
||
redis = get_redis()
|
||
if redis is None:
|
||
return
|
||
# 動態由 settings URL 組 cache key,避免硬編碼 IP
|
||
keys = [
|
||
make_cache_key(self._settings.OLLAMA_URL),
|
||
make_cache_key(self._settings.OLLAMA_FALLBACK_URL or ""),
|
||
]
|
||
for k in keys:
|
||
if k and k != "ollama_health:": # 空 URL 會產生無意義的 key,跳過
|
||
await redis.delete(k)
|
||
logger.info(
|
||
"ollama_failover_cache_cleared",
|
||
service="ollama_failover",
|
||
reason="recovery_triggered",
|
||
)
|
||
except Exception as e:
|
||
logger.debug("ollama_failover_clear_cache_failed", error=str(e))
|
||
|
||
def notify_recovery(self, provider: str) -> None:
|
||
"""
|
||
預留:P1.5 Engineer 接入 Telegram alerter 時使用。
|
||
目前僅寫 structlog audit。
|
||
|
||
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
|
||
"""
|
||
logger.info(
|
||
"ollama_recovery_notified",
|
||
service="ollama_failover",
|
||
provider=provider,
|
||
action="recovery_received",
|
||
)
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Audit Log
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def _write_failover_audit(self, result: OllamaRoutingResult) -> None:
|
||
"""
|
||
切換觸發時寫 structlog audit(best-effort)+ Telegram 告警
|
||
|
||
# 2026-04-25 critic-fix Part2 B1 by Claude Engineer-C2
|
||
# 原 AuditLog DB 寫入使用不存在的欄位(service/action/target/status/metadata)
|
||
# → SQLAlchemy crash → except 吃掉 → 零稽核
|
||
# 修法:刪除 DB 寫入路徑,改用 structlog only(audit 不依賴 DB schema)
|
||
|
||
# 2026-04-25 P1.5 by Claude Engineer-D — 新增 Telegram 告警(dedup 10min)
|
||
|
||
service="ollama_failover"(per 任務規格)
|
||
僅在 primary 非 111 時記錄(真正發生切換)
|
||
"""
|
||
if result.primary.provider_name == "ollama":
|
||
# 111 正常,無切換事件
|
||
return
|
||
|
||
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
|
||
try:
|
||
from src.core.metrics import (
|
||
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
|
||
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
|
||
)
|
||
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
|
||
from_provider="ollama",
|
||
to_provider=result.primary.provider_name,
|
||
).inc()
|
||
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA.set(0)
|
||
except Exception as _metric_err:
|
||
logger.debug("ollama_failover_metric_error", error=str(_metric_err))
|
||
|
||
logger.info(
|
||
"ollama_failover_triggered",
|
||
service="ollama_failover",
|
||
action="failover_triggered",
|
||
from_provider="ollama",
|
||
to_provider=result.primary.provider_name,
|
||
reason=result.routing_reason,
|
||
primary_url=result.primary.url or result.primary.provider_name,
|
||
health_111=result.health_111.status.value,
|
||
health_188=result.health_188.status.value if result.health_188 else "not_configured",
|
||
)
|
||
|
||
# Telegram 告警(首次切換才通知,dedup 10min 內建)
|
||
# 2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不阻斷主路由邏輯
|
||
try:
|
||
from src.services.failover_alerter import get_failover_alerter
|
||
fallback_chain_str = " → ".join(
|
||
p.provider_name for p in result.fallback_chain
|
||
)
|
||
alerter = get_failover_alerter()
|
||
await alerter.alert_failover({
|
||
"to_provider": result.primary.provider_name,
|
||
"model": result.primary.model,
|
||
"reason": result.routing_reason,
|
||
"timestamp": datetime.datetime.now(TAIPEI_TZ).isoformat(),
|
||
"fallback_chain_str": fallback_chain_str,
|
||
})
|
||
except Exception as e:
|
||
logger.warning("failover_alert_failed", error=str(e))
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
_failover_manager: OllamaFailoverManager | None = None
|
||
|
||
|
||
def get_ollama_failover_manager() -> OllamaFailoverManager:
|
||
"""取得 OllamaFailoverManager singleton"""
|
||
global _failover_manager
|
||
if _failover_manager is None:
|
||
_failover_manager = OllamaFailoverManager()
|
||
return _failover_manager
|
||
|
||
|
||
def reset_ollama_failover_manager() -> None:
|
||
"""重置 singleton(測試用)"""
|
||
global _failover_manager
|
||
_failover_manager = None
|