696 lines
30 KiB
Python
696 lines
30 KiB
Python
"""
|
||
Ollama 自動容災管理 - P1.1b
|
||
============================
|
||
依 OllamaHealthMonitor 健康狀態決定 Ollama 路由方案。
|
||
|
||
路由邏輯(2026-05-03 統帥新令:GCP 三層容災,ADR-110):
|
||
GCP-A HEALTHY → primary=GCP-A, fallback=[GCP-B, Local]
|
||
GCP-A 不健康 + GCP-B HEALTHY → primary=GCP-B, fallback=[Local]
|
||
GCP-A + GCP-B 都不健康 + Local HEALTHY → primary=Local, fallback=[Gemini]
|
||
全部 Ollama 不健康 → primary=Gemini, fallback=[Nemotron, Claude]
|
||
Gemini quota 超過 → primary=Nemotron, fallback=[Claude]
|
||
|
||
設計說明:
|
||
- GCP-A 主機:34.143.170.20(SSD,9x 載速 + 2x 推理)
|
||
- GCP-B 備援:34.21.145.224(SSD,9x 載速 + 2x 推理)
|
||
- Local 最後防線:192.168.0.111(M1 Pro, Metal 加速,HDD)
|
||
- 不直接依賴 AIProviderEnum(P1.2 Engineer-A 整合時再對齊)
|
||
- 返回輕量 OllamaRoutingResult,含主 endpoint + fallback 清單
|
||
- 並行檢查三台 Ollama 主機健康狀態
|
||
- 切換觸發時寫 audit_logs service="ollama_failover"
|
||
- clear_cache() 方法供 OllamaAutoRecoveryService 切回後清空路由快取
|
||
|
||
版本: v3.0
|
||
建立: 2026-04-25 (台北時區)
|
||
建立者: Claude Engineer-C (P1.1b)
|
||
更新: 2026-05-03 ogt — GCP 三層容災(ADR-110),GCP-A → GCP-B → Local → Gemini
|
||
# Created 2026-04-25 P1.1 by Claude Engineer-C
|
||
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
|
||
# 2026-05-03 ogt: GCP 三層容災(ADR-110),GCP-A → GCP-B → Local → Gemini
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import datetime
|
||
from dataclasses import dataclass
|
||
from datetime import timedelta, timezone
|
||
|
||
import structlog
|
||
|
||
from src.core.config import get_settings
|
||
from src.services.ollama_health_monitor import (
|
||
HealthReport,
|
||
HealthStatus,
|
||
OllamaHealthMonitor,
|
||
get_ollama_health_monitor,
|
||
)
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
# 台北時區 +8(標準庫保險絲,100% 可用)
|
||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區
|
||
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
|
||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||
|
||
|
||
# =============================================================================
|
||
# 路由結果模型(輕量,P1.2 整合時轉換為 RoutingDecision)
|
||
# =============================================================================
|
||
|
||
|
||
@dataclass
|
||
class OllamaEndpoint:
|
||
"""Ollama 端點描述"""
|
||
|
||
url: str
|
||
provider_name: str # 給 AIRouterExecutor 用的 provider 名稱
|
||
model: str
|
||
|
||
def to_dict(self) -> dict:
|
||
return {"url": self.url, "provider_name": self.provider_name, "model": self.model}
|
||
|
||
|
||
@dataclass
|
||
class OllamaRoutingResult:
|
||
"""
|
||
Ollama 容災路由結果(2026-05-03 ogt: 更新為三層 GCP 容災,ADR-110)
|
||
|
||
P1.2 Engineer-A 整合時,將此結果轉換為 ai_router.RoutingDecision:
|
||
- selected_provider = AIProviderEnum[result.primary.provider_name.upper()]
|
||
- selected_model = result.primary.model
|
||
- fallback_chain = [(AIProviderEnum[p.provider_name.upper()], p.model) for p in result.fallback_chain]
|
||
"""
|
||
|
||
primary: OllamaEndpoint
|
||
fallback_chain: list[OllamaEndpoint]
|
||
routing_reason: str
|
||
health_gcp_a: HealthReport # GCP-A 健康狀態(原 health_111)
|
||
health_gcp_b: HealthReport | None = None # GCP-B 健康狀態
|
||
health_local: HealthReport | None = None # Local(111) 健康狀態
|
||
|
||
@property
|
||
def health_111(self) -> HealthReport:
|
||
"""向後相容屬性(舊測試 / log 使用)"""
|
||
return self.health_gcp_a
|
||
|
||
def all_endpoints_in_order(self) -> list[OllamaEndpoint]:
|
||
"""返回完整的優先序端點列表(primary 在前)"""
|
||
return [self.primary, *self.fallback_chain]
|
||
|
||
def to_dict(self) -> dict:
|
||
return {
|
||
"primary": {
|
||
"url": self.primary.url,
|
||
"provider": self.primary.provider_name,
|
||
"model": self.primary.model,
|
||
},
|
||
"fallback_chain": [
|
||
{"url": e.url, "provider": e.provider_name, "model": e.model} # noqa: E501
|
||
for e in self.fallback_chain
|
||
],
|
||
"routing_reason": self.routing_reason,
|
||
"health_gcp_a": self.health_gcp_a.to_dict(),
|
||
"health_gcp_b": self.health_gcp_b.to_dict() if self.health_gcp_b else None,
|
||
"health_local": self.health_local.to_dict() if self.health_local else None,
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# 已知 Fallback 端點定義(Nemotron / Gemini / Claude)
|
||
# =============================================================================
|
||
|
||
# 以 provider_name 對應 ai_router.AIProviderEnum 的 value
|
||
_NEMOTRON_ENDPOINT = OllamaEndpoint(
|
||
url="", # Nemotron 不是 HTTP URL,由 AIRouterExecutor 從 Registry 取得
|
||
provider_name="nemotron",
|
||
model="nvidia/nemotron-mini-4b-instruct",
|
||
)
|
||
_GEMINI_ENDPOINT = OllamaEndpoint(
|
||
url="",
|
||
provider_name="gemini",
|
||
model="gemini-1.5-flash",
|
||
)
|
||
_CLAUDE_ENDPOINT = OllamaEndpoint(
|
||
url="",
|
||
provider_name="claude",
|
||
model="claude-haiku-4-5-20251001",
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# OllamaFailoverManager
|
||
# =============================================================================
|
||
|
||
|
||
class OllamaFailoverManager:
|
||
"""
|
||
Ollama 自動容災管理器
|
||
|
||
並行檢查 111 + 188,依健康狀態選擇最佳路由。
|
||
|
||
使用方式:
|
||
manager = OllamaFailoverManager()
|
||
result = await manager.select_provider()
|
||
# result.primary.url → 使用的 Ollama URL
|
||
# result.fallback_chain → 依序 fallback
|
||
|
||
2026-04-25 Claude Engineer-C (P1.1b)
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
health_monitor: OllamaHealthMonitor | None = None,
|
||
recovery_callback=None,
|
||
) -> None:
|
||
self._monitor = health_monitor or get_ollama_health_monitor()
|
||
self._settings = get_settings()
|
||
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
|
||
# recovery_callback: async callable(provider_name: str) → None
|
||
# OllamaAutoRecoveryService.set_current_primary 在 failover 時被通知,
|
||
# 避免重啟後 _current_primary 停留在 "ollama" 而永不啟動恢復監控
|
||
self._recovery_callback = recovery_callback
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Public API
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def select_provider(
|
||
self,
|
||
task_type: str = "",
|
||
context: dict | None = None,
|
||
) -> OllamaRoutingResult:
|
||
"""
|
||
三層 Ollama 容災路由(ADR-110 修正版 2026-05-04):
|
||
Primary(OLLAMA_URL) → Secondary(OLLAMA_SECONDARY_URL) → Tertiary(OLLAMA_FALLBACK_URL)
|
||
→ Gemini → Nemotron → Claude
|
||
|
||
2026-05-04 ogt: URL 優先序已更新(ConfigMap),primary = 111(K8s 內網可達)。
|
||
GCP-A/B 為 secondary/tertiary,待 nginx proxy 架設後再升回 primary。
|
||
|
||
Args:
|
||
task_type: 任務類型(預留,目前未影響路由邏輯)
|
||
context: 額外上下文(預留)
|
||
|
||
Returns:
|
||
OllamaRoutingResult
|
||
"""
|
||
# 2026-05-04 ogt: 改用語意中性名稱 primary/secondary/tertiary,
|
||
# 避免 gcp_a/gcp_b/local 與實際 URL 脫鉤造成 log 誤導
|
||
url_primary = self._settings.OLLAMA_URL # 110:11435 → GCP-A (nginx proxy)
|
||
url_secondary = self._settings.OLLAMA_SECONDARY_URL # 110:11436 → GCP-B (nginx proxy)
|
||
url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 110:11437 → Local 111 (nginx proxy)
|
||
|
||
def _to_health(r, label: str) -> HealthReport:
|
||
if isinstance(r, Exception):
|
||
return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}")
|
||
return r
|
||
|
||
def _short(url: str) -> str:
|
||
from urllib.parse import urlparse
|
||
return urlparse(url).hostname or url
|
||
|
||
# 2026-05-19 Codex: alert-fast path must not wait for the slow local lane
|
||
# when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made
|
||
# 111's 45s health timeout dominate every routing decision.
|
||
try:
|
||
primary_raw = await self._monitor.check(url_primary)
|
||
except Exception as exc:
|
||
primary_raw = exc
|
||
health_gcp_a = _to_health(primary_raw, f"primary({url_primary})")
|
||
health_gcp_b: HealthReport | None = None
|
||
health_local: HealthReport | None = None
|
||
|
||
if health_gcp_a.status == HealthStatus.HEALTHY:
|
||
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
|
||
fallback_chain = [
|
||
OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model),
|
||
OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model),
|
||
_GEMINI_ENDPOINT,
|
||
]
|
||
result = OllamaRoutingResult(
|
||
primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model),
|
||
fallback_chain=fallback_chain,
|
||
routing_reason=f"primary({_short(url_primary)}) HEALTHY",
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=None,
|
||
health_local=None,
|
||
)
|
||
else:
|
||
# Primary 不健康時才並行檢查後兩層,保留 GCP-B/Local 容災。
|
||
results_raw = await asyncio.gather(
|
||
self._monitor.check(url_secondary),
|
||
self._monitor.check(url_tertiary),
|
||
return_exceptions=True,
|
||
)
|
||
health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})")
|
||
health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})")
|
||
|
||
result = self._decide_route(
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
url_gcp_a=url_primary,
|
||
url_gcp_b=url_secondary,
|
||
url_local=url_tertiary,
|
||
)
|
||
|
||
# Gemini 帳單熔斷(quota gate)
|
||
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
|
||
if result.primary.provider_name == "gemini":
|
||
quota_ok = await self._check_gemini_quota()
|
||
if not quota_ok:
|
||
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
|
||
logger.warning(
|
||
"gemini_quota_exceeded_fallback_to_nemotron",
|
||
quota=quota,
|
||
health_gcp_a=health_gcp_a.status.value,
|
||
)
|
||
result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a)
|
||
# Quota 耗盡 Telegram 告警(24h dedup)
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
from src.services.failover_alerter import get_failover_alerter
|
||
_current_count = quota
|
||
try:
|
||
_redis = get_redis()
|
||
if _redis is not None:
|
||
_key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
|
||
_raw = await _redis.get(_key)
|
||
_current_count = int(_raw or 0)
|
||
except Exception:
|
||
pass
|
||
await get_failover_alerter().alert_gemini_quota_exceeded({
|
||
"quota": quota,
|
||
"current_count": _current_count,
|
||
})
|
||
except Exception as _alert_err:
|
||
logger.warning(
|
||
"gemini_quota_alert_dispatch_failed",
|
||
error=str(_alert_err),
|
||
)
|
||
|
||
# 寫入 audit_log(best-effort)
|
||
await self._write_failover_audit(result)
|
||
|
||
def _status(report: HealthReport | None) -> str:
|
||
return report.status.value if report else "not_checked"
|
||
|
||
logger.info(
|
||
"ollama_failover_decision",
|
||
primary=result.primary.provider_name,
|
||
primary_url=result.primary.url,
|
||
reason=result.routing_reason,
|
||
fallback_count=len(result.fallback_chain),
|
||
health_gcp_a=health_gcp_a.status.value,
|
||
health_gcp_b=_status(health_gcp_b),
|
||
health_local=_status(health_local),
|
||
)
|
||
|
||
# 通知 recovery service 當前 primary(跨重啟持久化)
|
||
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
|
||
if self._recovery_callback is not None:
|
||
try:
|
||
await self._recovery_callback(result.primary.provider_name)
|
||
except Exception as e:
|
||
logger.warning(
|
||
"ollama_failover_recovery_callback_failed",
|
||
error=str(e),
|
||
)
|
||
|
||
return result
|
||
|
||
# -------------------------------------------------------------------------
|
||
# 路由決策邏輯
|
||
# -------------------------------------------------------------------------
|
||
|
||
def _decide_route(
|
||
self,
|
||
health_gcp_a: HealthReport,
|
||
health_gcp_b: HealthReport,
|
||
health_local: HealthReport,
|
||
url_gcp_a: str,
|
||
url_gcp_b: str,
|
||
url_local: str,
|
||
) -> OllamaRoutingResult:
|
||
"""
|
||
三層 Ollama 決策矩陣(2026-05-03 ogt,ADR-110):
|
||
|
||
GCP-A HEALTHY → primary=GCP-A, fallback=[GCP-B, Local]
|
||
GCP-A 不健康 + GCP-B HEALTHY → primary=GCP-B, fallback=[Local]
|
||
GCP-A + GCP-B 不健康 + Local HEALTHY → primary=Local, fallback=[Gemini]
|
||
全部 Ollama 不健康 → primary=Gemini, fallback=[Nemotron, Claude]
|
||
|
||
Gemini quota 超過由 _build_quota_exceeded_route() 接管。
|
||
"""
|
||
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
|
||
ep_gcp_a = OllamaEndpoint(url=url_gcp_a, provider_name="ollama_gcp_a", model=model)
|
||
ep_gcp_b = OllamaEndpoint(url=url_gcp_b, provider_name="ollama_gcp_b", model=model)
|
||
ep_local = OllamaEndpoint(url=url_local, provider_name="ollama_local", model=model)
|
||
|
||
now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()
|
||
|
||
# 用實際 URL 取最後一段作為 log 標識(IP 或 hostname)
|
||
def _short(url: str) -> str:
|
||
from urllib.parse import urlparse
|
||
return urlparse(url).hostname or url
|
||
|
||
lbl_p = _short(url_gcp_a) # primary label
|
||
lbl_s = _short(url_gcp_b) # secondary label
|
||
lbl_t = _short(url_local) # tertiary label
|
||
|
||
# Primary HEALTHY → 使用 primary
|
||
if health_gcp_a.status == HealthStatus.HEALTHY:
|
||
return OllamaRoutingResult(
|
||
primary=ep_gcp_a,
|
||
fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT],
|
||
routing_reason=f"primary({lbl_p}) HEALTHY",
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
)
|
||
|
||
# Primary 不健康,Secondary HEALTHY → 切 secondary
|
||
if health_gcp_b.status == HealthStatus.HEALTHY:
|
||
return OllamaRoutingResult(
|
||
primary=ep_gcp_b,
|
||
fallback_chain=[ep_local, _GEMINI_ENDPOINT],
|
||
routing_reason=f"primary({lbl_p}) {health_gcp_a.status.value} → secondary({lbl_s}) at {now_ts}",
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
)
|
||
|
||
# Primary + Secondary 不健康,Tertiary HEALTHY → 切 tertiary
|
||
if health_local.status == HealthStatus.HEALTHY:
|
||
return OllamaRoutingResult(
|
||
primary=ep_local,
|
||
fallback_chain=[_GEMINI_ENDPOINT],
|
||
routing_reason=(
|
||
f"primary({lbl_p}) {health_gcp_a.status.value}"
|
||
f" + secondary({lbl_s}) {health_gcp_b.status.value}"
|
||
f" → tertiary({lbl_t}) at {now_ts}"
|
||
),
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
)
|
||
|
||
# 2026-05-04 ogt: SLOW 容災備援(外網同時抖動時,SLOW Ollama 仍優於 Gemini quota 耗盡)
|
||
if health_gcp_a.status == HealthStatus.SLOW:
|
||
return OllamaRoutingResult(
|
||
primary=ep_gcp_a,
|
||
fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT],
|
||
routing_reason=f"primary({lbl_p}) SLOW(降級可用)at {now_ts}",
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
)
|
||
if health_gcp_b.status == HealthStatus.SLOW:
|
||
return OllamaRoutingResult(
|
||
primary=ep_gcp_b,
|
||
fallback_chain=[ep_local, _GEMINI_ENDPOINT],
|
||
routing_reason=(
|
||
f"primary({lbl_p}) {health_gcp_a.status.value}"
|
||
f" + secondary({lbl_s}) SLOW(降級可用)at {now_ts}"
|
||
),
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
)
|
||
if health_local.status == HealthStatus.SLOW:
|
||
return OllamaRoutingResult(
|
||
primary=ep_local,
|
||
fallback_chain=[_GEMINI_ENDPOINT],
|
||
routing_reason=(
|
||
f"primary({lbl_p}) {health_gcp_a.status.value}"
|
||
f" + secondary({lbl_s}) {health_gcp_b.status.value}"
|
||
f" + tertiary({lbl_t}) SLOW(降級可用)at {now_ts}"
|
||
),
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
)
|
||
|
||
# 全部 Ollama 不可用(DEGRADED/OFFLINE)→ Gemini
|
||
return OllamaRoutingResult(
|
||
primary=_GEMINI_ENDPOINT,
|
||
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
|
||
routing_reason=(
|
||
f"所有 Ollama 不健康(primary({lbl_p}) {health_gcp_a.status.value},"
|
||
f"secondary({lbl_s}) {health_gcp_b.status.value},"
|
||
f"tertiary({lbl_t}) {health_local.status.value})→ 切 Gemini at {now_ts}"
|
||
),
|
||
health_gcp_a=health_gcp_a,
|
||
health_gcp_b=health_gcp_b,
|
||
health_local=health_local,
|
||
)
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Gemini 帳單熔斷(quota gate)
|
||
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def _check_gemini_quota(self) -> bool:
|
||
"""
|
||
檢查每日 Gemini call 配額,超過上限則禁用。
|
||
|
||
Redis key: ollama:gemini_daily_count:{YYYY-MM-DD},TTL 86400s
|
||
計數 atomic(incr)。
|
||
|
||
Returns:
|
||
True → 仍在配額內,可使用 Gemini
|
||
False → 已超配額,應切到 188+Nemotron
|
||
|
||
fail-open:Redis 不可用時允許走 Gemini(不阻擋服務)
|
||
"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
if redis is None:
|
||
return True # fail-open
|
||
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
|
||
key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
|
||
|
||
# 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — atomic pipeline 修復 TOCTOU
|
||
# 原實作:GET → 判斷 → INCR → EXPIRE(分四步,INCR 後 crash 會丟 TTL,
|
||
# 且並行請求在 GET/INCR 之間競爭導致配額超發)
|
||
# 修法:pipeline 原子執行 SET NX(首次設 TTL) + INCR,用 INCR 後的新值判斷
|
||
pipe = redis.pipeline()
|
||
pipe.set(key, 0, ex=86400, nx=True) # 僅首次寫入設 TTL;已存在則跳過
|
||
pipe.incr(key) # 原子遞增,回傳遞增後的值
|
||
results = await pipe.execute()
|
||
new_count = int(results[1]) # results[1] = INCR 後新值
|
||
|
||
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 刷新 Gemini Prometheus Gauge
|
||
# 每次 quota check 時同步更新,讓 Prometheus 取到最新值
|
||
try:
|
||
from src.core.metrics import GEMINI_DAILY_CALL_COUNT, GEMINI_DAILY_QUOTA
|
||
GEMINI_DAILY_CALL_COUNT.set(new_count)
|
||
GEMINI_DAILY_QUOTA.set(quota)
|
||
except Exception:
|
||
pass # metric 更新失敗不阻斷主路由邏輯
|
||
|
||
if new_count > quota:
|
||
# 已超配額(INCR 後 > quota),回退不是必要的(最多超發 1 次)
|
||
# 但要回傳 False 讓 router 切到 188
|
||
return False
|
||
return True
|
||
except Exception as e:
|
||
# 2026-04-27 Wave8-X2 by Claude — B14 quota fail-closed
|
||
# 原 fail-open:Redis 異常 → return True → Gemini 盲開 → 費用鐵律違反
|
||
# 修法:Redis 異常時 fail-closed,拒絕走 Gemini,讓 fallback chain 接手 188/Nemotron
|
||
# 費用安全 > 服務可用性(統帥鐵律:費用變更必須停下)
|
||
logger.exception(
|
||
"gemini_quota_check_failed_failing_closed",
|
||
error=str(e),
|
||
security_note="Redis 異常時為費用安全 fail-closed,切到 fallback chain",
|
||
)
|
||
# 嘗試告警(best-effort,不阻塞路由)
|
||
try:
|
||
from src.services.failover_alerter import get_failover_alerter
|
||
|
||
await get_failover_alerter().alert_gemini_quota_exceeded({
|
||
"quota": getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000),
|
||
"current_count": "unknown (Redis error)",
|
||
"reason": "fail_closed_due_to_redis_error",
|
||
})
|
||
except Exception:
|
||
pass
|
||
return False # fail-closed:拒絕 Gemini,讓 fallback chain(188/Nemotron)接手
|
||
|
||
def _build_quota_exceeded_route(
|
||
self,
|
||
health_gcp_a: HealthReport,
|
||
) -> OllamaRoutingResult:
|
||
"""
|
||
Gemini 配額耗盡時的備援路由:primary=Nemotron, fallback=[Claude]
|
||
2026-05-03 ogt: 更新參數名 health_111 → health_gcp_a(ADR-110)
|
||
"""
|
||
return OllamaRoutingResult(
|
||
primary=_NEMOTRON_ENDPOINT,
|
||
fallback_chain=[_CLAUDE_ENDPOINT],
|
||
routing_reason="Gemini quota exceeded → Nemotron 備援",
|
||
health_gcp_a=health_gcp_a,
|
||
)
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Recovery API(供 OllamaAutoRecoveryService 呼叫)
|
||
# -------------------------------------------------------------------------
|
||
|
||
def set_recovery_callback(self, callback) -> None:
|
||
"""
|
||
設定 recovery callback(供 lifespan wiring 使用)。
|
||
callback signature: async (provider_name: str) -> None
|
||
|
||
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
|
||
"""
|
||
self._recovery_callback = callback
|
||
|
||
async def clear_cache(self) -> None:
|
||
"""
|
||
清空路由決策快取,讓下次 select_provider 重新評估健康狀態。
|
||
OllamaAutoRecoveryService 在偵測 111 恢復後呼叫此方法。
|
||
|
||
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
|
||
# 2026-04-25 P1.2 by Claude Engineer-A2 — 改用 make_cache_key 動態組 key,消除硬編碼 IP
|
||
"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
from src.services.ollama_health_monitor import make_cache_key
|
||
redis = get_redis()
|
||
if redis is None:
|
||
return
|
||
# 動態由 settings URL 組 cache key,避免硬編碼 IP
|
||
# 2026-05-03 ogt: 新增 OLLAMA_SECONDARY_URL(ADR-110 GCP-B)
|
||
keys = [
|
||
make_cache_key(self._settings.OLLAMA_URL),
|
||
make_cache_key(self._settings.OLLAMA_SECONDARY_URL or ""),
|
||
make_cache_key(self._settings.OLLAMA_FALLBACK_URL or ""),
|
||
]
|
||
for k in keys:
|
||
if k and k != "ollama_health:": # 空 URL 會產生無意義的 key,跳過
|
||
await redis.delete(k)
|
||
logger.info(
|
||
"ollama_failover_cache_cleared",
|
||
service="ollama_failover",
|
||
reason="recovery_triggered",
|
||
)
|
||
except Exception as e:
|
||
logger.debug("ollama_failover_clear_cache_failed", error=str(e))
|
||
|
||
def notify_recovery(self, provider: str) -> None:
|
||
"""
|
||
預留:P1.5 Engineer 接入 Telegram alerter 時使用。
|
||
目前僅寫 structlog audit。
|
||
|
||
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
|
||
"""
|
||
logger.info(
|
||
"ollama_recovery_notified",
|
||
service="ollama_failover",
|
||
provider=provider,
|
||
action="recovery_received",
|
||
)
|
||
|
||
# -------------------------------------------------------------------------
|
||
# Audit Log
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def _write_failover_audit(self, result: OllamaRoutingResult) -> None:
|
||
"""
|
||
切換觸發時寫 structlog audit(best-effort)+ Telegram 告警
|
||
|
||
# 2026-04-25 critic-fix Part2 B1 by Claude Engineer-C2
|
||
# 原 AuditLog DB 寫入使用不存在的欄位(service/action/target/status/metadata)
|
||
# → SQLAlchemy crash → except 吃掉 → 零稽核
|
||
# 修法:刪除 DB 寫入路徑,改用 structlog only(audit 不依賴 DB schema)
|
||
|
||
# 2026-04-25 P1.5 by Claude Engineer-D — 新增 Telegram 告警(dedup 10min)
|
||
|
||
service="ollama_failover"(per 任務規格)
|
||
僅在 primary 非 111 時記錄(真正發生切換)
|
||
"""
|
||
# 2026-05-03 ogt: GCP 三層容災下,三個 ollama_* provider 都是正常狀態,無需告警
|
||
if result.primary.provider_name in ("ollama", "ollama_gcp_a", "ollama_gcp_b", "ollama_local"):
|
||
return
|
||
|
||
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
|
||
try:
|
||
from src.core.metrics import (
|
||
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
|
||
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
|
||
)
|
||
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
|
||
from_provider="ollama",
|
||
to_provider=result.primary.provider_name,
|
||
).inc()
|
||
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA.set(0)
|
||
except Exception as _metric_err:
|
||
logger.debug("ollama_failover_metric_error", error=str(_metric_err))
|
||
|
||
logger.info(
|
||
"ollama_failover_triggered",
|
||
service="ollama_failover",
|
||
action="failover_triggered",
|
||
from_provider="ollama",
|
||
to_provider=result.primary.provider_name,
|
||
reason=result.routing_reason,
|
||
primary_url=result.primary.url or result.primary.provider_name,
|
||
health_gcp_a=result.health_gcp_a.status.value,
|
||
health_gcp_b=result.health_gcp_b.status.value if result.health_gcp_b else "not_configured",
|
||
health_local=result.health_local.status.value if result.health_local else "not_configured",
|
||
)
|
||
|
||
# Telegram 告警(首次切換才通知,dedup 10min 內建)
|
||
# 2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不阻斷主路由邏輯
|
||
# 2026-05-03 ogt: ADR-110 — failed_host 動態計算,顯示哪台 GCP/Local 故障
|
||
try:
|
||
from src.services.failover_alerter import get_failover_alerter
|
||
from src.services.ollama_health_monitor import HealthStatus
|
||
fallback_chain_str = " → ".join(
|
||
p.provider_name for p in result.fallback_chain
|
||
)
|
||
# 計算故障主機描述(哪層 Ollama 不健康,用實際 URL 不用硬編碼標籤)
|
||
_failed = []
|
||
if result.health_gcp_a.status != HealthStatus.HEALTHY:
|
||
_failed.append(self._settings.OLLAMA_URL)
|
||
if result.health_gcp_b and result.health_gcp_b.status != HealthStatus.HEALTHY:
|
||
_failed.append(self._settings.OLLAMA_SECONDARY_URL or "secondary")
|
||
if result.health_local and result.health_local.status != HealthStatus.HEALTHY:
|
||
_failed.append(self._settings.OLLAMA_FALLBACK_URL or "tertiary")
|
||
failed_host = " + ".join(_failed) if _failed else "Ollama"
|
||
alerter = get_failover_alerter()
|
||
await alerter.alert_failover({
|
||
"to_provider": result.primary.provider_name,
|
||
"model": result.primary.model,
|
||
"reason": result.routing_reason,
|
||
"timestamp": datetime.datetime.now(TAIPEI_TZ).isoformat(),
|
||
"fallback_chain_str": fallback_chain_str,
|
||
"failed_host": failed_host,
|
||
})
|
||
except Exception as e:
|
||
logger.warning("failover_alert_failed", error=str(e))
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
_failover_manager: OllamaFailoverManager | None = None
|
||
|
||
|
||
def get_ollama_failover_manager() -> OllamaFailoverManager:
|
||
"""取得 OllamaFailoverManager singleton"""
|
||
global _failover_manager
|
||
if _failover_manager is None:
|
||
_failover_manager = OllamaFailoverManager()
|
||
return _failover_manager
|
||
|
||
|
||
def reset_ollama_failover_manager() -> None:
|
||
"""重置 singleton(測試用)"""
|
||
global _failover_manager
|
||
_failover_manager = None
|