Files
awoooi/apps/api/src/services/ollama_failover_manager.py
Your Name 36aeea80a3
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m27s
CD Pipeline / build-and-deploy (push) Successful in 4m22s
CD Pipeline / post-deploy-checks (push) Successful in 2m0s
fix(api): avoid local ollama health blocking gcp route
2026-05-19 12:22:46 +08:00

696 lines
30 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Ollama 自動容災管理 - P1.1b
============================
依 OllamaHealthMonitor 健康狀態決定 Ollama 路由方案。
路由邏輯2026-05-03 統帥新令GCP 三層容災ADR-110
GCP-A HEALTHY → primary=GCP-A, fallback=[GCP-B, Local]
GCP-A 不健康 + GCP-B HEALTHY → primary=GCP-B, fallback=[Local]
GCP-A + GCP-B 都不健康 + Local HEALTHY → primary=Local, fallback=[Gemini]
全部 Ollama 不健康 → primary=Gemini, fallback=[Nemotron, Claude]
Gemini quota 超過 → primary=Nemotron, fallback=[Claude]
設計說明:
- GCP-A 主機34.143.170.20SSD9x 載速 + 2x 推理)
- GCP-B 備援34.21.145.224SSD9x 載速 + 2x 推理)
- Local 最後防線192.168.0.111M1 Pro, Metal 加速HDD
- 不直接依賴 AIProviderEnumP1.2 Engineer-A 整合時再對齊)
- 返回輕量 OllamaRoutingResult含主 endpoint + fallback 清單
- 並行檢查三台 Ollama 主機健康狀態
- 切換觸發時寫 audit_logs service="ollama_failover"
- clear_cache() 方法供 OllamaAutoRecoveryService 切回後清空路由快取
版本: v3.0
建立: 2026-04-25 (台北時區)
建立者: Claude Engineer-C (P1.1b)
更新: 2026-05-03 ogt — GCP 三層容災ADR-110GCP-A → GCP-B → Local → Gemini
# Created 2026-04-25 P1.1 by Claude Engineer-C
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
# 2026-05-03 ogt: GCP 三層容災ADR-110GCP-A → GCP-B → Local → Gemini
"""
from __future__ import annotations
import asyncio
import datetime
from dataclasses import dataclass
from datetime import timedelta, timezone
import structlog
from src.core.config import get_settings
from src.services.ollama_health_monitor import (
HealthReport,
HealthStatus,
OllamaHealthMonitor,
get_ollama_health_monitor,
)
logger = structlog.get_logger(__name__)
# 台北時區 +8標準庫保險絲100% 可用)
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
TAIPEI_TZ = timezone(timedelta(hours=8))
# =============================================================================
# 路由結果模型輕量P1.2 整合時轉換為 RoutingDecision
# =============================================================================
@dataclass
class OllamaEndpoint:
"""Ollama 端點描述"""
url: str
provider_name: str # 給 AIRouterExecutor 用的 provider 名稱
model: str
def to_dict(self) -> dict:
return {"url": self.url, "provider_name": self.provider_name, "model": self.model}
@dataclass
class OllamaRoutingResult:
"""
Ollama 容災路由結果2026-05-03 ogt: 更新為三層 GCP 容災ADR-110
P1.2 Engineer-A 整合時,將此結果轉換為 ai_router.RoutingDecision
- selected_provider = AIProviderEnum[result.primary.provider_name.upper()]
- selected_model = result.primary.model
- fallback_chain = [(AIProviderEnum[p.provider_name.upper()], p.model) for p in result.fallback_chain]
"""
primary: OllamaEndpoint
fallback_chain: list[OllamaEndpoint]
routing_reason: str
health_gcp_a: HealthReport # GCP-A 健康狀態(原 health_111
health_gcp_b: HealthReport | None = None # GCP-B 健康狀態
health_local: HealthReport | None = None # Local(111) 健康狀態
@property
def health_111(self) -> HealthReport:
"""向後相容屬性(舊測試 / log 使用)"""
return self.health_gcp_a
def all_endpoints_in_order(self) -> list[OllamaEndpoint]:
"""返回完整的優先序端點列表primary 在前)"""
return [self.primary, *self.fallback_chain]
def to_dict(self) -> dict:
return {
"primary": {
"url": self.primary.url,
"provider": self.primary.provider_name,
"model": self.primary.model,
},
"fallback_chain": [
{"url": e.url, "provider": e.provider_name, "model": e.model} # noqa: E501
for e in self.fallback_chain
],
"routing_reason": self.routing_reason,
"health_gcp_a": self.health_gcp_a.to_dict(),
"health_gcp_b": self.health_gcp_b.to_dict() if self.health_gcp_b else None,
"health_local": self.health_local.to_dict() if self.health_local else None,
}
# =============================================================================
# 已知 Fallback 端點定義Nemotron / Gemini / Claude
# =============================================================================
# 以 provider_name 對應 ai_router.AIProviderEnum 的 value
_NEMOTRON_ENDPOINT = OllamaEndpoint(
url="", # Nemotron 不是 HTTP URL由 AIRouterExecutor 從 Registry 取得
provider_name="nemotron",
model="nvidia/nemotron-mini-4b-instruct",
)
_GEMINI_ENDPOINT = OllamaEndpoint(
url="",
provider_name="gemini",
model="gemini-1.5-flash",
)
_CLAUDE_ENDPOINT = OllamaEndpoint(
url="",
provider_name="claude",
model="claude-haiku-4-5-20251001",
)
# =============================================================================
# OllamaFailoverManager
# =============================================================================
class OllamaFailoverManager:
"""
Ollama 自動容災管理器
並行檢查 111 + 188依健康狀態選擇最佳路由。
使用方式:
manager = OllamaFailoverManager()
result = await manager.select_provider()
# result.primary.url → 使用的 Ollama URL
# result.fallback_chain → 依序 fallback
2026-04-25 Claude Engineer-C (P1.1b)
"""
def __init__(
self,
health_monitor: OllamaHealthMonitor | None = None,
recovery_callback=None,
) -> None:
self._monitor = health_monitor or get_ollama_health_monitor()
self._settings = get_settings()
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
# recovery_callback: async callable(provider_name: str) → None
# OllamaAutoRecoveryService.set_current_primary 在 failover 時被通知,
# 避免重啟後 _current_primary 停留在 "ollama" 而永不啟動恢復監控
self._recovery_callback = recovery_callback
# -------------------------------------------------------------------------
# Public API
# -------------------------------------------------------------------------
async def select_provider(
self,
task_type: str = "",
context: dict | None = None,
) -> OllamaRoutingResult:
"""
三層 Ollama 容災路由ADR-110 修正版 2026-05-04
Primary(OLLAMA_URL) → Secondary(OLLAMA_SECONDARY_URL) → Tertiary(OLLAMA_FALLBACK_URL)
→ Gemini → Nemotron → Claude
2026-05-04 ogt: URL 優先序已更新ConfigMapprimary = 111K8s 內網可達)。
GCP-A/B 為 secondary/tertiary待 nginx proxy 架設後再升回 primary。
Args:
task_type: 任務類型(預留,目前未影響路由邏輯)
context: 額外上下文(預留)
Returns:
OllamaRoutingResult
"""
# 2026-05-04 ogt: 改用語意中性名稱 primary/secondary/tertiary
# 避免 gcp_a/gcp_b/local 與實際 URL 脫鉤造成 log 誤導
url_primary = self._settings.OLLAMA_URL # 110:11435 → GCP-A (nginx proxy)
url_secondary = self._settings.OLLAMA_SECONDARY_URL # 110:11436 → GCP-B (nginx proxy)
url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 110:11437 → Local 111 (nginx proxy)
def _to_health(r, label: str) -> HealthReport:
if isinstance(r, Exception):
return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}")
return r
def _short(url: str) -> str:
from urllib.parse import urlparse
return urlparse(url).hostname or url
# 2026-05-19 Codex: alert-fast path must not wait for the slow local lane
# when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made
# 111's 45s health timeout dominate every routing decision.
try:
primary_raw = await self._monitor.check(url_primary)
except Exception as exc:
primary_raw = exc
health_gcp_a = _to_health(primary_raw, f"primary({url_primary})")
health_gcp_b: HealthReport | None = None
health_local: HealthReport | None = None
if health_gcp_a.status == HealthStatus.HEALTHY:
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
fallback_chain = [
OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model),
OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model),
_GEMINI_ENDPOINT,
]
result = OllamaRoutingResult(
primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model),
fallback_chain=fallback_chain,
routing_reason=f"primary({_short(url_primary)}) HEALTHY",
health_gcp_a=health_gcp_a,
health_gcp_b=None,
health_local=None,
)
else:
# Primary 不健康時才並行檢查後兩層,保留 GCP-B/Local 容災。
results_raw = await asyncio.gather(
self._monitor.check(url_secondary),
self._monitor.check(url_tertiary),
return_exceptions=True,
)
health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})")
health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})")
result = self._decide_route(
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
url_gcp_a=url_primary,
url_gcp_b=url_secondary,
url_local=url_tertiary,
)
# Gemini 帳單熔斷quota gate
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
if result.primary.provider_name == "gemini":
quota_ok = await self._check_gemini_quota()
if not quota_ok:
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
logger.warning(
"gemini_quota_exceeded_fallback_to_nemotron",
quota=quota,
health_gcp_a=health_gcp_a.status.value,
)
result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a)
# Quota 耗盡 Telegram 告警24h dedup
try:
from src.core.redis_client import get_redis
from src.services.failover_alerter import get_failover_alerter
_current_count = quota
try:
_redis = get_redis()
if _redis is not None:
_key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
_raw = await _redis.get(_key)
_current_count = int(_raw or 0)
except Exception:
pass
await get_failover_alerter().alert_gemini_quota_exceeded({
"quota": quota,
"current_count": _current_count,
})
except Exception as _alert_err:
logger.warning(
"gemini_quota_alert_dispatch_failed",
error=str(_alert_err),
)
# 寫入 audit_logbest-effort
await self._write_failover_audit(result)
def _status(report: HealthReport | None) -> str:
return report.status.value if report else "not_checked"
logger.info(
"ollama_failover_decision",
primary=result.primary.provider_name,
primary_url=result.primary.url,
reason=result.routing_reason,
fallback_count=len(result.fallback_chain),
health_gcp_a=health_gcp_a.status.value,
health_gcp_b=_status(health_gcp_b),
health_local=_status(health_local),
)
# 通知 recovery service 當前 primary跨重啟持久化
# 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
if self._recovery_callback is not None:
try:
await self._recovery_callback(result.primary.provider_name)
except Exception as e:
logger.warning(
"ollama_failover_recovery_callback_failed",
error=str(e),
)
return result
# -------------------------------------------------------------------------
# 路由決策邏輯
# -------------------------------------------------------------------------
def _decide_route(
self,
health_gcp_a: HealthReport,
health_gcp_b: HealthReport,
health_local: HealthReport,
url_gcp_a: str,
url_gcp_b: str,
url_local: str,
) -> OllamaRoutingResult:
"""
三層 Ollama 決策矩陣2026-05-03 ogtADR-110
GCP-A HEALTHY → primary=GCP-A, fallback=[GCP-B, Local]
GCP-A 不健康 + GCP-B HEALTHY → primary=GCP-B, fallback=[Local]
GCP-A + GCP-B 不健康 + Local HEALTHY → primary=Local, fallback=[Gemini]
全部 Ollama 不健康 → primary=Gemini, fallback=[Nemotron, Claude]
Gemini quota 超過由 _build_quota_exceeded_route() 接管。
"""
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
ep_gcp_a = OllamaEndpoint(url=url_gcp_a, provider_name="ollama_gcp_a", model=model)
ep_gcp_b = OllamaEndpoint(url=url_gcp_b, provider_name="ollama_gcp_b", model=model)
ep_local = OllamaEndpoint(url=url_local, provider_name="ollama_local", model=model)
now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()
# 用實際 URL 取最後一段作為 log 標識IP 或 hostname
def _short(url: str) -> str:
from urllib.parse import urlparse
return urlparse(url).hostname or url
lbl_p = _short(url_gcp_a) # primary label
lbl_s = _short(url_gcp_b) # secondary label
lbl_t = _short(url_local) # tertiary label
# Primary HEALTHY → 使用 primary
if health_gcp_a.status == HealthStatus.HEALTHY:
return OllamaRoutingResult(
primary=ep_gcp_a,
fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT],
routing_reason=f"primary({lbl_p}) HEALTHY",
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
)
# Primary 不健康Secondary HEALTHY → 切 secondary
if health_gcp_b.status == HealthStatus.HEALTHY:
return OllamaRoutingResult(
primary=ep_gcp_b,
fallback_chain=[ep_local, _GEMINI_ENDPOINT],
routing_reason=f"primary({lbl_p}) {health_gcp_a.status.value} → secondary({lbl_s}) at {now_ts}",
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
)
# Primary + Secondary 不健康Tertiary HEALTHY → 切 tertiary
if health_local.status == HealthStatus.HEALTHY:
return OllamaRoutingResult(
primary=ep_local,
fallback_chain=[_GEMINI_ENDPOINT],
routing_reason=(
f"primary({lbl_p}) {health_gcp_a.status.value}"
f" + secondary({lbl_s}) {health_gcp_b.status.value}"
f" → tertiary({lbl_t}) at {now_ts}"
),
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
)
# 2026-05-04 ogt: SLOW 容災備援外網同時抖動時SLOW Ollama 仍優於 Gemini quota 耗盡)
if health_gcp_a.status == HealthStatus.SLOW:
return OllamaRoutingResult(
primary=ep_gcp_a,
fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT],
routing_reason=f"primary({lbl_p}) SLOW降級可用at {now_ts}",
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
)
if health_gcp_b.status == HealthStatus.SLOW:
return OllamaRoutingResult(
primary=ep_gcp_b,
fallback_chain=[ep_local, _GEMINI_ENDPOINT],
routing_reason=(
f"primary({lbl_p}) {health_gcp_a.status.value}"
f" + secondary({lbl_s}) SLOW降級可用at {now_ts}"
),
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
)
if health_local.status == HealthStatus.SLOW:
return OllamaRoutingResult(
primary=ep_local,
fallback_chain=[_GEMINI_ENDPOINT],
routing_reason=(
f"primary({lbl_p}) {health_gcp_a.status.value}"
f" + secondary({lbl_s}) {health_gcp_b.status.value}"
f" + tertiary({lbl_t}) SLOW降級可用at {now_ts}"
),
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
)
# 全部 Ollama 不可用DEGRADED/OFFLINE→ Gemini
return OllamaRoutingResult(
primary=_GEMINI_ENDPOINT,
fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
routing_reason=(
f"所有 Ollama 不健康primary({lbl_p}) {health_gcp_a.status.value}"
f"secondary({lbl_s}) {health_gcp_b.status.value}"
f"tertiary({lbl_t}) {health_local.status.value})→ 切 Gemini at {now_ts}"
),
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
)
# -------------------------------------------------------------------------
# Gemini 帳單熔斷quota gate
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
# -------------------------------------------------------------------------
async def _check_gemini_quota(self) -> bool:
"""
檢查每日 Gemini call 配額,超過上限則禁用。
Redis key: ollama:gemini_daily_count:{YYYY-MM-DD}TTL 86400s
計數 atomicincr
Returns:
True → 仍在配額內,可使用 Gemini
False → 已超配額,應切到 188+Nemotron
fail-openRedis 不可用時允許走 Gemini不阻擋服務
"""
try:
from src.core.redis_client import get_redis
redis = get_redis()
if redis is None:
return True # fail-open
quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
# 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — atomic pipeline 修復 TOCTOU
# 原實作GET → 判斷 → INCR → EXPIRE分四步INCR 後 crash 會丟 TTL
# 且並行請求在 GET/INCR 之間競爭導致配額超發)
# 修法pipeline 原子執行 SET NX首次設 TTL + INCR用 INCR 後的新值判斷
pipe = redis.pipeline()
pipe.set(key, 0, ex=86400, nx=True) # 僅首次寫入設 TTL已存在則跳過
pipe.incr(key) # 原子遞增,回傳遞增後的值
results = await pipe.execute()
new_count = int(results[1]) # results[1] = INCR 後新值
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 刷新 Gemini Prometheus Gauge
# 每次 quota check 時同步更新,讓 Prometheus 取到最新值
try:
from src.core.metrics import GEMINI_DAILY_CALL_COUNT, GEMINI_DAILY_QUOTA
GEMINI_DAILY_CALL_COUNT.set(new_count)
GEMINI_DAILY_QUOTA.set(quota)
except Exception:
pass # metric 更新失敗不阻斷主路由邏輯
if new_count > quota:
# 已超配額INCR 後 > quota回退不是必要的最多超發 1 次)
# 但要回傳 False 讓 router 切到 188
return False
return True
except Exception as e:
# 2026-04-27 Wave8-X2 by Claude — B14 quota fail-closed
# 原 fail-openRedis 異常 → return True → Gemini 盲開 → 費用鐵律違反
# 修法Redis 異常時 fail-closed拒絕走 Gemini讓 fallback chain 接手 188/Nemotron
# 費用安全 > 服務可用性(統帥鐵律:費用變更必須停下)
logger.exception(
"gemini_quota_check_failed_failing_closed",
error=str(e),
security_note="Redis 異常時為費用安全 fail-closed切到 fallback chain",
)
# 嘗試告警best-effort不阻塞路由
try:
from src.services.failover_alerter import get_failover_alerter
await get_failover_alerter().alert_gemini_quota_exceeded({
"quota": getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000),
"current_count": "unknown (Redis error)",
"reason": "fail_closed_due_to_redis_error",
})
except Exception:
pass
return False # fail-closed拒絕 Gemini讓 fallback chain188/Nemotron接手
def _build_quota_exceeded_route(
self,
health_gcp_a: HealthReport,
) -> OllamaRoutingResult:
"""
Gemini 配額耗盡時的備援路由primary=Nemotron, fallback=[Claude]
2026-05-03 ogt: 更新參數名 health_111 → health_gcp_aADR-110
"""
return OllamaRoutingResult(
primary=_NEMOTRON_ENDPOINT,
fallback_chain=[_CLAUDE_ENDPOINT],
routing_reason="Gemini quota exceeded → Nemotron 備援",
health_gcp_a=health_gcp_a,
)
# -------------------------------------------------------------------------
# Recovery API供 OllamaAutoRecoveryService 呼叫)
# -------------------------------------------------------------------------
def set_recovery_callback(self, callback) -> None:
"""
設定 recovery callback供 lifespan wiring 使用)。
callback signature: async (provider_name: str) -> None
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
"""
self._recovery_callback = callback
async def clear_cache(self) -> None:
"""
清空路由決策快取,讓下次 select_provider 重新評估健康狀態。
OllamaAutoRecoveryService 在偵測 111 恢復後呼叫此方法。
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
# 2026-04-25 P1.2 by Claude Engineer-A2 — 改用 make_cache_key 動態組 key消除硬編碼 IP
"""
try:
from src.core.redis_client import get_redis
from src.services.ollama_health_monitor import make_cache_key
redis = get_redis()
if redis is None:
return
# 動態由 settings URL 組 cache key避免硬編碼 IP
# 2026-05-03 ogt: 新增 OLLAMA_SECONDARY_URLADR-110 GCP-B
keys = [
make_cache_key(self._settings.OLLAMA_URL),
make_cache_key(self._settings.OLLAMA_SECONDARY_URL or ""),
make_cache_key(self._settings.OLLAMA_FALLBACK_URL or ""),
]
for k in keys:
if k and k != "ollama_health:": # 空 URL 會產生無意義的 key跳過
await redis.delete(k)
logger.info(
"ollama_failover_cache_cleared",
service="ollama_failover",
reason="recovery_triggered",
)
except Exception as e:
logger.debug("ollama_failover_clear_cache_failed", error=str(e))
def notify_recovery(self, provider: str) -> None:
"""
預留P1.5 Engineer 接入 Telegram alerter 時使用。
目前僅寫 structlog audit。
2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
"""
logger.info(
"ollama_recovery_notified",
service="ollama_failover",
provider=provider,
action="recovery_received",
)
# -------------------------------------------------------------------------
# Audit Log
# -------------------------------------------------------------------------
async def _write_failover_audit(self, result: OllamaRoutingResult) -> None:
"""
切換觸發時寫 structlog auditbest-effort+ Telegram 告警
# 2026-04-25 critic-fix Part2 B1 by Claude Engineer-C2
# 原 AuditLog DB 寫入使用不存在的欄位service/action/target/status/metadata
# → SQLAlchemy crash → except 吃掉 → 零稽核
# 修法:刪除 DB 寫入路徑,改用 structlog onlyaudit 不依賴 DB schema
# 2026-04-25 P1.5 by Claude Engineer-D — 新增 Telegram 告警dedup 10min
service="ollama_failover"per 任務規格)
僅在 primary 非 111 時記錄(真正發生切換)
"""
# 2026-05-03 ogt: GCP 三層容災下,三個 ollama_* provider 都是正常狀態,無需告警
if result.primary.provider_name in ("ollama", "ollama_gcp_a", "ollama_gcp_b", "ollama_local"):
return
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
try:
from src.core.metrics import (
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
)
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
from_provider="ollama",
to_provider=result.primary.provider_name,
).inc()
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA.set(0)
except Exception as _metric_err:
logger.debug("ollama_failover_metric_error", error=str(_metric_err))
logger.info(
"ollama_failover_triggered",
service="ollama_failover",
action="failover_triggered",
from_provider="ollama",
to_provider=result.primary.provider_name,
reason=result.routing_reason,
primary_url=result.primary.url or result.primary.provider_name,
health_gcp_a=result.health_gcp_a.status.value,
health_gcp_b=result.health_gcp_b.status.value if result.health_gcp_b else "not_configured",
health_local=result.health_local.status.value if result.health_local else "not_configured",
)
# Telegram 告警首次切換才通知dedup 10min 內建)
# 2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不阻斷主路由邏輯
# 2026-05-03 ogt: ADR-110 — failed_host 動態計算,顯示哪台 GCP/Local 故障
try:
from src.services.failover_alerter import get_failover_alerter
from src.services.ollama_health_monitor import HealthStatus
fallback_chain_str = "".join(
p.provider_name for p in result.fallback_chain
)
# 計算故障主機描述(哪層 Ollama 不健康,用實際 URL 不用硬編碼標籤)
_failed = []
if result.health_gcp_a.status != HealthStatus.HEALTHY:
_failed.append(self._settings.OLLAMA_URL)
if result.health_gcp_b and result.health_gcp_b.status != HealthStatus.HEALTHY:
_failed.append(self._settings.OLLAMA_SECONDARY_URL or "secondary")
if result.health_local and result.health_local.status != HealthStatus.HEALTHY:
_failed.append(self._settings.OLLAMA_FALLBACK_URL or "tertiary")
failed_host = " + ".join(_failed) if _failed else "Ollama"
alerter = get_failover_alerter()
await alerter.alert_failover({
"to_provider": result.primary.provider_name,
"model": result.primary.model,
"reason": result.routing_reason,
"timestamp": datetime.datetime.now(TAIPEI_TZ).isoformat(),
"fallback_chain_str": fallback_chain_str,
"failed_host": failed_host,
})
except Exception as e:
logger.warning("failover_alert_failed", error=str(e))
# =============================================================================
# Singleton
# =============================================================================
_failover_manager: OllamaFailoverManager | None = None
def get_ollama_failover_manager() -> OllamaFailoverManager:
"""取得 OllamaFailoverManager singleton"""
global _failover_manager
if _failover_manager is None:
_failover_manager = OllamaFailoverManager()
return _failover_manager
def reset_ollama_failover_manager() -> None:
"""重置 singleton測試用"""
global _failover_manager
_failover_manager = None