fix(api): avoid local ollama health blocking gcp route
This commit is contained in:
@@ -33,19 +33,12 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
from dataclasses import dataclass, field
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區
|
||||
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
|
||||
from datetime import timezone, timedelta
|
||||
from dataclasses import dataclass
|
||||
from datetime import timedelta, timezone
|
||||
|
||||
import structlog
|
||||
|
||||
from src.core.config import get_settings
|
||||
|
||||
# 台北時區 +8(標準庫保險絲,100% 可用)
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||||
from src.services.ollama_health_monitor import (
|
||||
HealthReport,
|
||||
HealthStatus,
|
||||
@@ -55,6 +48,12 @@ from src.services.ollama_health_monitor import (
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
# 台北時區 +8(標準庫保險絲,100% 可用)
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區
|
||||
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
|
||||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 路由結果模型(輕量,P1.2 整合時轉換為 RoutingDecision)
|
||||
@@ -203,31 +202,59 @@ class OllamaFailoverManager:
|
||||
url_secondary = self._settings.OLLAMA_SECONDARY_URL # 110:11436 → GCP-B (nginx proxy)
|
||||
url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 110:11437 → Local 111 (nginx proxy)
|
||||
|
||||
# 並行檢查三台 Ollama 主機(asyncio.gather 提升效率)
|
||||
results_raw = await asyncio.gather(
|
||||
self._monitor.check(url_primary),
|
||||
self._monitor.check(url_secondary),
|
||||
self._monitor.check(url_tertiary),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
def _to_health(r, label: str) -> HealthReport:
|
||||
if isinstance(r, Exception):
|
||||
return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}")
|
||||
return r
|
||||
|
||||
health_gcp_a = _to_health(results_raw[0], f"primary({url_primary})")
|
||||
health_gcp_b = _to_health(results_raw[1], f"secondary({url_secondary})")
|
||||
health_local = _to_health(results_raw[2], f"tertiary({url_tertiary})")
|
||||
def _short(url: str) -> str:
|
||||
from urllib.parse import urlparse
|
||||
return urlparse(url).hostname or url
|
||||
|
||||
result = self._decide_route(
|
||||
health_gcp_a=health_gcp_a,
|
||||
health_gcp_b=health_gcp_b,
|
||||
health_local=health_local,
|
||||
url_gcp_a=url_primary,
|
||||
url_gcp_b=url_secondary,
|
||||
url_local=url_tertiary,
|
||||
)
|
||||
# 2026-05-19 Codex: alert-fast path must not wait for the slow local lane
|
||||
# when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made
|
||||
# 111's 45s health timeout dominate every routing decision.
|
||||
try:
|
||||
primary_raw = await self._monitor.check(url_primary)
|
||||
except Exception as exc:
|
||||
primary_raw = exc
|
||||
health_gcp_a = _to_health(primary_raw, f"primary({url_primary})")
|
||||
health_gcp_b: HealthReport | None = None
|
||||
health_local: HealthReport | None = None
|
||||
|
||||
if health_gcp_a.status == HealthStatus.HEALTHY:
|
||||
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
|
||||
fallback_chain = [
|
||||
OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model),
|
||||
OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model),
|
||||
_GEMINI_ENDPOINT,
|
||||
]
|
||||
result = OllamaRoutingResult(
|
||||
primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model),
|
||||
fallback_chain=fallback_chain,
|
||||
routing_reason=f"primary({_short(url_primary)}) HEALTHY",
|
||||
health_gcp_a=health_gcp_a,
|
||||
health_gcp_b=None,
|
||||
health_local=None,
|
||||
)
|
||||
else:
|
||||
# Primary 不健康時才並行檢查後兩層,保留 GCP-B/Local 容災。
|
||||
results_raw = await asyncio.gather(
|
||||
self._monitor.check(url_secondary),
|
||||
self._monitor.check(url_tertiary),
|
||||
return_exceptions=True,
|
||||
)
|
||||
health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})")
|
||||
health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})")
|
||||
|
||||
result = self._decide_route(
|
||||
health_gcp_a=health_gcp_a,
|
||||
health_gcp_b=health_gcp_b,
|
||||
health_local=health_local,
|
||||
url_gcp_a=url_primary,
|
||||
url_gcp_b=url_secondary,
|
||||
url_local=url_tertiary,
|
||||
)
|
||||
|
||||
# Gemini 帳單熔斷(quota gate)
|
||||
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
|
||||
@@ -243,8 +270,8 @@ class OllamaFailoverManager:
|
||||
result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a)
|
||||
# Quota 耗盡 Telegram 告警(24h dedup)
|
||||
try:
|
||||
from src.services.failover_alerter import get_failover_alerter
|
||||
from src.core.redis_client import get_redis
|
||||
from src.services.failover_alerter import get_failover_alerter
|
||||
_current_count = quota
|
||||
try:
|
||||
_redis = get_redis()
|
||||
@@ -267,6 +294,9 @@ class OllamaFailoverManager:
|
||||
# 寫入 audit_log(best-effort)
|
||||
await self._write_failover_audit(result)
|
||||
|
||||
def _status(report: HealthReport | None) -> str:
|
||||
return report.status.value if report else "not_checked"
|
||||
|
||||
logger.info(
|
||||
"ollama_failover_decision",
|
||||
primary=result.primary.provider_name,
|
||||
@@ -274,8 +304,8 @@ class OllamaFailoverManager:
|
||||
reason=result.routing_reason,
|
||||
fallback_count=len(result.fallback_chain),
|
||||
health_gcp_a=health_gcp_a.status.value,
|
||||
health_gcp_b=health_gcp_b.status.value,
|
||||
health_local=health_local.status.value,
|
||||
health_gcp_b=_status(health_gcp_b),
|
||||
health_local=_status(health_local),
|
||||
)
|
||||
|
||||
# 通知 recovery service 當前 primary(跨重啟持久化)
|
||||
@@ -589,8 +619,8 @@ class OllamaFailoverManager:
|
||||
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
|
||||
try:
|
||||
from src.core.metrics import (
|
||||
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
|
||||
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
|
||||
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
|
||||
)
|
||||
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
|
||||
from_provider="ollama",
|
||||
|
||||
Reference in New Issue
Block a user