fix(api): avoid local ollama health blocking gcp route
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m27s
CD Pipeline / build-and-deploy (push) Successful in 4m22s
CD Pipeline / post-deploy-checks (push) Successful in 2m0s

This commit is contained in:
Your Name
2026-05-19 12:22:46 +08:00
parent 1d285dd9d4
commit 36aeea80a3
2 changed files with 100 additions and 43 deletions

View File

@@ -33,19 +33,12 @@ from __future__ import annotations
import asyncio
import datetime
from dataclasses import dataclass, field
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
from datetime import timezone, timedelta
from dataclasses import dataclass
from datetime import timedelta, timezone
import structlog
from src.core.config import get_settings
# 台北時區 +8標準庫保險絲100% 可用)
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
TAIPEI_TZ = timezone(timedelta(hours=8))
from src.services.ollama_health_monitor import (
HealthReport,
HealthStatus,
@@ -55,6 +48,12 @@ from src.services.ollama_health_monitor import (
logger = structlog.get_logger(__name__)
# 台北時區 +8標準庫保險絲100% 可用)
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
TAIPEI_TZ = timezone(timedelta(hours=8))
# =============================================================================
# 路由結果模型輕量P1.2 整合時轉換為 RoutingDecision
@@ -203,31 +202,59 @@ class OllamaFailoverManager:
url_secondary = self._settings.OLLAMA_SECONDARY_URL # 110:11436 → GCP-B (nginx proxy)
url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 110:11437 → Local 111 (nginx proxy)
# 並行檢查三台 Ollama 主機asyncio.gather 提升效率)
results_raw = await asyncio.gather(
self._monitor.check(url_primary),
self._monitor.check(url_secondary),
self._monitor.check(url_tertiary),
return_exceptions=True,
)
def _to_health(r, label: str) -> HealthReport:
if isinstance(r, Exception):
return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}")
return r
health_gcp_a = _to_health(results_raw[0], f"primary({url_primary})")
health_gcp_b = _to_health(results_raw[1], f"secondary({url_secondary})")
health_local = _to_health(results_raw[2], f"tertiary({url_tertiary})")
def _short(url: str) -> str:
from urllib.parse import urlparse
return urlparse(url).hostname or url
result = self._decide_route(
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
url_gcp_a=url_primary,
url_gcp_b=url_secondary,
url_local=url_tertiary,
)
# 2026-05-19 Codex: alert-fast path must not wait for the slow local lane
# when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made
# 111's 45s health timeout dominate every routing decision.
try:
primary_raw = await self._monitor.check(url_primary)
except Exception as exc:
primary_raw = exc
health_gcp_a = _to_health(primary_raw, f"primary({url_primary})")
health_gcp_b: HealthReport | None = None
health_local: HealthReport | None = None
if health_gcp_a.status == HealthStatus.HEALTHY:
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
fallback_chain = [
OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model),
OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model),
_GEMINI_ENDPOINT,
]
result = OllamaRoutingResult(
primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model),
fallback_chain=fallback_chain,
routing_reason=f"primary({_short(url_primary)}) HEALTHY",
health_gcp_a=health_gcp_a,
health_gcp_b=None,
health_local=None,
)
else:
# Primary 不健康時才並行檢查後兩層,保留 GCP-B/Local 容災。
results_raw = await asyncio.gather(
self._monitor.check(url_secondary),
self._monitor.check(url_tertiary),
return_exceptions=True,
)
health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})")
health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})")
result = self._decide_route(
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
url_gcp_a=url_primary,
url_gcp_b=url_secondary,
url_local=url_tertiary,
)
# Gemini 帳單熔斷quota gate
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
@@ -243,8 +270,8 @@ class OllamaFailoverManager:
result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a)
# Quota 耗盡 Telegram 告警24h dedup
try:
from src.services.failover_alerter import get_failover_alerter
from src.core.redis_client import get_redis
from src.services.failover_alerter import get_failover_alerter
_current_count = quota
try:
_redis = get_redis()
@@ -267,6 +294,9 @@ class OllamaFailoverManager:
# 寫入 audit_logbest-effort
await self._write_failover_audit(result)
def _status(report: HealthReport | None) -> str:
return report.status.value if report else "not_checked"
logger.info(
"ollama_failover_decision",
primary=result.primary.provider_name,
@@ -274,8 +304,8 @@ class OllamaFailoverManager:
reason=result.routing_reason,
fallback_count=len(result.fallback_chain),
health_gcp_a=health_gcp_a.status.value,
health_gcp_b=health_gcp_b.status.value,
health_local=health_local.status.value,
health_gcp_b=_status(health_gcp_b),
health_local=_status(health_local),
)
# 通知 recovery service 當前 primary跨重啟持久化
@@ -589,8 +619,8 @@ class OllamaFailoverManager:
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
try:
from src.core.metrics import (
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
)
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
from_provider="ollama",