fix(api): avoid local ollama health blocking gcp route
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m27s
CD Pipeline / build-and-deploy (push) Successful in 4m22s
CD Pipeline / post-deploy-checks (push) Successful in 2m0s

This commit is contained in:
Your Name
2026-05-19 12:22:46 +08:00
parent 1d285dd9d4
commit 36aeea80a3
2 changed files with 100 additions and 43 deletions

View File

@@ -33,19 +33,12 @@ from __future__ import annotations
import asyncio
import datetime
from dataclasses import dataclass, field
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
from datetime import timezone, timedelta
from dataclasses import dataclass
from datetime import timedelta, timezone
import structlog
from src.core.config import get_settings
# 台北時區 +8標準庫保險絲100% 可用)
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
TAIPEI_TZ = timezone(timedelta(hours=8))
from src.services.ollama_health_monitor import (
HealthReport,
HealthStatus,
@@ -55,6 +48,12 @@ from src.services.ollama_health_monitor import (
logger = structlog.get_logger(__name__)
# 台北時區 +8標準庫保險絲100% 可用)
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
TAIPEI_TZ = timezone(timedelta(hours=8))
# =============================================================================
# 路由結果模型輕量P1.2 整合時轉換為 RoutingDecision
@@ -203,31 +202,59 @@ class OllamaFailoverManager:
url_secondary = self._settings.OLLAMA_SECONDARY_URL # 110:11436 → GCP-B (nginx proxy)
url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 110:11437 → Local 111 (nginx proxy)
# 並行檢查三台 Ollama 主機asyncio.gather 提升效率)
results_raw = await asyncio.gather(
self._monitor.check(url_primary),
self._monitor.check(url_secondary),
self._monitor.check(url_tertiary),
return_exceptions=True,
)
def _to_health(r, label: str) -> HealthReport:
if isinstance(r, Exception):
return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}")
return r
health_gcp_a = _to_health(results_raw[0], f"primary({url_primary})")
health_gcp_b = _to_health(results_raw[1], f"secondary({url_secondary})")
health_local = _to_health(results_raw[2], f"tertiary({url_tertiary})")
def _short(url: str) -> str:
from urllib.parse import urlparse
return urlparse(url).hostname or url
result = self._decide_route(
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
url_gcp_a=url_primary,
url_gcp_b=url_secondary,
url_local=url_tertiary,
)
# 2026-05-19 Codex: alert-fast path must not wait for the slow local lane
# when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made
# 111's 45s health timeout dominate every routing decision.
try:
primary_raw = await self._monitor.check(url_primary)
except Exception as exc:
primary_raw = exc
health_gcp_a = _to_health(primary_raw, f"primary({url_primary})")
health_gcp_b: HealthReport | None = None
health_local: HealthReport | None = None
if health_gcp_a.status == HealthStatus.HEALTHY:
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
fallback_chain = [
OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model),
OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model),
_GEMINI_ENDPOINT,
]
result = OllamaRoutingResult(
primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model),
fallback_chain=fallback_chain,
routing_reason=f"primary({_short(url_primary)}) HEALTHY",
health_gcp_a=health_gcp_a,
health_gcp_b=None,
health_local=None,
)
else:
# Primary 不健康時才並行檢查後兩層,保留 GCP-B/Local 容災。
results_raw = await asyncio.gather(
self._monitor.check(url_secondary),
self._monitor.check(url_tertiary),
return_exceptions=True,
)
health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})")
health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})")
result = self._decide_route(
health_gcp_a=health_gcp_a,
health_gcp_b=health_gcp_b,
health_local=health_local,
url_gcp_a=url_primary,
url_gcp_b=url_secondary,
url_local=url_tertiary,
)
# Gemini 帳單熔斷quota gate
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
@@ -243,8 +270,8 @@ class OllamaFailoverManager:
result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a)
# Quota 耗盡 Telegram 告警24h dedup
try:
from src.services.failover_alerter import get_failover_alerter
from src.core.redis_client import get_redis
from src.services.failover_alerter import get_failover_alerter
_current_count = quota
try:
_redis = get_redis()
@@ -267,6 +294,9 @@ class OllamaFailoverManager:
# 寫入 audit_logbest-effort
await self._write_failover_audit(result)
def _status(report: HealthReport | None) -> str:
return report.status.value if report else "not_checked"
logger.info(
"ollama_failover_decision",
primary=result.primary.provider_name,
@@ -274,8 +304,8 @@ class OllamaFailoverManager:
reason=result.routing_reason,
fallback_count=len(result.fallback_chain),
health_gcp_a=health_gcp_a.status.value,
health_gcp_b=health_gcp_b.status.value,
health_local=health_local.status.value,
health_gcp_b=_status(health_gcp_b),
health_local=_status(health_local),
)
# 通知 recovery service 當前 primary跨重啟持久化
@@ -589,8 +619,8 @@ class OllamaFailoverManager:
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
try:
from src.core.metrics import (
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
)
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
from_provider="ollama",

View File

@@ -3,6 +3,7 @@
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111188 完全移出
# 2026-05-03 ogt: ADR-110 GCP 三層容災架構URL 常數更新為 GCP-A/B/Local新增三層容災場景
# 2026-05-19 Codex: GCP-A healthy fast path 不等待 Local 111 health timeout
"""
OllamaFailoverManager 單元測試 - P1.1c v4.0
=============================================
@@ -25,14 +26,13 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.ollama_health_monitor import HealthReport, HealthStatus
from src.services.ollama_failover_manager import (
OllamaFailoverManager,
OllamaRoutingResult,
get_ollama_failover_manager,
reset_ollama_failover_manager,
)
from src.services.ollama_health_monitor import HealthReport, HealthStatus
# =============================================================================
# Fixtures
@@ -266,12 +266,12 @@ class TestDecideRoute:
# =============================================================================
# select_provider()只 check 111
# select_provider()GCP-A healthy fast path
# =============================================================================
class TestSelectProvider:
"""select_provider() 三層容災健康檢查ADR-110並行 check GCP-A / GCP-B / Local"""
"""select_provider() 三層容災健康檢查"""
def _make_three_layer_mock(
self,
@@ -303,16 +303,37 @@ class TestSelectProvider:
return manager, mock_monitor
@pytest.mark.asyncio
async def test_select_provider_checks_all_three_hosts(self):
"""ADR-110select_provider 並行 check 三台 Ollama 主機"""
async def test_select_provider_gcp_a_healthy_checks_primary_only(self):
"""GCP-A healthy 時不等待 GCP-B / Local 111避免 routing 被 111 timeout 拖慢。"""
manager, mock_monitor = self._make_three_layer_mock(
gcp_a_status=HealthStatus.HEALTHY,
)
with patch.object(manager, "_write_failover_audit", return_value=None):
await manager.select_provider()
result = await manager.select_provider()
# 並行 check 三台主機GCP-A / GCP-B / Local
assert result.primary.provider_name == "ollama_gcp_a"
assert result.health_gcp_b is None
assert result.health_local is None
assert mock_monitor.check.call_count == 1
called_urls = {call.args[0] for call in mock_monitor.check.call_args_list}
assert URL_GCP_A in called_urls
assert URL_GCP_B not in called_urls
assert URL_LOCAL not in called_urls
@pytest.mark.asyncio
async def test_select_provider_checks_fallback_hosts_when_gcp_a_not_healthy(self):
"""GCP-A 不健康時仍檢查 GCP-B / Local保留三層容災。"""
manager, mock_monitor = self._make_three_layer_mock(
gcp_a_status=HealthStatus.OFFLINE,
gcp_b_status=HealthStatus.HEALTHY,
local_status=HealthStatus.OFFLINE,
)
with patch.object(manager, "_write_failover_audit", return_value=None):
result = await manager.select_provider()
assert result.primary.provider_name == "ollama_gcp_b"
assert mock_monitor.check.call_count == 3
called_urls = {call.args[0] for call in mock_monitor.check.call_args_list}
assert URL_GCP_A in called_urls
@@ -626,7 +647,10 @@ class TestWriteFailoverAudit:
async def test_audit_uses_structlog_not_db(self):
"""_write_failover_audit 應呼叫 structlog不呼叫 DB"""
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
from src.services.ollama_failover_manager import (
OllamaEndpoint,
OllamaRoutingResult,
)
result = OllamaRoutingResult(
primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"),
@@ -642,7 +666,10 @@ class TestWriteFailoverAudit:
async def test_audit_skipped_when_gcp_a_healthy(self):
"""GCP-A HEALTHY 時 early return不記錄 failover"""
manager = _make_manager()
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
from src.services.ollama_failover_manager import (
OllamaEndpoint,
OllamaRoutingResult,
)
result = OllamaRoutingResult(
primary=OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen"),
@@ -669,7 +696,7 @@ class TestAIProviderEnumOllamaLocal:
assert AIProviderEnum.OLLAMA_LOCAL.value == "ollama_local"
def test_ollama_local_in_latency_budget(self):
from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET
from src.services.ai_router import PROVIDER_LATENCY_BUDGET, AIProviderEnum
assert AIProviderEnum.OLLAMA_LOCAL in PROVIDER_LATENCY_BUDGET
assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_LOCAL] == 90000