From 36aeea80a36f01e58e3cd9c2089b6befa8dcd1df Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 19 May 2026 12:22:46 +0800 Subject: [PATCH] fix(api): avoid local ollama health blocking gcp route --- .../src/services/ollama_failover_manager.py | 94 ++++++++++++------- .../api/tests/test_ollama_failover_manager.py | 49 +++++++--- 2 files changed, 100 insertions(+), 43 deletions(-) diff --git a/apps/api/src/services/ollama_failover_manager.py b/apps/api/src/services/ollama_failover_manager.py index 20f71e69..148e6e92 100644 --- a/apps/api/src/services/ollama_failover_manager.py +++ b/apps/api/src/services/ollama_failover_manager.py @@ -33,19 +33,12 @@ from __future__ import annotations import asyncio import datetime -from dataclasses import dataclass, field -# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2 -# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區 -# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC -from datetime import timezone, timedelta +from dataclasses import dataclass +from datetime import timedelta, timezone import structlog from src.core.config import get_settings - -# 台北時區 +8(標準庫保險絲,100% 可用) -# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2 -TAIPEI_TZ = timezone(timedelta(hours=8)) from src.services.ollama_health_monitor import ( HealthReport, HealthStatus, @@ -55,6 +48,12 @@ from src.services.ollama_health_monitor import ( logger = structlog.get_logger(__name__) +# 台北時區 +8(標準庫保險絲,100% 可用) +# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2 +# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區 +# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC +TAIPEI_TZ = timezone(timedelta(hours=8)) + # ============================================================================= # 路由結果模型(輕量,P1.2 整合時轉換為 RoutingDecision) @@ -203,31 +202,59 @@ class OllamaFailoverManager: url_secondary = self._settings.OLLAMA_SECONDARY_URL # 110:11436 → GCP-B (nginx proxy) url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 110:11437 → Local 111 (nginx proxy) - # 並行檢查三台 Ollama 主機(asyncio.gather 提升效率) - results_raw = await asyncio.gather( - self._monitor.check(url_primary), - self._monitor.check(url_secondary), - self._monitor.check(url_tertiary), - return_exceptions=True, - ) - def _to_health(r, label: str) -> HealthReport: if isinstance(r, Exception): return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}") return r - health_gcp_a = _to_health(results_raw[0], f"primary({url_primary})") - health_gcp_b = _to_health(results_raw[1], f"secondary({url_secondary})") - health_local = _to_health(results_raw[2], f"tertiary({url_tertiary})") + def _short(url: str) -> str: + from urllib.parse import urlparse + return urlparse(url).hostname or url - result = self._decide_route( - health_gcp_a=health_gcp_a, - health_gcp_b=health_gcp_b, - health_local=health_local, - url_gcp_a=url_primary, - url_gcp_b=url_secondary, - url_local=url_tertiary, - ) + # 2026-05-19 Codex: alert-fast path must not wait for the slow local lane + # when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made + # 111's 45s health timeout dominate every routing decision. + try: + primary_raw = await self._monitor.check(url_primary) + except Exception as exc: + primary_raw = exc + health_gcp_a = _to_health(primary_raw, f"primary({url_primary})") + health_gcp_b: HealthReport | None = None + health_local: HealthReport | None = None + + if health_gcp_a.status == HealthStatus.HEALTHY: + model = self._settings.OLLAMA_HEALTH_CHECK_MODEL + fallback_chain = [ + OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model), + OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model), + _GEMINI_ENDPOINT, + ] + result = OllamaRoutingResult( + primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model), + fallback_chain=fallback_chain, + routing_reason=f"primary({_short(url_primary)}) HEALTHY", + health_gcp_a=health_gcp_a, + health_gcp_b=None, + health_local=None, + ) + else: + # Primary 不健康時才並行檢查後兩層,保留 GCP-B/Local 容災。 + results_raw = await asyncio.gather( + self._monitor.check(url_secondary), + self._monitor.check(url_tertiary), + return_exceptions=True, + ) + health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})") + health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})") + + result = self._decide_route( + health_gcp_a=health_gcp_a, + health_gcp_b=health_gcp_b, + health_local=health_local, + url_gcp_a=url_primary, + url_gcp_b=url_secondary, + url_local=url_tertiary, + ) # Gemini 帳單熔斷(quota gate) # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2 @@ -243,8 +270,8 @@ class OllamaFailoverManager: result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a) # Quota 耗盡 Telegram 告警(24h dedup) try: - from src.services.failover_alerter import get_failover_alerter from src.core.redis_client import get_redis + from src.services.failover_alerter import get_failover_alerter _current_count = quota try: _redis = get_redis() @@ -267,6 +294,9 @@ class OllamaFailoverManager: # 寫入 audit_log(best-effort) await self._write_failover_audit(result) + def _status(report: HealthReport | None) -> str: + return report.status.value if report else "not_checked" + logger.info( "ollama_failover_decision", primary=result.primary.provider_name, @@ -274,8 +304,8 @@ class OllamaFailoverManager: reason=result.routing_reason, fallback_count=len(result.fallback_chain), health_gcp_a=health_gcp_a.status.value, - health_gcp_b=health_gcp_b.status.value, - health_local=health_local.status.value, + health_gcp_b=_status(health_gcp_b), + health_local=_status(health_local), ) # 通知 recovery service 當前 primary(跨重啟持久化) @@ -589,8 +619,8 @@ class OllamaFailoverManager: # 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric try: from src.core.metrics import ( - OLLAMA_FAILOVER_TRIGGERED_TOTAL, OLLAMA_CURRENT_PRIMARY_IS_OLLAMA, + OLLAMA_FAILOVER_TRIGGERED_TOTAL, ) OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels( from_provider="ollama", diff --git a/apps/api/tests/test_ollama_failover_manager.py b/apps/api/tests/test_ollama_failover_manager.py index fe8a8aaf..5c52c248 100644 --- a/apps/api/tests/test_ollama_failover_manager.py +++ b/apps/api/tests/test_ollama_failover_manager.py @@ -3,6 +3,7 @@ # 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新) # 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111,188 完全移出 # 2026-05-03 ogt: ADR-110 GCP 三層容災架構,URL 常數更新為 GCP-A/B/Local,新增三層容災場景 +# 2026-05-19 Codex: GCP-A healthy fast path 不等待 Local 111 health timeout """ OllamaFailoverManager 單元測試 - P1.1c v4.0 ============================================= @@ -25,14 +26,13 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from src.services.ollama_health_monitor import HealthReport, HealthStatus from src.services.ollama_failover_manager import ( OllamaFailoverManager, OllamaRoutingResult, get_ollama_failover_manager, reset_ollama_failover_manager, ) - +from src.services.ollama_health_monitor import HealthReport, HealthStatus # ============================================================================= # Fixtures @@ -266,12 +266,12 @@ class TestDecideRoute: # ============================================================================= -# select_provider():只 check 111 +# select_provider():GCP-A healthy fast path # ============================================================================= class TestSelectProvider: - """select_provider() 三層容災健康檢查(ADR-110:並行 check GCP-A / GCP-B / Local)""" + """select_provider() 三層容災健康檢查。""" def _make_three_layer_mock( self, @@ -303,16 +303,37 @@ class TestSelectProvider: return manager, mock_monitor @pytest.mark.asyncio - async def test_select_provider_checks_all_three_hosts(self): - """ADR-110:select_provider 並行 check 三台 Ollama 主機""" + async def test_select_provider_gcp_a_healthy_checks_primary_only(self): + """GCP-A healthy 時不等待 GCP-B / Local 111,避免 routing 被 111 timeout 拖慢。""" manager, mock_monitor = self._make_three_layer_mock( gcp_a_status=HealthStatus.HEALTHY, ) with patch.object(manager, "_write_failover_audit", return_value=None): - await manager.select_provider() + result = await manager.select_provider() - # 並行 check 三台主機(GCP-A / GCP-B / Local) + assert result.primary.provider_name == "ollama_gcp_a" + assert result.health_gcp_b is None + assert result.health_local is None + assert mock_monitor.check.call_count == 1 + called_urls = {call.args[0] for call in mock_monitor.check.call_args_list} + assert URL_GCP_A in called_urls + assert URL_GCP_B not in called_urls + assert URL_LOCAL not in called_urls + + @pytest.mark.asyncio + async def test_select_provider_checks_fallback_hosts_when_gcp_a_not_healthy(self): + """GCP-A 不健康時仍檢查 GCP-B / Local,保留三層容災。""" + manager, mock_monitor = self._make_three_layer_mock( + gcp_a_status=HealthStatus.OFFLINE, + gcp_b_status=HealthStatus.HEALTHY, + local_status=HealthStatus.OFFLINE, + ) + + with patch.object(manager, "_write_failover_audit", return_value=None): + result = await manager.select_provider() + + assert result.primary.provider_name == "ollama_gcp_b" assert mock_monitor.check.call_count == 3 called_urls = {call.args[0] for call in mock_monitor.check.call_args_list} assert URL_GCP_A in called_urls @@ -626,7 +647,10 @@ class TestWriteFailoverAudit: async def test_audit_uses_structlog_not_db(self): """_write_failover_audit 應呼叫 structlog,不呼叫 DB""" manager = _make_manager() - from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult + from src.services.ollama_failover_manager import ( + OllamaEndpoint, + OllamaRoutingResult, + ) result = OllamaRoutingResult( primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"), @@ -642,7 +666,10 @@ class TestWriteFailoverAudit: async def test_audit_skipped_when_gcp_a_healthy(self): """GCP-A HEALTHY 時 early return,不記錄 failover""" manager = _make_manager() - from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult + from src.services.ollama_failover_manager import ( + OllamaEndpoint, + OllamaRoutingResult, + ) result = OllamaRoutingResult( primary=OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen"), @@ -669,7 +696,7 @@ class TestAIProviderEnumOllamaLocal: assert AIProviderEnum.OLLAMA_LOCAL.value == "ollama_local" def test_ollama_local_in_latency_budget(self): - from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET + from src.services.ai_router import PROVIDER_LATENCY_BUDGET, AIProviderEnum assert AIProviderEnum.OLLAMA_LOCAL in PROVIDER_LATENCY_BUDGET assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_LOCAL] == 90000