fix(api): avoid local ollama health blocking gcp route
This commit is contained in:
@@ -33,19 +33,12 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
from dataclasses import dataclass, field
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區
|
||||
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
|
||||
from datetime import timezone, timedelta
|
||||
from dataclasses import dataclass
|
||||
from datetime import timedelta, timezone
|
||||
|
||||
import structlog
|
||||
|
||||
from src.core.config import get_settings
|
||||
|
||||
# 台北時區 +8(標準庫保險絲,100% 可用)
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||||
from src.services.ollama_health_monitor import (
|
||||
HealthReport,
|
||||
HealthStatus,
|
||||
@@ -55,6 +48,12 @@ from src.services.ollama_health_monitor import (
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
# 台北時區 +8(標準庫保險絲,100% 可用)
|
||||
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
|
||||
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo,保證一定有 +8 時區
|
||||
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
|
||||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 路由結果模型(輕量,P1.2 整合時轉換為 RoutingDecision)
|
||||
@@ -203,31 +202,59 @@ class OllamaFailoverManager:
|
||||
url_secondary = self._settings.OLLAMA_SECONDARY_URL # 110:11436 → GCP-B (nginx proxy)
|
||||
url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 110:11437 → Local 111 (nginx proxy)
|
||||
|
||||
# 並行檢查三台 Ollama 主機(asyncio.gather 提升效率)
|
||||
results_raw = await asyncio.gather(
|
||||
self._monitor.check(url_primary),
|
||||
self._monitor.check(url_secondary),
|
||||
self._monitor.check(url_tertiary),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
def _to_health(r, label: str) -> HealthReport:
|
||||
if isinstance(r, Exception):
|
||||
return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}")
|
||||
return r
|
||||
|
||||
health_gcp_a = _to_health(results_raw[0], f"primary({url_primary})")
|
||||
health_gcp_b = _to_health(results_raw[1], f"secondary({url_secondary})")
|
||||
health_local = _to_health(results_raw[2], f"tertiary({url_tertiary})")
|
||||
def _short(url: str) -> str:
|
||||
from urllib.parse import urlparse
|
||||
return urlparse(url).hostname or url
|
||||
|
||||
result = self._decide_route(
|
||||
health_gcp_a=health_gcp_a,
|
||||
health_gcp_b=health_gcp_b,
|
||||
health_local=health_local,
|
||||
url_gcp_a=url_primary,
|
||||
url_gcp_b=url_secondary,
|
||||
url_local=url_tertiary,
|
||||
)
|
||||
# 2026-05-19 Codex: alert-fast path must not wait for the slow local lane
|
||||
# when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made
|
||||
# 111's 45s health timeout dominate every routing decision.
|
||||
try:
|
||||
primary_raw = await self._monitor.check(url_primary)
|
||||
except Exception as exc:
|
||||
primary_raw = exc
|
||||
health_gcp_a = _to_health(primary_raw, f"primary({url_primary})")
|
||||
health_gcp_b: HealthReport | None = None
|
||||
health_local: HealthReport | None = None
|
||||
|
||||
if health_gcp_a.status == HealthStatus.HEALTHY:
|
||||
model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
|
||||
fallback_chain = [
|
||||
OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model),
|
||||
OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model),
|
||||
_GEMINI_ENDPOINT,
|
||||
]
|
||||
result = OllamaRoutingResult(
|
||||
primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model),
|
||||
fallback_chain=fallback_chain,
|
||||
routing_reason=f"primary({_short(url_primary)}) HEALTHY",
|
||||
health_gcp_a=health_gcp_a,
|
||||
health_gcp_b=None,
|
||||
health_local=None,
|
||||
)
|
||||
else:
|
||||
# Primary 不健康時才並行檢查後兩層,保留 GCP-B/Local 容災。
|
||||
results_raw = await asyncio.gather(
|
||||
self._monitor.check(url_secondary),
|
||||
self._monitor.check(url_tertiary),
|
||||
return_exceptions=True,
|
||||
)
|
||||
health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})")
|
||||
health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})")
|
||||
|
||||
result = self._decide_route(
|
||||
health_gcp_a=health_gcp_a,
|
||||
health_gcp_b=health_gcp_b,
|
||||
health_local=health_local,
|
||||
url_gcp_a=url_primary,
|
||||
url_gcp_b=url_secondary,
|
||||
url_local=url_tertiary,
|
||||
)
|
||||
|
||||
# Gemini 帳單熔斷(quota gate)
|
||||
# 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
|
||||
@@ -243,8 +270,8 @@ class OllamaFailoverManager:
|
||||
result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a)
|
||||
# Quota 耗盡 Telegram 告警(24h dedup)
|
||||
try:
|
||||
from src.services.failover_alerter import get_failover_alerter
|
||||
from src.core.redis_client import get_redis
|
||||
from src.services.failover_alerter import get_failover_alerter
|
||||
_current_count = quota
|
||||
try:
|
||||
_redis = get_redis()
|
||||
@@ -267,6 +294,9 @@ class OllamaFailoverManager:
|
||||
# 寫入 audit_log(best-effort)
|
||||
await self._write_failover_audit(result)
|
||||
|
||||
def _status(report: HealthReport | None) -> str:
|
||||
return report.status.value if report else "not_checked"
|
||||
|
||||
logger.info(
|
||||
"ollama_failover_decision",
|
||||
primary=result.primary.provider_name,
|
||||
@@ -274,8 +304,8 @@ class OllamaFailoverManager:
|
||||
reason=result.routing_reason,
|
||||
fallback_count=len(result.fallback_chain),
|
||||
health_gcp_a=health_gcp_a.status.value,
|
||||
health_gcp_b=health_gcp_b.status.value,
|
||||
health_local=health_local.status.value,
|
||||
health_gcp_b=_status(health_gcp_b),
|
||||
health_local=_status(health_local),
|
||||
)
|
||||
|
||||
# 通知 recovery service 當前 primary(跨重啟持久化)
|
||||
@@ -589,8 +619,8 @@ class OllamaFailoverManager:
|
||||
# 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
|
||||
try:
|
||||
from src.core.metrics import (
|
||||
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
|
||||
OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
|
||||
OLLAMA_FAILOVER_TRIGGERED_TOTAL,
|
||||
)
|
||||
OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
|
||||
from_provider="ollama",
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復(路由矩陣更新)
|
||||
# 2026-04-27 波次對齊 by Claude Sonnet 4.6 — 統帥鐵律:唯一 Ollama=111,188 完全移出
|
||||
# 2026-05-03 ogt: ADR-110 GCP 三層容災架構,URL 常數更新為 GCP-A/B/Local,新增三層容災場景
|
||||
# 2026-05-19 Codex: GCP-A healthy fast path 不等待 Local 111 health timeout
|
||||
"""
|
||||
OllamaFailoverManager 單元測試 - P1.1c v4.0
|
||||
=============================================
|
||||
@@ -25,14 +26,13 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from src.services.ollama_health_monitor import HealthReport, HealthStatus
|
||||
from src.services.ollama_failover_manager import (
|
||||
OllamaFailoverManager,
|
||||
OllamaRoutingResult,
|
||||
get_ollama_failover_manager,
|
||||
reset_ollama_failover_manager,
|
||||
)
|
||||
|
||||
from src.services.ollama_health_monitor import HealthReport, HealthStatus
|
||||
|
||||
# =============================================================================
|
||||
# Fixtures
|
||||
@@ -266,12 +266,12 @@ class TestDecideRoute:
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# select_provider():只 check 111
|
||||
# select_provider():GCP-A healthy fast path
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestSelectProvider:
|
||||
"""select_provider() 三層容災健康檢查(ADR-110:並行 check GCP-A / GCP-B / Local)"""
|
||||
"""select_provider() 三層容災健康檢查。"""
|
||||
|
||||
def _make_three_layer_mock(
|
||||
self,
|
||||
@@ -303,16 +303,37 @@ class TestSelectProvider:
|
||||
return manager, mock_monitor
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_select_provider_checks_all_three_hosts(self):
|
||||
"""ADR-110:select_provider 並行 check 三台 Ollama 主機"""
|
||||
async def test_select_provider_gcp_a_healthy_checks_primary_only(self):
|
||||
"""GCP-A healthy 時不等待 GCP-B / Local 111,避免 routing 被 111 timeout 拖慢。"""
|
||||
manager, mock_monitor = self._make_three_layer_mock(
|
||||
gcp_a_status=HealthStatus.HEALTHY,
|
||||
)
|
||||
|
||||
with patch.object(manager, "_write_failover_audit", return_value=None):
|
||||
await manager.select_provider()
|
||||
result = await manager.select_provider()
|
||||
|
||||
# 並行 check 三台主機(GCP-A / GCP-B / Local)
|
||||
assert result.primary.provider_name == "ollama_gcp_a"
|
||||
assert result.health_gcp_b is None
|
||||
assert result.health_local is None
|
||||
assert mock_monitor.check.call_count == 1
|
||||
called_urls = {call.args[0] for call in mock_monitor.check.call_args_list}
|
||||
assert URL_GCP_A in called_urls
|
||||
assert URL_GCP_B not in called_urls
|
||||
assert URL_LOCAL not in called_urls
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_select_provider_checks_fallback_hosts_when_gcp_a_not_healthy(self):
|
||||
"""GCP-A 不健康時仍檢查 GCP-B / Local,保留三層容災。"""
|
||||
manager, mock_monitor = self._make_three_layer_mock(
|
||||
gcp_a_status=HealthStatus.OFFLINE,
|
||||
gcp_b_status=HealthStatus.HEALTHY,
|
||||
local_status=HealthStatus.OFFLINE,
|
||||
)
|
||||
|
||||
with patch.object(manager, "_write_failover_audit", return_value=None):
|
||||
result = await manager.select_provider()
|
||||
|
||||
assert result.primary.provider_name == "ollama_gcp_b"
|
||||
assert mock_monitor.check.call_count == 3
|
||||
called_urls = {call.args[0] for call in mock_monitor.check.call_args_list}
|
||||
assert URL_GCP_A in called_urls
|
||||
@@ -626,7 +647,10 @@ class TestWriteFailoverAudit:
|
||||
async def test_audit_uses_structlog_not_db(self):
|
||||
"""_write_failover_audit 應呼叫 structlog,不呼叫 DB"""
|
||||
manager = _make_manager()
|
||||
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
|
||||
from src.services.ollama_failover_manager import (
|
||||
OllamaEndpoint,
|
||||
OllamaRoutingResult,
|
||||
)
|
||||
|
||||
result = OllamaRoutingResult(
|
||||
primary=OllamaEndpoint(url="", provider_name="gemini", model="gemini-1.5-flash"),
|
||||
@@ -642,7 +666,10 @@ class TestWriteFailoverAudit:
|
||||
async def test_audit_skipped_when_gcp_a_healthy(self):
|
||||
"""GCP-A HEALTHY 時 early return,不記錄 failover"""
|
||||
manager = _make_manager()
|
||||
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
|
||||
from src.services.ollama_failover_manager import (
|
||||
OllamaEndpoint,
|
||||
OllamaRoutingResult,
|
||||
)
|
||||
|
||||
result = OllamaRoutingResult(
|
||||
primary=OllamaEndpoint(url=URL_GCP_A, provider_name="ollama_gcp_a", model="qwen"),
|
||||
@@ -669,7 +696,7 @@ class TestAIProviderEnumOllamaLocal:
|
||||
assert AIProviderEnum.OLLAMA_LOCAL.value == "ollama_local"
|
||||
|
||||
def test_ollama_local_in_latency_budget(self):
|
||||
from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET
|
||||
from src.services.ai_router import PROVIDER_LATENCY_BUDGET, AIProviderEnum
|
||||
assert AIProviderEnum.OLLAMA_LOCAL in PROVIDER_LATENCY_BUDGET
|
||||
assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_LOCAL] == 90000
|
||||
|
||||
|
||||
Reference in New Issue
Block a user