awoooi/apps/api/src/services/ollama_failover_manager.py

"""
Ollama 自動容災管理 - P1.1b
============================
依 OllamaHealthMonitor 健康狀態決定 Ollama 路由方案。

路由邏輯（2026-05-03 統帥新令：GCP 三層容災，ADR-110）：
  GCP-A HEALTHY  → primary=GCP-A, fallback=[GCP-B, Local]
  GCP-A 不健康 + GCP-B HEALTHY → primary=GCP-B, fallback=[Local]
  GCP-A + GCP-B 都不健康 + Local HEALTHY → primary=Local, fallback=[Gemini]
  全部 Ollama 不健康 → primary=Gemini, fallback=[Nemotron, Claude]
  Gemini quota 超過 → primary=Nemotron, fallback=[Claude]

設計說明：
- GCP-A 主機：34.143.170.20（SSD，9x 載速 + 2x 推理）
- GCP-B 備援：34.21.145.224（SSD，9x 載速 + 2x 推理）
- Local 最後防線：192.168.0.111（M1 Pro, Metal 加速，HDD）
- 不直接依賴 AIProviderEnum（P1.2 Engineer-A 整合時再對齊）
- 返回輕量 OllamaRoutingResult，含主 endpoint + fallback 清單
- 並行檢查三台 Ollama 主機健康狀態
- 切換觸發時寫 audit_logs service="ollama_failover"
- clear_cache() 方法供 OllamaAutoRecoveryService 切回後清空路由快取

版本: v3.0
建立: 2026-04-25 (台北時區)
建立者: Claude Engineer-C (P1.1b)
更新: 2026-05-03 ogt — GCP 三層容災（ADR-110），GCP-A → GCP-B → Local → Gemini
# Created 2026-04-25 P1.1 by Claude Engineer-C
# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
# 2026-05-03 ogt: GCP 三層容災（ADR-110），GCP-A → GCP-B → Local → Gemini
"""

from __future__ import annotations

import asyncio
import datetime
from dataclasses import dataclass
from datetime import timedelta, timezone

import structlog

from src.core.config import get_settings
from src.services.ollama_health_monitor import (
    HealthReport,
    HealthStatus,
    OllamaHealthMonitor,
    get_ollama_health_monitor,
)

logger = structlog.get_logger(__name__)

# 台北時區 +8（標準庫保險絲，100% 可用）
# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo，保證一定有 +8 時區
# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
TAIPEI_TZ = timezone(timedelta(hours=8))


# =============================================================================
# 路由結果模型（輕量，P1.2 整合時轉換為 RoutingDecision）
# =============================================================================


@dataclass
class OllamaEndpoint:
    """Ollama 端點描述"""

    url: str
    provider_name: str  # 給 AIRouterExecutor 用的 provider 名稱
    model: str

    def to_dict(self) -> dict:
        return {"url": self.url, "provider_name": self.provider_name, "model": self.model}


@dataclass
class OllamaRoutingResult:
    """
    Ollama 容災路由結果（2026-05-03 ogt: 更新為三層 GCP 容災，ADR-110）

    P1.2 Engineer-A 整合時，將此結果轉換為 ai_router.RoutingDecision：
      - selected_provider = AIProviderEnum[result.primary.provider_name.upper()]
      - selected_model = result.primary.model
      - fallback_chain = [(AIProviderEnum[p.provider_name.upper()], p.model) for p in result.fallback_chain]
    """

    primary: OllamaEndpoint
    fallback_chain: list[OllamaEndpoint]
    routing_reason: str
    health_gcp_a: HealthReport           # GCP-A 健康狀態（原 health_111）
    health_gcp_b: HealthReport | None = None   # GCP-B 健康狀態
    health_local: HealthReport | None = None   # Local(111) 健康狀態

    @property
    def health_111(self) -> HealthReport:
        """向後相容屬性（舊測試 / log 使用）"""
        return self.health_gcp_a

    def all_endpoints_in_order(self) -> list[OllamaEndpoint]:
        """返回完整的優先序端點列表（primary 在前）"""
        return [self.primary, *self.fallback_chain]

    def to_dict(self) -> dict:
        return {
            "primary": {
                "url": self.primary.url,
                "provider": self.primary.provider_name,
                "model": self.primary.model,
            },
            "fallback_chain": [
                {"url": e.url, "provider": e.provider_name, "model": e.model}  # noqa: E501
                for e in self.fallback_chain
            ],
            "routing_reason": self.routing_reason,
            "health_gcp_a": self.health_gcp_a.to_dict(),
            "health_gcp_b": self.health_gcp_b.to_dict() if self.health_gcp_b else None,
            "health_local": self.health_local.to_dict() if self.health_local else None,
        }


# =============================================================================
# 已知 Fallback 端點定義（Nemotron / Gemini / Claude）
# =============================================================================

# 以 provider_name 對應 ai_router.AIProviderEnum 的 value
_NEMOTRON_ENDPOINT = OllamaEndpoint(
    url="",  # Nemotron 不是 HTTP URL，由 AIRouterExecutor 從 Registry 取得
    provider_name="nemotron",
    model="nvidia/nemotron-mini-4b-instruct",
)
_GEMINI_ENDPOINT = OllamaEndpoint(
    url="",
    provider_name="gemini",
    model="gemini-1.5-flash",
)
_CLAUDE_ENDPOINT = OllamaEndpoint(
    url="",
    provider_name="claude",
    model="claude-haiku-4-5-20251001",
)


# =============================================================================
# OllamaFailoverManager
# =============================================================================


class OllamaFailoverManager:
    """
    Ollama 自動容災管理器

    並行檢查 111 + 188，依健康狀態選擇最佳路由。

    使用方式：
        manager = OllamaFailoverManager()
        result = await manager.select_provider()
        # result.primary.url → 使用的 Ollama URL
        # result.fallback_chain → 依序 fallback

    2026-04-25 Claude Engineer-C (P1.1b)
    """

    def __init__(
        self,
        health_monitor: OllamaHealthMonitor | None = None,
        recovery_callback=None,
    ) -> None:
        self._monitor = health_monitor or get_ollama_health_monitor()
        self._settings = get_settings()
        # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
        # recovery_callback: async callable(provider_name: str) → None
        # OllamaAutoRecoveryService.set_current_primary 在 failover 時被通知，
        # 避免重啟後 _current_primary 停留在 "ollama" 而永不啟動恢復監控
        self._recovery_callback = recovery_callback

    # -------------------------------------------------------------------------
    # Public API
    # -------------------------------------------------------------------------

    async def select_provider(
        self,
        task_type: str = "",
        context: dict | None = None,
    ) -> OllamaRoutingResult:
        """
        三層 Ollama 容災路由（ADR-110 修正版 2026-05-04）：
        Primary(OLLAMA_URL) → Secondary(OLLAMA_SECONDARY_URL) → Tertiary(OLLAMA_FALLBACK_URL)
        → Gemini → Nemotron → Claude

        2026-05-04 ogt: URL 優先序已更新（ConfigMap），primary = 111（K8s 內網可達）。
        GCP-A/B 為 secondary/tertiary，待 nginx proxy 架設後再升回 primary。

        Args:
            task_type: 任務類型（預留，目前未影響路由邏輯）
            context: 額外上下文（預留）

        Returns:
            OllamaRoutingResult
        """
        # 2026-05-04 ogt: 改用語意中性名稱 primary/secondary/tertiary，
        # 避免 gcp_a/gcp_b/local 與實際 URL 脫鉤造成 log 誤導
        url_primary   = self._settings.OLLAMA_URL            # 110:11435 → GCP-A (nginx proxy)
        url_secondary = self._settings.OLLAMA_SECONDARY_URL  # 110:11436 → GCP-B (nginx proxy)
        url_tertiary  = self._settings.OLLAMA_FALLBACK_URL   # 110:11437 → Local 111 (nginx proxy)

        def _to_health(r, label: str) -> HealthReport:
            if isinstance(r, Exception):
                return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}")
            return r

        def _short(url: str) -> str:
            from urllib.parse import urlparse
            return urlparse(url).hostname or url

        # 2026-05-19 Codex: alert-fast path must not wait for the slow local lane
        # when GCP-A is already healthy. The old gather(GCP-A/GCP-B/111) path made
        # 111's 45s health timeout dominate every routing decision.
        try:
            primary_raw = await self._monitor.check(url_primary)
        except Exception as exc:
            primary_raw = exc
        health_gcp_a = _to_health(primary_raw, f"primary({url_primary})")
        health_gcp_b: HealthReport | None = None
        health_local: HealthReport | None = None

        if health_gcp_a.status == HealthStatus.HEALTHY:
            model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
            fallback_chain = [
                OllamaEndpoint(url=url_secondary, provider_name="ollama_gcp_b", model=model),
                OllamaEndpoint(url=url_tertiary, provider_name="ollama_local", model=model),
                _GEMINI_ENDPOINT,
            ]
            result = OllamaRoutingResult(
                primary=OllamaEndpoint(url=url_primary, provider_name="ollama_gcp_a", model=model),
                fallback_chain=fallback_chain,
                routing_reason=f"primary({_short(url_primary)}) HEALTHY",
                health_gcp_a=health_gcp_a,
                health_gcp_b=None,
                health_local=None,
            )
        else:
            # Primary 不健康時才並行檢查後兩層，保留 GCP-B/Local 容災。
            results_raw = await asyncio.gather(
                self._monitor.check(url_secondary),
                self._monitor.check(url_tertiary),
                return_exceptions=True,
            )
            health_gcp_b = _to_health(results_raw[0], f"secondary({url_secondary})")
            health_local = _to_health(results_raw[1], f"tertiary({url_tertiary})")

            result = self._decide_route(
                health_gcp_a=health_gcp_a,
                health_gcp_b=health_gcp_b,
                health_local=health_local,
                url_gcp_a=url_primary,
                url_gcp_b=url_secondary,
                url_local=url_tertiary,
            )

        # Gemini 帳單熔斷（quota gate）
        # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
        if result.primary.provider_name == "gemini":
            quota_ok = await self._check_gemini_quota()
            if not quota_ok:
                quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
                logger.warning(
                    "gemini_quota_exceeded_fallback_to_nemotron",
                    quota=quota,
                    health_gcp_a=health_gcp_a.status.value,
                )
                result = self._build_quota_exceeded_route(health_gcp_a=health_gcp_a)
                # Quota 耗盡 Telegram 告警（24h dedup）
                try:
                    from src.core.redis_client import get_redis
                    from src.services.failover_alerter import get_failover_alerter
                    _current_count = quota
                    try:
                        _redis = get_redis()
                        if _redis is not None:
                            _key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
                            _raw = await _redis.get(_key)
                            _current_count = int(_raw or 0)
                    except Exception:
                        pass
                    await get_failover_alerter().alert_gemini_quota_exceeded({
                        "quota": quota,
                        "current_count": _current_count,
                    })
                except Exception as _alert_err:
                    logger.warning(
                        "gemini_quota_alert_dispatch_failed",
                        error=str(_alert_err),
                    )

        # 寫入 audit_log（best-effort）
        await self._write_failover_audit(result)

        def _status(report: HealthReport | None) -> str:
            return report.status.value if report else "not_checked"

        logger.info(
            "ollama_failover_decision",
            primary=result.primary.provider_name,
            primary_url=result.primary.url,
            reason=result.routing_reason,
            fallback_count=len(result.fallback_chain),
            health_gcp_a=health_gcp_a.status.value,
            health_gcp_b=_status(health_gcp_b),
            health_local=_status(health_local),
        )

        # 通知 recovery service 當前 primary（跨重啟持久化）
        # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
        if self._recovery_callback is not None:
            try:
                await self._recovery_callback(result.primary.provider_name)
            except Exception as e:
                logger.warning(
                    "ollama_failover_recovery_callback_failed",
                    error=str(e),
                )

        return result

    # -------------------------------------------------------------------------
    # 路由決策邏輯
    # -------------------------------------------------------------------------

    def _decide_route(
        self,
        health_gcp_a: HealthReport,
        health_gcp_b: HealthReport,
        health_local: HealthReport,
        url_gcp_a: str,
        url_gcp_b: str,
        url_local: str,
    ) -> OllamaRoutingResult:
        """
        三層 Ollama 決策矩陣（2026-05-03 ogt，ADR-110）：

          GCP-A HEALTHY → primary=GCP-A, fallback=[GCP-B, Local]
          GCP-A 不健康 + GCP-B HEALTHY → primary=GCP-B, fallback=[Local]
          GCP-A + GCP-B 不健康 + Local HEALTHY → primary=Local, fallback=[Gemini]
          全部 Ollama 不健康 → primary=Gemini, fallback=[Nemotron, Claude]

        Gemini quota 超過由 _build_quota_exceeded_route() 接管。
        """
        model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
        ep_gcp_a = OllamaEndpoint(url=url_gcp_a, provider_name="ollama_gcp_a", model=model)
        ep_gcp_b = OllamaEndpoint(url=url_gcp_b, provider_name="ollama_gcp_b", model=model)
        ep_local = OllamaEndpoint(url=url_local, provider_name="ollama_local", model=model)

        now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()

        # 用實際 URL 取最後一段作為 log 標識（IP 或 hostname）
        def _short(url: str) -> str:
            from urllib.parse import urlparse
            return urlparse(url).hostname or url

        lbl_p = _short(url_gcp_a)   # primary label
        lbl_s = _short(url_gcp_b)   # secondary label
        lbl_t = _short(url_local)   # tertiary label

        # Primary HEALTHY → 使用 primary
        if health_gcp_a.status == HealthStatus.HEALTHY:
            return OllamaRoutingResult(
                primary=ep_gcp_a,
                fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT],
                routing_reason=f"primary({lbl_p}) HEALTHY",
                health_gcp_a=health_gcp_a,
                health_gcp_b=health_gcp_b,
                health_local=health_local,
            )

        # Primary 不健康，Secondary HEALTHY → 切 secondary
        if health_gcp_b.status == HealthStatus.HEALTHY:
            return OllamaRoutingResult(
                primary=ep_gcp_b,
                fallback_chain=[ep_local, _GEMINI_ENDPOINT],
                routing_reason=f"primary({lbl_p}) {health_gcp_a.status.value} → secondary({lbl_s}) at {now_ts}",
                health_gcp_a=health_gcp_a,
                health_gcp_b=health_gcp_b,
                health_local=health_local,
            )

        # Primary + Secondary 不健康，Tertiary HEALTHY → 切 tertiary
        if health_local.status == HealthStatus.HEALTHY:
            return OllamaRoutingResult(
                primary=ep_local,
                fallback_chain=[_GEMINI_ENDPOINT],
                routing_reason=(
                    f"primary({lbl_p}) {health_gcp_a.status.value}"
                    f" + secondary({lbl_s}) {health_gcp_b.status.value}"
                    f" → tertiary({lbl_t}) at {now_ts}"
                ),
                health_gcp_a=health_gcp_a,
                health_gcp_b=health_gcp_b,
                health_local=health_local,
            )

        # 2026-05-04 ogt: SLOW 容災備援（外網同時抖動時，SLOW Ollama 仍優於 Gemini quota 耗盡）
        if health_gcp_a.status == HealthStatus.SLOW:
            return OllamaRoutingResult(
                primary=ep_gcp_a,
                fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT],
                routing_reason=f"primary({lbl_p}) SLOW（降級可用）at {now_ts}",
                health_gcp_a=health_gcp_a,
                health_gcp_b=health_gcp_b,
                health_local=health_local,
            )
        if health_gcp_b.status == HealthStatus.SLOW:
            return OllamaRoutingResult(
                primary=ep_gcp_b,
                fallback_chain=[ep_local, _GEMINI_ENDPOINT],
                routing_reason=(
                    f"primary({lbl_p}) {health_gcp_a.status.value}"
                    f" + secondary({lbl_s}) SLOW（降級可用）at {now_ts}"
                ),
                health_gcp_a=health_gcp_a,
                health_gcp_b=health_gcp_b,
                health_local=health_local,
            )
        if health_local.status == HealthStatus.SLOW:
            return OllamaRoutingResult(
                primary=ep_local,
                fallback_chain=[_GEMINI_ENDPOINT],
                routing_reason=(
                    f"primary({lbl_p}) {health_gcp_a.status.value}"
                    f" + secondary({lbl_s}) {health_gcp_b.status.value}"
                    f" + tertiary({lbl_t}) SLOW（降級可用）at {now_ts}"
                ),
                health_gcp_a=health_gcp_a,
                health_gcp_b=health_gcp_b,
                health_local=health_local,
            )

        # 全部 Ollama 不可用（DEGRADED/OFFLINE）→ Gemini
        return OllamaRoutingResult(
            primary=_GEMINI_ENDPOINT,
            fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
            routing_reason=(
                f"所有 Ollama 不健康（primary({lbl_p}) {health_gcp_a.status.value}，"
                f"secondary({lbl_s}) {health_gcp_b.status.value}，"
                f"tertiary({lbl_t}) {health_local.status.value}）→ 切 Gemini at {now_ts}"
            ),
            health_gcp_a=health_gcp_a,
            health_gcp_b=health_gcp_b,
            health_local=health_local,
        )

    # -------------------------------------------------------------------------
    # Gemini 帳單熔斷（quota gate）
    # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
    # -------------------------------------------------------------------------

    async def _check_gemini_quota(self) -> bool:
        """
        檢查每日 Gemini call 配額，超過上限則禁用。

        Redis key: ollama:gemini_daily_count:{YYYY-MM-DD}，TTL 86400s
        計數 atomic（incr）。

        Returns:
            True  → 仍在配額內，可使用 Gemini
            False → 已超配額，應切到 188+Nemotron

        fail-open：Redis 不可用時允許走 Gemini（不阻擋服務）
        """
        try:
            from src.core.redis_client import get_redis
            redis = get_redis()
            if redis is None:
                return True  # fail-open
            quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
            key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"

            # 2026-04-26 Wave5 B3-fix by Claude Engineer-A4 — atomic pipeline 修復 TOCTOU
            # 原實作：GET → 判斷 → INCR → EXPIRE（分四步，INCR 後 crash 會丟 TTL，
            # 且並行請求在 GET/INCR 之間競爭導致配額超發）
            # 修法：pipeline 原子執行 SET NX（首次設 TTL） + INCR，用 INCR 後的新值判斷
            pipe = redis.pipeline()
            pipe.set(key, 0, ex=86400, nx=True)  # 僅首次寫入設 TTL；已存在則跳過
            pipe.incr(key)                        # 原子遞增，回傳遞增後的值
            results = await pipe.execute()
            new_count = int(results[1])           # results[1] = INCR 後新值

            # 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 刷新 Gemini Prometheus Gauge
            # 每次 quota check 時同步更新，讓 Prometheus 取到最新值
            try:
                from src.core.metrics import GEMINI_DAILY_CALL_COUNT, GEMINI_DAILY_QUOTA
                GEMINI_DAILY_CALL_COUNT.set(new_count)
                GEMINI_DAILY_QUOTA.set(quota)
            except Exception:
                pass  # metric 更新失敗不阻斷主路由邏輯

            if new_count > quota:
                # 已超配額（INCR 後 > quota），回退不是必要的（最多超發 1 次）
                # 但要回傳 False 讓 router 切到 188
                return False
            return True
        except Exception as e:
            # 2026-04-27 Wave8-X2 by Claude — B14 quota fail-closed
            # 原 fail-open：Redis 異常 → return True → Gemini 盲開 → 費用鐵律違反
            # 修法：Redis 異常時 fail-closed，拒絕走 Gemini，讓 fallback chain 接手 188/Nemotron
            # 費用安全 > 服務可用性（統帥鐵律：費用變更必須停下）
            logger.exception(
                "gemini_quota_check_failed_failing_closed",
                error=str(e),
                security_note="Redis 異常時為費用安全 fail-closed，切到 fallback chain",
            )
            # 嘗試告警（best-effort，不阻塞路由）
            try:
                from src.services.failover_alerter import get_failover_alerter

                await get_failover_alerter().alert_gemini_quota_exceeded({
                    "quota": getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000),
                    "current_count": "unknown (Redis error)",
                    "reason": "fail_closed_due_to_redis_error",
                })
            except Exception:
                pass
            return False  # fail-closed：拒絕 Gemini，讓 fallback chain（188/Nemotron）接手

    def _build_quota_exceeded_route(
        self,
        health_gcp_a: HealthReport,
    ) -> OllamaRoutingResult:
        """
        Gemini 配額耗盡時的備援路由：primary=Nemotron, fallback=[Claude]
        2026-05-03 ogt: 更新參數名 health_111 → health_gcp_a（ADR-110）
        """
        return OllamaRoutingResult(
            primary=_NEMOTRON_ENDPOINT,
            fallback_chain=[_CLAUDE_ENDPOINT],
            routing_reason="Gemini quota exceeded → Nemotron 備援",
            health_gcp_a=health_gcp_a,
        )

    # -------------------------------------------------------------------------
    # Recovery API（供 OllamaAutoRecoveryService 呼叫）
    # -------------------------------------------------------------------------

    def set_recovery_callback(self, callback) -> None:
        """
        設定 recovery callback（供 lifespan wiring 使用）。
        callback signature: async (provider_name: str) -> None

        # 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
        """
        self._recovery_callback = callback

    async def clear_cache(self) -> None:
        """
        清空路由決策快取，讓下次 select_provider 重新評估健康狀態。
        OllamaAutoRecoveryService 在偵測 111 恢復後呼叫此方法。

        2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
        # 2026-04-25 P1.2 by Claude Engineer-A2 — 改用 make_cache_key 動態組 key，消除硬編碼 IP
        """
        try:
            from src.core.redis_client import get_redis
            from src.services.ollama_health_monitor import make_cache_key
            redis = get_redis()
            if redis is None:
                return
            # 動態由 settings URL 組 cache key，避免硬編碼 IP
            # 2026-05-03 ogt: 新增 OLLAMA_SECONDARY_URL（ADR-110 GCP-B）
            keys = [
                make_cache_key(self._settings.OLLAMA_URL),
                make_cache_key(self._settings.OLLAMA_SECONDARY_URL or ""),
                make_cache_key(self._settings.OLLAMA_FALLBACK_URL or ""),
            ]
            for k in keys:
                if k and k != "ollama_health:":  # 空 URL 會產生無意義的 key，跳過
                    await redis.delete(k)
            logger.info(
                "ollama_failover_cache_cleared",
                service="ollama_failover",
                reason="recovery_triggered",
            )
        except Exception as e:
            logger.debug("ollama_failover_clear_cache_failed", error=str(e))

    def notify_recovery(self, provider: str) -> None:
        """
        預留：P1.5 Engineer 接入 Telegram alerter 時使用。
        目前僅寫 structlog audit。

        2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
        """
        logger.info(
            "ollama_recovery_notified",
            service="ollama_failover",
            provider=provider,
            action="recovery_received",
        )

    # -------------------------------------------------------------------------
    # Audit Log
    # -------------------------------------------------------------------------

    async def _write_failover_audit(self, result: OllamaRoutingResult) -> None:
        """
        切換觸發時寫 structlog audit（best-effort）+ Telegram 告警

        # 2026-04-25 critic-fix Part2 B1 by Claude Engineer-C2
        # 原 AuditLog DB 寫入使用不存在的欄位（service/action/target/status/metadata）
        # → SQLAlchemy crash → except 吃掉 → 零稽核
        # 修法：刪除 DB 寫入路徑，改用 structlog only（audit 不依賴 DB schema）

        # 2026-04-25 P1.5 by Claude Engineer-D — 新增 Telegram 告警（dedup 10min）

        service="ollama_failover"（per 任務規格）
        僅在 primary 非 111 時記錄（真正發生切換）
        """
        # 2026-05-03 ogt: GCP 三層容災下，三個 ollama_* provider 都是正常狀態，無需告警
        if result.primary.provider_name in ("ollama", "ollama_gcp_a", "ollama_gcp_b", "ollama_local"):
            return

        # 2026-04-26 P2.3 by Claude Sonnet 4.6 (tool-expert) — 記錄 failover Prometheus metric
        try:
            from src.core.metrics import (
                OLLAMA_CURRENT_PRIMARY_IS_OLLAMA,
                OLLAMA_FAILOVER_TRIGGERED_TOTAL,
            )
            OLLAMA_FAILOVER_TRIGGERED_TOTAL.labels(
                from_provider="ollama",
                to_provider=result.primary.provider_name,
            ).inc()
            OLLAMA_CURRENT_PRIMARY_IS_OLLAMA.set(0)
        except Exception as _metric_err:
            logger.debug("ollama_failover_metric_error", error=str(_metric_err))

        logger.info(
            "ollama_failover_triggered",
            service="ollama_failover",
            action="failover_triggered",
            from_provider="ollama",
            to_provider=result.primary.provider_name,
            reason=result.routing_reason,
            primary_url=result.primary.url or result.primary.provider_name,
            health_gcp_a=result.health_gcp_a.status.value,
            health_gcp_b=result.health_gcp_b.status.value if result.health_gcp_b else "not_configured",
            health_local=result.health_local.status.value if result.health_local else "not_configured",
        )

        # Telegram 告警（首次切換才通知，dedup 10min 內建）
        # 2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不阻斷主路由邏輯
        # 2026-05-03 ogt: ADR-110 — failed_host 動態計算，顯示哪台 GCP/Local 故障
        try:
            from src.services.failover_alerter import get_failover_alerter
            from src.services.ollama_health_monitor import HealthStatus
            fallback_chain_str = " → ".join(
                p.provider_name for p in result.fallback_chain
            )
            # 計算故障主機描述（哪層 Ollama 不健康，用實際 URL 不用硬編碼標籤）
            _failed = []
            if result.health_gcp_a.status != HealthStatus.HEALTHY:
                _failed.append(self._settings.OLLAMA_URL)
            if result.health_gcp_b and result.health_gcp_b.status != HealthStatus.HEALTHY:
                _failed.append(self._settings.OLLAMA_SECONDARY_URL or "secondary")
            if result.health_local and result.health_local.status != HealthStatus.HEALTHY:
                _failed.append(self._settings.OLLAMA_FALLBACK_URL or "tertiary")
            failed_host = " + ".join(_failed) if _failed else "Ollama"
            alerter = get_failover_alerter()
            await alerter.alert_failover({
                "to_provider": result.primary.provider_name,
                "model": result.primary.model,
                "reason": result.routing_reason,
                "timestamp": datetime.datetime.now(TAIPEI_TZ).isoformat(),
                "fallback_chain_str": fallback_chain_str,
                "failed_host": failed_host,
            })
        except Exception as e:
            logger.warning("failover_alert_failed", error=str(e))


# =============================================================================
# Singleton
# =============================================================================

_failover_manager: OllamaFailoverManager | None = None


def get_ollama_failover_manager() -> OllamaFailoverManager:
    """取得 OllamaFailoverManager singleton"""
    global _failover_manager
    if _failover_manager is None:
        _failover_manager = OllamaFailoverManager()
    return _failover_manager


def reset_ollama_failover_manager() -> None:
    """重置 singleton（測試用）"""
    global _failover_manager
    _failover_manager = None