feat(p1): Ollama 多層容災系統 — P1.1 健康檢測 + P1.2 ai_router 整合 + P1.5 容災告警

ADR-092 P1 飛輪閉環的 Ollama 失敗轉移子系統，全部 Engineer-A2/C/C2 補上。新服務 (1581 行)： - ollama_health_monitor.py (356)：3 層健康檢測（TCP/HTTP/推理） - ollama_failover_manager.py (571)：111→188 自動切換 + Redis 持久化 + recovery callback - ollama_auto_recovery.py (436)：30s 背景監控 + 連續 3 次 HEALTHY → 切回 + clear_cache - failover_alerter.py (218)：P1.5 Telegram 容災告警服務整合： - ai_router.py: AIProviderEnum.OLLAMA_188 + 120s budget + failover fallback chain - main.py lifespan: 啟動時 wire callback + start recovery，關閉時優雅 stop - config.py: OLLAMA_FALLBACK_URL / OLLAMA_HEALTH_CHECK_MODEL / GEMINI_DAILY_QUOTA（帳單熔斷） K8s 配置： - 04-configmap.yaml.patch-188-fallback：注入 OLLAMA_FALLBACK_URL=http://192.168.0.188:11434 測試 (2082 行)： - test_ollama_health_monitor.py (402) - test_ollama_failover_manager.py (707) - test_ollama_auto_recovery.py (580) - test_ai_router_failover_integration.py (257) - test_lifespan_failover_wiring.py (136) 依賴鏈：service 三件套 + ai_router + main.py 一起 commit，缺一就 ImportError。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 20:18:33 +08:00
parent d3a4fb4d15
commit 55c6b4e2d9
13 changed files with 3798 additions and 2 deletions
--- a/apps/api/src/core/config.py
+++ b/apps/api/src/core/config.py
@@ -189,11 +189,29 @@ class Settings(BaseSettings):
        default="http://192.168.0.111:11434",  # 2026-04-08 ogt: 切換至 M1 Pro (40+ tok/s vs 0.45 tok/s)
        description="Ollama LLM service URL",
    )
+    # 2026-04-25 Claude Engineer-C (P1.1): Ollama 188 CPU-only 備援 (方案 C)
+    # 若空字串則 OllamaFailoverManager 僅使用 OLLAMA_URL（單節點模式）
+    OLLAMA_FALLBACK_URL: str = Field(
+        default="",
+        description="Ollama CPU-only fallback URL (188 備援，P1.1)，空字串=停用",
+    )
+    # 2026-04-25 Claude Engineer-C (P1.1): Ollama 健康檢測推理測試模型
+    OLLAMA_HEALTH_CHECK_MODEL: str = Field(
+        default="qwen2.5:7b-instruct",
+        description="OllamaHealthMonitor 推理測試使用模型（P1.1）",
+    )
    # 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單
    OLLAMA_REQUIRED_MODELS: list[str] = Field(
        default=["nomic-embed-text", "qwen2.5:7b-instruct", "deepseek-r1:14b"],
        description="HeartbeatReportService 探測必要模型是否載入",
    )
+    # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
+    # Gemini 帳單熔斷：每日呼叫上限，超過改走 188+Nemotron
+    # 超過上限後寫 Redis key ollama:gemini_daily_count:{date}，TTL 86400s
+    GEMINI_DAILY_QUOTA: int = Field(
+        default=1000,
+        description="每日 Gemini 呼叫上限，超過切到 188+Nemotron（P1.1 帳單熔斷）",
+    )
    # Deprecated: use OPENCLAW_URL instead
    CLAWBOT_URL: str = Field(
        default="http://192.168.0.188:8088",  # 🔧 修正: OpenClaw 實際 port 是 8088
--- a/apps/api/src/main.py
+++ b/apps/api/src/main.py
@@ -546,9 +546,39 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
    except Exception as e:
        logger.warning("ai_slo_watchdog_schedule_failed", error=str(e))

+    # 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
+    # OllamaFailoverManager + OllamaAutoRecoveryService 飛輪接線：
+    #   failover 切換時 → recovery_callback → set_current_primary → Redis 持久化
+    #   recovery service 每 30s 檢查 → 111 連續 3 次 HEALTHY → 自動切回 → clear_cache
+    # 順序：先取 singleton → wire callback → 啟動 recovery service（才能接收 callback）
+    try:
+        from src.services.ollama_failover_manager import get_ollama_failover_manager
+        from src.services.ollama_auto_recovery import get_ollama_auto_recovery_service
+
+        _failover_mgr = get_ollama_failover_manager()
+        _recovery_svc = get_ollama_auto_recovery_service()
+
+        # wire callback：failover 切換時通知 recovery service 更新 current_primary
+        _failover_mgr.set_recovery_callback(_recovery_svc.set_current_primary)
+
+        # 啟動 recovery service（從 Redis bootstrap current_primary，並啟動背景監控）
+        await _recovery_svc.start()
+        logger.info("ollama_failover_system_started")
+    except Exception as e:
+        logger.warning("ollama_failover_system_start_failed", error=str(e))
+
    yield

    # Shutdown
+    # 2026-04-25 P1.2 by Claude Engineer-A2 — 優雅關閉 Ollama failover 背景監控
+    # 必須在 Redis pool 關閉之前停止（recovery service 可能仍在寫 Redis）
+    try:
+        from src.services.ollama_auto_recovery import get_ollama_auto_recovery_service
+        await get_ollama_auto_recovery_service().stop()
+        logger.info("ollama_failover_system_stopped")
+    except Exception as e:
+        logger.warning("ollama_failover_system_stop_failed", error=str(e))
+
    # Phase 6.1: 關閉 Signal Worker (先關閉 Consumer)
    await close_signal_worker()
    await publisher.stop()
--- a/apps/api/src/services/ai_router.py
+++ b/apps/api/src/services/ai_router.py
@@ -68,10 +68,14 @@ logger = structlog.get_logger(__name__)
 # =============================================================================


-class AIProviderEnum(Enum):
+class AIProviderEnum(str, Enum):
    """AI 提供者"""

    OLLAMA = "ollama"
+    # 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2
+    # P1.1b OllamaFailoverManager 使用 provider_name="ollama_188"，
+    # 但 AIProviderEnum 沒有此值 → P1.2 整合時 lookup 失敗
+    OLLAMA_188 = "ollama_188"  # 188 CPU-only 備援節點（P1.1b）
    GEMINI = "gemini"
    CLAUDE = "claude"
    # 2026-04-02 ogt: C1 修復 — 對齊 Registry 實際名稱
@@ -85,6 +89,8 @@ class AIProviderEnum(Enum):
 # Provider 對應延遲預算 (ms)
 PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = {
    AIProviderEnum.OLLAMA: 60000,  # 本地，允許較長處理時間
+    # 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2 — 188 CPU-only 推理較慢
+    AIProviderEnum.OLLAMA_188: 120000,  # 120s budget for CPU inference
    AIProviderEnum.GEMINI: 30000,  # 雲端，較低延遲
    AIProviderEnum.CLAUDE: 30000,  # 雲端，較低延遲
    # 2026-04-02 ogt: C1 修復 — 對齊 Registry 名稱
@@ -212,6 +218,10 @@ class AIRouter:
        self._intent_classifier = get_intent_classifier()
        self._complexity_scorer = get_complexity_scorer()
        self._model_registry = get_model_registry()
+        # 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
+        # 延遲 import 避免循環依賴（ollama_failover_manager 不 import ai_router）
+        from src.services.ollama_failover_manager import get_ollama_failover_manager
+        self._failover_manager = get_ollama_failover_manager()

        # 從 ModelRegistry 取得模型配置
        self._ollama_default = self._model_registry.get_model("ollama", "default")
@@ -372,10 +382,51 @@ class AIRouter:
            intent, intent_result, complexity
        )

+        # Step 3b: 若 initial decision 選到 OLLAMA，交由 FailoverManager 重評
+        # 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
+        # 只在 OLLAMA 時觸發，不干擾 NEMOTRON / OPENCLAW_NEMO / CLAUDE / GEMINI 路由
+        failover_fallback: list[tuple[AIProviderEnum, str]] | None = None
+        if provider == AIProviderEnum.OLLAMA:
+            try:
+                failover_result = await self._failover_manager.select_provider(
+                    task_type=intent.value if intent else "general"
+                )
+                primary_str = failover_result.primary.provider_name
+                try:
+                    provider = AIProviderEnum(primary_str)
+                    model = failover_result.primary.model
+                    reason = f"{reason} [failover→{primary_str}]"
+                except ValueError:
+                    # provider_name 無法對應已知 enum（理論上不應發生，OLLAMA_188 已加）
+                    logger.warning(
+                        "ai_router_unknown_failover_provider",
+                        provider=primary_str,
+                    )
+                # 重建 fallback chain：從 failover_result 轉換
+                fb_list: list[tuple[AIProviderEnum, str]] = []
+                for ep in failover_result.fallback_chain:
+                    try:
+                        fb_provider = AIProviderEnum(ep.provider_name)
+                        fb_list.append((fb_provider, ep.model))
+                    except ValueError:
+                        logger.warning(
+                            "ai_router_unknown_failover_fallback_provider",
+                            provider=ep.provider_name,
+                        )
+                failover_fallback = fb_list
+            except Exception as e:
+                # failover_manager 異常 → 保留原始 provider（fail-open）
+                logger.warning("ai_router_failover_manager_error", error=str(e))
+
        # Step 4: 建立 Fallback 鏈
        # 2026-04-05 Claude Code: v4.3 — DIAGNOSE 改回 _full_fallback_chain
        # NIM 從 Phase 22 起就是主力，無隱私問題；Ollama CPU-only 不可用（實測 238s）
-        fallback_chain = self._build_fallback_chain(provider)
+        # 2026-04-25 P1.2: 若 failover_manager 回傳了 fallback chain，優先使用
+        fallback_chain = (
+            failover_fallback
+            if failover_fallback is not None
+            else self._build_fallback_chain(provider)
+        )

        # Step 5: 計算延遲預算
        latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
--- a/apps/api/src/services/failover_alerter.py
+++ b/apps/api/src/services/failover_alerter.py
@@ -0,0 +1,218 @@
+"""Ollama 容災 / 自動恢復 / Gemini 帳單 Telegram 告警
+
+設計原則：
+- 每次 failover 觸發都通知（用戶明確指示）
+- 但用 10min Redis dedup TTL 防同樣狀態重複告警
+- 三種告警類型：failover_triggered / recovery_succeeded / gemini_quota_exceeded
+
+2026-04-25 P1.5 by Claude Engineer-D — Telegram Alerter for Ollama Failover
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone, timedelta
+from typing import Any
+
+import structlog
+
+TAIPEI_TZ = timezone(timedelta(hours=8))
+DEDUP_TTL_SEC = 600  # 10 min
+QUOTA_DEDUP_TTL_SEC = 86400  # 24h（每日 quota 告警只發一次）
+
+logger = structlog.get_logger(__name__)
+
+
+class FailoverAlerter:
+    """Ollama 容災 Telegram 告警
+
+    2026-04-25 P1.5 by Claude Engineer-D — Telegram Alerter for Ollama Failover
+    """
+
+    def __init__(self, redis_client=None) -> None:
+        # telegram_gateway 從 singleton 取，不注入（lifespan 已確保初始化）
+        self._redis = redis_client
+
+    async def alert_failover(self, event: dict[str, Any]) -> None:
+        """111 故障切換到 Gemini/188 — 10min dedup"""
+        to_provider = event.get("to_provider", "unknown")
+        dedup_key = f"alert:failover:{to_provider}"
+        if not await self._check_dedup(dedup_key, ttl=DEDUP_TTL_SEC):
+            logger.debug("failover_alert_dedup_skipped", to_provider=to_provider)
+            return
+
+        reason = event.get("reason", "unknown")
+        model = event.get("model", "?")
+        timestamp = event.get("timestamp", datetime.now(TAIPEI_TZ).isoformat())
+        fallback_chain_str = event.get("fallback_chain_str", "?")
+
+        msg = (
+            f"*Ollama 容災激活*\n\n"
+            f"故障主機：Ollama 111 \\(GPU\\)\n"
+            f"故障狀態：{_escape_md(reason)}\n"
+            f"切換目標：{_escape_md(to_provider)} \\(model: {_escape_md(model)}\\)\n"
+            f"切換時間：{_escape_md(timestamp)}\n\n"
+            f"Fallback 鏈：{_escape_md(fallback_chain_str)}\n\n"
+            f"自動恢復服務持續監控，3 次 HEALTHY 後自動切回"
+        )
+        await self._send(msg)
+        logger.info("failover_alert_sent", to_provider=to_provider)
+
+    async def alert_recovery(self, event: dict[str, Any]) -> None:
+        """111 恢復後自動切回 — 10min dedup
+
+        2026-04-25 P1.5 by Claude Engineer-D — 用戶明確要求要通知
+        """
+        dedup_key = "alert:recovery"
+        if not await self._check_dedup(dedup_key, ttl=DEDUP_TTL_SEC):
+            logger.debug("recovery_alert_dedup_skipped")
+            return
+
+        stable_count = event.get("stable_count", event.get("to", "?"))
+        # 相容 auto_recovery 傳入的 {"from": ..., "to": ...} 格式
+        from_provider = event.get("from_provider", event.get("from", "?"))
+        to_provider = event.get("to_provider", event.get("to", "ollama_111"))
+        recovery_time = event.get("recovery_time", datetime.now(TAIPEI_TZ).isoformat())
+
+        msg = (
+            f"*Ollama 自動恢復*\n\n"
+            f"恢復主機：Ollama 111 \\(GPU\\)\n"
+            f"穩定計數：連續 {stable_count} 次 HEALTHY\n"
+            f"切回時間：{_escape_md(str(recovery_time))}\n"
+            f"切換路徑：{_escape_md(str(from_provider))} → {_escape_md(str(to_provider))}\n\n"
+            f"自動化飛輪已恢復至 GPU 推理模式"
+        )
+        await self._send(msg)
+        logger.info("recovery_alert_sent", from_provider=from_provider)
+
+    async def alert_gemini_quota_exceeded(self, event: dict[str, Any]) -> None:
+        """Gemini 每日上限觸發，降級到 188 CPU 備援 — 24h dedup"""
+        dedup_key = "alert:gemini_quota_exceeded"
+        if not await self._check_dedup(dedup_key, ttl=QUOTA_DEDUP_TTL_SEC):
+            logger.debug("quota_alert_dedup_skipped")
+            return
+
+        quota = event.get("quota", "?")
+        current_count = event.get("current_count", "?")
+        date_str = datetime.now(TAIPEI_TZ).date().isoformat()
+
+        msg = (
+            f"*Gemini 每日配額耗盡*\n\n"
+            f"日期：{date_str}\n"
+            f"上限：{quota} calls/day\n"
+            f"當前用量：{current_count}\n"
+            f"降級目標：OLLAMA\\_188 \\(CPU，推理較慢\\)\n\n"
+            f"進入慢速模式至明日 0:00\n"
+            f"建議檢查是否有異常流量，評估是否升級 Gemini 配額"
+        )
+        await self._send(msg)
+        logger.info("quota_alert_sent", quota=quota, current_count=current_count)
+
+    # -------------------------------------------------------------------------
+    # Dedup（Redis SET NX EX）
+    # -------------------------------------------------------------------------
+
+    async def _check_dedup(self, key: str, ttl: int) -> bool:
+        """
+        Redis SET NX EX 防止重複告警。
+        True = 第一次（應送出），False = 已送過（跳過）。
+
+        fail-open：Redis 不可用時允許送出（不阻擋通知）
+
+        2026-04-25 P1.5 by Claude Engineer-D — Telegram dedup 鐵律 10min/24h TTL
+        """
+        if self._redis is None:
+            return True  # fail-open
+        try:
+            ok = await self._redis.set(f"{key}:dedup", "1", ex=ttl, nx=True)
+            return bool(ok)
+        except Exception as e:
+            logger.warning("dedup_check_failed", error=str(e))
+            return True  # fail-open
+
+    # -------------------------------------------------------------------------
+    # 發送（透過 TelegramGateway singleton）
+    # -------------------------------------------------------------------------
+
+    async def _send(self, message: str) -> None:
+        """發送至 Telegram OPENCLAW_TG_CHAT_ID
+
+        使用現有 TelegramGateway singleton（不另建 HTTP client），
+        parse_mode=MarkdownV2 對應 MarkdownV2 escape 規則。
+
+        alerter 失敗不阻斷主路由邏輯（exception 吞掉只 log）
+
+        2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不能阻斷主流程
+        """
+        try:
+            from src.services.telegram_gateway import get_telegram_gateway
+            from src.core.config import get_settings
+
+            settings = get_settings()
+            chat_id = getattr(settings, "OPENCLAW_TG_CHAT_ID", None)
+            if not chat_id:
+                logger.warning("telegram_chat_id_missing_failover_alert")
+                return
+
+            gateway = get_telegram_gateway()
+            await gateway.send_notification(text=message, parse_mode="MarkdownV2")
+            logger.info("telegram_failover_alert_sent", message_len=len(message))
+        except Exception as e:
+            # 不 raise — 告警失敗不該阻斷主流程（鐵律）
+            logger.exception("telegram_failover_send_failed", error=str(e))
+
+
+# -------------------------------------------------------------------------
+# MarkdownV2 escape 工具
+# -------------------------------------------------------------------------
+
+_MD2_SPECIAL = r"\_*[]()~`>#+-=|{}.!"
+
+
+def _escape_md(text: str) -> str:
+    """Escape MarkdownV2 特殊字元，防止 Telegram parse error。
+
+    2026-04-25 P1.5 by Claude Engineer-D — MarkdownV2 安全逸出
+    """
+    for ch in _MD2_SPECIAL:
+        text = text.replace(ch, f"\\{ch}")
+    return text
+
+
+# =============================================================================
+# Singleton
+# =============================================================================
+
+_alerter_instance: FailoverAlerter | None = None
+
+
+def get_failover_alerter() -> FailoverAlerter:
+    """取得 FailoverAlerter singleton（lifespan 中注入依賴前也可安全呼叫，僅 fail-open）
+
+    2026-04-25 P1.5 by Claude Engineer-D — Singleton 取得 alerter
+    """
+    global _alerter_instance
+    if _alerter_instance is None:
+        _alerter_instance = FailoverAlerter()
+    return _alerter_instance
+
+
+def configure_alerter(redis_client) -> None:
+    """Lifespan 注入：redis_client 就緒後呼叫，讓 dedup 功能生效。
+
+    telegram 不另外注入，直接從 get_telegram_gateway() singleton 取得
+    （lifespan startup 已保證 TelegramGateway 初始化完成）。
+
+    2026-04-25 P1.5 by Claude Engineer-D — Lifespan 注入
+    """
+    global _alerter_instance
+    _alerter_instance = FailoverAlerter(redis_client=redis_client)
+    logger.info("failover_alerter_configured")
+
+
+def reset_failover_alerter() -> None:
+    """重置 singleton（測試用）
+
+    2026-04-25 P1.5 by Claude Engineer-D
+    """
+    global _alerter_instance
+    _alerter_instance = None
--- a/apps/api/src/services/ollama_auto_recovery.py
+++ b/apps/api/src/services/ollama_auto_recovery.py
@@ -0,0 +1,436 @@
+"""
+Ollama 自動恢復服務 - P1.1d
+============================
+背景監控：偵測 111 從 OFFLINE/SLOW/DEGRADED 恢復為 HEALTHY 後，立刻切回 Ollama。
+
+核心設計：
+- 30s 輪詢 111 健康狀態
+- 防抖：連續 3 次 HEALTHY 才觸發切回（30s × 3 = 90s 穩定視窗）
+- 中途若又 OFFLINE，counter 歸零，重新計數
+- 切回後呼叫 failover_manager.clear_cache()，讓下次路由重新評估
+- 預留 telegram_alerter 介面（P1.5 Engineer 接入）
+- structlog audit service="ollama_auto_recovery"
+
+整合方式（FastAPI lifespan）：
+    from src.services.ollama_auto_recovery import OllamaAutoRecoveryService
+
+    @asynccontextmanager
+    async def lifespan(app: FastAPI):
+        recovery_svc = OllamaAutoRecoveryService()
+        await recovery_svc.start()
+        yield
+        await recovery_svc.stop()
+
+版本: v1.0
+建立: 2026-04-25 (台北時區)
+建立者: Claude Engineer-C (P1.1d)
+# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
+"""
+
+from __future__ import annotations
+
+import asyncio
+import datetime
+from datetime import timezone, timedelta
+from typing import Any, Protocol, runtime_checkable
+
+import structlog
+
+# 台北時區 +8（標準庫保險絲，100% 可用）
+# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
+# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
+TAIPEI_TZ = timezone(timedelta(hours=8))
+
+from src.core.config import get_settings
+from src.services.ollama_health_monitor import (
+    HealthStatus,
+    OllamaHealthMonitor,
+    get_ollama_health_monitor,
+)
+from src.services.ollama_failover_manager import (
+    OllamaFailoverManager,
+    get_ollama_failover_manager,
+)
+
+logger = structlog.get_logger(__name__)
+
+# =============================================================================
+# 常數
+# =============================================================================
+
+RECOVERY_CHECK_INTERVAL_SEC = 30    # 每 30s 檢查一次
+STABLE_COUNT_REQUIRED = 3           # 連續 3 次 HEALTHY 才切回（防抖）
+
+# =============================================================================
+# Telegram Alerter Protocol（預留介面，P1.5 Engineer 實作）
+# =============================================================================
+
+
+@runtime_checkable
+class TelegramAlerter(Protocol):
+    """Telegram 通知介面（預留，P1.5 Engineer 接入）"""
+
+    async def alert_recovery(self, payload: dict[str, Any]) -> None:
+        """111 恢復通知"""
+        ...
+
+
+# =============================================================================
+# OllamaAutoRecoveryService
+# =============================================================================
+
+
+class OllamaAutoRecoveryService:
+    """
+    Ollama 111 自動恢復服務
+
+    偵測 111 從 OFFLINE/SLOW/DEGRADED 恢復為 HEALTHY 後，立刻切回 Ollama 路由。
+
+    防抖邏輯：
+    - 連續 3 次 HEALTHY（90s 穩定視窗）才觸發切回
+    - 中途任一次非 HEALTHY → counter 歸零
+    - 已是 ollama primary 時不重複觸發
+
+    2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
+    """
+
+    def __init__(
+        self,
+        health_monitor: OllamaHealthMonitor | None = None,
+        failover_manager: OllamaFailoverManager | None = None,
+        telegram_alerter: TelegramAlerter | None = None,
+        recovery_check_interval_sec: int = RECOVERY_CHECK_INTERVAL_SEC,
+        stable_count_required: int = STABLE_COUNT_REQUIRED,
+    ) -> None:
+        self._monitor = health_monitor or get_ollama_health_monitor()
+        self._failover_manager = failover_manager or get_ollama_failover_manager()
+        self._alerter = telegram_alerter
+        self._recovery_check_interval_sec = recovery_check_interval_sec
+        self._stable_count_required = stable_count_required
+        self._settings = get_settings()
+
+        # 狀態追蹤
+        self._current_primary: str = "ollama"   # "ollama" / "gemini" / "fallback"
+        self._consecutive_healthy: int = 0
+        self._task: asyncio.Task | None = None
+
+    # -------------------------------------------------------------------------
+    # Lifecycle
+    # -------------------------------------------------------------------------
+
+    async def start(self) -> None:
+        """
+        啟動背景監控任務。
+
+        在 FastAPI lifespan 的 startup 階段呼叫：
+            recovery_svc = OllamaAutoRecoveryService()
+            await recovery_svc.start()
+
+        # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
+        # 啟動時從 Redis 載入狀態（跨重啟持久化），
+        # 若 primary != "ollama" 立刻執行一次 check（不等 30s）。
+        """
+        if self._task is not None and not self._task.done():
+            logger.warning(
+                "ollama_auto_recovery_already_running",
+                service="ollama_auto_recovery",
+            )
+            return
+
+        # 從 Redis 載入持久化狀態（跨重啟恢復）
+        # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
+        self._current_primary = await self._load_primary()
+        logger.info(
+            "ollama_auto_recovery_bootstrap",
+            service="ollama_auto_recovery",
+            current_primary=self._current_primary,
+        )
+
+        # 若 primary 不是 ollama，立刻評估一次（不等 30s interval）
+        if self._current_primary != "ollama":
+            try:
+                await self._check_and_recover()
+            except Exception:
+                logger.exception(
+                    "ollama_auto_recovery_immediate_check_error",
+                    service="ollama_auto_recovery",
+                )
+
+        self._task = asyncio.create_task(
+            self._monitor_loop(),
+            name="ollama_auto_recovery_loop",
+        )
+        logger.info(
+            "ollama_auto_recovery_started",
+            service="ollama_auto_recovery",
+            check_interval_sec=self._recovery_check_interval_sec,
+            stable_count_required=self._stable_count_required,
+        )
+
+    async def stop(self) -> None:
+        """
+        優雅關閉背景任務。
+
+        在 FastAPI lifespan 的 shutdown 階段呼叫：
+            await recovery_svc.stop()
+        """
+        if self._task is None or self._task.done():
+            return
+
+        self._task.cancel()
+        try:
+            await self._task
+        except asyncio.CancelledError:
+            pass
+        finally:
+            self._task = None
+
+        logger.info(
+            "ollama_auto_recovery_stopped",
+            service="ollama_auto_recovery",
+        )
+
+    # -------------------------------------------------------------------------
+    # 狀態控制（供整合層設定當前 primary，例如 failover 觸發時更新）
+    # -------------------------------------------------------------------------
+
+    async def set_current_primary(self, provider: str) -> None:
+        """
+        通知 recovery service 當前路由 primary 是哪個 provider。
+        OllamaFailoverManager.select_provider() 觸發 failover 後呼叫此方法更新狀態。
+
+        # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
+        # 改為 async，切換時同步寫 Redis 持久化（跨重啟恢復）
+        """
+        if self._current_primary != provider:
+            logger.info(
+                "ollama_auto_recovery_primary_changed",
+                service="ollama_auto_recovery",
+                from_primary=self._current_primary,
+                to_primary=provider,
+            )
+            self._current_primary = provider
+            # Redis 持久化（跨重啟恢復）
+            await self._persist_primary(provider)
+
+        if provider != "ollama":
+            # 切換到非 Ollama → 重置 counter，開始監控恢復
+            self._consecutive_healthy = 0
+            logger.info(
+                "ollama_auto_recovery_tracking_started",
+                service="ollama_auto_recovery",
+                current_primary=provider,
+            )
+
+    # -------------------------------------------------------------------------
+    # Redis 持久化（跨重啟恢復）
+    # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
+    # -------------------------------------------------------------------------
+
+    _REDIS_PRIMARY_KEY = "ollama:current_primary"
+
+    async def _persist_primary(self, primary: str) -> None:
+        """持久化 current_primary 到 Redis（無 TTL，跨重啟恢復）"""
+        try:
+            from src.core.redis_client import get_redis
+            redis = get_redis()
+            await redis.set(self._REDIS_PRIMARY_KEY, primary)
+        except Exception as e:
+            logger.warning(
+                "ollama_auto_recovery_persist_failed",
+                service="ollama_auto_recovery",
+                error=str(e),
+            )
+
+    async def _load_primary(self) -> str:
+        """從 Redis 載入 current_primary（找不到時預設 "ollama"）"""
+        try:
+            from src.core.redis_client import get_redis
+            redis = get_redis()
+            val = await redis.get(self._REDIS_PRIMARY_KEY)
+            if val:
+                decoded = val.decode() if isinstance(val, bytes) else val
+                logger.info(
+                    "ollama_auto_recovery_loaded_from_redis",
+                    service="ollama_auto_recovery",
+                    current_primary=decoded,
+                )
+                return decoded
+        except Exception as e:
+            logger.warning(
+                "ollama_auto_recovery_load_failed",
+                service="ollama_auto_recovery",
+                error=str(e),
+            )
+        return "ollama"  # 預設：正常路由
+
+    @property
+    def current_primary(self) -> str:
+        return self._current_primary
+
+    @property
+    def consecutive_healthy(self) -> int:
+        return self._consecutive_healthy
+
+    @property
+    def is_running(self) -> bool:
+        return self._task is not None and not self._task.done()
+
+    # -------------------------------------------------------------------------
+    # 背景監控迴圈
+    # -------------------------------------------------------------------------
+
+    async def _monitor_loop(self) -> None:
+        """
+        每 30s 檢查 111 健康狀態。
+        連續 3 次 HEALTHY 且 current_primary != "ollama" → 觸發切回。
+        """
+        while True:
+            try:
+                await asyncio.sleep(self._recovery_check_interval_sec)
+                await self._check_and_recover()
+            except asyncio.CancelledError:
+                raise  # 必須重新 raise，讓 stop() 的 await 正常結束
+            except Exception:
+                logger.exception(
+                    "ollama_auto_recovery_loop_error",
+                    service="ollama_auto_recovery",
+                )
+                # 不 break，繼續監控（異常不應停止監控迴圈）
+
+    async def _check_and_recover(self) -> None:
+        """
+        單次健康狀態檢查 + 切回邏輯。
+        抽取為獨立方法，方便單元測試。
+        """
+        host = self._settings.OLLAMA_URL
+
+        try:
+            health = await self._monitor.check(host)
+        except Exception:
+            logger.exception(
+                "ollama_auto_recovery_check_failed",
+                service="ollama_auto_recovery",
+                host=host,
+            )
+            self._consecutive_healthy = 0
+            return
+
+        if health.status == HealthStatus.HEALTHY:
+            self._consecutive_healthy += 1
+            logger.debug(
+                "ollama_auto_recovery_healthy_tick",
+                service="ollama_auto_recovery",
+                consecutive=self._consecutive_healthy,
+                required=self._stable_count_required,
+                current_primary=self._current_primary,
+            )
+
+            if (
+                self._consecutive_healthy >= self._stable_count_required
+                and self._current_primary != "ollama"
+            ):
+                await self._switch_back_to_ollama()
+        else:
+            # 非 HEALTHY → counter 歸零，繼續等
+            if self._consecutive_healthy > 0:
+                logger.debug(
+                    "ollama_auto_recovery_counter_reset",
+                    service="ollama_auto_recovery",
+                    previous_count=self._consecutive_healthy,
+                    status=health.status.value,
+                )
+            self._consecutive_healthy = 0
+
+    # -------------------------------------------------------------------------
+    # 切回 Ollama 111
+    # -------------------------------------------------------------------------
+
+    async def _switch_back_to_ollama(self) -> None:
+        """
+        切回 Ollama 111：
+        1. 更新 _current_primary
+        2. 呼叫 failover_manager.clear_cache() 清空路由快取
+        3. 通知 failover_manager（notify_recovery）
+        4. Telegram 通知（若 alerter 已設定）
+        5. structlog audit
+        """
+        from_provider = self._current_primary
+        self._current_primary = "ollama"
+        stable_count = self._consecutive_healthy
+        # counter 不立即清零：保留供 audit，下次 loop 開始後自然計數
+        self._consecutive_healthy = 0
+
+        # 取台北時間（標準庫保證 +8，禁 UTC）
+        # 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
+        recovery_time = datetime.datetime.now(TAIPEI_TZ)
+
+        # 清空 failover manager 快取（讓下次 select_provider 重新評估）
+        try:
+            await self._failover_manager.clear_cache()
+        except Exception:
+            logger.exception(
+                "ollama_auto_recovery_clear_cache_error",
+                service="ollama_auto_recovery",
+            )
+
+        # 通知 failover_manager
+        try:
+            self._failover_manager.notify_recovery("ollama_111")
+        except Exception:
+            logger.exception(
+                "ollama_auto_recovery_notify_error",
+                service="ollama_auto_recovery",
+            )
+
+        # Telegram 通知（P1.5 Engineer-D 接入）
+        # 2026-04-25 P1.5 by Claude Engineer-D — 接 FailoverAlerter，self._alerter 優先，
+        # 無則 fallback 到 get_failover_alerter() singleton
+        try:
+            alerter = self._alerter
+            if alerter is None:
+                from src.services.failover_alerter import get_failover_alerter
+                alerter = get_failover_alerter()
+            await alerter.alert_recovery({
+                "from": from_provider,
+                "to": "ollama_111",
+                "stable_count": stable_count,
+                "recovery_time": recovery_time.isoformat(),
+            })
+        except Exception:
+            logger.exception(
+                "ollama_auto_recovery_telegram_alert_error",
+                service="ollama_auto_recovery",
+            )
+
+        # structlog audit（必須記錄）
+        logger.info(
+            "ollama_auto_recovery_switched_back",
+            service="ollama_auto_recovery",
+            action="switched_back",
+            from_provider=from_provider,
+            to_provider="ollama_111",
+            stable_count=stable_count,
+            recovery_time=recovery_time.isoformat(),
+        )
+
+
+# =============================================================================
+# Singleton
+# =============================================================================
+
+_recovery_service: OllamaAutoRecoveryService | None = None
+
+
+def get_ollama_auto_recovery_service() -> OllamaAutoRecoveryService:
+    """取得 OllamaAutoRecoveryService singleton"""
+    global _recovery_service
+    if _recovery_service is None:
+        _recovery_service = OllamaAutoRecoveryService()
+    return _recovery_service
+
+
+def reset_ollama_auto_recovery_service() -> None:
+    """重置 singleton（測試用）"""
+    global _recovery_service
+    _recovery_service = None
--- a/apps/api/src/services/ollama_failover_manager.py
+++ b/apps/api/src/services/ollama_failover_manager.py
@@ -0,0 +1,571 @@
+"""
+Ollama 自動容災管理 - P1.1b
+============================
+依 OllamaHealthMonitor 健康狀態決定 Ollama 路由方案。
+
+路由邏輯（2026-04-25 統帥指令：Gemini 優先，188 最後備援）：
+  111 HEALTHY  → 主 111，fallback [Gemini, 188, Nemotron]
+  111 SLOW     → 主 Gemini，fallback [111, 188]
+  111 DEGRADED → 主 Gemini，fallback [188, Nemotron, Claude]
+  111 OFFLINE  → 主 Gemini，fallback [188, Nemotron, Claude]
+  111 OFFLINE + 188 OFFLINE → 主 Gemini，fallback [Nemotron, Claude]
+
+設計說明：
+- 不直接依賴 AIProviderEnum（P1.2 Engineer-A 整合時再對齊）
+- 返回輕量 OllamaRoutingResult，含主 endpoint + fallback 清單
+- 並行檢查 111 + 188（asyncio.gather）
+- 切換觸發時寫 audit_logs service="ollama_failover"
+- clear_cache() 方法供 OllamaAutoRecoveryService 切回後清空路由快取
+
+版本: v2.0
+建立: 2026-04-25 (台北時區)
+建立者: Claude Engineer-C (P1.1b)
+# Created 2026-04-25 P1.1 by Claude Engineer-C
+# 2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
+"""
+
+from __future__ import annotations
+
+import asyncio
+import datetime
+from dataclasses import dataclass, field
+# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
+# 用標準庫 timezone(timedelta(hours=8)) 取代 zoneinfo，保證一定有 +8 時區
+# 原 zoneinfo.ZoneInfo("Asia/Taipei") 失敗時 = None → datetime.now(None) 為 UTC
+from datetime import timezone, timedelta
+
+import structlog
+
+from src.core.config import get_settings
+
+# 台北時區 +8（標準庫保險絲，100% 可用）
+# 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
+TAIPEI_TZ = timezone(timedelta(hours=8))
+from src.services.ollama_health_monitor import (
+    HealthReport,
+    HealthStatus,
+    OllamaHealthMonitor,
+    get_ollama_health_monitor,
+)
+
+logger = structlog.get_logger(__name__)
+
+
+# =============================================================================
+# 路由結果模型（輕量，P1.2 整合時轉換為 RoutingDecision）
+# =============================================================================
+
+
+@dataclass
+class OllamaEndpoint:
+    """Ollama 端點描述"""
+
+    url: str
+    provider_name: str  # 給 AIRouterExecutor 用的 provider 名稱
+    model: str
+
+    def to_dict(self) -> dict:
+        return {"url": self.url, "provider_name": self.provider_name, "model": self.model}
+
+
+@dataclass
+class OllamaRoutingResult:
+    """
+    Ollama 容災路由結果
+
+    P1.2 Engineer-A 整合時，將此結果轉換為 ai_router.RoutingDecision：
+      - selected_provider = AIProviderEnum[result.primary.provider_name.upper()] or 新的 OLLAMA_188
+      - selected_model = result.primary.model
+      - fallback_chain = [(AIProviderEnum[p.provider_name.upper()], p.model) for p in result.fallback_chain]
+    """
+
+    primary: OllamaEndpoint
+    fallback_chain: list[OllamaEndpoint]
+    routing_reason: str
+    health_111: HealthReport
+    health_188: HealthReport | None = None
+
+    def all_endpoints_in_order(self) -> list[OllamaEndpoint]:
+        """返回完整的優先序端點列表（primary 在前）"""
+        return [self.primary, *self.fallback_chain]
+
+    def to_dict(self) -> dict:
+        return {
+            "primary": {
+                "url": self.primary.url,
+                "provider": self.primary.provider_name,
+                "model": self.primary.model,
+            },
+            "fallback_chain": [
+                {"url": e.url, "provider": e.provider_name, "model": e.model}
+                for e in self.fallback_chain
+            ],
+            "routing_reason": self.routing_reason,
+            "health_111": self.health_111.to_dict(),
+            "health_188": self.health_188.to_dict() if self.health_188 else None,
+        }
+
+
+# =============================================================================
+# 已知 Fallback 端點定義（Nemotron / Gemini / Claude）
+# =============================================================================
+
+# 以 provider_name 對應 ai_router.AIProviderEnum 的 value
+_NEMOTRON_ENDPOINT = OllamaEndpoint(
+    url="",  # Nemotron 不是 HTTP URL，由 AIRouterExecutor 從 Registry 取得
+    provider_name="nemotron",
+    model="nvidia/nemotron-mini-4b-instruct",
+)
+_GEMINI_ENDPOINT = OllamaEndpoint(
+    url="",
+    provider_name="gemini",
+    model="gemini-1.5-flash",
+)
+_CLAUDE_ENDPOINT = OllamaEndpoint(
+    url="",
+    provider_name="claude",
+    model="claude-3-5-haiku-20241022",
+)
+
+
+# =============================================================================
+# OllamaFailoverManager
+# =============================================================================
+
+
+class OllamaFailoverManager:
+    """
+    Ollama 自動容災管理器
+
+    並行檢查 111 + 188，依健康狀態選擇最佳路由。
+
+    使用方式：
+        manager = OllamaFailoverManager()
+        result = await manager.select_provider()
+        # result.primary.url → 使用的 Ollama URL
+        # result.fallback_chain → 依序 fallback
+
+    2026-04-25 Claude Engineer-C (P1.1b)
+    """
+
+    def __init__(
+        self,
+        health_monitor: OllamaHealthMonitor | None = None,
+        recovery_callback=None,
+    ) -> None:
+        self._monitor = health_monitor or get_ollama_health_monitor()
+        self._settings = get_settings()
+        # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
+        # recovery_callback: async callable(provider_name: str) → None
+        # OllamaAutoRecoveryService.set_current_primary 在 failover 時被通知，
+        # 避免重啟後 _current_primary 停留在 "ollama" 而永不啟動恢復監控
+        self._recovery_callback = recovery_callback
+
+    # -------------------------------------------------------------------------
+    # Public API
+    # -------------------------------------------------------------------------
+
+    async def select_provider(
+        self,
+        task_type: str = "",
+        context: dict | None = None,
+    ) -> OllamaRoutingResult:
+        """
+        並行檢查 111 + 188，返回路由結果
+
+        Args:
+            task_type: 任務類型（預留，目前未影響路由邏輯）
+            context: 額外上下文（預留）
+
+        Returns:
+            OllamaRoutingResult
+        """
+        url_111 = self._settings.OLLAMA_URL
+        url_188 = self._settings.OLLAMA_FALLBACK_URL or ""
+
+        # 並行檢查
+        # 2026-04-25 critic-fix Part2 H4 by Claude Engineer-C2
+        # return_exceptions=True 防止任一 check 例外導致整個 select_provider 炸
+        if url_188:
+            results = await asyncio.gather(
+                self._monitor.check(url_111),
+                self._monitor.check(url_188),
+                return_exceptions=True,
+            )
+            # 處理 exception — 任一失敗視為 OFFLINE
+            health_111_raw, health_188_raw = results
+            health_111: HealthReport = (
+                HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_111_raw}")
+                if isinstance(health_111_raw, Exception)
+                else health_111_raw
+            )
+            health_188: HealthReport | None = (
+                HealthReport(status=HealthStatus.OFFLINE, reason=f"check error: {health_188_raw}")
+                if isinstance(health_188_raw, Exception)
+                else health_188_raw
+            )
+        else:
+            health_111 = await self._monitor.check(url_111)
+            health_188 = None
+
+        result = self._decide_route(
+            health_111=health_111,
+            health_188=health_188,
+            url_111=url_111,
+            url_188=url_188,
+        )
+
+        # Gemini 帳單熔斷（quota gate）
+        # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
+        if result.primary.provider_name == "gemini":
+            quota_ok = await self._check_gemini_quota()
+            if not quota_ok:
+                quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
+                logger.warning(
+                    "gemini_quota_exceeded_falling_to_188",
+                    quota=quota,
+                    health_111=health_111.status.value,
+                )
+                result = self._build_quota_exceeded_route(
+                    health_111=health_111,
+                    health_188=health_188,
+                    url_111=url_111,
+                    url_188=url_188,
+                )
+
+        # 寫入 audit_log（best-effort）
+        await self._write_failover_audit(result)
+
+        logger.info(
+            "ollama_failover_decision",
+            primary=result.primary.provider_name,
+            primary_url=result.primary.url,
+            reason=result.routing_reason,
+            fallback_count=len(result.fallback_chain),
+            health_111=health_111.status.value,
+            health_188=health_188.status.value if health_188 else "not_configured",
+        )
+
+        # 通知 recovery service 當前 primary（跨重啟持久化）
+        # 2026-04-25 critic-fix Part2 H5+H6 by Claude Engineer-C2
+        if self._recovery_callback is not None:
+            try:
+                await self._recovery_callback(result.primary.provider_name)
+            except Exception as e:
+                logger.warning(
+                    "ollama_failover_recovery_callback_failed",
+                    error=str(e),
+                )
+
+        return result
+
+    # -------------------------------------------------------------------------
+    # 路由決策邏輯
+    # -------------------------------------------------------------------------
+
+    def _decide_route(
+        self,
+        health_111: HealthReport,
+        health_188: HealthReport | None,
+        url_111: str,
+        url_188: str,
+    ) -> OllamaRoutingResult:
+        """
+        決策矩陣（2026-04-25 統帥指令：Gemini 優先，188 最後備援）：
+
+          111 HEALTHY  → primary=111, fallback=[Gemini, 188, Nemotron]
+          111 SLOW     → primary=Gemini, fallback=[111, 188]
+          111 DEGRADED → primary=Gemini, fallback=[188, Nemotron, Claude]
+          111 OFFLINE  → primary=Gemini, fallback=[188, Nemotron, Claude]
+          111 OFFLINE + 188 OFFLINE → primary=Gemini, fallback=[Nemotron, Claude]
+
+        關鍵原則：
+        - 111 非 HEALTHY 時，primary 必為 Gemini（快速雲端，不等 188 慢推理）
+        - 188 永遠在 fallback chain，作為 Gemini 額度耗盡的最後備援
+        - degradation_reason 記錄切換原因 + 時間戳
+
+        2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
+        """
+        model_111 = self._settings.OLLAMA_HEALTH_CHECK_MODEL
+        model_188 = "qwen2.5:7b-instruct"  # 188 CPU-only 備援推薦模型（plan 方案 C）
+
+        ep_111 = OllamaEndpoint(url=url_111, provider_name="ollama", model=model_111)
+        ep_188 = (
+            OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188)
+            if url_188
+            else None
+        )
+
+        # 188 可用性判斷（僅供 fallback 使用）
+        has_188 = ep_188 is not None and (
+            health_188 is not None and health_188.status != HealthStatus.OFFLINE
+        )
+
+        # 切換時間戳（台北時區 +8，標準庫保證）
+        # 2026-04-25 critic-fix Part2 B4 by Claude Engineer-C2
+        now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat()
+
+        # ==========================================================
+        # 111 HEALTHY → 主 111，Gemini 作為第一 fallback（快速雲端）
+        # ==========================================================
+        if health_111.status == HealthStatus.HEALTHY:
+            fallback: list[OllamaEndpoint] = [_GEMINI_ENDPOINT]
+            if has_188 and ep_188:
+                fallback.append(ep_188)
+            fallback.append(_NEMOTRON_ENDPOINT)
+            return OllamaRoutingResult(
+                primary=ep_111,
+                fallback_chain=fallback,
+                routing_reason="111 HEALTHY → 主 111",
+                health_111=health_111,
+                health_188=health_188,
+            )
+
+        # ==========================================================
+        # 111 SLOW → primary=Gemini，fallback=[111, 188]
+        # 111 實測 eval rate 0.09 token/s，~111s 推理，Gemini 更快
+        # ==========================================================
+        if health_111.status == HealthStatus.SLOW:
+            fallback_slow: list[OllamaEndpoint] = [ep_111]
+            if has_188 and ep_188:
+                fallback_slow.append(ep_188)
+            degradation_reason = (
+                f"111 SLOW（eval ~0.09 token/s, ~111s）→ 切 Gemini at {now_ts}"
+            )
+            return OllamaRoutingResult(
+                primary=_GEMINI_ENDPOINT,
+                fallback_chain=fallback_slow,
+                routing_reason=degradation_reason,
+                health_111=health_111,
+                health_188=health_188,
+            )
+
+        # ==========================================================
+        # 111 DEGRADED 或 OFFLINE → primary=Gemini，188 在 fallback
+        # ==========================================================
+        status_label = health_111.status.value  # "degraded" / "offline"
+        degradation_reason = f"111 {status_label} → 切 Gemini at {now_ts}"
+        if has_188 and ep_188:
+            return OllamaRoutingResult(
+                primary=_GEMINI_ENDPOINT,
+                fallback_chain=[ep_188, _NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
+                routing_reason=degradation_reason,
+                health_111=health_111,
+                health_188=health_188,
+            )
+
+        # 188 也不可用 → Gemini 主力，最後備援 Nemotron / Claude
+        degradation_reason = f"111 {status_label} + 188 不可用 → 切 Gemini at {now_ts}"
+        return OllamaRoutingResult(
+            primary=_GEMINI_ENDPOINT,
+            fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
+            routing_reason=degradation_reason,
+            health_111=health_111,
+            health_188=health_188,
+        )
+
+    # -------------------------------------------------------------------------
+    # Gemini 帳單熔斷（quota gate）
+    # 2026-04-25 critic-fix Part2 H7 by Claude Engineer-C2
+    # -------------------------------------------------------------------------
+
+    async def _check_gemini_quota(self) -> bool:
+        """
+        檢查每日 Gemini call 配額，超過上限則禁用。
+
+        Redis key: ollama:gemini_daily_count:{YYYY-MM-DD}，TTL 86400s
+        計數 atomic（incr）。
+
+        Returns:
+            True  → 仍在配額內，可使用 Gemini
+            False → 已超配額，應切到 188+Nemotron
+
+        fail-open：Redis 不可用時允許走 Gemini（不阻擋服務）
+        """
+        try:
+            from src.core.redis_client import get_redis
+            redis = get_redis()
+            if redis is None:
+                return True  # fail-open
+            quota = getattr(self._settings, "GEMINI_DAILY_QUOTA", 1000)
+            key = f"ollama:gemini_daily_count:{datetime.date.today().isoformat()}"
+            count_raw = await redis.get(key)
+            count = int(count_raw or 0)
+            if count >= quota:
+                return False
+            # atomic incr + 設定 TTL（確保跨日自動重置）
+            await redis.incr(key)
+            await redis.expire(key, 86400)
+            return True
+        except Exception as e:
+            logger.warning("gemini_quota_check_failed", error=str(e))
+            return True  # fail-open
+
+    def _build_quota_exceeded_route(
+        self,
+        health_111: HealthReport,
+        health_188: HealthReport | None,
+        url_111: str,  # noqa: ARG002 — 保留供 OllamaRoutingResult 結構完整性（health_111 對應）
+        url_188: str,
+    ) -> OllamaRoutingResult:
+        """
+        Gemini 配額耗盡時的備援路由：primary=OLLAMA_188，fallback=[Nemotron, Claude]
+        若 188 也不可用，則 primary=Nemotron。
+        """
+        model_188 = "qwen2.5:7b-instruct"
+        ep_188 = (
+            OllamaEndpoint(url=url_188, provider_name="ollama_188", model=model_188)
+            if url_188
+            else None
+        )
+        has_188 = ep_188 is not None and (
+            health_188 is not None and health_188.status != HealthStatus.OFFLINE
+        )
+
+        if has_188 and ep_188:
+            return OllamaRoutingResult(
+                primary=ep_188,
+                fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT],
+                routing_reason="Gemini quota exceeded → 188 CPU-only 備援",
+                health_111=health_111,
+                health_188=health_188,
+            )
+
+        # 188 也不可用
+        return OllamaRoutingResult(
+            primary=_NEMOTRON_ENDPOINT,
+            fallback_chain=[_CLAUDE_ENDPOINT],
+            routing_reason="Gemini quota exceeded + 188 不可用 → Nemotron 備援",
+            health_111=health_111,
+            health_188=health_188,
+        )
+
+    # -------------------------------------------------------------------------
+    # Recovery API（供 OllamaAutoRecoveryService 呼叫）
+    # -------------------------------------------------------------------------
+
+    def set_recovery_callback(self, callback) -> None:
+        """
+        設定 recovery callback（供 lifespan wiring 使用）。
+        callback signature: async (provider_name: str) -> None
+
+        # 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
+        """
+        self._recovery_callback = callback
+
+    async def clear_cache(self) -> None:
+        """
+        清空路由決策快取，讓下次 select_provider 重新評估健康狀態。
+        OllamaAutoRecoveryService 在偵測 111 恢復後呼叫此方法。
+
+        2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
+        # 2026-04-25 P1.2 by Claude Engineer-A2 — 改用 make_cache_key 動態組 key，消除硬編碼 IP
+        """
+        try:
+            from src.core.redis_client import get_redis
+            from src.services.ollama_health_monitor import make_cache_key
+            redis = get_redis()
+            if redis is None:
+                return
+            # 動態由 settings URL 組 cache key，避免硬編碼 IP
+            keys = [
+                make_cache_key(self._settings.OLLAMA_URL),
+                make_cache_key(self._settings.OLLAMA_FALLBACK_URL or ""),
+            ]
+            for k in keys:
+                if k and k != "ollama_health:":  # 空 URL 會產生無意義的 key，跳過
+                    await redis.delete(k)
+            logger.info(
+                "ollama_failover_cache_cleared",
+                service="ollama_failover",
+                reason="recovery_triggered",
+            )
+        except Exception as e:
+            logger.debug("ollama_failover_clear_cache_failed", error=str(e))
+
+    def notify_recovery(self, provider: str) -> None:
+        """
+        預留：P1.5 Engineer 接入 Telegram alerter 時使用。
+        目前僅寫 structlog audit。
+
+        2026-04-25 統帥指令 by Claude Engineer-C — 自動切 Gemini + 自動恢復
+        """
+        logger.info(
+            "ollama_recovery_notified",
+            service="ollama_failover",
+            provider=provider,
+            action="recovery_received",
+        )
+
+    # -------------------------------------------------------------------------
+    # Audit Log
+    # -------------------------------------------------------------------------
+
+    async def _write_failover_audit(self, result: OllamaRoutingResult) -> None:
+        """
+        切換觸發時寫 structlog audit（best-effort）+ Telegram 告警
+
+        # 2026-04-25 critic-fix Part2 B1 by Claude Engineer-C2
+        # 原 AuditLog DB 寫入使用不存在的欄位（service/action/target/status/metadata）
+        # → SQLAlchemy crash → except 吃掉 → 零稽核
+        # 修法：刪除 DB 寫入路徑，改用 structlog only（audit 不依賴 DB schema）
+
+        # 2026-04-25 P1.5 by Claude Engineer-D — 新增 Telegram 告警（dedup 10min）
+
+        service="ollama_failover"（per 任務規格）
+        僅在 primary 非 111 時記錄（真正發生切換）
+        """
+        if result.primary.provider_name == "ollama":
+            # 111 正常，無切換事件
+            return
+
+        logger.info(
+            "ollama_failover_triggered",
+            service="ollama_failover",
+            action="failover_triggered",
+            from_provider="ollama",
+            to_provider=result.primary.provider_name,
+            reason=result.routing_reason,
+            primary_url=result.primary.url or result.primary.provider_name,
+            health_111=result.health_111.status.value,
+            health_188=result.health_188.status.value if result.health_188 else "not_configured",
+        )
+
+        # Telegram 告警（首次切換才通知，dedup 10min 內建）
+        # 2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不阻斷主路由邏輯
+        try:
+            from src.services.failover_alerter import get_failover_alerter
+            fallback_chain_str = " → ".join(
+                p.provider_name for p in result.fallback_chain
+            )
+            alerter = get_failover_alerter()
+            await alerter.alert_failover({
+                "to_provider": result.primary.provider_name,
+                "model": result.primary.model,
+                "reason": result.routing_reason,
+                "timestamp": datetime.datetime.now(TAIPEI_TZ).isoformat(),
+                "fallback_chain_str": fallback_chain_str,
+            })
+        except Exception as e:
+            logger.warning("failover_alert_failed", error=str(e))
+
+
+# =============================================================================
+# Singleton
+# =============================================================================
+
+_failover_manager: OllamaFailoverManager | None = None
+
+
+def get_ollama_failover_manager() -> OllamaFailoverManager:
+    """取得 OllamaFailoverManager singleton"""
+    global _failover_manager
+    if _failover_manager is None:
+        _failover_manager = OllamaFailoverManager()
+    return _failover_manager
+
+
+def reset_ollama_failover_manager() -> None:
+    """重置 singleton（測試用）"""
+    global _failover_manager
+    _failover_manager = None
--- a/apps/api/src/services/ollama_health_monitor.py
+++ b/apps/api/src/services/ollama_health_monitor.py
@@ -0,0 +1,356 @@
+"""
+Ollama 健康檢測模組 - P1.1a
+============================
+三層健康檢查：連通性 → 推理測試 → (optional) GPU 記憶體
+
+設計原則：
+- Redis 30s 快取防 health check storm
+- 降級安全：快取失敗不影響功能，直接執行檢查
+- 用 settings 取 host 配置（禁硬編碼 IP）
+- 用 httpx.AsyncClient（非 requests）
+
+版本: v1.1
+建立: 2026-04-25 (台北時區)
+建立者: Claude Engineer-C (P1.1a)
+# Created 2026-04-25 P1.1 by Claude Engineer-C
+# 2026-04-25 critic-fix by Claude Engineer-C — 修 BLOCKER B3(make_cache_key 公開) + H3(timeout 45s)
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import time
+from dataclasses import dataclass, field
+from enum import Enum
+from urllib.parse import urlparse
+
+import httpx
+import structlog
+
+from src.core.config import get_settings
+
+logger = structlog.get_logger(__name__)
+
+# =============================================================================
+# 常數
+# =============================================================================
+
+REDIS_CACHE_KEY_PREFIX = "ollama_health:"
+REDIS_CACHE_TTL_SECONDS = 30  # 防 health check storm
+
+CONNECTIVITY_TIMEOUT_SECONDS = 5.0
+# 2026-04-25 critic-fix H3 by Claude Engineer-C — 45s 讓 SLOW 門檻(30s)真的能觀察到
+INFERENCE_TIMEOUT_SECONDS = 45.0
+
+# 推理延遲分級門檻（毫秒）
+LATENCY_HEALTHY_THRESHOLD_MS = 10_000   # <10s → HEALTHY
+LATENCY_SLOW_THRESHOLD_MS = 30_000      # 10-30s → SLOW
+# >30s → DEGRADED
+
+
+# =============================================================================
+# 公開工具函數
+# =============================================================================
+
+def make_cache_key(url: str) -> str:
+    """
+    由 URL 產生 Redis cache key。
+
+    取 host:port 部分（netloc）作為 key，避免 scheme 差異干擾。
+    例：http://192.168.0.111:11434 → "ollama_health:192.168.0.111:11434"
+
+    公開函數供 OllamaFailoverManager.clear_cache() 動態組 key，
+    確保與內部 _cache_key() 使用相同邏輯。
+
+    # 2026-04-25 critic-fix B3 by Claude Engineer-C — 避免 clear_cache 硬編碼 IP
+    """
+    parsed = urlparse(url)
+    netloc = parsed.netloc or url
+    return f"{REDIS_CACHE_KEY_PREFIX}{netloc}"
+
+
+# =============================================================================
+# 資料模型
+# =============================================================================
+
+
+class HealthStatus(Enum):
+    """Ollama 健康狀態分級"""
+
+    HEALTHY = "healthy"    # <10s 推理，正常使用
+    SLOW = "slow"          # 10-30s，降級使用
+    DEGRADED = "degraded"  # >30s 或推理超時，優先切換
+    OFFLINE = "offline"    # 連通性失敗，必須切換
+
+
+@dataclass
+class HealthReport:
+    """Ollama 健康檢測報告"""
+
+    status: HealthStatus
+    host: str = ""
+    latency_ms: float = 0.0
+    reason: str = ""
+    checked_at: float = field(default_factory=time.time)
+    from_cache: bool = False
+    mcp_health: dict[str, bool] = field(default_factory=dict)
+
+    def is_usable(self) -> bool:
+        """是否可用（不含 OFFLINE）"""
+        return self.status != HealthStatus.OFFLINE
+
+    def to_dict(self) -> dict:
+        return {
+            "status": self.status.value,
+            "host": self.host,
+            "latency_ms": round(self.latency_ms, 1),
+            "reason": self.reason,
+            "checked_at": self.checked_at,
+            "from_cache": self.from_cache,
+        }
+
+
+# =============================================================================
+# OllamaHealthMonitor
+# =============================================================================
+
+
+class OllamaHealthMonitor:
+    """
+    Ollama 三層健康檢測
+
+    層 1：連通性（/api/tags，5s timeout）
+    層 2：推理測試（/api/generate，35s timeout）
+    層 3：GPU 記憶體（optional，via SSH MCP）
+
+    結果 Redis 快取 30s，防 health check storm。
+
+    2026-04-25 Claude Engineer-C (P1.1a)
+    """
+
+    def __init__(self) -> None:
+        self._settings = get_settings()
+
+    # -------------------------------------------------------------------------
+    # Public API
+    # -------------------------------------------------------------------------
+
+    async def check(self, host: str) -> HealthReport:
+        """
+        執行健康檢測（含 Redis 快取）
+
+        Args:
+            host: Ollama 主機 URL，e.g. "http://192.168.0.111:11434"
+
+        Returns:
+            HealthReport
+        """
+        # 嘗試從快取取得
+        cached = await self._get_cached(host)
+        if cached is not None:
+            cached.from_cache = True
+            logger.debug("ollama_health_cache_hit", host=host, status=cached.status.value)
+            return cached
+
+        # 執行實際健康檢測
+        report = await self._run_checks(host)
+        report.host = host
+
+        # 寫入快取（失敗不影響功能，外部 exception 也靜默）
+        try:
+            await self._set_cached(host, report)
+        except Exception as e:
+            logger.debug("ollama_health_set_cached_outer_failed", host=host, error=str(e))
+
+        logger.info(
+            "ollama_health_checked",
+            host=host,
+            status=report.status.value,
+            latency_ms=round(report.latency_ms, 1),
+            reason=report.reason,
+        )
+
+        # 寫入 audit_log（best-effort）
+        await self._write_audit_log(host, report)
+
+        return report
+
+    # -------------------------------------------------------------------------
+    # 三層檢查
+    # -------------------------------------------------------------------------
+
+    async def _run_checks(self, host: str) -> HealthReport:
+        """依序執行三層檢查"""
+        # 層 1：連通性
+        connectivity_ok = await self._check_connectivity(host)
+        if not connectivity_ok:
+            return HealthReport(
+                status=HealthStatus.OFFLINE,
+                host=host,
+                reason="連通性失敗：/api/tags 無回應",
+            )
+
+        # 層 2：推理測試
+        report = await self._check_inference(host)
+        return report
+
+    async def _check_connectivity(self, host: str) -> bool:
+        """
+        層 1：連通性檢查
+        GET /api/tags，5s timeout，回傳 200 即通過
+        """
+        try:
+            async with httpx.AsyncClient(
+                timeout=httpx.Timeout(CONNECTIVITY_TIMEOUT_SECONDS)
+            ) as client:
+                resp = await client.get(f"{host}/api/tags")
+                return resp.status_code == 200
+        except (httpx.TimeoutException, httpx.ConnectError, httpx.NetworkError):
+            return False
+        except Exception as e:
+            logger.warning("ollama_connectivity_check_error", host=host, error=str(e))
+            return False
+
+    async def _check_inference(self, host: str) -> HealthReport:
+        """
+        層 2：推理測試
+        POST /api/generate，35s timeout，依延遲分級
+        """
+        model = self._settings.OLLAMA_HEALTH_CHECK_MODEL
+        start = time.perf_counter()
+
+        try:
+            async with httpx.AsyncClient(
+                timeout=httpx.Timeout(INFERENCE_TIMEOUT_SECONDS)
+            ) as client:
+                resp = await client.post(
+                    f"{host}/api/generate",
+                    json={
+                        "model": model,
+                        "prompt": "hi",
+                        "stream": False,
+                        "options": {"num_predict": 1},
+                    },
+                )
+                latency_ms = (time.perf_counter() - start) * 1000
+
+                if resp.status_code != 200:
+                    return HealthReport(
+                        status=HealthStatus.DEGRADED,
+                        latency_ms=latency_ms,
+                        reason=f"推理回傳 HTTP {resp.status_code}",
+                    )
+
+                # 分級判斷
+                if latency_ms < LATENCY_HEALTHY_THRESHOLD_MS:
+                    return HealthReport(
+                        status=HealthStatus.HEALTHY,
+                        latency_ms=latency_ms,
+                    )
+                elif latency_ms < LATENCY_SLOW_THRESHOLD_MS:
+                    return HealthReport(
+                        status=HealthStatus.SLOW,
+                        latency_ms=latency_ms,
+                        reason=f"推理延遲 {latency_ms:.0f}ms（slow zone）",
+                    )
+                else:
+                    return HealthReport(
+                        status=HealthStatus.DEGRADED,
+                        latency_ms=latency_ms,
+                        reason=f"推理延遲 {latency_ms:.0f}ms（>30s）",
+                    )
+
+        except (httpx.TimeoutException, asyncio.TimeoutError):
+            latency_ms = (time.perf_counter() - start) * 1000
+            return HealthReport(
+                status=HealthStatus.DEGRADED,
+                latency_ms=latency_ms,
+                reason="推理超時 >35s",
+            )
+        except (httpx.ConnectError, httpx.NetworkError) as e:
+            return HealthReport(
+                status=HealthStatus.OFFLINE,
+                reason=f"推理連接失敗：{e}",
+            )
+        except Exception as e:
+            logger.warning("ollama_inference_check_error", host=host, error=str(e))
+            return HealthReport(
+                status=HealthStatus.DEGRADED,
+                reason=f"推理測試例外：{e}",
+            )
+
+    # -------------------------------------------------------------------------
+    # Redis 快取
+    # -------------------------------------------------------------------------
+
+    def _cache_key(self, host: str) -> str:
+        """生成 Redis cache key（內部使用，委派至 make_cache_key）"""
+        return make_cache_key(host)
+
+    async def _get_cached(self, host: str) -> HealthReport | None:
+        """從 Redis 取得快取的 HealthReport，失敗返回 None"""
+        try:
+            from src.core.redis_client import get_redis
+            redis = get_redis()
+            raw = await redis.get(self._cache_key(host))
+            if not raw:
+                return None
+            data = json.loads(raw)
+            return HealthReport(
+                status=HealthStatus(data["status"]),
+                host=data.get("host", host),
+                latency_ms=data.get("latency_ms", 0.0),
+                reason=data.get("reason", ""),
+                checked_at=data.get("checked_at", time.time()),
+            )
+        except Exception as e:
+            logger.debug("ollama_health_cache_get_failed", host=host, error=str(e))
+            return None
+
+    async def _set_cached(self, host: str, report: HealthReport) -> None:
+        """寫入 Redis 快取，失敗靜默（不影響功能）"""
+        try:
+            from src.core.redis_client import get_redis
+            redis = get_redis()
+            data = json.dumps(report.to_dict())
+            await redis.set(self._cache_key(host), data, ex=REDIS_CACHE_TTL_SECONDS)
+        except Exception as e:
+            logger.debug("ollama_health_cache_set_failed", host=host, error=str(e))
+
+    # -------------------------------------------------------------------------
+    # Audit Log (structured log，AuditLog 表設計用於 K8s 操作審計，不適合此場景)
+    # -------------------------------------------------------------------------
+
+    async def _write_audit_log(self, host: str, report: HealthReport) -> None:
+        """記錄健康檢測結果（structlog，service=ollama_health_monitor）"""
+        logger.info(
+            "ollama_health_monitor_audit",
+            service="ollama_health_monitor",
+            host=host,
+            status=report.status.value,
+            latency_ms=round(report.latency_ms, 1),
+            reason=report.reason,
+            is_usable=report.is_usable(),
+        )
+
+
+# =============================================================================
+# Singleton
+# =============================================================================
+
+_monitor: OllamaHealthMonitor | None = None
+
+
+def get_ollama_health_monitor() -> OllamaHealthMonitor:
+    """取得 OllamaHealthMonitor singleton"""
+    global _monitor
+    if _monitor is None:
+        _monitor = OllamaHealthMonitor()
+    return _monitor
+
+
+def reset_ollama_health_monitor() -> None:
+    """重置 singleton（測試用）"""
+    global _monitor
+    _monitor = None