Files
awoooi/apps/api/src/services/ai_router.py
Your Name ff30c61c4c
All checks were successful
Code Review / ai-code-review (push) Successful in 21s
CD Pipeline / tests (push) Successful in 1m20s
CD Pipeline / build-and-deploy (push) Successful in 4m15s
CD Pipeline / post-deploy-checks (push) Successful in 1m58s
fix(rls): 收斂 API DB access context
2026-05-12 19:55:13 +08:00

1435 lines
61 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AI Router - Phase 13.3 #87
==========================
智能 AI 路由器,根據意圖和複雜度動態選擇 AI Provider
目標: 根據請求特性自動選擇最適模型
策略: Intent Classifier + Complexity Scorer → Routing Decision
延遲目標: < 50ms (規則引擎優先)
路由決策矩陣 (ADR-023):
┌─────────────────┬───────────────┬──────────────────────────────┐
│ 複雜度 + 風險 │ Provider │ 備註 │
├─────────────────┼───────────────┼──────────────────────────────┤
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
└─────────────────┴───────────────┴──────────────────────────────┘
版本: v4.4
建立: 2026-03-26 (台北時區)
建立者: Claude Code
最後修改: 2026-04-27 (台北時區)
修改者: Claude Sonnet 4.6 (A2 — DIAGNOSE 移除 Ollama, INC-20260425)
變更紀錄:
| 版本 | 日期 | 執行者 | 變更內容 |
|------|------|--------|----------|
| v1.0 | 2026-03-26 | Claude Code | 初始實作 |
| v2.0 | 2026-03-26 | Claude Code | 支援 IntentResult + 新意圖類型 |
| v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 |
| v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close |
| v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram |
| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端),僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) |
| v4.3 | 2026-04-05 | Claude Code | Phase 25 P0 架構修正: 實測 Ollama CPU ~238s(不可用); NIM 實測 2-27s avg 10.6s; DIAGNOSE 改走 _full_fallback_chain(NIM 主力); _local_fallback_chain 廢棄 |
| v4.4 | 2026-04-27 | Claude Sonnet 4.6 | A2 INC-20260425: DIAGNOSE fallback chain 移除 Ollama (CPU 238s 二次 timeout); 新增 _diagnose_fallback_chain (NEMO→GEMINI→CLAUDE); 新增 aiops_diagnose_fallback_total metric |
"""
from __future__ import annotations
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import TYPE_CHECKING, Protocol
import structlog
if TYPE_CHECKING:
from src.services.intent_classifier import IntentResult
from src.services.complexity_scorer import (
ComplexityScore,
get_complexity_scorer,
)
from src.services.intent_classifier import (
IntentResult,
IntentType,
RiskLevel,
get_intent_classifier,
normalize_intent,
)
from src.services.model_registry import get_model_registry
logger = structlog.get_logger(__name__)
# =============================================================================
# Provider 定義
# =============================================================================
class AIProviderEnum(str, Enum):
"""AI 提供者"""
OLLAMA = "ollama"
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災
# OllamaFailoverManager 回傳 provider_name="ollama_gcp_a"/"ollama_gcp_b"/"ollama_local"
# 缺少 enum 值 → AIProviderEnum(primary_str) 拋 ValueError → fallback chain 清空 → 直跳 Gemini
OLLAMA_GCP_A = "ollama_gcp_a" # GCP-A 34.143.170.20 Primary
OLLAMA_GCP_B = "ollama_gcp_b" # GCP-B 34.21.145.224 Secondary
OLLAMA_LOCAL = "ollama_local" # 192.168.0.111 Local Fallback
GEMINI = "gemini"
CLAUDE = "claude"
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 實際名稱
# OpenClawNemoProvider.name = "openclaw_nemo" (一般推理, via .188)
# NemotronProvider.name = "nemotron" (Tool Calling, direct NVIDIA NIM)
# 舊版 NVIDIA = "nvidia" 已移除: Registry 無此 Provider
OPENCLAW_NEMO = "openclaw_nemo"
NEMOTRON = "nemotron"
# Provider 對應延遲預算 (ms)
PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = {
AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間
# 2026-05-04 ogt: ADR-110 GCP 三層容災 — GCP NVMe SSD 推理快60s 足夠
AIProviderEnum.OLLAMA_GCP_A: 60000,
AIProviderEnum.OLLAMA_GCP_B: 60000,
AIProviderEnum.OLLAMA_LOCAL: 90000, # 111 本地 HDD 稍慢
AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲
AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 名稱
AIProviderEnum.OPENCLAW_NEMO: 60000, # via .188 → NVIDIA NIM允許較長時間
AIProviderEnum.NEMOTRON: 60000, # Tool Calling 專用,允許較長時間
}
# =============================================================================
# Interface 定義 (P1 修復 - 2026-04-01 首席架構師審查)
# =============================================================================
class IAIRouter(Protocol):
"""
AI Router Protocol - 支援 DI 測試替換
2026-04-01 ogt: 首席架構師審查 P1 修復
- 新增 Protocol 定義支援依賴注入
- 參考: IModelRegistry, IComplexityScorer
"""
async def route(
self,
text: str,
context: dict | None = None,
) -> "RoutingDecision":
"""路由請求到最適 AI Provider"""
...
def route_sync(
self,
text: str,
context: dict | None = None,
) -> "RoutingDecision":
"""同步版本路由"""
...
def route_tool_calling(
self,
) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
"""Tool Calling 專用路由"""
...
@dataclass
class RoutingDecision:
"""
路由決策結果 (Phase 13.3 #87)
包含完整的路由資訊,供 OpenClaw 主流程使用
"""
# 核心決策
selected_provider: AIProviderEnum # 選擇的 AI Provider
selected_model: str # 選擇的模型名稱
fallback_chain: list[tuple[AIProviderEnum, str]] # 備援鏈 [(provider, model), ...]
routing_reason: str # 路由決策原因
latency_budget_ms: int # 延遲預算 (毫秒)
# 分類結果
intent: IntentType # 意圖分類 (正規化後)
intent_result: IntentResult # 完整 Intent 分類結果
complexity: ComplexityScore # 複雜度評分
risk_level: RiskLevel = field(default=RiskLevel.MEDIUM) # 風險等級
# 路由 metadata
routing_latency_ms: float = 0.0 # 路由決策耗時 (ms)
# 向後相容 (deprecated)
model: str = "" # -> selected_model
reason: str = "" # -> routing_reason
fallback_models: list[str] = field(default_factory=list) # -> fallback_chain
def __post_init__(self):
"""初始化後設定衍生欄位"""
self.risk_level = self.intent_result.risk_level
# 向後相容
self.model = self.selected_model
self.reason = self.routing_reason
self.fallback_models = [model for _, model in self.fallback_chain if model != self.selected_model]
def to_dict(self) -> dict:
"""轉換為字典 (API 回應用)"""
return {
"selected_provider": self.selected_provider.value,
"selected_model": self.selected_model,
"fallback_chain": [
{"provider": p.value, "model": m} for p, m in self.fallback_chain
],
"routing_reason": self.routing_reason,
"latency_budget_ms": self.latency_budget_ms,
"intent": self.intent.value,
"risk_level": self.risk_level.value,
"complexity_score": self.complexity.score,
"routing_latency_ms": round(self.routing_latency_ms, 2),
}
class AIRouter:
"""
AI 路由器 (Phase 13.3 #87)
整合 IntentClassifier 和 ComplexityScorer
動態選擇最適合的 AI Provider 和模型。
路由決策矩陣:
┌─────────────────┬───────────────┬──────────────────────────────┐
│ 複雜度 + 風險 │ Provider │ 備註 │
├─────────────────┼───────────────┼──────────────────────────────┤
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
└─────────────────┴───────────────┴──────────────────────────────┘
路由策略 (按優先級):
1. CRITICAL 風險強制使用 Claude
2. DELETE 意圖強制使用 Claude
3. HIGH 風險或複雜度 4-5 → Gemini
4. 其他情況 → Ollama (成本優先)
"""
def __init__(self):
self._intent_classifier = get_intent_classifier()
self._complexity_scorer = get_complexity_scorer()
self._model_registry = get_model_registry()
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
# 延遲 import 避免循環依賴ollama_failover_manager 不 import ai_router
from src.services.ollama_failover_manager import get_ollama_failover_manager
self._failover_manager = get_ollama_failover_manager()
# 從 ModelRegistry 取得模型配置
self._ollama_default = self._model_registry.get_model("ollama", "default")
self._ollama_summary = self._model_registry.get_model("ollama", "summary")
self._gemini_default = self._model_registry.get_model("gemini", "default")
self._claude_default = self._model_registry.get_model("claude", "default")
# 2026-04-02 ogt: C1 修復 — openclaw_nemo (一般推理) + nemotron (Tool Calling)
self._openclaw_nemo_default = self._model_registry.get_model("nvidia", "default")
self._nemotron_default = self._model_registry.get_model("nvidia", "default")
# 向後相容別名
self._nvidia_default = self._openclaw_nemo_default
# Provider 對應模型映射
self._provider_models: dict[AIProviderEnum, str] = {
AIProviderEnum.OLLAMA: self._ollama_default,
AIProviderEnum.GEMINI: self._gemini_default,
AIProviderEnum.CLAUDE: self._claude_default,
AIProviderEnum.OPENCLAW_NEMO: self._openclaw_nemo_default,
AIProviderEnum.NEMOTRON: self._nemotron_default,
}
# 完整 Fallback 鏈 (Provider, Model)
# 2026-04-02 ogt: C1 修復 — OPENCLAW_NEMO 首選仲裁
self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [
(AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default),
(AIProviderEnum.GEMINI, self._gemini_default),
(AIProviderEnum.CLAUDE, self._claude_default),
(AIProviderEnum.OLLAMA, self._ollama_default),
]
# 2026-04-29 ogt + Claude Code: 推翻 A2 鐵律INC-20260425 事實基礎已過期)
# 統帥鐵律 2026-04-29: 「主要優先用 111 主機的 Ollama」
# 統帥鐵律 feedback_ai_autonomous_direction.md: 以本地免費 LLM 為主
# 統帥鐵律 feedback_ollama_111_only.md: Ollama 唯一主機 = 111
#
# 推翻原因:
# 舊事實 (2026-04-27 A2): Ollama = CPU-only deepseek-r1:14b @ 238s不可用
# 新事實 (2026-04-29): prod Ollama 111 = M1 Pro Apple Silicon GPU + qwen2.5:7b-instruct
# VRAM 8.2GB 全載入ctx 32k實測 hi 0.54s
# 雲端全死: OpenClaw 188 → 500Gemini → 429 配額爆Claude → 404 endpoint 過時
# 不推翻 → 100% incident llm_failed → AI 自動修復永遠不啟動
#
# 配套修改:
# - IntentType.DIAGNOSE override: OPENCLAW_NEMO → OLLAMA
# - openclaw.py 注入 task_type="diagnose" 讓 Ollama 用 200s timeout
# - test_p0_diagnose_routing.py / test_ai_router_diagnose_fallback.py 同步更新
self._diagnose_fallback_chain: list[tuple[AIProviderEnum, str]] = [
(AIProviderEnum.OLLAMA, self._ollama_default), # 主:本地免費,統帥鐵律
(AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default), # fallback 1
(AIProviderEnum.GEMINI, self._gemini_default), # fallback 2
(AIProviderEnum.CLAUDE, self._claude_default), # fallback 3
]
# Tool Calling 專用 Fallback 鏈 (ADR-036)
self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [
(AIProviderEnum.NEMOTRON, self._nemotron_default),
(AIProviderEnum.GEMINI, self._gemini_default),
(AIProviderEnum.CLAUDE, self._claude_default),
]
# 2026-04-05 Claude Code: Phase 25 P0 v4.3 — _local_fallback_chain 廢棄
# 實測依據 (2026-04-05):
# Ollama llama3.2:3b CPU-only = 238s 回 {"ok":true}(完全不可用於生產)
# Nemotron NIM 實測 2.2s~27s平均 10.6s(雲端 GPU一直是主力
# NIM 從 Phase 22 起就接收 Incident 資料(無隱私問題,非新決策)
# 結論: 不存在可用的本地 AI providerDIAGNOSE 統一走 _full_fallback_chainNIM 主力)
self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [
# 廢棄: Ollama CPU ~238s 不可用NIM 本非 local。保留欄位避免 attribute error。
]
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
self._intent_provider_overrides: dict[IntentType, AIProviderEnum | None] = {
# 四大核心意圖
IntentType.RESTART: None, # 依複雜度
IntentType.SCALE: None, # 依複雜度
IntentType.CONFIG: None, # 依複雜度 (但 HIGH 會升級)
# 2026-04-16 ogt: 恢復 DIAGNOSE → OPENCLAW_NEMO已過期見 2026-04-29 推翻)
# 2026-04-29 ogt + Claude Code: 推翻 A2DIAGNOSE → OLLAMA統帥鐵律本地優先
# 舊事實過期M1 Pro GPU + qwen2.5:7b 0.54s(不再 CPU deepseek 238s
# OPENCLAW_NEMO 188:8088 現況 500 → 不可用
# 雲端全死 → 必須回到本地 Ollama 主推理
IntentType.DIAGNOSE: AIProviderEnum.OLLAMA,
# 輔助意圖
IntentType.DELETE: AIProviderEnum.CLAUDE, # CRITICAL → 強制 Claude
IntentType.ROLLBACK: None, # 依複雜度
IntentType.UNKNOWN: None,
# 舊版兼容
IntentType.CODE_REVIEW: None,
IntentType.DEPLOYMENT: None,
IntentType.ALERT_TRIAGE: AIProviderEnum.OLLAMA,
IntentType.QUERY: AIProviderEnum.OLLAMA,
IntentType.MAINTENANCE: None,
}
# 向後相容
self._default_model = self._ollama_default
self._summary_model = self._ollama_summary
self._fallback_order = [
self._ollama_default,
self._ollama_summary,
"gemini",
"claude",
]
def _resolve_intent_from_context(
self,
context: dict | None,
) -> IntentResult | None:
"""
從 context 解析集中治理的 intent hint。
僅作為 AI Router 的內部快路徑,避免呼叫端自行繞過 Router 規則。
例如 Phase 2 agent 已知屬於診斷分析,就不必再多跑一次 intent LLM。
"""
if not context:
return None
raw_hint = str(context.get("intent_hint", "")).strip().lower()
if not raw_hint:
return None
alias_map = {
"restart": IntentType.RESTART,
"scale": IntentType.SCALE,
"config": IntentType.CONFIG,
"diagnose": IntentType.DIAGNOSE,
"delete": IntentType.DELETE,
"rollback": IntentType.ROLLBACK,
"unknown": IntentType.UNKNOWN,
# legacy aliases
"alert_triage": IntentType.ALERT_TRIAGE,
"deployment": IntentType.DEPLOYMENT,
"query": IntentType.QUERY,
"maintenance": IntentType.MAINTENANCE,
"code_review": IntentType.CODE_REVIEW,
}
intent = alias_map.get(raw_hint)
if intent is None:
logger.warning("ai_router_invalid_intent_hint", intent_hint=raw_hint)
return None
return IntentResult(
intent=intent,
confidence=1.0,
method="context_hint",
matched_keywords=[f"context:{raw_hint}"],
detected_resources=[],
reasoning=f"context intent_hint={raw_hint}",
)
async def route(
self,
text: str,
context: dict | None = None,
) -> RoutingDecision:
"""
路由請求到最適 AI Provider 和模型
延遲目標: < 50ms (規則引擎優先LLM 分類時可能稍長)
Args:
text: 用戶輸入或告警內容
context: 額外上下文 (服務、指標等)
Returns:
RoutingDecision: 完整路由決策
"""
start_time = time.perf_counter()
context = context or {}
# Step 1: 意圖分類 (返回 IntentResult, 規則引擎 < 10ms)
intent_result = self._resolve_intent_from_context(context)
if intent_result is None:
intent_result = await self._intent_classifier.classify(text)
intent = normalize_intent(intent_result.intent)
# Step 2: 複雜度評分 (< 10ms)
complexity = self._complexity_scorer.score(context)
# Step 3: Provider + Model 選擇 (< 1ms)
provider, model, reason = self._select_provider_and_model(
intent, intent_result, complexity
)
# Step 3b: 若 initial decision 選到 OLLAMA交由 FailoverManager 重評
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
# 只在 OLLAMA 時觸發,不干擾 NEMOTRON / OPENCLAW_NEMO / CLAUDE / GEMINI 路由
failover_fallback: list[tuple[AIProviderEnum, str]] | None = None
if provider == AIProviderEnum.OLLAMA:
try:
failover_result = await self._failover_manager.select_provider(
task_type=intent.value if intent else "general"
)
primary_str = failover_result.primary.provider_name
try:
provider = AIProviderEnum(primary_str)
model = failover_result.primary.model
reason = f"{reason} [failover→{primary_str}]"
except ValueError:
# provider_name 無法對應已知 enum避免未知 provider 靜默進入執行層。
logger.warning(
"ai_router_unknown_failover_provider",
provider=primary_str,
)
# 重建 fallback chain從 failover_result 轉換
fb_list: list[tuple[AIProviderEnum, str]] = []
for ep in failover_result.fallback_chain:
try:
fb_provider = AIProviderEnum(ep.provider_name)
fb_list.append((fb_provider, ep.model))
except ValueError:
logger.warning(
"ai_router_unknown_failover_fallback_provider",
provider=ep.provider_name,
)
failover_fallback = fb_list
except Exception as e:
# failover_manager 異常 → 保留原始 providerfail-open
logger.warning("ai_router_failover_manager_error", error=str(e))
# Step 4: 建立 Fallback 鏈
# 2026-04-05 Claude Code: v4.3 — NIM 從 Phase 22 起就是主力,無隱私問題
# 2026-04-25 P1.2: 若 failover_manager 回傳了 fallback chain優先使用
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — DIAGNOSE 專用 chain排除 Ollama
# failover_manager 只在 OLLAMA 路徑觸發Step 3b 限制DIAGNOSE→OPENCLAW_NEMO
# 不會進入 failover 路徑,因此 fallover_fallback 此時為 None走 _build_fallback_chain_for_intent
fallback_chain = (
failover_fallback
if failover_fallback is not None
else self._build_fallback_chain_for_intent(provider, intent)
)
# Step 5: 計算延遲預算
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
# 計算路由決策耗時
routing_latency = (time.perf_counter() - start_time) * 1000
decision = RoutingDecision(
selected_provider=provider,
selected_model=model,
fallback_chain=fallback_chain,
routing_reason=reason,
latency_budget_ms=latency_budget,
intent=intent,
intent_result=intent_result,
complexity=complexity,
routing_latency_ms=routing_latency,
)
logger.info(
"ai_routing_decision",
provider=provider.value,
model=model,
intent=intent.value,
intent_confidence=intent_result.confidence,
risk_level=intent_result.risk_level.value,
complexity_score=complexity.score,
reason=reason,
latency_budget_ms=latency_budget,
routing_latency_ms=round(routing_latency, 2),
fallback_count=len(fallback_chain),
)
return decision
def _select_provider_and_model(
self,
intent: IntentType,
intent_result: IntentResult,
complexity: ComplexityScore,
) -> tuple[AIProviderEnum, str, str]:
"""
選擇 Provider 和模型 (Phase 13.3 #87 核心邏輯)
路由決策矩陣:
┌─────────────────┬───────────────┬──────────────────────────────┐
│ 複雜度 + 風險 │ Provider │ 備註 │
├─────────────────┼───────────────┼──────────────────────────────┤
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
└─────────────────┴───────────────┴──────────────────────────────┘
Args:
intent: 正規化後的意圖
intent_result: 完整分類結果
complexity: 複雜度評分
Returns:
(provider, model, reason)
"""
risk = intent_result.risk_level
score = complexity.score
# =======================================================================
# 規則 1: CRITICAL 風險強制 Claude (最高優先級)
# =======================================================================
if risk == RiskLevel.CRITICAL:
provider = AIProviderEnum.CLAUDE
model = self._claude_default
reason = f"CRITICAL 風險 ({intent.value}) 強制使用 Claude"
return provider, model, reason
# =======================================================================
# 規則 2: DELETE 意圖強制 Claude (不可逆操作)
# =======================================================================
if intent == IntentType.DELETE:
provider = AIProviderEnum.CLAUDE
model = self._claude_default
reason = "DELETE 意圖 (不可逆) 強制使用 Claude"
return provider, model, reason
# =======================================================================
# 規則 3: 檢查意圖強制覆寫
# =======================================================================
provider_override = self._intent_provider_overrides.get(intent)
if provider_override is not None:
provider = provider_override
# 2026-04-03 ogt: ALERT_TRIAGE/QUERY 用 Ollama summary model (llama3.2:3b)
# 避免 qwen2.5:7b-instruct 90秒 timeout 導致全鏈路失敗 (Phase 24 A選項)
# 2026-04-04 ogt: DIAGNOSE 已改為 NEMOTRON不走這條分支
if provider == AIProviderEnum.OLLAMA and intent in (
IntentType.ALERT_TRIAGE, IntentType.QUERY
):
model = self._ollama_summary
else:
model = self._provider_models[provider]
reason = f"意圖 {intent.value} 指定使用 {provider.value}"
return provider, model, reason
# =======================================================================
# 規則 4: 複雜度 4-5 或 HIGH 風險 → OpenClaw Nemo (via .188 → NVIDIA NIM)
# 2026-04-02 ogt: C1 修復 — NVIDIA→OPENCLAW_NEMO 對齊 Registry 名稱
# =======================================================================
if score >= 4 or risk == RiskLevel.HIGH:
provider = AIProviderEnum.OPENCLAW_NEMO
model = self._openclaw_nemo_default
reason = f"複雜度={score}/5, 風險={risk.value} → OpenClaw Nemo (fallback Gemini)"
return provider, model, reason
# =======================================================================
# 規則 5: 複雜度 3 + MEDIUM → Ollama (fallback Gemini)
# =======================================================================
if score == 3:
provider = AIProviderEnum.OLLAMA
model = self._ollama_default
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (fallback Gemini)"
return provider, model, reason
# =======================================================================
# 規則 6: 複雜度 1-2 + LOW/MEDIUM → Ollama (快速本地處理)
# =======================================================================
provider = AIProviderEnum.OLLAMA
# 低複雜度使用輕量模型 (更快回應)
model = self._ollama_summary if score <= 1 else self._ollama_default
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (成本優先)"
return provider, model, reason
def _select_model(
self,
intent: IntentType,
intent_result: IntentResult,
complexity: ComplexityScore,
) -> tuple[str, str]:
"""
選擇模型 (向後相容方法)
Deprecated: 請使用 _select_provider_and_model
Args:
intent: 正規化後的意圖
intent_result: 完整分類結果
complexity: 複雜度評分
Returns:
(model_name, reason)
"""
_, model, reason = self._select_provider_and_model(
intent, intent_result, complexity
)
return model, reason
def _build_fallback_chain(
self, selected_provider: AIProviderEnum
) -> list[tuple[AIProviderEnum, str]]:
"""
# DEPRECATED 2026-04-28 — 已由 _build_fallback_chain_for_intent 取代,無呼叫方
建立 Fallback 鏈 (排除已選 Provider)
Fallback 順序: Ollama → Gemini → Claude
Args:
selected_provider: 已選擇的 Provider
Returns:
Fallback 鏈 [(provider, model), ...]
"""
fallback_chain: list[tuple[AIProviderEnum, str]] = []
for provider, model in self._full_fallback_chain:
if provider != selected_provider:
fallback_chain.append((provider, model))
return fallback_chain
def _build_fallback_list(self, selected_model: str) -> list[str]:
"""建立 Fallback 列表 (向後相容)"""
fallbacks = [m for m in self._fallback_order if m != selected_model]
return fallbacks
def _build_fallback_chain_for_intent(
self,
selected_provider: AIProviderEnum,
intent: IntentType,
) -> list[tuple[AIProviderEnum, str]]:
"""
Intent-aware Fallback 鏈建構 (A2 INC-20260425 修復)
2026-04-27 Claude Sonnet 4.6: A2 — DIAGNOSE 使用 _diagnose_fallback_chain排除 Ollama
背景: INC-20260425 NIM timeout → fallback 到 Ollama deepseek-r1:14b (CPU 238s) → 二次 timeout
修復: DIAGNOSE 專屬 chain 只含雲端推理NEMO→GEMINI→CLAUDE絕不觸及 Ollama
所有其他 intent 繼續使用 _full_fallback_chain既有行為不變
Args:
selected_provider: 已選擇的 primary Provider
intent: 正規化後的意圖
Returns:
Fallback 鏈 [(provider, model), ...],排除 selected_provider
"""
if intent == IntentType.DIAGNOSE:
# DIAGNOSE 專屬:排除 Ollama只用雲端推理鏈
source_chain = self._diagnose_fallback_chain
else:
# 其他所有 intent 保持原有邏輯(行為不變)
source_chain = self._full_fallback_chain
return [
(provider, model)
for provider, model in source_chain
if provider != selected_provider
]
def route_sync(
self,
text: str,
context: dict | None = None,
) -> RoutingDecision:
"""
同步版本路由 (僅關鍵字匹配,保證 < 50ms)
適用場景: 需要快速決策,不需要 LLM 分類的情況
Args:
text: 用戶輸入或告警內容
context: 額外上下文
Returns:
RoutingDecision: 路由決策
"""
start_time = time.perf_counter()
context = context or {}
# 同步分類 (僅規則引擎, < 10ms)
intent_result = self._resolve_intent_from_context(context)
if intent_result is None:
intent_result = self._intent_classifier.classify_sync(text)
intent = normalize_intent(intent_result.intent)
# 複雜度評分 (< 10ms)
complexity = self._complexity_scorer.score(context)
# Provider + Model 選擇
provider, model, reason = self._select_provider_and_model(
intent, intent_result, complexity
)
# 建立 Fallback 鏈
# 2026-04-05 Claude Code: v4.3 — NIM 主力,無隱私問題
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — 對齊 route(),使用 intent-aware chain
fallback_chain = self._build_fallback_chain_for_intent(provider, intent)
# 延遲預算
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
# 計算路由決策耗時
routing_latency = (time.perf_counter() - start_time) * 1000
return RoutingDecision(
selected_provider=provider,
selected_model=model,
fallback_chain=fallback_chain,
routing_reason=reason,
latency_budget_ms=latency_budget,
intent=intent,
intent_result=intent_result,
complexity=complexity,
routing_latency_ms=routing_latency,
)
# =========================================================================
# Tool Calling 路由 (ADR-036)
# =========================================================================
def route_tool_calling(self) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
"""
Tool Calling 專用路由 (ADR-036)
Tool Calling 任務優先使用 Nemotron (83.3% 精準度)
Fallback 到 Gemini/Claude。
Returns:
(provider, model, fallback_chain)
"""
# 2026-04-02 ogt: C1 修復 — Tool Calling 使用 NEMOTRON (direct NIM)
provider = AIProviderEnum.NEMOTRON
model = self._nemotron_default
fallback_chain = [
(p, m) for p, m in self._tool_calling_fallback_chain if p != provider
]
logger.info(
"tool_calling_routing",
provider=provider.value,
model=model,
fallback_count=len(fallback_chain),
)
return provider, model, fallback_chain
def get_tool_calling_fallback_chain(self) -> list[tuple[AIProviderEnum, str]]:
"""取得 Tool Calling Fallback 鏈"""
return self._tool_calling_fallback_chain.copy()
# =========================================================================
# 便捷方法
# =========================================================================
def get_provider_for_intent(self, intent: IntentType) -> AIProviderEnum:
"""取得意圖對應的 Provider (不考慮複雜度)"""
override = self._intent_provider_overrides.get(intent)
return override if override else AIProviderEnum.OLLAMA
def get_model_for_provider(self, provider: AIProviderEnum) -> str:
"""取得 Provider 對應的模型"""
return self._provider_models.get(provider, self._ollama_default)
def get_routing_matrix(self) -> list[dict]:
"""
取得路由決策矩陣 (用於 API 文檔或除錯)
Returns:
路由規則清單
"""
return [
{
"rule": 1,
"condition": "CRITICAL risk",
"provider": "claude",
"reason": "不可逆/高風險操作強制最強模型",
},
{
"rule": 2,
"condition": "DELETE intent",
"provider": "claude",
"reason": "刪除操作強制最強模型",
},
{
"rule": 3,
"condition": "Intent override",
"provider": "depends",
"reason": "特定意圖有預設 Provider",
},
{
"rule": 4,
"condition": "complexity >= 4 OR HIGH risk",
"provider": "openclaw_nemo",
"reason": "高複雜度需要 Nvidia Nemotron 強大推理能力 (via .188)",
},
{
"rule": 5,
"condition": "complexity == 3",
"provider": "ollama",
"reason": "中等複雜度本地處理",
},
{
"rule": 6,
"condition": "complexity 1-2",
"provider": "ollama",
"reason": "低複雜度快速處理",
},
]
async def feedback_from_aider_events(
self,
repo: str | None = None,
days: int = 7,
) -> dict[str, float]:
"""從 aider_events 聚合近 N 日 success rate per model。
Phase 24 ADR-052 延伸AI 自主化 feedback loop。
目前為 read-only未接入 route() 決策),等 USE_AIDER_FEEDBACK flag
+ 7 天灰度驗證後才會調整 provider 權重。
Args:
repo: 若給定,只聚合該 repo 的 session否則所有 repo。
days: 時間窗口(預設 7 天)。
Returns:
{model_name: success_rate_float} e.g. {"elephant-alpha": 0.85, "gemini-pro": 0.92}
空 dict 代表無資料或查詢失敗caller 應降級為忽略)。
"""
try:
from src.db.base import get_db_context
from src.repositories.aider_event_repository import AiderEventRepository
except ImportError:
return {}
try:
async with get_db_context() as sess:
repo_obj = AiderEventRepository(sess)
stats = await repo_obj.model_stats_since(days=days)
except Exception:
logger.debug("ai_router_feedback_aggregation_failed")
return {}
return self._aggregate_feedback_stats(stats, repo=repo)
@staticmethod
def _aggregate_feedback_stats(
stats: list[dict], repo: str | None = None
) -> dict[str, float]:
"""純函數:過濾 repo 並將 stats 轉換為 {model: success_rate}(可獨立單元測試)。"""
out: dict[str, float] = {}
for row in stats:
if repo and row.get("repo") != repo:
continue
model = row.get("model")
if not model:
continue
out[model] = float(row.get("success_rate") or 0)
return out
# =============================================================================
# Phase 24 ADR-052: AI Provider Registry + Execution Layer
# =============================================================================
# 2026-04-02 ogt: 在現有 AIRouter (路由決策) 之上,加入 Provider 執行層
# 整合: ProviderRegistry + 閘門 (CB/RL/Sem) + Cache + Langfuse Trace
#
# 呼叫關係:
# openclaw.py → AIRouterExecutor.execute() → AIRouter.route() → Provider.analyze()
# =============================================================================
import asyncio
import hashlib
import json as _json
from src.core.config import get_settings
from src.services.ai_providers.interfaces import AIProvider as AIProviderProtocol, AIResult
_settings = get_settings()
class _SimpleCircuitBreaker:
"""
輕量 per-provider Circuit Breaker (Phase 24 C2 修復)
不共用 OpenClawGuard — 避免 Gemini 掛掉時 Ollama 也被擋
"""
def __init__(self, name: str, failure_threshold: int = 5, recovery_timeout: float = 60.0) -> None:
self.name = name
self._failure_threshold = failure_threshold
self._recovery_timeout = recovery_timeout
self._failure_count = 0
self._last_failure_time: float = 0.0
def is_open(self) -> bool:
if self._failure_count < self._failure_threshold:
return False
# 超過 recovery timeout → half-open (允許一次嘗試)
if time.time() - self._last_failure_time > self._recovery_timeout:
return False
return True
def record_success(self) -> None:
self._failure_count = 0
def record_failure(self) -> None:
self._failure_count += 1
self._last_failure_time = time.time()
class AIProviderRegistry:
"""
AI Provider 註冊中心 — 類比 MCP ProviderRegistry (ADR-015)
動態管理 AI Provider 的生命週期與啟停狀態。
"""
def __init__(self) -> None:
self._providers: dict[str, AIProviderProtocol] = {}
def register(self, provider: AIProviderProtocol) -> None:
"""註冊 Provider (啟動時呼叫)"""
self._providers[provider.name] = provider
status = "enabled" if provider.is_enabled else "disabled"
logger.info("ai_provider_registered", name=provider.name, status=status, privacy=provider.privacy_level)
def get(self, name: str) -> AIProviderProtocol | None:
"""取得已啟用的 Provider"""
p = self._providers.get(name)
if p and p.is_enabled:
return p
return None
def all_enabled(self) -> list[AIProviderProtocol]:
"""取得所有已啟用的 Provider"""
return [p for p in self._providers.values() if p.is_enabled]
def names(self) -> list[str]:
"""所有已註冊 Provider 名稱"""
return list(self._providers.keys())
async def health_check_all(self) -> dict[str, bool]:
"""所有 Provider 健康狀態"""
results = {}
for name, p in self._providers.items():
try:
results[name] = await p.health_check()
except Exception:
results[name] = False
return results
async def close_all(self) -> None:
"""關閉所有 Provider 的 HTTP 連線 (I5 修復: shutdown hook)"""
for name, p in self._providers.items():
try:
if hasattr(p, "close"):
await p.close()
logger.info("ai_provider_closed", name=name)
except Exception as e:
logger.warning("ai_provider_close_failed", name=name, error=str(e))
class AIRouterExecutor:
"""
AI Router 執行層 (Phase 24 ADR-052)
職責:
1. Cache 檢查 (Redis, 跨 Provider 共享) — D4
2. 閘門控制 (Circuit Breaker → Rate Limiter → Semaphore) — D3
3. 呼叫 Provider.analyze() — 實際執行
4. 記錄 Langfuse Trace — D5
5. Mock Mode 攔截 — D13
設計原則:
- 只依賴 AIProviderProtocol禁止 import 具體 Provider 類別
- 閘門在 RouterProvider 保持純粹 (Stateless Compute Units)
"""
def __init__(self, registry: AIProviderRegistry) -> None:
self._registry = registry
self._semaphores: dict[str, asyncio.Semaphore] = {}
# C2 修復: per-provider Circuit Breaker (不共用,避免一個掛全部擋)
self._circuit_breakers: dict[str, "_SimpleCircuitBreaker"] = {}
def _get_semaphore(self, name: str, limit: int = 3) -> asyncio.Semaphore:
"""取得 Provider 的並發 Semaphore (lazy init)"""
if name not in self._semaphores:
self._semaphores[name] = asyncio.Semaphore(limit)
return self._semaphores[name]
def _get_circuit_breaker(self, name: str) -> "_SimpleCircuitBreaker":
"""取得 Provider 的 Circuit Breaker (per-provider, lazy init)"""
if name not in self._circuit_breakers:
# 2026-04-05 Claude Code: v4.3 — NIM 使用更寬鬆的 CB 參數
# 每次都先跑 NIM只有真正連線錯誤非 timeout才累積失敗
# failure_threshold=10: 需要 10 次真實錯誤才 OPENtimeout 不計)
# recovery_timeout=30: 30s 後進入 half-open立即重試 NIM
if name == "nemotron":
self._circuit_breakers[name] = _SimpleCircuitBreaker(
name, failure_threshold=10, recovery_timeout=30.0
)
else:
self._circuit_breakers[name] = _SimpleCircuitBreaker(name)
return self._circuit_breakers[name]
@staticmethod
def _cache_key(prompt: str, context: dict | None) -> str:
"""生成 Cache Key (與 openclaw.py 相容)"""
ctx_hash = ""
if context:
ctx_hash = f":{context.get('alert_type', '')}:{context.get('target_resource', '')}"
content = f"{prompt}{ctx_hash}"
return f"llm_cache:{hashlib.sha256(content.encode()).hexdigest()[:16]}"
async def execute(
self,
prompt: str,
provider_order: list[str],
context: dict | None = None,
cache_ttl: int = 3600,
require_local: bool = False,
) -> AIResult:
"""
核心執行方法 — 依序嘗試 Provider含閘門 + Cache
Args:
prompt: LLM prompt
provider_order: Provider 名稱順序 (由 AIRouter.route 決定)
context: 額外上下文
cache_ttl: Cache TTL (秒)
require_local: 強制 local Provider (隱私)
Returns:
AIResult: 標準化結果
"""
# ① Mock Mode 攔截 (D13)
if _settings.MOCK_MODE:
logger.info("ai_router_mock_mode")
return AIResult(
raw_response=_json.dumps({
"action_title": "Mock Analysis",
"description": "Mock mode enabled",
"risk_level": "low",
"reasoning": "MOCK_MODE=true",
"confidence": 0.0,
}),
success=True,
provider="mock",
)
# ② Cache 檢查 (D4)
cache_key = self._cache_key(prompt, context) # C3 修復: 移到 try 外避免 UnboundLocalError
try:
from src.core.redis_client import get_redis
redis = get_redis()
cached = await redis.get(cache_key)
if cached:
data = _json.loads(cached)
cached_provider = data.get("provider", "cache")
provider_allowed = cached_provider in provider_order
ollama_first_required = (
bool(context)
and any(
key in context
for key in (
"alert_type",
"alertname",
"alert_name",
"fingerprint",
"incident_id",
"severity",
"target_resource",
)
)
and bool(provider_order)
and provider_order[0].startswith("ollama")
) or (
bool(context)
and bool(context.get("enforce_ollama_first"))
and bool(provider_order)
and provider_order[0].startswith("ollama")
)
if (
cached_provider == "ollama"
and any(provider.startswith("ollama") for provider in provider_order)
):
provider_allowed = True
if ollama_first_required and not cached_provider.startswith("ollama"):
provider_allowed = False
if not provider_allowed:
logger.info(
"ai_router_cache_provider_mismatch_skip",
cache_key=cache_key[:30],
cached_provider=cached_provider,
provider_order=provider_order,
ollama_first_required=ollama_first_required,
)
raise ValueError("cached provider not allowed by current provider_order")
logger.info("ai_router_cache_hit", cache_key=cache_key[:30])
return AIResult(
raw_response=data.get("response", ""),
success=True,
provider=cached_provider,
from_cache=True,
)
except Exception as e:
logger.debug("ai_router_cache_read_failed", error=str(e))
# ③ 遍歷 Provider + 閘門 (D3)
# 2026-04-02 ogt: C1 修復 — 建立 Langfuse Trace (D5)
# 包住整個執行鏈,記錄每個 Provider 的 generation
try:
from src.services.langfuse_client import langfuse_trace
_lf_trace_ctx = langfuse_trace(
"ai_router_execute",
metadata={
"provider_order": provider_order,
"prompt_length": len(prompt),
"require_local": require_local,
"alert_type": (context or {}).get("alert_type", ""),
},
)
_lf_trace_ctx.__enter__()
except Exception:
_lf_trace_ctx = None
errors: list[str] = []
attempted_providers: set[str] = set()
alert_requires_ollama_before_cloud = bool(
(context or {}).get("alert_requires_ollama_before_cloud")
)
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — DIAGNOSE fallback metric 追蹤
# 透過 context.get("intent_hint") 判斷是否為 DIAGNOSE避免改動 execute() 簽名
# _last_attempted_provider 記錄上一輪嘗試的 provider用於計算 from→to 關係
_is_diagnose_intent = str((context or {}).get("intent_hint", "")).strip().lower() == "diagnose"
_last_attempted_provider: str | None = None
for provider_name in provider_order:
# 2026-04-27 Claude Sonnet 4.6: A2 — 若上一輪失敗且本輪開始,表示發生 fallback
# 記錄 metricDIAGNOSE intent 專屬;非 DIAGNOSE 不記,不影響其他路徑)
# 2026-04-27 Claude Sonnet 4.6: F6 — fallback metric 只在真實 analyze() 失敗時觸發
# _last_attempted_provider 僅在 provider.analyze() 執行後失敗才賦值(見下方兩處);
# not_registered / privacy_skip / circuit_open / rate_limit 分支均不賦值,
# 避免這些「被跳過的 provider」誤計入 from→to fallback 鏈metric 不可信問題F6
if _is_diagnose_intent and _last_attempted_provider is not None:
try:
from src.core.metrics import record_diagnose_fallback
record_diagnose_fallback(
from_provider=_last_attempted_provider,
to_provider=provider_name,
)
logger.info(
"diagnose_fallback_recorded",
from_provider=_last_attempted_provider,
to_provider=provider_name,
)
except Exception as _metric_e:
# 2026-04-27 Claude Sonnet 4.6: F6 — 升 warning原 debug 會 silent swallow
# + 計入 error counter 讓 metric 管道問題可被 Prometheus 偵測
logger.warning("diagnose_fallback_metric_failed", error=str(_metric_e))
try:
from src.core.metrics import AIOPS_DIAGNOSE_FALLBACK_METRIC_ERROR_TOTAL
AIOPS_DIAGNOSE_FALLBACK_METRIC_ERROR_TOTAL.inc()
except Exception:
pass
provider = self._registry.get(provider_name)
if not provider:
# 2026-04-14 Claude Sonnet 4.6: silent skip 改 errors 累積(觀測性)
# 2026-04-27 Claude Sonnet 4.6: F6 — 不設 _last_attempted_provider未真實執行 analyze
errors.append(f"{provider_name}: not_registered")
continue
# 隱私過濾 (D7)
# 2026-04-27 Claude Sonnet 4.6: F6 — privacy_skip 不設 _last_attempted_provider未嘗試
if require_local and provider.privacy_level != "local":
errors.append(f"{provider_name}: privacy_skip(non_local)")
continue
if alert_requires_ollama_before_cloud and provider.privacy_level == "cloud":
if "ollama_local" not in attempted_providers:
errors.append(f"{provider_name}: blocked_until_ollama_local_attempted")
logger.warning(
"ai_router_cloud_blocked_until_ollama_local_attempted",
provider=provider_name,
provider_order=provider_order,
attempted_providers=sorted(attempted_providers),
)
continue
# 閘門 1: Circuit Breaker (per-provider, C2 修復)
cb = self._get_circuit_breaker(provider_name)
if cb.is_open():
if alert_requires_ollama_before_cloud and provider_name.startswith("ollama"):
logger.warning(
"ai_router_alert_ollama_circuit_bypassed",
provider=provider_name,
reason="alert_requires_ollama_before_cloud",
)
else:
errors.append(f"{provider_name}: circuit_open")
logger.warning("ai_router_circuit_open", provider=provider_name)
# 2026-04-27 Claude Sonnet 4.6: F6 — circuit_open 不設 _last_attempted_provider未嘗試
continue
# 閘門 2: Rate Limiter
# 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo)
if provider_name in ("openclaw_nemo", "nemotron", "gemini", "claude"):
try:
from src.services.ai_rate_limiter import get_ai_rate_limiter
rate_limiter = get_ai_rate_limiter()
allowed, reason = await rate_limiter.check_and_increment(provider_name)
if not allowed:
errors.append(f"{provider_name}: rate_limit({reason})")
logger.info("ai_router_rate_limited", provider=provider_name, reason=reason)
continue
except Exception as e:
logger.debug("ai_router_rate_limiter_error", error=str(e))
# 閘門 3: Semaphore (並發控制)
sem = self._get_semaphore(provider_name)
async with sem:
try:
attempted_providers.add(provider_name)
result = await provider.analyze(prompt, context)
if result.success:
# 記錄成功 (per-provider CB)
cb.record_success()
# 記錄費用
if result.cost_usd > 0:
try:
rate_limiter = get_ai_rate_limiter()
await rate_limiter.record_cost(provider_name, result.cost_usd)
except Exception:
pass
# 寫入 Cache (D4)
try:
redis = get_redis()
cache_data = _json.dumps({
"response": result.raw_response,
"provider": result.provider,
"cached_at": time.strftime("%Y-%m-%dT%H:%M:%S+08:00"),
})
await redis.set(cache_key, cache_data, ex=cache_ttl)
except Exception:
pass
logger.info(
"ai_router_execute_success",
provider=provider_name,
latency_ms=round(result.latency_ms, 1),
tokens=result.tokens,
from_cache=False,
)
# D5: 記錄 Langfuse generation
if _lf_trace_ctx:
try:
_lf_trace_ctx.generation(
name=f"{provider_name}_call",
model=provider_name,
input=prompt[:500],
output=result.raw_response[:500],
usage={"total": result.tokens} if result.tokens else None,
metadata={"cost_usd": result.cost_usd, "latency_ms": round(result.latency_ms, 1)},
)
_lf_trace_ctx.__exit__(None, None, None)
except Exception:
pass
return result
# Provider 回傳 success=False
errors.append(f"{provider_name}: {result.error}")
logger.warning("ai_router_provider_failed", provider=provider_name, error=result.error)
# 2026-04-27 A2: 記錄失敗的 provider供下輪迭代計算 fallback metric
_last_attempted_provider = provider_name
except Exception as e:
errors.append(f"{provider_name}: {e}")
logger.warning("ai_router_provider_exception", provider=provider_name, error=str(e))
# 2026-04-05 Claude Code: v4.3 — Timeout 不計 CB 失敗
# NIM 偶爾 GPU 忙碌導致 27stimeout 不代表 NIM 故障
# 只有明確連線錯誤(非 timeout才累積 CB 失敗次數
import httpx as _httpx
if not isinstance(e, _httpx.TimeoutException):
cb.record_failure()
# 2026-04-27 A2: 記錄失敗的 provider供下輪迭代計算 fallback metric
_last_attempted_provider = provider_name
# 全部失敗
logger.error("ai_router_all_providers_failed", tried=provider_order, errors=errors)
if _lf_trace_ctx:
try:
_lf_trace_ctx.__exit__(None, None, None)
except Exception:
pass
# 2026-04-04 ogt: Phase 25 P0 — require_local 全部失敗時 Telegram 通知(隱私邊界)
# 2026-04-15 ogt: 改用 ADR-075 TYPE-1 格式,禁止純文字 raw notification
if require_local:
try:
from src.services.telegram_gateway import get_telegram_gateway
tg = get_telegram_gateway()
import asyncio as _asyncio
tried_str = ", ".join(provider_order)
formatted = (
"⚠️ <b>TYPE-1 | AI Provider 不可用</b>\n"
"──────────────────────\n"
f"├─ 已嘗試: <code>{tried_str}</code>\n"
"└─ 原因: require_local=True無可用本地 Provider\n"
"\n"
"需要人工介入"
)
_asyncio.create_task(
tg.send_alert_notification(formatted, parse_mode="HTML")
)
except Exception as _tg_e:
logger.warning("diagnose_reject_telegram_failed", error=str(_tg_e))
return AIResult(
raw_response="",
success=False,
provider="none",
error="local_providers_unavailable",
)
return AIResult(
raw_response="",
success=False,
provider="none",
error=f"All providers failed: {'; '.join(errors)}",
)
# =============================================================================
# 單例管理
# =============================================================================
_router: AIRouter | None = None
_registry: AIProviderRegistry | None = None
_executor: AIRouterExecutor | None = None
def _init_registry() -> AIProviderRegistry:
"""初始化 Provider Registry (首次呼叫時自動註冊所有 Provider)"""
from src.services.ai_providers.ollama import (
OllamaProvider,
OllamaLocalProvider,
OllamaGcpBProvider, # 2026-05-04 ADR-110 GCP-B
)
from src.services.ai_providers.gemini import GeminiProvider
from src.services.ai_providers.claude import ClaudeProvider
from src.services.ai_providers.openclaw_nemo import OpenClawNemoProvider
registry = AIProviderRegistry()
# GCP-A Primaryname="ollama"OLLAMA_URL
ollama_gcp_a = OllamaProvider()
registry.register(ollama_gcp_a)
registry.register(GeminiProvider())
registry.register(ClaudeProvider())
registry.register(OpenClawNemoProvider())
# 2026-04-02 Claude Code: Phase 24 B3 — 加入 NemotronProvider (tool_calling 優先)
from src.services.ai_providers.nemotron import NemotronProvider
registry.register(NemotronProvider())
# 2026-05-06 Codex: 188 不再作為 Ollama provider。
# Local fallback 統一命名為 ollama_local端點由 OLLAMA_FALLBACK_URL 指向 111/110 proxy。
ollama_local = OllamaLocalProvider()
registry.register(ollama_local)
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災修復
# 根因OllamaFailoverManager 回傳 "ollama_gcp_a"/"ollama_gcp_b"/"ollama_local"
# 但 registry 無這些名稱 → not_registered → 整條 Ollama 鏈跳過 → 直接跳 Gemini
# 修復:
# "ollama_gcp_a" alias → 同 OllamaProviderOLLAMA_URL = GCP-A
# "ollama_gcp_b" → 新 OllamaGcpBProviderOLLAMA_SECONDARY_URL = GCP-B
# "ollama_local" → OllamaLocalProviderOLLAMA_FALLBACK_URL = 111 / 110:11437
registry._providers["ollama_gcp_a"] = ollama_gcp_a
registry.register(OllamaGcpBProvider())
registry._providers["ollama_local"] = ollama_local
return registry
def get_ai_router() -> AIRouter:
"""取得 AIRouter 單例 (路由決策)"""
global _router
if _router is None:
_router = AIRouter()
return _router
def get_ai_registry() -> AIProviderRegistry:
"""取得 AIProviderRegistry 單例"""
global _registry
if _registry is None:
_registry = _init_registry()
return _registry
def get_ai_executor() -> AIRouterExecutor:
"""取得 AIRouterExecutor 單例 (路由決策 + 執行)"""
global _executor
if _executor is None:
_executor = AIRouterExecutor(get_ai_registry())
return _executor
def reset_ai_router() -> None:
"""重置所有單例 (用於測試)"""
global _router, _registry, _executor
_router = None
_registry = None
_executor = None