Files
awoooi/apps/api/src/services/ai_router.py
OG T 73577f7c5d
Some checks failed
CD Pipeline / Deploy Prometheus Alert Rules (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
chore(ai-router): v4.3 版本號同步 (trigger CD push event)
2026-04-05 12:03:15 +08:00

1077 lines
43 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AI Router - Phase 13.3 #87
==========================
智能 AI 路由器,根據意圖和複雜度動態選擇 AI Provider
目標: 根據請求特性自動選擇最適模型
策略: Intent Classifier + Complexity Scorer → Routing Decision
延遲目標: < 50ms (規則引擎優先)
路由決策矩陣 (ADR-023):
┌─────────────────┬───────────────┬──────────────────────────────┐
│ 複雜度 + 風險 │ Provider │ 備註 │
├─────────────────┼───────────────┼──────────────────────────────┤
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
└─────────────────┴───────────────┴──────────────────────────────┘
版本: v4.3
建立: 2026-03-26 (台北時區)
建立者: Claude Code
最後修改: 2026-04-02 (台北時區)
修改者: ogt (首席架構師 Review C1/C2/C3 修復)
變更紀錄:
| 版本 | 日期 | 執行者 | 變更內容 |
|------|------|--------|----------|
| v1.0 | 2026-03-26 | Claude Code | 初始實作 |
| v2.0 | 2026-03-26 | Claude Code | 支援 IntentResult + 新意圖類型 |
| v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 |
| v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close |
| v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram |
| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端),僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) |
| v4.3 | 2026-04-05 | Claude Code | Phase 25 P0 架構修正: 實測 Ollama CPU ~238s(不可用); NIM 實測 2-27s avg 10.6s; DIAGNOSE 改走 _full_fallback_chain(NIM 主力); _local_fallback_chain 廢棄 |
"""
from __future__ import annotations
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import TYPE_CHECKING, Protocol
import structlog
if TYPE_CHECKING:
from src.services.intent_classifier import IntentResult
from src.services.complexity_scorer import (
ComplexityScore,
get_complexity_scorer,
)
from src.services.intent_classifier import (
IntentResult,
IntentType,
RiskLevel,
get_intent_classifier,
normalize_intent,
)
from src.services.model_registry import get_model_registry
logger = structlog.get_logger(__name__)
# =============================================================================
# Provider 定義
# =============================================================================
class AIProviderEnum(Enum):
"""AI 提供者"""
OLLAMA = "ollama"
GEMINI = "gemini"
CLAUDE = "claude"
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 實際名稱
# OpenClawNemoProvider.name = "openclaw_nemo" (一般推理, via .188)
# NemotronProvider.name = "nemotron" (Tool Calling, direct NVIDIA NIM)
# 舊版 NVIDIA = "nvidia" 已移除: Registry 無此 Provider
OPENCLAW_NEMO = "openclaw_nemo"
NEMOTRON = "nemotron"
# Provider 對應延遲預算 (ms)
PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = {
AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間
AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲
AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 名稱
AIProviderEnum.OPENCLAW_NEMO: 60000, # via .188 → NVIDIA NIM允許較長時間
AIProviderEnum.NEMOTRON: 60000, # Tool Calling 專用,允許較長時間
}
# =============================================================================
# Interface 定義 (P1 修復 - 2026-04-01 首席架構師審查)
# =============================================================================
class IAIRouter(Protocol):
"""
AI Router Protocol - 支援 DI 測試替換
2026-04-01 ogt: 首席架構師審查 P1 修復
- 新增 Protocol 定義支援依賴注入
- 參考: IModelRegistry, IComplexityScorer
"""
async def route(
self,
text: str,
context: dict | None = None,
) -> "RoutingDecision":
"""路由請求到最適 AI Provider"""
...
def route_sync(
self,
text: str,
context: dict | None = None,
) -> "RoutingDecision":
"""同步版本路由"""
...
def route_tool_calling(
self,
) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
"""Tool Calling 專用路由"""
...
@dataclass
class RoutingDecision:
"""
路由決策結果 (Phase 13.3 #87)
包含完整的路由資訊,供 OpenClaw 主流程使用
"""
# 核心決策
selected_provider: AIProviderEnum # 選擇的 AI Provider
selected_model: str # 選擇的模型名稱
fallback_chain: list[tuple[AIProviderEnum, str]] # 備援鏈 [(provider, model), ...]
routing_reason: str # 路由決策原因
latency_budget_ms: int # 延遲預算 (毫秒)
# 分類結果
intent: IntentType # 意圖分類 (正規化後)
intent_result: IntentResult # 完整 Intent 分類結果
complexity: ComplexityScore # 複雜度評分
risk_level: RiskLevel = field(default=RiskLevel.MEDIUM) # 風險等級
# 路由 metadata
routing_latency_ms: float = 0.0 # 路由決策耗時 (ms)
# 向後相容 (deprecated)
model: str = "" # -> selected_model
reason: str = "" # -> routing_reason
fallback_models: list[str] = field(default_factory=list) # -> fallback_chain
def __post_init__(self):
"""初始化後設定衍生欄位"""
self.risk_level = self.intent_result.risk_level
# 向後相容
self.model = self.selected_model
self.reason = self.routing_reason
self.fallback_models = [model for _, model in self.fallback_chain if model != self.selected_model]
def to_dict(self) -> dict:
"""轉換為字典 (API 回應用)"""
return {
"selected_provider": self.selected_provider.value,
"selected_model": self.selected_model,
"fallback_chain": [
{"provider": p.value, "model": m} for p, m in self.fallback_chain
],
"routing_reason": self.routing_reason,
"latency_budget_ms": self.latency_budget_ms,
"intent": self.intent.value,
"risk_level": self.risk_level.value,
"complexity_score": self.complexity.score,
"routing_latency_ms": round(self.routing_latency_ms, 2),
}
class AIRouter:
"""
AI 路由器 (Phase 13.3 #87)
整合 IntentClassifier 和 ComplexityScorer
動態選擇最適合的 AI Provider 和模型。
路由決策矩陣:
┌─────────────────┬───────────────┬──────────────────────────────┐
│ 複雜度 + 風險 │ Provider │ 備註 │
├─────────────────┼───────────────┼──────────────────────────────┤
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
└─────────────────┴───────────────┴──────────────────────────────┘
路由策略 (按優先級):
1. CRITICAL 風險強制使用 Claude
2. DELETE 意圖強制使用 Claude
3. HIGH 風險或複雜度 4-5 → Gemini
4. 其他情況 → Ollama (成本優先)
"""
def __init__(self):
self._intent_classifier = get_intent_classifier()
self._complexity_scorer = get_complexity_scorer()
self._model_registry = get_model_registry()
# 從 ModelRegistry 取得模型配置
self._ollama_default = self._model_registry.get_model("ollama", "default")
self._ollama_summary = self._model_registry.get_model("ollama", "summary")
self._gemini_default = self._model_registry.get_model("gemini", "default")
self._claude_default = self._model_registry.get_model("claude", "default")
# 2026-04-02 ogt: C1 修復 — openclaw_nemo (一般推理) + nemotron (Tool Calling)
self._openclaw_nemo_default = self._model_registry.get_model("nvidia", "default")
self._nemotron_default = self._model_registry.get_model("nvidia", "default")
# 向後相容別名
self._nvidia_default = self._openclaw_nemo_default
# Provider 對應模型映射
self._provider_models: dict[AIProviderEnum, str] = {
AIProviderEnum.OLLAMA: self._ollama_default,
AIProviderEnum.GEMINI: self._gemini_default,
AIProviderEnum.CLAUDE: self._claude_default,
AIProviderEnum.OPENCLAW_NEMO: self._openclaw_nemo_default,
AIProviderEnum.NEMOTRON: self._nemotron_default,
}
# 完整 Fallback 鏈 (Provider, Model)
# 2026-04-02 ogt: C1 修復 — OPENCLAW_NEMO 首選仲裁
self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [
(AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default),
(AIProviderEnum.GEMINI, self._gemini_default),
(AIProviderEnum.CLAUDE, self._claude_default),
(AIProviderEnum.OLLAMA, self._ollama_default),
]
# Tool Calling 專用 Fallback 鏈 (ADR-036)
self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [
(AIProviderEnum.NEMOTRON, self._nemotron_default),
(AIProviderEnum.GEMINI, self._gemini_default),
(AIProviderEnum.CLAUDE, self._claude_default),
]
# 2026-04-05 Claude Code: Phase 25 P0 v4.3 — _local_fallback_chain 廢棄
# 實測依據 (2026-04-05):
# Ollama llama3.2:3b CPU-only = 238s 回 {"ok":true}(完全不可用於生產)
# Nemotron NIM 實測 2.2s~27s平均 10.6s(雲端 GPU一直是主力
# NIM 從 Phase 22 起就接收 Incident 資料(無隱私問題,非新決策)
# 結論: 不存在可用的本地 AI providerDIAGNOSE 統一走 _full_fallback_chainNIM 主力)
self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [
# 廢棄: Ollama CPU ~238s 不可用NIM 本非 local。保留欄位避免 attribute error。
]
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
self._intent_provider_overrides: dict[IntentType, AIProviderEnum | None] = {
# 四大核心意圖
IntentType.RESTART: None, # 依複雜度
IntentType.SCALE: None, # 依複雜度
IntentType.CONFIG: None, # 依複雜度 (但 HIGH 會升級)
# P0 2026-04-04 Claude Code: DIAGNOSE 升級至 Nemotron高能力雲端
# 注意: FORCE_LOCAL 情境由 require_local=True + privacy 過濾保護Nemotron 會被正確跳過
IntentType.DIAGNOSE: AIProviderEnum.NEMOTRON,
# 輔助意圖
IntentType.DELETE: AIProviderEnum.CLAUDE, # CRITICAL → 強制 Claude
IntentType.ROLLBACK: None, # 依複雜度
IntentType.UNKNOWN: None,
# 舊版兼容
IntentType.CODE_REVIEW: None,
IntentType.DEPLOYMENT: None,
IntentType.ALERT_TRIAGE: AIProviderEnum.OLLAMA,
IntentType.QUERY: AIProviderEnum.OLLAMA,
IntentType.MAINTENANCE: None,
}
# 向後相容
self._default_model = self._ollama_default
self._summary_model = self._ollama_summary
self._fallback_order = [
self._ollama_default,
self._ollama_summary,
"gemini",
"claude",
]
async def route(
self,
text: str,
context: dict | None = None,
) -> RoutingDecision:
"""
路由請求到最適 AI Provider 和模型
延遲目標: < 50ms (規則引擎優先LLM 分類時可能稍長)
Args:
text: 用戶輸入或告警內容
context: 額外上下文 (服務、指標等)
Returns:
RoutingDecision: 完整路由決策
"""
start_time = time.perf_counter()
context = context or {}
# Step 1: 意圖分類 (返回 IntentResult, 規則引擎 < 10ms)
intent_result = await self._intent_classifier.classify(text)
intent = normalize_intent(intent_result.intent)
# Step 2: 複雜度評分 (< 10ms)
complexity = self._complexity_scorer.score(context)
# Step 3: Provider + Model 選擇 (< 1ms)
provider, model, reason = self._select_provider_and_model(
intent, intent_result, complexity
)
# Step 4: 建立 Fallback 鏈
# 2026-04-05 Claude Code: v4.3 — DIAGNOSE 改回 _full_fallback_chain
# NIM 從 Phase 22 起就是主力無隱私問題Ollama CPU-only 不可用(實測 238s
fallback_chain = self._build_fallback_chain(provider)
# Step 5: 計算延遲預算
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
# 計算路由決策耗時
routing_latency = (time.perf_counter() - start_time) * 1000
decision = RoutingDecision(
selected_provider=provider,
selected_model=model,
fallback_chain=fallback_chain,
routing_reason=reason,
latency_budget_ms=latency_budget,
intent=intent,
intent_result=intent_result,
complexity=complexity,
routing_latency_ms=routing_latency,
)
logger.info(
"ai_routing_decision",
provider=provider.value,
model=model,
intent=intent.value,
intent_confidence=intent_result.confidence,
risk_level=intent_result.risk_level.value,
complexity_score=complexity.score,
reason=reason,
latency_budget_ms=latency_budget,
routing_latency_ms=round(routing_latency, 2),
fallback_count=len(fallback_chain),
)
return decision
def _select_provider_and_model(
self,
intent: IntentType,
intent_result: IntentResult,
complexity: ComplexityScore,
) -> tuple[AIProviderEnum, str, str]:
"""
選擇 Provider 和模型 (Phase 13.3 #87 核心邏輯)
路由決策矩陣:
┌─────────────────┬───────────────┬──────────────────────────────┐
│ 複雜度 + 風險 │ Provider │ 備註 │
├─────────────────┼───────────────┼──────────────────────────────┤
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
└─────────────────┴───────────────┴──────────────────────────────┘
Args:
intent: 正規化後的意圖
intent_result: 完整分類結果
complexity: 複雜度評分
Returns:
(provider, model, reason)
"""
risk = intent_result.risk_level
score = complexity.score
# =======================================================================
# 規則 1: CRITICAL 風險強制 Claude (最高優先級)
# =======================================================================
if risk == RiskLevel.CRITICAL:
provider = AIProviderEnum.CLAUDE
model = self._claude_default
reason = f"CRITICAL 風險 ({intent.value}) 強制使用 Claude"
return provider, model, reason
# =======================================================================
# 規則 2: DELETE 意圖強制 Claude (不可逆操作)
# =======================================================================
if intent == IntentType.DELETE:
provider = AIProviderEnum.CLAUDE
model = self._claude_default
reason = "DELETE 意圖 (不可逆) 強制使用 Claude"
return provider, model, reason
# =======================================================================
# 規則 3: 檢查意圖強制覆寫
# =======================================================================
provider_override = self._intent_provider_overrides.get(intent)
if provider_override is not None:
provider = provider_override
# 2026-04-03 ogt: ALERT_TRIAGE/QUERY 用 Ollama summary model (llama3.2:3b)
# 避免 qwen2.5:7b-instruct 90秒 timeout 導致全鏈路失敗 (Phase 24 A選項)
# 2026-04-04 ogt: DIAGNOSE 已改為 NEMOTRON不走這條分支
if provider == AIProviderEnum.OLLAMA and intent in (
IntentType.ALERT_TRIAGE, IntentType.QUERY
):
model = self._ollama_summary
else:
model = self._provider_models[provider]
reason = f"意圖 {intent.value} 指定使用 {provider.value}"
return provider, model, reason
# =======================================================================
# 規則 4: 複雜度 4-5 或 HIGH 風險 → OpenClaw Nemo (via .188 → NVIDIA NIM)
# 2026-04-02 ogt: C1 修復 — NVIDIA→OPENCLAW_NEMO 對齊 Registry 名稱
# =======================================================================
if score >= 4 or risk == RiskLevel.HIGH:
provider = AIProviderEnum.OPENCLAW_NEMO
model = self._openclaw_nemo_default
reason = f"複雜度={score}/5, 風險={risk.value} → OpenClaw Nemo (fallback Gemini)"
return provider, model, reason
# =======================================================================
# 規則 5: 複雜度 3 + MEDIUM → Ollama (fallback Gemini)
# =======================================================================
if score == 3:
provider = AIProviderEnum.OLLAMA
model = self._ollama_default
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (fallback Gemini)"
return provider, model, reason
# =======================================================================
# 規則 6: 複雜度 1-2 + LOW/MEDIUM → Ollama (快速本地處理)
# =======================================================================
provider = AIProviderEnum.OLLAMA
# 低複雜度使用輕量模型 (更快回應)
model = self._ollama_summary if score <= 1 else self._ollama_default
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (成本優先)"
return provider, model, reason
def _select_model(
self,
intent: IntentType,
intent_result: IntentResult,
complexity: ComplexityScore,
) -> tuple[str, str]:
"""
選擇模型 (向後相容方法)
Deprecated: 請使用 _select_provider_and_model
Args:
intent: 正規化後的意圖
intent_result: 完整分類結果
complexity: 複雜度評分
Returns:
(model_name, reason)
"""
_, model, reason = self._select_provider_and_model(
intent, intent_result, complexity
)
return model, reason
def _build_fallback_chain(
self, selected_provider: AIProviderEnum
) -> list[tuple[AIProviderEnum, str]]:
"""
建立 Fallback 鏈 (排除已選 Provider)
Fallback 順序: Ollama → Gemini → Claude
Args:
selected_provider: 已選擇的 Provider
Returns:
Fallback 鏈 [(provider, model), ...]
"""
fallback_chain: list[tuple[AIProviderEnum, str]] = []
for provider, model in self._full_fallback_chain:
if provider != selected_provider:
fallback_chain.append((provider, model))
return fallback_chain
def _build_fallback_list(self, selected_model: str) -> list[str]:
"""建立 Fallback 列表 (向後相容)"""
fallbacks = [m for m in self._fallback_order if m != selected_model]
return fallbacks
def route_sync(
self,
text: str,
context: dict | None = None,
) -> RoutingDecision:
"""
同步版本路由 (僅關鍵字匹配,保證 < 50ms)
適用場景: 需要快速決策,不需要 LLM 分類的情況
Args:
text: 用戶輸入或告警內容
context: 額外上下文
Returns:
RoutingDecision: 路由決策
"""
start_time = time.perf_counter()
context = context or {}
# 同步分類 (僅規則引擎, < 10ms)
intent_result = self._intent_classifier.classify_sync(text)
intent = normalize_intent(intent_result.intent)
# 複雜度評分 (< 10ms)
complexity = self._complexity_scorer.score(context)
# Provider + Model 選擇
provider, model, reason = self._select_provider_and_model(
intent, intent_result, complexity
)
# 建立 Fallback 鏈
# 2026-04-05 Claude Code: v4.3 — 同 route()DIAGNOSE 改回 _full_fallback_chain
fallback_chain = self._build_fallback_chain(provider)
# 延遲預算
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
# 計算路由決策耗時
routing_latency = (time.perf_counter() - start_time) * 1000
return RoutingDecision(
selected_provider=provider,
selected_model=model,
fallback_chain=fallback_chain,
routing_reason=reason,
latency_budget_ms=latency_budget,
intent=intent,
intent_result=intent_result,
complexity=complexity,
routing_latency_ms=routing_latency,
)
# =========================================================================
# Tool Calling 路由 (ADR-036)
# =========================================================================
def route_tool_calling(self) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
"""
Tool Calling 專用路由 (ADR-036)
Tool Calling 任務優先使用 Nemotron (83.3% 精準度)
Fallback 到 Gemini/Claude。
Returns:
(provider, model, fallback_chain)
"""
# 2026-04-02 ogt: C1 修復 — Tool Calling 使用 NEMOTRON (direct NIM)
provider = AIProviderEnum.NEMOTRON
model = self._nemotron_default
fallback_chain = [
(p, m) for p, m in self._tool_calling_fallback_chain if p != provider
]
logger.info(
"tool_calling_routing",
provider=provider.value,
model=model,
fallback_count=len(fallback_chain),
)
return provider, model, fallback_chain
def get_tool_calling_fallback_chain(self) -> list[tuple[AIProviderEnum, str]]:
"""取得 Tool Calling Fallback 鏈"""
return self._tool_calling_fallback_chain.copy()
# =========================================================================
# 便捷方法
# =========================================================================
def get_provider_for_intent(self, intent: IntentType) -> AIProviderEnum:
"""取得意圖對應的 Provider (不考慮複雜度)"""
override = self._intent_provider_overrides.get(intent)
return override if override else AIProviderEnum.OLLAMA
def get_model_for_provider(self, provider: AIProviderEnum) -> str:
"""取得 Provider 對應的模型"""
return self._provider_models.get(provider, self._ollama_default)
def get_routing_matrix(self) -> list[dict]:
"""
取得路由決策矩陣 (用於 API 文檔或除錯)
Returns:
路由規則清單
"""
return [
{
"rule": 1,
"condition": "CRITICAL risk",
"provider": "claude",
"reason": "不可逆/高風險操作強制最強模型",
},
{
"rule": 2,
"condition": "DELETE intent",
"provider": "claude",
"reason": "刪除操作強制最強模型",
},
{
"rule": 3,
"condition": "Intent override",
"provider": "depends",
"reason": "特定意圖有預設 Provider",
},
{
"rule": 4,
"condition": "complexity >= 4 OR HIGH risk",
"provider": "openclaw_nemo",
"reason": "高複雜度需要 Nvidia Nemotron 強大推理能力 (via .188)",
},
{
"rule": 5,
"condition": "complexity == 3",
"provider": "ollama",
"reason": "中等複雜度本地處理",
},
{
"rule": 6,
"condition": "complexity 1-2",
"provider": "ollama",
"reason": "低複雜度快速處理",
},
]
# =============================================================================
# Phase 24 ADR-052: AI Provider Registry + Execution Layer
# =============================================================================
# 2026-04-02 ogt: 在現有 AIRouter (路由決策) 之上,加入 Provider 執行層
# 整合: ProviderRegistry + 閘門 (CB/RL/Sem) + Cache + Langfuse Trace
#
# 呼叫關係:
# openclaw.py → AIRouterExecutor.execute() → AIRouter.route() → Provider.analyze()
# =============================================================================
import asyncio
import hashlib
import json as _json
from src.core.config import get_settings
from src.services.ai_providers.interfaces import AIProvider as AIProviderProtocol, AIResult
_settings = get_settings()
class _SimpleCircuitBreaker:
"""
輕量 per-provider Circuit Breaker (Phase 24 C2 修復)
不共用 OpenClawGuard — 避免 Gemini 掛掉時 Ollama 也被擋
"""
def __init__(self, name: str, failure_threshold: int = 5, recovery_timeout: float = 60.0) -> None:
self.name = name
self._failure_threshold = failure_threshold
self._recovery_timeout = recovery_timeout
self._failure_count = 0
self._last_failure_time: float = 0.0
def is_open(self) -> bool:
if self._failure_count < self._failure_threshold:
return False
# 超過 recovery timeout → half-open (允許一次嘗試)
if time.time() - self._last_failure_time > self._recovery_timeout:
return False
return True
def record_success(self) -> None:
self._failure_count = 0
def record_failure(self) -> None:
self._failure_count += 1
self._last_failure_time = time.time()
class AIProviderRegistry:
"""
AI Provider 註冊中心 — 類比 MCP ProviderRegistry (ADR-015)
動態管理 AI Provider 的生命週期與啟停狀態。
"""
def __init__(self) -> None:
self._providers: dict[str, AIProviderProtocol] = {}
def register(self, provider: AIProviderProtocol) -> None:
"""註冊 Provider (啟動時呼叫)"""
self._providers[provider.name] = provider
status = "enabled" if provider.is_enabled else "disabled"
logger.info("ai_provider_registered", name=provider.name, status=status, privacy=provider.privacy_level)
def get(self, name: str) -> AIProviderProtocol | None:
"""取得已啟用的 Provider"""
p = self._providers.get(name)
if p and p.is_enabled:
return p
return None
def all_enabled(self) -> list[AIProviderProtocol]:
"""取得所有已啟用的 Provider"""
return [p for p in self._providers.values() if p.is_enabled]
def names(self) -> list[str]:
"""所有已註冊 Provider 名稱"""
return list(self._providers.keys())
async def health_check_all(self) -> dict[str, bool]:
"""所有 Provider 健康狀態"""
results = {}
for name, p in self._providers.items():
try:
results[name] = await p.health_check()
except Exception:
results[name] = False
return results
async def close_all(self) -> None:
"""關閉所有 Provider 的 HTTP 連線 (I5 修復: shutdown hook)"""
for name, p in self._providers.items():
try:
if hasattr(p, "close"):
await p.close()
logger.info("ai_provider_closed", name=name)
except Exception as e:
logger.warning("ai_provider_close_failed", name=name, error=str(e))
class AIRouterExecutor:
"""
AI Router 執行層 (Phase 24 ADR-052)
職責:
1. Cache 檢查 (Redis, 跨 Provider 共享) — D4
2. 閘門控制 (Circuit Breaker → Rate Limiter → Semaphore) — D3
3. 呼叫 Provider.analyze() — 實際執行
4. 記錄 Langfuse Trace — D5
5. Mock Mode 攔截 — D13
設計原則:
- 只依賴 AIProviderProtocol禁止 import 具體 Provider 類別
- 閘門在 RouterProvider 保持純粹 (Stateless Compute Units)
"""
def __init__(self, registry: AIProviderRegistry) -> None:
self._registry = registry
self._semaphores: dict[str, asyncio.Semaphore] = {}
# C2 修復: per-provider Circuit Breaker (不共用,避免一個掛全部擋)
self._circuit_breakers: dict[str, "_SimpleCircuitBreaker"] = {}
def _get_semaphore(self, name: str, limit: int = 3) -> asyncio.Semaphore:
"""取得 Provider 的並發 Semaphore (lazy init)"""
if name not in self._semaphores:
self._semaphores[name] = asyncio.Semaphore(limit)
return self._semaphores[name]
def _get_circuit_breaker(self, name: str) -> "_SimpleCircuitBreaker":
"""取得 Provider 的 Circuit Breaker (per-provider, lazy init)"""
if name not in self._circuit_breakers:
# 2026-04-05 Claude Code: v4.3 — NIM 使用更寬鬆的 CB 參數
# 每次都先跑 NIM只有真正連線錯誤非 timeout才累積失敗
# failure_threshold=10: 需要 10 次真實錯誤才 OPENtimeout 不計)
# recovery_timeout=30: 30s 後進入 half-open立即重試 NIM
if name == "nemotron":
self._circuit_breakers[name] = _SimpleCircuitBreaker(
name, failure_threshold=10, recovery_timeout=30.0
)
else:
self._circuit_breakers[name] = _SimpleCircuitBreaker(name)
return self._circuit_breakers[name]
@staticmethod
def _cache_key(prompt: str, context: dict | None) -> str:
"""生成 Cache Key (與 openclaw.py 相容)"""
ctx_hash = ""
if context:
ctx_hash = f":{context.get('alert_type', '')}:{context.get('target_resource', '')}"
content = f"{prompt}{ctx_hash}"
return f"llm_cache:{hashlib.sha256(content.encode()).hexdigest()[:16]}"
async def execute(
self,
prompt: str,
provider_order: list[str],
context: dict | None = None,
cache_ttl: int = 3600,
require_local: bool = False,
) -> AIResult:
"""
核心執行方法 — 依序嘗試 Provider含閘門 + Cache
Args:
prompt: LLM prompt
provider_order: Provider 名稱順序 (由 AIRouter.route 決定)
context: 額外上下文
cache_ttl: Cache TTL (秒)
require_local: 強制 local Provider (隱私)
Returns:
AIResult: 標準化結果
"""
# ① Mock Mode 攔截 (D13)
if _settings.MOCK_MODE:
logger.info("ai_router_mock_mode")
return AIResult(
raw_response=_json.dumps({
"action_title": "Mock Analysis",
"description": "Mock mode enabled",
"risk_level": "low",
"reasoning": "MOCK_MODE=true",
"confidence": 0.0,
}),
success=True,
provider="mock",
)
# ② Cache 檢查 (D4)
cache_key = self._cache_key(prompt, context) # C3 修復: 移到 try 外避免 UnboundLocalError
try:
from src.core.redis_client import get_redis
redis = get_redis()
cached = await redis.get(cache_key)
if cached:
data = _json.loads(cached)
logger.info("ai_router_cache_hit", cache_key=cache_key[:30])
return AIResult(
raw_response=data.get("response", ""),
success=True,
provider=data.get("provider", "cache"),
from_cache=True,
)
except Exception as e:
logger.debug("ai_router_cache_read_failed", error=str(e))
# ③ 遍歷 Provider + 閘門 (D3)
# 2026-04-02 ogt: C1 修復 — 建立 Langfuse Trace (D5)
# 包住整個執行鏈,記錄每個 Provider 的 generation
try:
from src.services.langfuse_client import langfuse_trace
_lf_trace_ctx = langfuse_trace(
"ai_router_execute",
metadata={
"provider_order": provider_order,
"prompt_length": len(prompt),
"require_local": require_local,
"alert_type": (context or {}).get("alert_type", ""),
},
)
_lf_trace_ctx.__enter__()
except Exception:
_lf_trace_ctx = None
errors: list[str] = []
for provider_name in provider_order:
provider = self._registry.get(provider_name)
if not provider:
continue
# 隱私過濾 (D7)
if require_local and provider.privacy_level != "local":
continue
# 閘門 1: Circuit Breaker (per-provider, C2 修復)
cb = self._get_circuit_breaker(provider_name)
if cb.is_open():
logger.debug("ai_router_circuit_open", provider=provider_name)
continue
# 閘門 2: Rate Limiter
# 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo)
if provider_name in ("openclaw_nemo", "nemotron", "gemini", "claude"):
try:
from src.services.ai_rate_limiter import get_ai_rate_limiter
rate_limiter = get_ai_rate_limiter()
allowed, reason = await rate_limiter.check_and_increment(provider_name)
if not allowed:
logger.info("ai_router_rate_limited", provider=provider_name, reason=reason)
continue
except Exception as e:
logger.debug("ai_router_rate_limiter_error", error=str(e))
# 閘門 3: Semaphore (並發控制)
sem = self._get_semaphore(provider_name)
async with sem:
try:
result = await provider.analyze(prompt, context)
if result.success:
# 記錄成功 (per-provider CB)
cb.record_success()
# 記錄費用
if result.cost_usd > 0:
try:
rate_limiter = get_ai_rate_limiter()
await rate_limiter.record_cost(provider_name, result.cost_usd)
except Exception:
pass
# 寫入 Cache (D4)
try:
redis = get_redis()
cache_data = _json.dumps({
"response": result.raw_response,
"provider": result.provider,
"cached_at": time.strftime("%Y-%m-%dT%H:%M:%S+08:00"),
})
await redis.set(cache_key, cache_data, ex=cache_ttl)
except Exception:
pass
logger.info(
"ai_router_execute_success",
provider=provider_name,
latency_ms=round(result.latency_ms, 1),
tokens=result.tokens,
from_cache=False,
)
# D5: 記錄 Langfuse generation
if _lf_trace_ctx:
try:
_lf_trace_ctx.generation(
name=f"{provider_name}_call",
model=provider_name,
input=prompt[:500],
output=result.raw_response[:500],
usage={"total": result.tokens} if result.tokens else None,
metadata={"cost_usd": result.cost_usd, "latency_ms": round(result.latency_ms, 1)},
)
_lf_trace_ctx.__exit__(None, None, None)
except Exception:
pass
return result
# Provider 回傳 success=False
errors.append(f"{provider_name}: {result.error}")
logger.warning("ai_router_provider_failed", provider=provider_name, error=result.error)
except Exception as e:
errors.append(f"{provider_name}: {e}")
logger.warning("ai_router_provider_exception", provider=provider_name, error=str(e))
# 2026-04-05 Claude Code: v4.3 — Timeout 不計 CB 失敗
# NIM 偶爾 GPU 忙碌導致 27stimeout 不代表 NIM 故障
# 只有明確連線錯誤(非 timeout才累積 CB 失敗次數
import httpx as _httpx
if not isinstance(e, _httpx.TimeoutException):
cb.record_failure()
# 全部失敗
logger.error("ai_router_all_providers_failed", tried=provider_order, errors=errors)
if _lf_trace_ctx:
try:
_lf_trace_ctx.__exit__(None, None, None)
except Exception:
pass
# 2026-04-04 ogt: Phase 25 P0 — require_local 全部失敗時 Telegram 通知(隱私邊界)
if require_local:
try:
from src.services.telegram_gateway import get_telegram_gateway
tg = get_telegram_gateway()
import asyncio as _asyncio
_asyncio.create_task(
tg.send_text(
"⚠️ <b>DIAGNOSE 本地 Provider 不可用</b>\n"
f"已嘗試: {', '.join(provider_order)}\n"
"需要人工介入,雲端 Provider 不會被呼叫(隱私邊界)。"
)
)
except Exception as _tg_e:
logger.warning("diagnose_reject_telegram_failed", error=str(_tg_e))
return AIResult(
raw_response="",
success=False,
provider="none",
error="local_providers_unavailable",
)
return AIResult(
raw_response="",
success=False,
provider="none",
error=f"All providers failed: {'; '.join(errors)}",
)
# =============================================================================
# 單例管理
# =============================================================================
_router: AIRouter | None = None
_registry: AIProviderRegistry | None = None
_executor: AIRouterExecutor | None = None
def _init_registry() -> AIProviderRegistry:
"""初始化 Provider Registry (首次呼叫時自動註冊所有 Provider)"""
from src.services.ai_providers.ollama import OllamaProvider
from src.services.ai_providers.gemini import GeminiProvider
from src.services.ai_providers.claude import ClaudeProvider
from src.services.ai_providers.openclaw_nemo import OpenClawNemoProvider
registry = AIProviderRegistry()
registry.register(OllamaProvider())
registry.register(GeminiProvider())
registry.register(ClaudeProvider())
registry.register(OpenClawNemoProvider())
# 2026-04-02 Claude Code: Phase 24 B3 — 加入 NemotronProvider (tool_calling 優先)
from src.services.ai_providers.nemotron import NemotronProvider
registry.register(NemotronProvider())
return registry
def get_ai_router() -> AIRouter:
"""取得 AIRouter 單例 (路由決策)"""
global _router
if _router is None:
_router = AIRouter()
return _router
def get_ai_registry() -> AIProviderRegistry:
"""取得 AIProviderRegistry 單例"""
global _registry
if _registry is None:
_registry = _init_registry()
return _registry
def get_ai_executor() -> AIRouterExecutor:
"""取得 AIRouterExecutor 單例 (路由決策 + 執行)"""
global _executor
if _executor is None:
_executor = AIRouterExecutor(get_ai_registry())
return _executor
def reset_ai_router() -> None:
"""重置所有單例 (用於測試)"""
global _router, _registry, _executor
_router = None
_registry = None
_executor = None