1077 lines
43 KiB
Python
1077 lines
43 KiB
Python
"""
|
||
AI Router - Phase 13.3 #87
|
||
==========================
|
||
智能 AI 路由器,根據意圖和複雜度動態選擇 AI Provider
|
||
|
||
目標: 根據請求特性自動選擇最適模型
|
||
策略: Intent Classifier + Complexity Scorer → Routing Decision
|
||
延遲目標: < 50ms (規則引擎優先)
|
||
|
||
路由決策矩陣 (ADR-023):
|
||
┌─────────────────┬───────────────┬──────────────────────────────┐
|
||
│ 複雜度 + 風險 │ Provider │ 備註 │
|
||
├─────────────────┼───────────────┼──────────────────────────────┤
|
||
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
|
||
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
|
||
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
|
||
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
|
||
└─────────────────┴───────────────┴──────────────────────────────┘
|
||
|
||
版本: v4.3
|
||
建立: 2026-03-26 (台北時區)
|
||
建立者: Claude Code
|
||
最後修改: 2026-04-02 (台北時區)
|
||
修改者: ogt (首席架構師 Review C1/C2/C3 修復)
|
||
|
||
變更紀錄:
|
||
| 版本 | 日期 | 執行者 | 變更內容 |
|
||
|------|------|--------|----------|
|
||
| v1.0 | 2026-03-26 | Claude Code | 初始實作 |
|
||
| v2.0 | 2026-03-26 | Claude Code | 支援 IntentResult + 新意圖類型 |
|
||
| v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 |
|
||
| v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close |
|
||
| v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram |
|
||
| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端),僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) |
|
||
| v4.3 | 2026-04-05 | Claude Code | Phase 25 P0 架構修正: 實測 Ollama CPU ~238s(不可用); NIM 實測 2-27s avg 10.6s; DIAGNOSE 改走 _full_fallback_chain(NIM 主力); _local_fallback_chain 廢棄 |
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import time
|
||
from dataclasses import dataclass, field
|
||
from enum import Enum
|
||
from typing import TYPE_CHECKING, Protocol
|
||
|
||
import structlog
|
||
|
||
if TYPE_CHECKING:
|
||
from src.services.intent_classifier import IntentResult
|
||
|
||
from src.services.complexity_scorer import (
|
||
ComplexityScore,
|
||
get_complexity_scorer,
|
||
)
|
||
from src.services.intent_classifier import (
|
||
IntentResult,
|
||
IntentType,
|
||
RiskLevel,
|
||
get_intent_classifier,
|
||
normalize_intent,
|
||
)
|
||
from src.services.model_registry import get_model_registry
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
# =============================================================================
|
||
# Provider 定義
|
||
# =============================================================================
|
||
|
||
|
||
class AIProviderEnum(Enum):
|
||
"""AI 提供者"""
|
||
|
||
OLLAMA = "ollama"
|
||
GEMINI = "gemini"
|
||
CLAUDE = "claude"
|
||
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 實際名稱
|
||
# OpenClawNemoProvider.name = "openclaw_nemo" (一般推理, via .188)
|
||
# NemotronProvider.name = "nemotron" (Tool Calling, direct NVIDIA NIM)
|
||
# 舊版 NVIDIA = "nvidia" 已移除: Registry 無此 Provider
|
||
OPENCLAW_NEMO = "openclaw_nemo"
|
||
NEMOTRON = "nemotron"
|
||
|
||
|
||
# Provider 對應延遲預算 (ms)
|
||
PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = {
|
||
AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間
|
||
AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲
|
||
AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲
|
||
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 名稱
|
||
AIProviderEnum.OPENCLAW_NEMO: 60000, # via .188 → NVIDIA NIM,允許較長時間
|
||
AIProviderEnum.NEMOTRON: 60000, # Tool Calling 專用,允許較長時間
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# Interface 定義 (P1 修復 - 2026-04-01 首席架構師審查)
|
||
# =============================================================================
|
||
|
||
|
||
class IAIRouter(Protocol):
|
||
"""
|
||
AI Router Protocol - 支援 DI 測試替換
|
||
|
||
2026-04-01 ogt: 首席架構師審查 P1 修復
|
||
- 新增 Protocol 定義支援依賴注入
|
||
- 參考: IModelRegistry, IComplexityScorer
|
||
"""
|
||
|
||
async def route(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> "RoutingDecision":
|
||
"""路由請求到最適 AI Provider"""
|
||
...
|
||
|
||
def route_sync(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> "RoutingDecision":
|
||
"""同步版本路由"""
|
||
...
|
||
|
||
def route_tool_calling(
|
||
self,
|
||
) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
|
||
"""Tool Calling 專用路由"""
|
||
...
|
||
|
||
|
||
@dataclass
|
||
class RoutingDecision:
|
||
"""
|
||
路由決策結果 (Phase 13.3 #87)
|
||
|
||
包含完整的路由資訊,供 OpenClaw 主流程使用
|
||
"""
|
||
|
||
# 核心決策
|
||
selected_provider: AIProviderEnum # 選擇的 AI Provider
|
||
selected_model: str # 選擇的模型名稱
|
||
fallback_chain: list[tuple[AIProviderEnum, str]] # 備援鏈 [(provider, model), ...]
|
||
routing_reason: str # 路由決策原因
|
||
latency_budget_ms: int # 延遲預算 (毫秒)
|
||
|
||
# 分類結果
|
||
intent: IntentType # 意圖分類 (正規化後)
|
||
intent_result: IntentResult # 完整 Intent 分類結果
|
||
complexity: ComplexityScore # 複雜度評分
|
||
risk_level: RiskLevel = field(default=RiskLevel.MEDIUM) # 風險等級
|
||
|
||
# 路由 metadata
|
||
routing_latency_ms: float = 0.0 # 路由決策耗時 (ms)
|
||
|
||
# 向後相容 (deprecated)
|
||
model: str = "" # -> selected_model
|
||
reason: str = "" # -> routing_reason
|
||
fallback_models: list[str] = field(default_factory=list) # -> fallback_chain
|
||
|
||
def __post_init__(self):
|
||
"""初始化後設定衍生欄位"""
|
||
self.risk_level = self.intent_result.risk_level
|
||
# 向後相容
|
||
self.model = self.selected_model
|
||
self.reason = self.routing_reason
|
||
self.fallback_models = [model for _, model in self.fallback_chain if model != self.selected_model]
|
||
|
||
def to_dict(self) -> dict:
|
||
"""轉換為字典 (API 回應用)"""
|
||
return {
|
||
"selected_provider": self.selected_provider.value,
|
||
"selected_model": self.selected_model,
|
||
"fallback_chain": [
|
||
{"provider": p.value, "model": m} for p, m in self.fallback_chain
|
||
],
|
||
"routing_reason": self.routing_reason,
|
||
"latency_budget_ms": self.latency_budget_ms,
|
||
"intent": self.intent.value,
|
||
"risk_level": self.risk_level.value,
|
||
"complexity_score": self.complexity.score,
|
||
"routing_latency_ms": round(self.routing_latency_ms, 2),
|
||
}
|
||
|
||
|
||
class AIRouter:
|
||
"""
|
||
AI 路由器 (Phase 13.3 #87)
|
||
|
||
整合 IntentClassifier 和 ComplexityScorer,
|
||
動態選擇最適合的 AI Provider 和模型。
|
||
|
||
路由決策矩陣:
|
||
┌─────────────────┬───────────────┬──────────────────────────────┐
|
||
│ 複雜度 + 風險 │ Provider │ 備註 │
|
||
├─────────────────┼───────────────┼──────────────────────────────┤
|
||
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
|
||
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
|
||
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
|
||
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
|
||
└─────────────────┴───────────────┴──────────────────────────────┘
|
||
|
||
路由策略 (按優先級):
|
||
1. CRITICAL 風險強制使用 Claude
|
||
2. DELETE 意圖強制使用 Claude
|
||
3. HIGH 風險或複雜度 4-5 → Gemini
|
||
4. 其他情況 → Ollama (成本優先)
|
||
"""
|
||
|
||
def __init__(self):
|
||
self._intent_classifier = get_intent_classifier()
|
||
self._complexity_scorer = get_complexity_scorer()
|
||
self._model_registry = get_model_registry()
|
||
|
||
# 從 ModelRegistry 取得模型配置
|
||
self._ollama_default = self._model_registry.get_model("ollama", "default")
|
||
self._ollama_summary = self._model_registry.get_model("ollama", "summary")
|
||
self._gemini_default = self._model_registry.get_model("gemini", "default")
|
||
self._claude_default = self._model_registry.get_model("claude", "default")
|
||
# 2026-04-02 ogt: C1 修復 — openclaw_nemo (一般推理) + nemotron (Tool Calling)
|
||
self._openclaw_nemo_default = self._model_registry.get_model("nvidia", "default")
|
||
self._nemotron_default = self._model_registry.get_model("nvidia", "default")
|
||
# 向後相容別名
|
||
self._nvidia_default = self._openclaw_nemo_default
|
||
|
||
# Provider 對應模型映射
|
||
self._provider_models: dict[AIProviderEnum, str] = {
|
||
AIProviderEnum.OLLAMA: self._ollama_default,
|
||
AIProviderEnum.GEMINI: self._gemini_default,
|
||
AIProviderEnum.CLAUDE: self._claude_default,
|
||
AIProviderEnum.OPENCLAW_NEMO: self._openclaw_nemo_default,
|
||
AIProviderEnum.NEMOTRON: self._nemotron_default,
|
||
}
|
||
|
||
# 完整 Fallback 鏈 (Provider, Model)
|
||
# 2026-04-02 ogt: C1 修復 — OPENCLAW_NEMO 首選仲裁
|
||
self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||
(AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default),
|
||
(AIProviderEnum.GEMINI, self._gemini_default),
|
||
(AIProviderEnum.CLAUDE, self._claude_default),
|
||
(AIProviderEnum.OLLAMA, self._ollama_default),
|
||
]
|
||
|
||
# Tool Calling 專用 Fallback 鏈 (ADR-036)
|
||
self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||
(AIProviderEnum.NEMOTRON, self._nemotron_default),
|
||
(AIProviderEnum.GEMINI, self._gemini_default),
|
||
(AIProviderEnum.CLAUDE, self._claude_default),
|
||
]
|
||
|
||
# 2026-04-05 Claude Code: Phase 25 P0 v4.3 — _local_fallback_chain 廢棄
|
||
# 實測依據 (2026-04-05):
|
||
# Ollama llama3.2:3b CPU-only = 238s 回 {"ok":true}(完全不可用於生產)
|
||
# Nemotron NIM 實測 2.2s~27s,平均 10.6s(雲端 GPU,一直是主力)
|
||
# NIM 從 Phase 22 起就接收 Incident 資料(無隱私問題,非新決策)
|
||
# 結論: 不存在可用的本地 AI provider,DIAGNOSE 統一走 _full_fallback_chain(NIM 主力)
|
||
self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||
# 廢棄: Ollama CPU ~238s 不可用,NIM 本非 local。保留欄位避免 attribute error。
|
||
]
|
||
|
||
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
|
||
self._intent_provider_overrides: dict[IntentType, AIProviderEnum | None] = {
|
||
# 四大核心意圖
|
||
IntentType.RESTART: None, # 依複雜度
|
||
IntentType.SCALE: None, # 依複雜度
|
||
IntentType.CONFIG: None, # 依複雜度 (但 HIGH 會升級)
|
||
# P0 2026-04-04 Claude Code: DIAGNOSE 升級至 Nemotron(高能力雲端)
|
||
# 注意: FORCE_LOCAL 情境由 require_local=True + privacy 過濾保護,Nemotron 會被正確跳過
|
||
IntentType.DIAGNOSE: AIProviderEnum.NEMOTRON,
|
||
# 輔助意圖
|
||
IntentType.DELETE: AIProviderEnum.CLAUDE, # CRITICAL → 強制 Claude
|
||
IntentType.ROLLBACK: None, # 依複雜度
|
||
IntentType.UNKNOWN: None,
|
||
# 舊版兼容
|
||
IntentType.CODE_REVIEW: None,
|
||
IntentType.DEPLOYMENT: None,
|
||
IntentType.ALERT_TRIAGE: AIProviderEnum.OLLAMA,
|
||
IntentType.QUERY: AIProviderEnum.OLLAMA,
|
||
IntentType.MAINTENANCE: None,
|
||
}
|
||
|
||
# 向後相容
|
||
self._default_model = self._ollama_default
|
||
self._summary_model = self._ollama_summary
|
||
self._fallback_order = [
|
||
self._ollama_default,
|
||
self._ollama_summary,
|
||
"gemini",
|
||
"claude",
|
||
]
|
||
|
||
async def route(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> RoutingDecision:
|
||
"""
|
||
路由請求到最適 AI Provider 和模型
|
||
|
||
延遲目標: < 50ms (規則引擎優先,LLM 分類時可能稍長)
|
||
|
||
Args:
|
||
text: 用戶輸入或告警內容
|
||
context: 額外上下文 (服務、指標等)
|
||
|
||
Returns:
|
||
RoutingDecision: 完整路由決策
|
||
"""
|
||
start_time = time.perf_counter()
|
||
context = context or {}
|
||
|
||
# Step 1: 意圖分類 (返回 IntentResult, 規則引擎 < 10ms)
|
||
intent_result = await self._intent_classifier.classify(text)
|
||
intent = normalize_intent(intent_result.intent)
|
||
|
||
# Step 2: 複雜度評分 (< 10ms)
|
||
complexity = self._complexity_scorer.score(context)
|
||
|
||
# Step 3: Provider + Model 選擇 (< 1ms)
|
||
provider, model, reason = self._select_provider_and_model(
|
||
intent, intent_result, complexity
|
||
)
|
||
|
||
# Step 4: 建立 Fallback 鏈
|
||
# 2026-04-05 Claude Code: v4.3 — DIAGNOSE 改回 _full_fallback_chain
|
||
# NIM 從 Phase 22 起就是主力,無隱私問題;Ollama CPU-only 不可用(實測 238s)
|
||
fallback_chain = self._build_fallback_chain(provider)
|
||
|
||
# Step 5: 計算延遲預算
|
||
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
|
||
|
||
# 計算路由決策耗時
|
||
routing_latency = (time.perf_counter() - start_time) * 1000
|
||
|
||
decision = RoutingDecision(
|
||
selected_provider=provider,
|
||
selected_model=model,
|
||
fallback_chain=fallback_chain,
|
||
routing_reason=reason,
|
||
latency_budget_ms=latency_budget,
|
||
intent=intent,
|
||
intent_result=intent_result,
|
||
complexity=complexity,
|
||
routing_latency_ms=routing_latency,
|
||
)
|
||
|
||
logger.info(
|
||
"ai_routing_decision",
|
||
provider=provider.value,
|
||
model=model,
|
||
intent=intent.value,
|
||
intent_confidence=intent_result.confidence,
|
||
risk_level=intent_result.risk_level.value,
|
||
complexity_score=complexity.score,
|
||
reason=reason,
|
||
latency_budget_ms=latency_budget,
|
||
routing_latency_ms=round(routing_latency, 2),
|
||
fallback_count=len(fallback_chain),
|
||
)
|
||
|
||
return decision
|
||
|
||
def _select_provider_and_model(
|
||
self,
|
||
intent: IntentType,
|
||
intent_result: IntentResult,
|
||
complexity: ComplexityScore,
|
||
) -> tuple[AIProviderEnum, str, str]:
|
||
"""
|
||
選擇 Provider 和模型 (Phase 13.3 #87 核心邏輯)
|
||
|
||
路由決策矩陣:
|
||
┌─────────────────┬───────────────┬──────────────────────────────┐
|
||
│ 複雜度 + 風險 │ Provider │ 備註 │
|
||
├─────────────────┼───────────────┼──────────────────────────────┤
|
||
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
|
||
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
|
||
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
|
||
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
|
||
└─────────────────┴───────────────┴──────────────────────────────┘
|
||
|
||
Args:
|
||
intent: 正規化後的意圖
|
||
intent_result: 完整分類結果
|
||
complexity: 複雜度評分
|
||
|
||
Returns:
|
||
(provider, model, reason)
|
||
"""
|
||
risk = intent_result.risk_level
|
||
score = complexity.score
|
||
|
||
# =======================================================================
|
||
# 規則 1: CRITICAL 風險強制 Claude (最高優先級)
|
||
# =======================================================================
|
||
if risk == RiskLevel.CRITICAL:
|
||
provider = AIProviderEnum.CLAUDE
|
||
model = self._claude_default
|
||
reason = f"CRITICAL 風險 ({intent.value}) 強制使用 Claude"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 2: DELETE 意圖強制 Claude (不可逆操作)
|
||
# =======================================================================
|
||
if intent == IntentType.DELETE:
|
||
provider = AIProviderEnum.CLAUDE
|
||
model = self._claude_default
|
||
reason = "DELETE 意圖 (不可逆) 強制使用 Claude"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 3: 檢查意圖強制覆寫
|
||
# =======================================================================
|
||
provider_override = self._intent_provider_overrides.get(intent)
|
||
if provider_override is not None:
|
||
provider = provider_override
|
||
# 2026-04-03 ogt: ALERT_TRIAGE/QUERY 用 Ollama summary model (llama3.2:3b)
|
||
# 避免 qwen2.5:7b-instruct 90秒 timeout 導致全鏈路失敗 (Phase 24 A選項)
|
||
# 2026-04-04 ogt: DIAGNOSE 已改為 NEMOTRON,不走這條分支
|
||
if provider == AIProviderEnum.OLLAMA and intent in (
|
||
IntentType.ALERT_TRIAGE, IntentType.QUERY
|
||
):
|
||
model = self._ollama_summary
|
||
else:
|
||
model = self._provider_models[provider]
|
||
reason = f"意圖 {intent.value} 指定使用 {provider.value}"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 4: 複雜度 4-5 或 HIGH 風險 → OpenClaw Nemo (via .188 → NVIDIA NIM)
|
||
# 2026-04-02 ogt: C1 修復 — NVIDIA→OPENCLAW_NEMO 對齊 Registry 名稱
|
||
# =======================================================================
|
||
if score >= 4 or risk == RiskLevel.HIGH:
|
||
provider = AIProviderEnum.OPENCLAW_NEMO
|
||
model = self._openclaw_nemo_default
|
||
reason = f"複雜度={score}/5, 風險={risk.value} → OpenClaw Nemo (fallback Gemini)"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 5: 複雜度 3 + MEDIUM → Ollama (fallback Gemini)
|
||
# =======================================================================
|
||
if score == 3:
|
||
provider = AIProviderEnum.OLLAMA
|
||
model = self._ollama_default
|
||
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (fallback Gemini)"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 6: 複雜度 1-2 + LOW/MEDIUM → Ollama (快速本地處理)
|
||
# =======================================================================
|
||
provider = AIProviderEnum.OLLAMA
|
||
# 低複雜度使用輕量模型 (更快回應)
|
||
model = self._ollama_summary if score <= 1 else self._ollama_default
|
||
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (成本優先)"
|
||
return provider, model, reason
|
||
|
||
def _select_model(
|
||
self,
|
||
intent: IntentType,
|
||
intent_result: IntentResult,
|
||
complexity: ComplexityScore,
|
||
) -> tuple[str, str]:
|
||
"""
|
||
選擇模型 (向後相容方法)
|
||
|
||
Deprecated: 請使用 _select_provider_and_model
|
||
|
||
Args:
|
||
intent: 正規化後的意圖
|
||
intent_result: 完整分類結果
|
||
complexity: 複雜度評分
|
||
|
||
Returns:
|
||
(model_name, reason)
|
||
"""
|
||
_, model, reason = self._select_provider_and_model(
|
||
intent, intent_result, complexity
|
||
)
|
||
return model, reason
|
||
|
||
def _build_fallback_chain(
|
||
self, selected_provider: AIProviderEnum
|
||
) -> list[tuple[AIProviderEnum, str]]:
|
||
"""
|
||
建立 Fallback 鏈 (排除已選 Provider)
|
||
|
||
Fallback 順序: Ollama → Gemini → Claude
|
||
|
||
Args:
|
||
selected_provider: 已選擇的 Provider
|
||
|
||
Returns:
|
||
Fallback 鏈 [(provider, model), ...]
|
||
"""
|
||
fallback_chain: list[tuple[AIProviderEnum, str]] = []
|
||
|
||
for provider, model in self._full_fallback_chain:
|
||
if provider != selected_provider:
|
||
fallback_chain.append((provider, model))
|
||
|
||
return fallback_chain
|
||
|
||
def _build_fallback_list(self, selected_model: str) -> list[str]:
|
||
"""建立 Fallback 列表 (向後相容)"""
|
||
fallbacks = [m for m in self._fallback_order if m != selected_model]
|
||
return fallbacks
|
||
|
||
def route_sync(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> RoutingDecision:
|
||
"""
|
||
同步版本路由 (僅關鍵字匹配,保證 < 50ms)
|
||
|
||
適用場景: 需要快速決策,不需要 LLM 分類的情況
|
||
|
||
Args:
|
||
text: 用戶輸入或告警內容
|
||
context: 額外上下文
|
||
|
||
Returns:
|
||
RoutingDecision: 路由決策
|
||
"""
|
||
start_time = time.perf_counter()
|
||
context = context or {}
|
||
|
||
# 同步分類 (僅規則引擎, < 10ms)
|
||
intent_result = self._intent_classifier.classify_sync(text)
|
||
intent = normalize_intent(intent_result.intent)
|
||
|
||
# 複雜度評分 (< 10ms)
|
||
complexity = self._complexity_scorer.score(context)
|
||
|
||
# Provider + Model 選擇
|
||
provider, model, reason = self._select_provider_and_model(
|
||
intent, intent_result, complexity
|
||
)
|
||
|
||
# 建立 Fallback 鏈
|
||
# 2026-04-05 Claude Code: v4.3 — 同 route(),DIAGNOSE 改回 _full_fallback_chain
|
||
fallback_chain = self._build_fallback_chain(provider)
|
||
|
||
# 延遲預算
|
||
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
|
||
|
||
# 計算路由決策耗時
|
||
routing_latency = (time.perf_counter() - start_time) * 1000
|
||
|
||
return RoutingDecision(
|
||
selected_provider=provider,
|
||
selected_model=model,
|
||
fallback_chain=fallback_chain,
|
||
routing_reason=reason,
|
||
latency_budget_ms=latency_budget,
|
||
intent=intent,
|
||
intent_result=intent_result,
|
||
complexity=complexity,
|
||
routing_latency_ms=routing_latency,
|
||
)
|
||
|
||
# =========================================================================
|
||
# Tool Calling 路由 (ADR-036)
|
||
# =========================================================================
|
||
|
||
def route_tool_calling(self) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
|
||
"""
|
||
Tool Calling 專用路由 (ADR-036)
|
||
|
||
Tool Calling 任務優先使用 Nemotron (83.3% 精準度),
|
||
Fallback 到 Gemini/Claude。
|
||
|
||
Returns:
|
||
(provider, model, fallback_chain)
|
||
"""
|
||
# 2026-04-02 ogt: C1 修復 — Tool Calling 使用 NEMOTRON (direct NIM)
|
||
provider = AIProviderEnum.NEMOTRON
|
||
model = self._nemotron_default
|
||
fallback_chain = [
|
||
(p, m) for p, m in self._tool_calling_fallback_chain if p != provider
|
||
]
|
||
|
||
logger.info(
|
||
"tool_calling_routing",
|
||
provider=provider.value,
|
||
model=model,
|
||
fallback_count=len(fallback_chain),
|
||
)
|
||
|
||
return provider, model, fallback_chain
|
||
|
||
def get_tool_calling_fallback_chain(self) -> list[tuple[AIProviderEnum, str]]:
|
||
"""取得 Tool Calling Fallback 鏈"""
|
||
return self._tool_calling_fallback_chain.copy()
|
||
|
||
# =========================================================================
|
||
# 便捷方法
|
||
# =========================================================================
|
||
|
||
def get_provider_for_intent(self, intent: IntentType) -> AIProviderEnum:
|
||
"""取得意圖對應的 Provider (不考慮複雜度)"""
|
||
override = self._intent_provider_overrides.get(intent)
|
||
return override if override else AIProviderEnum.OLLAMA
|
||
|
||
def get_model_for_provider(self, provider: AIProviderEnum) -> str:
|
||
"""取得 Provider 對應的模型"""
|
||
return self._provider_models.get(provider, self._ollama_default)
|
||
|
||
def get_routing_matrix(self) -> list[dict]:
|
||
"""
|
||
取得路由決策矩陣 (用於 API 文檔或除錯)
|
||
|
||
Returns:
|
||
路由規則清單
|
||
"""
|
||
return [
|
||
{
|
||
"rule": 1,
|
||
"condition": "CRITICAL risk",
|
||
"provider": "claude",
|
||
"reason": "不可逆/高風險操作強制最強模型",
|
||
},
|
||
{
|
||
"rule": 2,
|
||
"condition": "DELETE intent",
|
||
"provider": "claude",
|
||
"reason": "刪除操作強制最強模型",
|
||
},
|
||
{
|
||
"rule": 3,
|
||
"condition": "Intent override",
|
||
"provider": "depends",
|
||
"reason": "特定意圖有預設 Provider",
|
||
},
|
||
{
|
||
"rule": 4,
|
||
"condition": "complexity >= 4 OR HIGH risk",
|
||
"provider": "openclaw_nemo",
|
||
"reason": "高複雜度需要 Nvidia Nemotron 強大推理能力 (via .188)",
|
||
},
|
||
{
|
||
"rule": 5,
|
||
"condition": "complexity == 3",
|
||
"provider": "ollama",
|
||
"reason": "中等複雜度本地處理",
|
||
},
|
||
{
|
||
"rule": 6,
|
||
"condition": "complexity 1-2",
|
||
"provider": "ollama",
|
||
"reason": "低複雜度快速處理",
|
||
},
|
||
]
|
||
|
||
|
||
# =============================================================================
|
||
# Phase 24 ADR-052: AI Provider Registry + Execution Layer
|
||
# =============================================================================
|
||
# 2026-04-02 ogt: 在現有 AIRouter (路由決策) 之上,加入 Provider 執行層
|
||
# 整合: ProviderRegistry + 閘門 (CB/RL/Sem) + Cache + Langfuse Trace
|
||
#
|
||
# 呼叫關係:
|
||
# openclaw.py → AIRouterExecutor.execute() → AIRouter.route() → Provider.analyze()
|
||
# =============================================================================
|
||
|
||
import asyncio
|
||
import hashlib
|
||
import json as _json
|
||
|
||
from src.core.config import get_settings
|
||
from src.services.ai_providers.interfaces import AIProvider as AIProviderProtocol, AIResult
|
||
|
||
_settings = get_settings()
|
||
|
||
|
||
class _SimpleCircuitBreaker:
|
||
"""
|
||
輕量 per-provider Circuit Breaker (Phase 24 C2 修復)
|
||
|
||
不共用 OpenClawGuard — 避免 Gemini 掛掉時 Ollama 也被擋
|
||
"""
|
||
|
||
def __init__(self, name: str, failure_threshold: int = 5, recovery_timeout: float = 60.0) -> None:
|
||
self.name = name
|
||
self._failure_threshold = failure_threshold
|
||
self._recovery_timeout = recovery_timeout
|
||
self._failure_count = 0
|
||
self._last_failure_time: float = 0.0
|
||
|
||
def is_open(self) -> bool:
|
||
if self._failure_count < self._failure_threshold:
|
||
return False
|
||
# 超過 recovery timeout → half-open (允許一次嘗試)
|
||
if time.time() - self._last_failure_time > self._recovery_timeout:
|
||
return False
|
||
return True
|
||
|
||
def record_success(self) -> None:
|
||
self._failure_count = 0
|
||
|
||
def record_failure(self) -> None:
|
||
self._failure_count += 1
|
||
self._last_failure_time = time.time()
|
||
|
||
|
||
class AIProviderRegistry:
|
||
"""
|
||
AI Provider 註冊中心 — 類比 MCP ProviderRegistry (ADR-015)
|
||
|
||
動態管理 AI Provider 的生命週期與啟停狀態。
|
||
"""
|
||
|
||
def __init__(self) -> None:
|
||
self._providers: dict[str, AIProviderProtocol] = {}
|
||
|
||
def register(self, provider: AIProviderProtocol) -> None:
|
||
"""註冊 Provider (啟動時呼叫)"""
|
||
self._providers[provider.name] = provider
|
||
status = "enabled" if provider.is_enabled else "disabled"
|
||
logger.info("ai_provider_registered", name=provider.name, status=status, privacy=provider.privacy_level)
|
||
|
||
def get(self, name: str) -> AIProviderProtocol | None:
|
||
"""取得已啟用的 Provider"""
|
||
p = self._providers.get(name)
|
||
if p and p.is_enabled:
|
||
return p
|
||
return None
|
||
|
||
def all_enabled(self) -> list[AIProviderProtocol]:
|
||
"""取得所有已啟用的 Provider"""
|
||
return [p for p in self._providers.values() if p.is_enabled]
|
||
|
||
def names(self) -> list[str]:
|
||
"""所有已註冊 Provider 名稱"""
|
||
return list(self._providers.keys())
|
||
|
||
async def health_check_all(self) -> dict[str, bool]:
|
||
"""所有 Provider 健康狀態"""
|
||
results = {}
|
||
for name, p in self._providers.items():
|
||
try:
|
||
results[name] = await p.health_check()
|
||
except Exception:
|
||
results[name] = False
|
||
return results
|
||
|
||
async def close_all(self) -> None:
|
||
"""關閉所有 Provider 的 HTTP 連線 (I5 修復: shutdown hook)"""
|
||
for name, p in self._providers.items():
|
||
try:
|
||
if hasattr(p, "close"):
|
||
await p.close()
|
||
logger.info("ai_provider_closed", name=name)
|
||
except Exception as e:
|
||
logger.warning("ai_provider_close_failed", name=name, error=str(e))
|
||
|
||
|
||
class AIRouterExecutor:
|
||
"""
|
||
AI Router 執行層 (Phase 24 ADR-052)
|
||
|
||
職責:
|
||
1. Cache 檢查 (Redis, 跨 Provider 共享) — D4
|
||
2. 閘門控制 (Circuit Breaker → Rate Limiter → Semaphore) — D3
|
||
3. 呼叫 Provider.analyze() — 實際執行
|
||
4. 記錄 Langfuse Trace — D5
|
||
5. Mock Mode 攔截 — D13
|
||
|
||
設計原則:
|
||
- 只依賴 AIProviderProtocol,禁止 import 具體 Provider 類別
|
||
- 閘門在 Router,Provider 保持純粹 (Stateless Compute Units)
|
||
"""
|
||
|
||
def __init__(self, registry: AIProviderRegistry) -> None:
|
||
self._registry = registry
|
||
self._semaphores: dict[str, asyncio.Semaphore] = {}
|
||
# C2 修復: per-provider Circuit Breaker (不共用,避免一個掛全部擋)
|
||
self._circuit_breakers: dict[str, "_SimpleCircuitBreaker"] = {}
|
||
|
||
def _get_semaphore(self, name: str, limit: int = 3) -> asyncio.Semaphore:
|
||
"""取得 Provider 的並發 Semaphore (lazy init)"""
|
||
if name not in self._semaphores:
|
||
self._semaphores[name] = asyncio.Semaphore(limit)
|
||
return self._semaphores[name]
|
||
|
||
def _get_circuit_breaker(self, name: str) -> "_SimpleCircuitBreaker":
|
||
"""取得 Provider 的 Circuit Breaker (per-provider, lazy init)"""
|
||
if name not in self._circuit_breakers:
|
||
# 2026-04-05 Claude Code: v4.3 — NIM 使用更寬鬆的 CB 參數
|
||
# 每次都先跑 NIM,只有真正連線錯誤(非 timeout)才累積失敗
|
||
# failure_threshold=10: 需要 10 次真實錯誤才 OPEN(timeout 不計)
|
||
# recovery_timeout=30: 30s 後進入 half-open,立即重試 NIM
|
||
if name == "nemotron":
|
||
self._circuit_breakers[name] = _SimpleCircuitBreaker(
|
||
name, failure_threshold=10, recovery_timeout=30.0
|
||
)
|
||
else:
|
||
self._circuit_breakers[name] = _SimpleCircuitBreaker(name)
|
||
return self._circuit_breakers[name]
|
||
|
||
@staticmethod
|
||
def _cache_key(prompt: str, context: dict | None) -> str:
|
||
"""生成 Cache Key (與 openclaw.py 相容)"""
|
||
ctx_hash = ""
|
||
if context:
|
||
ctx_hash = f":{context.get('alert_type', '')}:{context.get('target_resource', '')}"
|
||
content = f"{prompt}{ctx_hash}"
|
||
return f"llm_cache:{hashlib.sha256(content.encode()).hexdigest()[:16]}"
|
||
|
||
async def execute(
|
||
self,
|
||
prompt: str,
|
||
provider_order: list[str],
|
||
context: dict | None = None,
|
||
cache_ttl: int = 3600,
|
||
require_local: bool = False,
|
||
) -> AIResult:
|
||
"""
|
||
核心執行方法 — 依序嘗試 Provider,含閘門 + Cache
|
||
|
||
Args:
|
||
prompt: LLM prompt
|
||
provider_order: Provider 名稱順序 (由 AIRouter.route 決定)
|
||
context: 額外上下文
|
||
cache_ttl: Cache TTL (秒)
|
||
require_local: 強制 local Provider (隱私)
|
||
|
||
Returns:
|
||
AIResult: 標準化結果
|
||
"""
|
||
# ① Mock Mode 攔截 (D13)
|
||
if _settings.MOCK_MODE:
|
||
logger.info("ai_router_mock_mode")
|
||
return AIResult(
|
||
raw_response=_json.dumps({
|
||
"action_title": "Mock Analysis",
|
||
"description": "Mock mode enabled",
|
||
"risk_level": "low",
|
||
"reasoning": "MOCK_MODE=true",
|
||
"confidence": 0.0,
|
||
}),
|
||
success=True,
|
||
provider="mock",
|
||
)
|
||
|
||
# ② Cache 檢查 (D4)
|
||
cache_key = self._cache_key(prompt, context) # C3 修復: 移到 try 外避免 UnboundLocalError
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
cached = await redis.get(cache_key)
|
||
if cached:
|
||
data = _json.loads(cached)
|
||
logger.info("ai_router_cache_hit", cache_key=cache_key[:30])
|
||
return AIResult(
|
||
raw_response=data.get("response", ""),
|
||
success=True,
|
||
provider=data.get("provider", "cache"),
|
||
from_cache=True,
|
||
)
|
||
except Exception as e:
|
||
logger.debug("ai_router_cache_read_failed", error=str(e))
|
||
|
||
# ③ 遍歷 Provider + 閘門 (D3)
|
||
# 2026-04-02 ogt: C1 修復 — 建立 Langfuse Trace (D5)
|
||
# 包住整個執行鏈,記錄每個 Provider 的 generation
|
||
try:
|
||
from src.services.langfuse_client import langfuse_trace
|
||
_lf_trace_ctx = langfuse_trace(
|
||
"ai_router_execute",
|
||
metadata={
|
||
"provider_order": provider_order,
|
||
"prompt_length": len(prompt),
|
||
"require_local": require_local,
|
||
"alert_type": (context or {}).get("alert_type", ""),
|
||
},
|
||
)
|
||
_lf_trace_ctx.__enter__()
|
||
except Exception:
|
||
_lf_trace_ctx = None
|
||
|
||
errors: list[str] = []
|
||
|
||
for provider_name in provider_order:
|
||
provider = self._registry.get(provider_name)
|
||
if not provider:
|
||
continue
|
||
|
||
# 隱私過濾 (D7)
|
||
if require_local and provider.privacy_level != "local":
|
||
continue
|
||
|
||
# 閘門 1: Circuit Breaker (per-provider, C2 修復)
|
||
cb = self._get_circuit_breaker(provider_name)
|
||
if cb.is_open():
|
||
logger.debug("ai_router_circuit_open", provider=provider_name)
|
||
continue
|
||
|
||
# 閘門 2: Rate Limiter
|
||
# 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo)
|
||
if provider_name in ("openclaw_nemo", "nemotron", "gemini", "claude"):
|
||
try:
|
||
from src.services.ai_rate_limiter import get_ai_rate_limiter
|
||
rate_limiter = get_ai_rate_limiter()
|
||
allowed, reason = await rate_limiter.check_and_increment(provider_name)
|
||
if not allowed:
|
||
logger.info("ai_router_rate_limited", provider=provider_name, reason=reason)
|
||
continue
|
||
except Exception as e:
|
||
logger.debug("ai_router_rate_limiter_error", error=str(e))
|
||
|
||
# 閘門 3: Semaphore (並發控制)
|
||
sem = self._get_semaphore(provider_name)
|
||
async with sem:
|
||
try:
|
||
result = await provider.analyze(prompt, context)
|
||
|
||
if result.success:
|
||
# 記錄成功 (per-provider CB)
|
||
cb.record_success()
|
||
|
||
# 記錄費用
|
||
if result.cost_usd > 0:
|
||
try:
|
||
rate_limiter = get_ai_rate_limiter()
|
||
await rate_limiter.record_cost(provider_name, result.cost_usd)
|
||
except Exception:
|
||
pass
|
||
|
||
# 寫入 Cache (D4)
|
||
try:
|
||
redis = get_redis()
|
||
cache_data = _json.dumps({
|
||
"response": result.raw_response,
|
||
"provider": result.provider,
|
||
"cached_at": time.strftime("%Y-%m-%dT%H:%M:%S+08:00"),
|
||
})
|
||
await redis.set(cache_key, cache_data, ex=cache_ttl)
|
||
except Exception:
|
||
pass
|
||
|
||
logger.info(
|
||
"ai_router_execute_success",
|
||
provider=provider_name,
|
||
latency_ms=round(result.latency_ms, 1),
|
||
tokens=result.tokens,
|
||
from_cache=False,
|
||
)
|
||
# D5: 記錄 Langfuse generation
|
||
if _lf_trace_ctx:
|
||
try:
|
||
_lf_trace_ctx.generation(
|
||
name=f"{provider_name}_call",
|
||
model=provider_name,
|
||
input=prompt[:500],
|
||
output=result.raw_response[:500],
|
||
usage={"total": result.tokens} if result.tokens else None,
|
||
metadata={"cost_usd": result.cost_usd, "latency_ms": round(result.latency_ms, 1)},
|
||
)
|
||
_lf_trace_ctx.__exit__(None, None, None)
|
||
except Exception:
|
||
pass
|
||
return result
|
||
|
||
# Provider 回傳 success=False
|
||
errors.append(f"{provider_name}: {result.error}")
|
||
logger.warning("ai_router_provider_failed", provider=provider_name, error=result.error)
|
||
|
||
except Exception as e:
|
||
errors.append(f"{provider_name}: {e}")
|
||
logger.warning("ai_router_provider_exception", provider=provider_name, error=str(e))
|
||
# 2026-04-05 Claude Code: v4.3 — Timeout 不計 CB 失敗
|
||
# NIM 偶爾 GPU 忙碌導致 27s,timeout 不代表 NIM 故障
|
||
# 只有明確連線錯誤(非 timeout)才累積 CB 失敗次數
|
||
import httpx as _httpx
|
||
if not isinstance(e, _httpx.TimeoutException):
|
||
cb.record_failure()
|
||
|
||
# 全部失敗
|
||
logger.error("ai_router_all_providers_failed", tried=provider_order, errors=errors)
|
||
if _lf_trace_ctx:
|
||
try:
|
||
_lf_trace_ctx.__exit__(None, None, None)
|
||
except Exception:
|
||
pass
|
||
|
||
# 2026-04-04 ogt: Phase 25 P0 — require_local 全部失敗時 Telegram 通知(隱私邊界)
|
||
if require_local:
|
||
try:
|
||
from src.services.telegram_gateway import get_telegram_gateway
|
||
tg = get_telegram_gateway()
|
||
import asyncio as _asyncio
|
||
_asyncio.create_task(
|
||
tg.send_text(
|
||
"⚠️ <b>DIAGNOSE 本地 Provider 不可用</b>\n"
|
||
f"已嘗試: {', '.join(provider_order)}\n"
|
||
"需要人工介入,雲端 Provider 不會被呼叫(隱私邊界)。"
|
||
)
|
||
)
|
||
except Exception as _tg_e:
|
||
logger.warning("diagnose_reject_telegram_failed", error=str(_tg_e))
|
||
return AIResult(
|
||
raw_response="",
|
||
success=False,
|
||
provider="none",
|
||
error="local_providers_unavailable",
|
||
)
|
||
|
||
return AIResult(
|
||
raw_response="",
|
||
success=False,
|
||
provider="none",
|
||
error=f"All providers failed: {'; '.join(errors)}",
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# 單例管理
|
||
# =============================================================================
|
||
|
||
_router: AIRouter | None = None
|
||
_registry: AIProviderRegistry | None = None
|
||
_executor: AIRouterExecutor | None = None
|
||
|
||
|
||
def _init_registry() -> AIProviderRegistry:
|
||
"""初始化 Provider Registry (首次呼叫時自動註冊所有 Provider)"""
|
||
from src.services.ai_providers.ollama import OllamaProvider
|
||
from src.services.ai_providers.gemini import GeminiProvider
|
||
from src.services.ai_providers.claude import ClaudeProvider
|
||
from src.services.ai_providers.openclaw_nemo import OpenClawNemoProvider
|
||
|
||
registry = AIProviderRegistry()
|
||
registry.register(OllamaProvider())
|
||
registry.register(GeminiProvider())
|
||
registry.register(ClaudeProvider())
|
||
registry.register(OpenClawNemoProvider())
|
||
|
||
# 2026-04-02 Claude Code: Phase 24 B3 — 加入 NemotronProvider (tool_calling 優先)
|
||
from src.services.ai_providers.nemotron import NemotronProvider
|
||
registry.register(NemotronProvider())
|
||
|
||
return registry
|
||
|
||
|
||
def get_ai_router() -> AIRouter:
|
||
"""取得 AIRouter 單例 (路由決策)"""
|
||
global _router
|
||
if _router is None:
|
||
_router = AIRouter()
|
||
return _router
|
||
|
||
|
||
def get_ai_registry() -> AIProviderRegistry:
|
||
"""取得 AIProviderRegistry 單例"""
|
||
global _registry
|
||
if _registry is None:
|
||
_registry = _init_registry()
|
||
return _registry
|
||
|
||
|
||
def get_ai_executor() -> AIRouterExecutor:
|
||
"""取得 AIRouterExecutor 單例 (路由決策 + 執行)"""
|
||
global _executor
|
||
if _executor is None:
|
||
_executor = AIRouterExecutor(get_ai_registry())
|
||
return _executor
|
||
|
||
|
||
def reset_ai_router() -> None:
|
||
"""重置所有單例 (用於測試)"""
|
||
global _router, _registry, _executor
|
||
_router = None
|
||
_registry = None
|
||
_executor = None
|