1435 lines
61 KiB
Python
1435 lines
61 KiB
Python
"""
|
||
AI Router - Phase 13.3 #87
|
||
==========================
|
||
智能 AI 路由器,根據意圖和複雜度動態選擇 AI Provider
|
||
|
||
目標: 根據請求特性自動選擇最適模型
|
||
策略: Intent Classifier + Complexity Scorer → Routing Decision
|
||
延遲目標: < 50ms (規則引擎優先)
|
||
|
||
路由決策矩陣 (ADR-023):
|
||
┌─────────────────┬───────────────┬──────────────────────────────┐
|
||
│ 複雜度 + 風險 │ Provider │ 備註 │
|
||
├─────────────────┼───────────────┼──────────────────────────────┤
|
||
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
|
||
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
|
||
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
|
||
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
|
||
└─────────────────┴───────────────┴──────────────────────────────┘
|
||
|
||
版本: v4.4
|
||
建立: 2026-03-26 (台北時區)
|
||
建立者: Claude Code
|
||
最後修改: 2026-04-27 (台北時區)
|
||
修改者: Claude Sonnet 4.6 (A2 — DIAGNOSE 移除 Ollama, INC-20260425)
|
||
|
||
變更紀錄:
|
||
| 版本 | 日期 | 執行者 | 變更內容 |
|
||
|------|------|--------|----------|
|
||
| v1.0 | 2026-03-26 | Claude Code | 初始實作 |
|
||
| v2.0 | 2026-03-26 | Claude Code | 支援 IntentResult + 新意圖類型 |
|
||
| v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 |
|
||
| v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close |
|
||
| v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram |
|
||
| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端),僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) |
|
||
| v4.3 | 2026-04-05 | Claude Code | Phase 25 P0 架構修正: 實測 Ollama CPU ~238s(不可用); NIM 實測 2-27s avg 10.6s; DIAGNOSE 改走 _full_fallback_chain(NIM 主力); _local_fallback_chain 廢棄 |
|
||
| v4.4 | 2026-04-27 | Claude Sonnet 4.6 | A2 INC-20260425: DIAGNOSE fallback chain 移除 Ollama (CPU 238s 二次 timeout); 新增 _diagnose_fallback_chain (NEMO→GEMINI→CLAUDE); 新增 aiops_diagnose_fallback_total metric |
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import time
|
||
from dataclasses import dataclass, field
|
||
from enum import Enum
|
||
from typing import TYPE_CHECKING, Protocol
|
||
|
||
import structlog
|
||
|
||
if TYPE_CHECKING:
|
||
from src.services.intent_classifier import IntentResult
|
||
|
||
from src.services.complexity_scorer import (
|
||
ComplexityScore,
|
||
get_complexity_scorer,
|
||
)
|
||
from src.services.intent_classifier import (
|
||
IntentResult,
|
||
IntentType,
|
||
RiskLevel,
|
||
get_intent_classifier,
|
||
normalize_intent,
|
||
)
|
||
from src.services.model_registry import get_model_registry
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
# =============================================================================
|
||
# Provider 定義
|
||
# =============================================================================
|
||
|
||
|
||
class AIProviderEnum(str, Enum):
|
||
"""AI 提供者"""
|
||
|
||
OLLAMA = "ollama"
|
||
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災
|
||
# OllamaFailoverManager 回傳 provider_name="ollama_gcp_a"/"ollama_gcp_b"/"ollama_local"
|
||
# 缺少 enum 值 → AIProviderEnum(primary_str) 拋 ValueError → fallback chain 清空 → 直跳 Gemini
|
||
OLLAMA_GCP_A = "ollama_gcp_a" # GCP-A 34.143.170.20 Primary
|
||
OLLAMA_GCP_B = "ollama_gcp_b" # GCP-B 34.21.145.224 Secondary
|
||
OLLAMA_LOCAL = "ollama_local" # 192.168.0.111 Local Fallback
|
||
GEMINI = "gemini"
|
||
CLAUDE = "claude"
|
||
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 實際名稱
|
||
# OpenClawNemoProvider.name = "openclaw_nemo" (一般推理, via .188)
|
||
# NemotronProvider.name = "nemotron" (Tool Calling, direct NVIDIA NIM)
|
||
# 舊版 NVIDIA = "nvidia" 已移除: Registry 無此 Provider
|
||
OPENCLAW_NEMO = "openclaw_nemo"
|
||
NEMOTRON = "nemotron"
|
||
|
||
|
||
# Provider 對應延遲預算 (ms)
|
||
PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = {
|
||
AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間
|
||
# 2026-05-04 ogt: ADR-110 GCP 三層容災 — GCP NVMe SSD 推理快,60s 足夠
|
||
AIProviderEnum.OLLAMA_GCP_A: 60000,
|
||
AIProviderEnum.OLLAMA_GCP_B: 60000,
|
||
AIProviderEnum.OLLAMA_LOCAL: 90000, # 111 本地 HDD 稍慢
|
||
AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲
|
||
AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲
|
||
# 2026-04-02 ogt: C1 修復 — 對齊 Registry 名稱
|
||
AIProviderEnum.OPENCLAW_NEMO: 60000, # via .188 → NVIDIA NIM,允許較長時間
|
||
AIProviderEnum.NEMOTRON: 60000, # Tool Calling 專用,允許較長時間
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# Interface 定義 (P1 修復 - 2026-04-01 首席架構師審查)
|
||
# =============================================================================
|
||
|
||
|
||
class IAIRouter(Protocol):
|
||
"""
|
||
AI Router Protocol - 支援 DI 測試替換
|
||
|
||
2026-04-01 ogt: 首席架構師審查 P1 修復
|
||
- 新增 Protocol 定義支援依賴注入
|
||
- 參考: IModelRegistry, IComplexityScorer
|
||
"""
|
||
|
||
async def route(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> "RoutingDecision":
|
||
"""路由請求到最適 AI Provider"""
|
||
...
|
||
|
||
def route_sync(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> "RoutingDecision":
|
||
"""同步版本路由"""
|
||
...
|
||
|
||
def route_tool_calling(
|
||
self,
|
||
) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
|
||
"""Tool Calling 專用路由"""
|
||
...
|
||
|
||
|
||
@dataclass
|
||
class RoutingDecision:
|
||
"""
|
||
路由決策結果 (Phase 13.3 #87)
|
||
|
||
包含完整的路由資訊,供 OpenClaw 主流程使用
|
||
"""
|
||
|
||
# 核心決策
|
||
selected_provider: AIProviderEnum # 選擇的 AI Provider
|
||
selected_model: str # 選擇的模型名稱
|
||
fallback_chain: list[tuple[AIProviderEnum, str]] # 備援鏈 [(provider, model), ...]
|
||
routing_reason: str # 路由決策原因
|
||
latency_budget_ms: int # 延遲預算 (毫秒)
|
||
|
||
# 分類結果
|
||
intent: IntentType # 意圖分類 (正規化後)
|
||
intent_result: IntentResult # 完整 Intent 分類結果
|
||
complexity: ComplexityScore # 複雜度評分
|
||
risk_level: RiskLevel = field(default=RiskLevel.MEDIUM) # 風險等級
|
||
|
||
# 路由 metadata
|
||
routing_latency_ms: float = 0.0 # 路由決策耗時 (ms)
|
||
|
||
# 向後相容 (deprecated)
|
||
model: str = "" # -> selected_model
|
||
reason: str = "" # -> routing_reason
|
||
fallback_models: list[str] = field(default_factory=list) # -> fallback_chain
|
||
|
||
def __post_init__(self):
|
||
"""初始化後設定衍生欄位"""
|
||
self.risk_level = self.intent_result.risk_level
|
||
# 向後相容
|
||
self.model = self.selected_model
|
||
self.reason = self.routing_reason
|
||
self.fallback_models = [model for _, model in self.fallback_chain if model != self.selected_model]
|
||
|
||
def to_dict(self) -> dict:
|
||
"""轉換為字典 (API 回應用)"""
|
||
return {
|
||
"selected_provider": self.selected_provider.value,
|
||
"selected_model": self.selected_model,
|
||
"fallback_chain": [
|
||
{"provider": p.value, "model": m} for p, m in self.fallback_chain
|
||
],
|
||
"routing_reason": self.routing_reason,
|
||
"latency_budget_ms": self.latency_budget_ms,
|
||
"intent": self.intent.value,
|
||
"risk_level": self.risk_level.value,
|
||
"complexity_score": self.complexity.score,
|
||
"routing_latency_ms": round(self.routing_latency_ms, 2),
|
||
}
|
||
|
||
|
||
class AIRouter:
|
||
"""
|
||
AI 路由器 (Phase 13.3 #87)
|
||
|
||
整合 IntentClassifier 和 ComplexityScorer,
|
||
動態選擇最適合的 AI Provider 和模型。
|
||
|
||
路由決策矩陣:
|
||
┌─────────────────┬───────────────┬──────────────────────────────┐
|
||
│ 複雜度 + 風險 │ Provider │ 備註 │
|
||
├─────────────────┼───────────────┼──────────────────────────────┤
|
||
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
|
||
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
|
||
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
|
||
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
|
||
└─────────────────┴───────────────┴──────────────────────────────┘
|
||
|
||
路由策略 (按優先級):
|
||
1. CRITICAL 風險強制使用 Claude
|
||
2. DELETE 意圖強制使用 Claude
|
||
3. HIGH 風險或複雜度 4-5 → Gemini
|
||
4. 其他情況 → Ollama (成本優先)
|
||
"""
|
||
|
||
def __init__(self):
|
||
self._intent_classifier = get_intent_classifier()
|
||
self._complexity_scorer = get_complexity_scorer()
|
||
self._model_registry = get_model_registry()
|
||
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
|
||
# 延遲 import 避免循環依賴(ollama_failover_manager 不 import ai_router)
|
||
from src.services.ollama_failover_manager import get_ollama_failover_manager
|
||
self._failover_manager = get_ollama_failover_manager()
|
||
|
||
# 從 ModelRegistry 取得模型配置
|
||
self._ollama_default = self._model_registry.get_model("ollama", "default")
|
||
self._ollama_summary = self._model_registry.get_model("ollama", "summary")
|
||
self._gemini_default = self._model_registry.get_model("gemini", "default")
|
||
self._claude_default = self._model_registry.get_model("claude", "default")
|
||
# 2026-04-02 ogt: C1 修復 — openclaw_nemo (一般推理) + nemotron (Tool Calling)
|
||
self._openclaw_nemo_default = self._model_registry.get_model("nvidia", "default")
|
||
self._nemotron_default = self._model_registry.get_model("nvidia", "default")
|
||
# 向後相容別名
|
||
self._nvidia_default = self._openclaw_nemo_default
|
||
|
||
# Provider 對應模型映射
|
||
self._provider_models: dict[AIProviderEnum, str] = {
|
||
AIProviderEnum.OLLAMA: self._ollama_default,
|
||
AIProviderEnum.GEMINI: self._gemini_default,
|
||
AIProviderEnum.CLAUDE: self._claude_default,
|
||
AIProviderEnum.OPENCLAW_NEMO: self._openclaw_nemo_default,
|
||
AIProviderEnum.NEMOTRON: self._nemotron_default,
|
||
}
|
||
|
||
# 完整 Fallback 鏈 (Provider, Model)
|
||
# 2026-04-02 ogt: C1 修復 — OPENCLAW_NEMO 首選仲裁
|
||
self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||
(AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default),
|
||
(AIProviderEnum.GEMINI, self._gemini_default),
|
||
(AIProviderEnum.CLAUDE, self._claude_default),
|
||
(AIProviderEnum.OLLAMA, self._ollama_default),
|
||
]
|
||
|
||
# 2026-04-29 ogt + Claude Code: 推翻 A2 鐵律(INC-20260425 事實基礎已過期)
|
||
# 統帥鐵律 2026-04-29: 「主要優先用 111 主機的 Ollama」
|
||
# 統帥鐵律 feedback_ai_autonomous_direction.md: 以本地免費 LLM 為主
|
||
# 統帥鐵律 feedback_ollama_111_only.md: Ollama 唯一主機 = 111
|
||
#
|
||
# 推翻原因:
|
||
# 舊事實 (2026-04-27 A2): Ollama = CPU-only deepseek-r1:14b @ 238s(不可用)
|
||
# 新事實 (2026-04-29): prod Ollama 111 = M1 Pro Apple Silicon GPU + qwen2.5:7b-instruct
|
||
# VRAM 8.2GB 全載入,ctx 32k,實測 hi 0.54s
|
||
# 雲端全死: OpenClaw 188 → 500,Gemini → 429 配額爆,Claude → 404 endpoint 過時
|
||
# 不推翻 → 100% incident llm_failed → AI 自動修復永遠不啟動
|
||
#
|
||
# 配套修改:
|
||
# - IntentType.DIAGNOSE override: OPENCLAW_NEMO → OLLAMA
|
||
# - openclaw.py 注入 task_type="diagnose" 讓 Ollama 用 200s timeout
|
||
# - test_p0_diagnose_routing.py / test_ai_router_diagnose_fallback.py 同步更新
|
||
self._diagnose_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||
(AIProviderEnum.OLLAMA, self._ollama_default), # 主:本地免費,統帥鐵律
|
||
(AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default), # fallback 1
|
||
(AIProviderEnum.GEMINI, self._gemini_default), # fallback 2
|
||
(AIProviderEnum.CLAUDE, self._claude_default), # fallback 3
|
||
]
|
||
|
||
# Tool Calling 專用 Fallback 鏈 (ADR-036)
|
||
self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||
(AIProviderEnum.NEMOTRON, self._nemotron_default),
|
||
(AIProviderEnum.GEMINI, self._gemini_default),
|
||
(AIProviderEnum.CLAUDE, self._claude_default),
|
||
]
|
||
|
||
# 2026-04-05 Claude Code: Phase 25 P0 v4.3 — _local_fallback_chain 廢棄
|
||
# 實測依據 (2026-04-05):
|
||
# Ollama llama3.2:3b CPU-only = 238s 回 {"ok":true}(完全不可用於生產)
|
||
# Nemotron NIM 實測 2.2s~27s,平均 10.6s(雲端 GPU,一直是主力)
|
||
# NIM 從 Phase 22 起就接收 Incident 資料(無隱私問題,非新決策)
|
||
# 結論: 不存在可用的本地 AI provider,DIAGNOSE 統一走 _full_fallback_chain(NIM 主力)
|
||
self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||
# 廢棄: Ollama CPU ~238s 不可用,NIM 本非 local。保留欄位避免 attribute error。
|
||
]
|
||
|
||
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
|
||
self._intent_provider_overrides: dict[IntentType, AIProviderEnum | None] = {
|
||
# 四大核心意圖
|
||
IntentType.RESTART: None, # 依複雜度
|
||
IntentType.SCALE: None, # 依複雜度
|
||
IntentType.CONFIG: None, # 依複雜度 (但 HIGH 會升級)
|
||
# 2026-04-16 ogt: 恢復 DIAGNOSE → OPENCLAW_NEMO(已過期,見 2026-04-29 推翻)
|
||
# 2026-04-29 ogt + Claude Code: 推翻 A2,DIAGNOSE → OLLAMA(統帥鐵律本地優先)
|
||
# 舊事實過期:M1 Pro GPU + qwen2.5:7b 0.54s(不再 CPU deepseek 238s)
|
||
# OPENCLAW_NEMO 188:8088 現況 500 → 不可用
|
||
# 雲端全死 → 必須回到本地 Ollama 主推理
|
||
IntentType.DIAGNOSE: AIProviderEnum.OLLAMA,
|
||
# 輔助意圖
|
||
IntentType.DELETE: AIProviderEnum.CLAUDE, # CRITICAL → 強制 Claude
|
||
IntentType.ROLLBACK: None, # 依複雜度
|
||
IntentType.UNKNOWN: None,
|
||
# 舊版兼容
|
||
IntentType.CODE_REVIEW: None,
|
||
IntentType.DEPLOYMENT: None,
|
||
IntentType.ALERT_TRIAGE: AIProviderEnum.OLLAMA,
|
||
IntentType.QUERY: AIProviderEnum.OLLAMA,
|
||
IntentType.MAINTENANCE: None,
|
||
}
|
||
|
||
# 向後相容
|
||
self._default_model = self._ollama_default
|
||
self._summary_model = self._ollama_summary
|
||
self._fallback_order = [
|
||
self._ollama_default,
|
||
self._ollama_summary,
|
||
"gemini",
|
||
"claude",
|
||
]
|
||
|
||
def _resolve_intent_from_context(
|
||
self,
|
||
context: dict | None,
|
||
) -> IntentResult | None:
|
||
"""
|
||
從 context 解析集中治理的 intent hint。
|
||
|
||
僅作為 AI Router 的內部快路徑,避免呼叫端自行繞過 Router 規則。
|
||
例如 Phase 2 agent 已知屬於診斷分析,就不必再多跑一次 intent LLM。
|
||
"""
|
||
if not context:
|
||
return None
|
||
|
||
raw_hint = str(context.get("intent_hint", "")).strip().lower()
|
||
if not raw_hint:
|
||
return None
|
||
|
||
alias_map = {
|
||
"restart": IntentType.RESTART,
|
||
"scale": IntentType.SCALE,
|
||
"config": IntentType.CONFIG,
|
||
"diagnose": IntentType.DIAGNOSE,
|
||
"delete": IntentType.DELETE,
|
||
"rollback": IntentType.ROLLBACK,
|
||
"unknown": IntentType.UNKNOWN,
|
||
# legacy aliases
|
||
"alert_triage": IntentType.ALERT_TRIAGE,
|
||
"deployment": IntentType.DEPLOYMENT,
|
||
"query": IntentType.QUERY,
|
||
"maintenance": IntentType.MAINTENANCE,
|
||
"code_review": IntentType.CODE_REVIEW,
|
||
}
|
||
intent = alias_map.get(raw_hint)
|
||
if intent is None:
|
||
logger.warning("ai_router_invalid_intent_hint", intent_hint=raw_hint)
|
||
return None
|
||
|
||
return IntentResult(
|
||
intent=intent,
|
||
confidence=1.0,
|
||
method="context_hint",
|
||
matched_keywords=[f"context:{raw_hint}"],
|
||
detected_resources=[],
|
||
reasoning=f"context intent_hint={raw_hint}",
|
||
)
|
||
|
||
async def route(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> RoutingDecision:
|
||
"""
|
||
路由請求到最適 AI Provider 和模型
|
||
|
||
延遲目標: < 50ms (規則引擎優先,LLM 分類時可能稍長)
|
||
|
||
Args:
|
||
text: 用戶輸入或告警內容
|
||
context: 額外上下文 (服務、指標等)
|
||
|
||
Returns:
|
||
RoutingDecision: 完整路由決策
|
||
"""
|
||
start_time = time.perf_counter()
|
||
context = context or {}
|
||
|
||
# Step 1: 意圖分類 (返回 IntentResult, 規則引擎 < 10ms)
|
||
intent_result = self._resolve_intent_from_context(context)
|
||
if intent_result is None:
|
||
intent_result = await self._intent_classifier.classify(text)
|
||
intent = normalize_intent(intent_result.intent)
|
||
|
||
# Step 2: 複雜度評分 (< 10ms)
|
||
complexity = self._complexity_scorer.score(context)
|
||
|
||
# Step 3: Provider + Model 選擇 (< 1ms)
|
||
provider, model, reason = self._select_provider_and_model(
|
||
intent, intent_result, complexity
|
||
)
|
||
|
||
# Step 3b: 若 initial decision 選到 OLLAMA,交由 FailoverManager 重評
|
||
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
|
||
# 只在 OLLAMA 時觸發,不干擾 NEMOTRON / OPENCLAW_NEMO / CLAUDE / GEMINI 路由
|
||
failover_fallback: list[tuple[AIProviderEnum, str]] | None = None
|
||
if provider == AIProviderEnum.OLLAMA:
|
||
try:
|
||
failover_result = await self._failover_manager.select_provider(
|
||
task_type=intent.value if intent else "general"
|
||
)
|
||
primary_str = failover_result.primary.provider_name
|
||
try:
|
||
provider = AIProviderEnum(primary_str)
|
||
model = failover_result.primary.model
|
||
reason = f"{reason} [failover→{primary_str}]"
|
||
except ValueError:
|
||
# provider_name 無法對應已知 enum;避免未知 provider 靜默進入執行層。
|
||
logger.warning(
|
||
"ai_router_unknown_failover_provider",
|
||
provider=primary_str,
|
||
)
|
||
# 重建 fallback chain:從 failover_result 轉換
|
||
fb_list: list[tuple[AIProviderEnum, str]] = []
|
||
for ep in failover_result.fallback_chain:
|
||
try:
|
||
fb_provider = AIProviderEnum(ep.provider_name)
|
||
fb_list.append((fb_provider, ep.model))
|
||
except ValueError:
|
||
logger.warning(
|
||
"ai_router_unknown_failover_fallback_provider",
|
||
provider=ep.provider_name,
|
||
)
|
||
failover_fallback = fb_list
|
||
except Exception as e:
|
||
# failover_manager 異常 → 保留原始 provider(fail-open)
|
||
logger.warning("ai_router_failover_manager_error", error=str(e))
|
||
|
||
# Step 4: 建立 Fallback 鏈
|
||
# 2026-04-05 Claude Code: v4.3 — NIM 從 Phase 22 起就是主力,無隱私問題
|
||
# 2026-04-25 P1.2: 若 failover_manager 回傳了 fallback chain,優先使用
|
||
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — DIAGNOSE 專用 chain(排除 Ollama)
|
||
# failover_manager 只在 OLLAMA 路徑觸發(Step 3b 限制),DIAGNOSE→OPENCLAW_NEMO
|
||
# 不會進入 failover 路徑,因此 fallover_fallback 此時為 None,走 _build_fallback_chain_for_intent
|
||
fallback_chain = (
|
||
failover_fallback
|
||
if failover_fallback is not None
|
||
else self._build_fallback_chain_for_intent(provider, intent)
|
||
)
|
||
|
||
# Step 5: 計算延遲預算
|
||
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
|
||
|
||
# 計算路由決策耗時
|
||
routing_latency = (time.perf_counter() - start_time) * 1000
|
||
|
||
decision = RoutingDecision(
|
||
selected_provider=provider,
|
||
selected_model=model,
|
||
fallback_chain=fallback_chain,
|
||
routing_reason=reason,
|
||
latency_budget_ms=latency_budget,
|
||
intent=intent,
|
||
intent_result=intent_result,
|
||
complexity=complexity,
|
||
routing_latency_ms=routing_latency,
|
||
)
|
||
|
||
logger.info(
|
||
"ai_routing_decision",
|
||
provider=provider.value,
|
||
model=model,
|
||
intent=intent.value,
|
||
intent_confidence=intent_result.confidence,
|
||
risk_level=intent_result.risk_level.value,
|
||
complexity_score=complexity.score,
|
||
reason=reason,
|
||
latency_budget_ms=latency_budget,
|
||
routing_latency_ms=round(routing_latency, 2),
|
||
fallback_count=len(fallback_chain),
|
||
)
|
||
|
||
return decision
|
||
|
||
def _select_provider_and_model(
|
||
self,
|
||
intent: IntentType,
|
||
intent_result: IntentResult,
|
||
complexity: ComplexityScore,
|
||
) -> tuple[AIProviderEnum, str, str]:
|
||
"""
|
||
選擇 Provider 和模型 (Phase 13.3 #87 核心邏輯)
|
||
|
||
路由決策矩陣:
|
||
┌─────────────────┬───────────────┬──────────────────────────────┐
|
||
│ 複雜度 + 風險 │ Provider │ 備註 │
|
||
├─────────────────┼───────────────┼──────────────────────────────┤
|
||
│ 1-2 + LOW │ Ollama │ 快速本地處理 │
|
||
│ 3 + MEDIUM │ Ollama │ fallback → Gemini │
|
||
│ 4-5 + HIGH │ Gemini │ fallback → Claude │
|
||
│ DELETE/CRITICAL │ Claude │ 強制使用最強模型 │
|
||
└─────────────────┴───────────────┴──────────────────────────────┘
|
||
|
||
Args:
|
||
intent: 正規化後的意圖
|
||
intent_result: 完整分類結果
|
||
complexity: 複雜度評分
|
||
|
||
Returns:
|
||
(provider, model, reason)
|
||
"""
|
||
risk = intent_result.risk_level
|
||
score = complexity.score
|
||
|
||
# =======================================================================
|
||
# 規則 1: CRITICAL 風險強制 Claude (最高優先級)
|
||
# =======================================================================
|
||
if risk == RiskLevel.CRITICAL:
|
||
provider = AIProviderEnum.CLAUDE
|
||
model = self._claude_default
|
||
reason = f"CRITICAL 風險 ({intent.value}) 強制使用 Claude"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 2: DELETE 意圖強制 Claude (不可逆操作)
|
||
# =======================================================================
|
||
if intent == IntentType.DELETE:
|
||
provider = AIProviderEnum.CLAUDE
|
||
model = self._claude_default
|
||
reason = "DELETE 意圖 (不可逆) 強制使用 Claude"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 3: 檢查意圖強制覆寫
|
||
# =======================================================================
|
||
provider_override = self._intent_provider_overrides.get(intent)
|
||
if provider_override is not None:
|
||
provider = provider_override
|
||
# 2026-04-03 ogt: ALERT_TRIAGE/QUERY 用 Ollama summary model (llama3.2:3b)
|
||
# 避免 qwen2.5:7b-instruct 90秒 timeout 導致全鏈路失敗 (Phase 24 A選項)
|
||
# 2026-04-04 ogt: DIAGNOSE 已改為 NEMOTRON,不走這條分支
|
||
if provider == AIProviderEnum.OLLAMA and intent in (
|
||
IntentType.ALERT_TRIAGE, IntentType.QUERY
|
||
):
|
||
model = self._ollama_summary
|
||
else:
|
||
model = self._provider_models[provider]
|
||
reason = f"意圖 {intent.value} 指定使用 {provider.value}"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 4: 複雜度 4-5 或 HIGH 風險 → OpenClaw Nemo (via .188 → NVIDIA NIM)
|
||
# 2026-04-02 ogt: C1 修復 — NVIDIA→OPENCLAW_NEMO 對齊 Registry 名稱
|
||
# =======================================================================
|
||
if score >= 4 or risk == RiskLevel.HIGH:
|
||
provider = AIProviderEnum.OPENCLAW_NEMO
|
||
model = self._openclaw_nemo_default
|
||
reason = f"複雜度={score}/5, 風險={risk.value} → OpenClaw Nemo (fallback Gemini)"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 5: 複雜度 3 + MEDIUM → Ollama (fallback Gemini)
|
||
# =======================================================================
|
||
if score == 3:
|
||
provider = AIProviderEnum.OLLAMA
|
||
model = self._ollama_default
|
||
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (fallback Gemini)"
|
||
return provider, model, reason
|
||
|
||
# =======================================================================
|
||
# 規則 6: 複雜度 1-2 + LOW/MEDIUM → Ollama (快速本地處理)
|
||
# =======================================================================
|
||
provider = AIProviderEnum.OLLAMA
|
||
# 低複雜度使用輕量模型 (更快回應)
|
||
model = self._ollama_summary if score <= 1 else self._ollama_default
|
||
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (成本優先)"
|
||
return provider, model, reason
|
||
|
||
def _select_model(
|
||
self,
|
||
intent: IntentType,
|
||
intent_result: IntentResult,
|
||
complexity: ComplexityScore,
|
||
) -> tuple[str, str]:
|
||
"""
|
||
選擇模型 (向後相容方法)
|
||
|
||
Deprecated: 請使用 _select_provider_and_model
|
||
|
||
Args:
|
||
intent: 正規化後的意圖
|
||
intent_result: 完整分類結果
|
||
complexity: 複雜度評分
|
||
|
||
Returns:
|
||
(model_name, reason)
|
||
"""
|
||
_, model, reason = self._select_provider_and_model(
|
||
intent, intent_result, complexity
|
||
)
|
||
return model, reason
|
||
|
||
def _build_fallback_chain(
|
||
self, selected_provider: AIProviderEnum
|
||
) -> list[tuple[AIProviderEnum, str]]:
|
||
"""
|
||
# DEPRECATED 2026-04-28 — 已由 _build_fallback_chain_for_intent 取代,無呼叫方
|
||
建立 Fallback 鏈 (排除已選 Provider)
|
||
|
||
Fallback 順序: Ollama → Gemini → Claude
|
||
|
||
Args:
|
||
selected_provider: 已選擇的 Provider
|
||
|
||
Returns:
|
||
Fallback 鏈 [(provider, model), ...]
|
||
"""
|
||
fallback_chain: list[tuple[AIProviderEnum, str]] = []
|
||
|
||
for provider, model in self._full_fallback_chain:
|
||
if provider != selected_provider:
|
||
fallback_chain.append((provider, model))
|
||
|
||
return fallback_chain
|
||
|
||
def _build_fallback_list(self, selected_model: str) -> list[str]:
|
||
"""建立 Fallback 列表 (向後相容)"""
|
||
fallbacks = [m for m in self._fallback_order if m != selected_model]
|
||
return fallbacks
|
||
|
||
def _build_fallback_chain_for_intent(
|
||
self,
|
||
selected_provider: AIProviderEnum,
|
||
intent: IntentType,
|
||
) -> list[tuple[AIProviderEnum, str]]:
|
||
"""
|
||
Intent-aware Fallback 鏈建構 (A2 INC-20260425 修復)
|
||
|
||
2026-04-27 Claude Sonnet 4.6: A2 — DIAGNOSE 使用 _diagnose_fallback_chain(排除 Ollama)
|
||
背景: INC-20260425 NIM timeout → fallback 到 Ollama deepseek-r1:14b (CPU 238s) → 二次 timeout
|
||
修復: DIAGNOSE 專屬 chain 只含雲端推理(NEMO→GEMINI→CLAUDE),絕不觸及 Ollama
|
||
所有其他 intent 繼續使用 _full_fallback_chain(既有行為不變)。
|
||
|
||
Args:
|
||
selected_provider: 已選擇的 primary Provider
|
||
intent: 正規化後的意圖
|
||
|
||
Returns:
|
||
Fallback 鏈 [(provider, model), ...],排除 selected_provider
|
||
"""
|
||
if intent == IntentType.DIAGNOSE:
|
||
# DIAGNOSE 專屬:排除 Ollama,只用雲端推理鏈
|
||
source_chain = self._diagnose_fallback_chain
|
||
else:
|
||
# 其他所有 intent 保持原有邏輯(行為不變)
|
||
source_chain = self._full_fallback_chain
|
||
|
||
return [
|
||
(provider, model)
|
||
for provider, model in source_chain
|
||
if provider != selected_provider
|
||
]
|
||
|
||
def route_sync(
|
||
self,
|
||
text: str,
|
||
context: dict | None = None,
|
||
) -> RoutingDecision:
|
||
"""
|
||
同步版本路由 (僅關鍵字匹配,保證 < 50ms)
|
||
|
||
適用場景: 需要快速決策,不需要 LLM 分類的情況
|
||
|
||
Args:
|
||
text: 用戶輸入或告警內容
|
||
context: 額外上下文
|
||
|
||
Returns:
|
||
RoutingDecision: 路由決策
|
||
"""
|
||
start_time = time.perf_counter()
|
||
context = context or {}
|
||
|
||
# 同步分類 (僅規則引擎, < 10ms)
|
||
intent_result = self._resolve_intent_from_context(context)
|
||
if intent_result is None:
|
||
intent_result = self._intent_classifier.classify_sync(text)
|
||
intent = normalize_intent(intent_result.intent)
|
||
|
||
# 複雜度評分 (< 10ms)
|
||
complexity = self._complexity_scorer.score(context)
|
||
|
||
# Provider + Model 選擇
|
||
provider, model, reason = self._select_provider_and_model(
|
||
intent, intent_result, complexity
|
||
)
|
||
|
||
# 建立 Fallback 鏈
|
||
# 2026-04-05 Claude Code: v4.3 — NIM 主力,無隱私問題
|
||
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — 對齊 route(),使用 intent-aware chain
|
||
fallback_chain = self._build_fallback_chain_for_intent(provider, intent)
|
||
|
||
# 延遲預算
|
||
latency_budget = PROVIDER_LATENCY_BUDGET.get(provider, 30000)
|
||
|
||
# 計算路由決策耗時
|
||
routing_latency = (time.perf_counter() - start_time) * 1000
|
||
|
||
return RoutingDecision(
|
||
selected_provider=provider,
|
||
selected_model=model,
|
||
fallback_chain=fallback_chain,
|
||
routing_reason=reason,
|
||
latency_budget_ms=latency_budget,
|
||
intent=intent,
|
||
intent_result=intent_result,
|
||
complexity=complexity,
|
||
routing_latency_ms=routing_latency,
|
||
)
|
||
|
||
# =========================================================================
|
||
# Tool Calling 路由 (ADR-036)
|
||
# =========================================================================
|
||
|
||
def route_tool_calling(self) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
|
||
"""
|
||
Tool Calling 專用路由 (ADR-036)
|
||
|
||
Tool Calling 任務優先使用 Nemotron (83.3% 精準度),
|
||
Fallback 到 Gemini/Claude。
|
||
|
||
Returns:
|
||
(provider, model, fallback_chain)
|
||
"""
|
||
# 2026-04-02 ogt: C1 修復 — Tool Calling 使用 NEMOTRON (direct NIM)
|
||
provider = AIProviderEnum.NEMOTRON
|
||
model = self._nemotron_default
|
||
fallback_chain = [
|
||
(p, m) for p, m in self._tool_calling_fallback_chain if p != provider
|
||
]
|
||
|
||
logger.info(
|
||
"tool_calling_routing",
|
||
provider=provider.value,
|
||
model=model,
|
||
fallback_count=len(fallback_chain),
|
||
)
|
||
|
||
return provider, model, fallback_chain
|
||
|
||
def get_tool_calling_fallback_chain(self) -> list[tuple[AIProviderEnum, str]]:
|
||
"""取得 Tool Calling Fallback 鏈"""
|
||
return self._tool_calling_fallback_chain.copy()
|
||
|
||
# =========================================================================
|
||
# 便捷方法
|
||
# =========================================================================
|
||
|
||
def get_provider_for_intent(self, intent: IntentType) -> AIProviderEnum:
|
||
"""取得意圖對應的 Provider (不考慮複雜度)"""
|
||
override = self._intent_provider_overrides.get(intent)
|
||
return override if override else AIProviderEnum.OLLAMA
|
||
|
||
def get_model_for_provider(self, provider: AIProviderEnum) -> str:
|
||
"""取得 Provider 對應的模型"""
|
||
return self._provider_models.get(provider, self._ollama_default)
|
||
|
||
def get_routing_matrix(self) -> list[dict]:
|
||
"""
|
||
取得路由決策矩陣 (用於 API 文檔或除錯)
|
||
|
||
Returns:
|
||
路由規則清單
|
||
"""
|
||
return [
|
||
{
|
||
"rule": 1,
|
||
"condition": "CRITICAL risk",
|
||
"provider": "claude",
|
||
"reason": "不可逆/高風險操作強制最強模型",
|
||
},
|
||
{
|
||
"rule": 2,
|
||
"condition": "DELETE intent",
|
||
"provider": "claude",
|
||
"reason": "刪除操作強制最強模型",
|
||
},
|
||
{
|
||
"rule": 3,
|
||
"condition": "Intent override",
|
||
"provider": "depends",
|
||
"reason": "特定意圖有預設 Provider",
|
||
},
|
||
{
|
||
"rule": 4,
|
||
"condition": "complexity >= 4 OR HIGH risk",
|
||
"provider": "openclaw_nemo",
|
||
"reason": "高複雜度需要 Nvidia Nemotron 強大推理能力 (via .188)",
|
||
},
|
||
{
|
||
"rule": 5,
|
||
"condition": "complexity == 3",
|
||
"provider": "ollama",
|
||
"reason": "中等複雜度本地處理",
|
||
},
|
||
{
|
||
"rule": 6,
|
||
"condition": "complexity 1-2",
|
||
"provider": "ollama",
|
||
"reason": "低複雜度快速處理",
|
||
},
|
||
]
|
||
|
||
async def feedback_from_aider_events(
|
||
self,
|
||
repo: str | None = None,
|
||
days: int = 7,
|
||
) -> dict[str, float]:
|
||
"""從 aider_events 聚合近 N 日 success rate per model。
|
||
|
||
Phase 24 ADR-052 延伸:AI 自主化 feedback loop。
|
||
目前為 read-only(未接入 route() 決策),等 USE_AIDER_FEEDBACK flag
|
||
+ 7 天灰度驗證後才會調整 provider 權重。
|
||
|
||
Args:
|
||
repo: 若給定,只聚合該 repo 的 session;否則所有 repo。
|
||
days: 時間窗口(預設 7 天)。
|
||
|
||
Returns:
|
||
{model_name: success_rate_float} e.g. {"elephant-alpha": 0.85, "gemini-pro": 0.92}
|
||
空 dict 代表無資料或查詢失敗(caller 應降級為忽略)。
|
||
"""
|
||
try:
|
||
from src.db.base import get_db_context
|
||
from src.repositories.aider_event_repository import AiderEventRepository
|
||
except ImportError:
|
||
return {}
|
||
|
||
try:
|
||
async with get_db_context() as sess:
|
||
repo_obj = AiderEventRepository(sess)
|
||
stats = await repo_obj.model_stats_since(days=days)
|
||
except Exception:
|
||
logger.debug("ai_router_feedback_aggregation_failed")
|
||
return {}
|
||
|
||
return self._aggregate_feedback_stats(stats, repo=repo)
|
||
|
||
@staticmethod
|
||
def _aggregate_feedback_stats(
|
||
stats: list[dict], repo: str | None = None
|
||
) -> dict[str, float]:
|
||
"""純函數:過濾 repo 並將 stats 轉換為 {model: success_rate}(可獨立單元測試)。"""
|
||
out: dict[str, float] = {}
|
||
for row in stats:
|
||
if repo and row.get("repo") != repo:
|
||
continue
|
||
model = row.get("model")
|
||
if not model:
|
||
continue
|
||
out[model] = float(row.get("success_rate") or 0)
|
||
return out
|
||
|
||
|
||
# =============================================================================
|
||
# Phase 24 ADR-052: AI Provider Registry + Execution Layer
|
||
# =============================================================================
|
||
# 2026-04-02 ogt: 在現有 AIRouter (路由決策) 之上,加入 Provider 執行層
|
||
# 整合: ProviderRegistry + 閘門 (CB/RL/Sem) + Cache + Langfuse Trace
|
||
#
|
||
# 呼叫關係:
|
||
# openclaw.py → AIRouterExecutor.execute() → AIRouter.route() → Provider.analyze()
|
||
# =============================================================================
|
||
|
||
import asyncio
|
||
import hashlib
|
||
import json as _json
|
||
|
||
from src.core.config import get_settings
|
||
from src.services.ai_providers.interfaces import AIProvider as AIProviderProtocol, AIResult
|
||
|
||
_settings = get_settings()
|
||
|
||
|
||
class _SimpleCircuitBreaker:
|
||
"""
|
||
輕量 per-provider Circuit Breaker (Phase 24 C2 修復)
|
||
|
||
不共用 OpenClawGuard — 避免 Gemini 掛掉時 Ollama 也被擋
|
||
"""
|
||
|
||
def __init__(self, name: str, failure_threshold: int = 5, recovery_timeout: float = 60.0) -> None:
|
||
self.name = name
|
||
self._failure_threshold = failure_threshold
|
||
self._recovery_timeout = recovery_timeout
|
||
self._failure_count = 0
|
||
self._last_failure_time: float = 0.0
|
||
|
||
def is_open(self) -> bool:
|
||
if self._failure_count < self._failure_threshold:
|
||
return False
|
||
# 超過 recovery timeout → half-open (允許一次嘗試)
|
||
if time.time() - self._last_failure_time > self._recovery_timeout:
|
||
return False
|
||
return True
|
||
|
||
def record_success(self) -> None:
|
||
self._failure_count = 0
|
||
|
||
def record_failure(self) -> None:
|
||
self._failure_count += 1
|
||
self._last_failure_time = time.time()
|
||
|
||
|
||
class AIProviderRegistry:
|
||
"""
|
||
AI Provider 註冊中心 — 類比 MCP ProviderRegistry (ADR-015)
|
||
|
||
動態管理 AI Provider 的生命週期與啟停狀態。
|
||
"""
|
||
|
||
def __init__(self) -> None:
|
||
self._providers: dict[str, AIProviderProtocol] = {}
|
||
|
||
def register(self, provider: AIProviderProtocol) -> None:
|
||
"""註冊 Provider (啟動時呼叫)"""
|
||
self._providers[provider.name] = provider
|
||
status = "enabled" if provider.is_enabled else "disabled"
|
||
logger.info("ai_provider_registered", name=provider.name, status=status, privacy=provider.privacy_level)
|
||
|
||
def get(self, name: str) -> AIProviderProtocol | None:
|
||
"""取得已啟用的 Provider"""
|
||
p = self._providers.get(name)
|
||
if p and p.is_enabled:
|
||
return p
|
||
return None
|
||
|
||
def all_enabled(self) -> list[AIProviderProtocol]:
|
||
"""取得所有已啟用的 Provider"""
|
||
return [p for p in self._providers.values() if p.is_enabled]
|
||
|
||
def names(self) -> list[str]:
|
||
"""所有已註冊 Provider 名稱"""
|
||
return list(self._providers.keys())
|
||
|
||
async def health_check_all(self) -> dict[str, bool]:
|
||
"""所有 Provider 健康狀態"""
|
||
results = {}
|
||
for name, p in self._providers.items():
|
||
try:
|
||
results[name] = await p.health_check()
|
||
except Exception:
|
||
results[name] = False
|
||
return results
|
||
|
||
async def close_all(self) -> None:
|
||
"""關閉所有 Provider 的 HTTP 連線 (I5 修復: shutdown hook)"""
|
||
for name, p in self._providers.items():
|
||
try:
|
||
if hasattr(p, "close"):
|
||
await p.close()
|
||
logger.info("ai_provider_closed", name=name)
|
||
except Exception as e:
|
||
logger.warning("ai_provider_close_failed", name=name, error=str(e))
|
||
|
||
|
||
class AIRouterExecutor:
|
||
"""
|
||
AI Router 執行層 (Phase 24 ADR-052)
|
||
|
||
職責:
|
||
1. Cache 檢查 (Redis, 跨 Provider 共享) — D4
|
||
2. 閘門控制 (Circuit Breaker → Rate Limiter → Semaphore) — D3
|
||
3. 呼叫 Provider.analyze() — 實際執行
|
||
4. 記錄 Langfuse Trace — D5
|
||
5. Mock Mode 攔截 — D13
|
||
|
||
設計原則:
|
||
- 只依賴 AIProviderProtocol,禁止 import 具體 Provider 類別
|
||
- 閘門在 Router,Provider 保持純粹 (Stateless Compute Units)
|
||
"""
|
||
|
||
def __init__(self, registry: AIProviderRegistry) -> None:
|
||
self._registry = registry
|
||
self._semaphores: dict[str, asyncio.Semaphore] = {}
|
||
# C2 修復: per-provider Circuit Breaker (不共用,避免一個掛全部擋)
|
||
self._circuit_breakers: dict[str, "_SimpleCircuitBreaker"] = {}
|
||
|
||
def _get_semaphore(self, name: str, limit: int = 3) -> asyncio.Semaphore:
|
||
"""取得 Provider 的並發 Semaphore (lazy init)"""
|
||
if name not in self._semaphores:
|
||
self._semaphores[name] = asyncio.Semaphore(limit)
|
||
return self._semaphores[name]
|
||
|
||
def _get_circuit_breaker(self, name: str) -> "_SimpleCircuitBreaker":
|
||
"""取得 Provider 的 Circuit Breaker (per-provider, lazy init)"""
|
||
if name not in self._circuit_breakers:
|
||
# 2026-04-05 Claude Code: v4.3 — NIM 使用更寬鬆的 CB 參數
|
||
# 每次都先跑 NIM,只有真正連線錯誤(非 timeout)才累積失敗
|
||
# failure_threshold=10: 需要 10 次真實錯誤才 OPEN(timeout 不計)
|
||
# recovery_timeout=30: 30s 後進入 half-open,立即重試 NIM
|
||
if name == "nemotron":
|
||
self._circuit_breakers[name] = _SimpleCircuitBreaker(
|
||
name, failure_threshold=10, recovery_timeout=30.0
|
||
)
|
||
else:
|
||
self._circuit_breakers[name] = _SimpleCircuitBreaker(name)
|
||
return self._circuit_breakers[name]
|
||
|
||
@staticmethod
|
||
def _cache_key(prompt: str, context: dict | None) -> str:
|
||
"""生成 Cache Key (與 openclaw.py 相容)"""
|
||
ctx_hash = ""
|
||
if context:
|
||
ctx_hash = f":{context.get('alert_type', '')}:{context.get('target_resource', '')}"
|
||
content = f"{prompt}{ctx_hash}"
|
||
return f"llm_cache:{hashlib.sha256(content.encode()).hexdigest()[:16]}"
|
||
|
||
async def execute(
|
||
self,
|
||
prompt: str,
|
||
provider_order: list[str],
|
||
context: dict | None = None,
|
||
cache_ttl: int = 3600,
|
||
require_local: bool = False,
|
||
) -> AIResult:
|
||
"""
|
||
核心執行方法 — 依序嘗試 Provider,含閘門 + Cache
|
||
|
||
Args:
|
||
prompt: LLM prompt
|
||
provider_order: Provider 名稱順序 (由 AIRouter.route 決定)
|
||
context: 額外上下文
|
||
cache_ttl: Cache TTL (秒)
|
||
require_local: 強制 local Provider (隱私)
|
||
|
||
Returns:
|
||
AIResult: 標準化結果
|
||
"""
|
||
# ① Mock Mode 攔截 (D13)
|
||
if _settings.MOCK_MODE:
|
||
logger.info("ai_router_mock_mode")
|
||
return AIResult(
|
||
raw_response=_json.dumps({
|
||
"action_title": "Mock Analysis",
|
||
"description": "Mock mode enabled",
|
||
"risk_level": "low",
|
||
"reasoning": "MOCK_MODE=true",
|
||
"confidence": 0.0,
|
||
}),
|
||
success=True,
|
||
provider="mock",
|
||
)
|
||
|
||
# ② Cache 檢查 (D4)
|
||
cache_key = self._cache_key(prompt, context) # C3 修復: 移到 try 外避免 UnboundLocalError
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
cached = await redis.get(cache_key)
|
||
if cached:
|
||
data = _json.loads(cached)
|
||
cached_provider = data.get("provider", "cache")
|
||
provider_allowed = cached_provider in provider_order
|
||
ollama_first_required = (
|
||
bool(context)
|
||
and any(
|
||
key in context
|
||
for key in (
|
||
"alert_type",
|
||
"alertname",
|
||
"alert_name",
|
||
"fingerprint",
|
||
"incident_id",
|
||
"severity",
|
||
"target_resource",
|
||
)
|
||
)
|
||
and bool(provider_order)
|
||
and provider_order[0].startswith("ollama")
|
||
) or (
|
||
bool(context)
|
||
and bool(context.get("enforce_ollama_first"))
|
||
and bool(provider_order)
|
||
and provider_order[0].startswith("ollama")
|
||
)
|
||
if (
|
||
cached_provider == "ollama"
|
||
and any(provider.startswith("ollama") for provider in provider_order)
|
||
):
|
||
provider_allowed = True
|
||
if ollama_first_required and not cached_provider.startswith("ollama"):
|
||
provider_allowed = False
|
||
if not provider_allowed:
|
||
logger.info(
|
||
"ai_router_cache_provider_mismatch_skip",
|
||
cache_key=cache_key[:30],
|
||
cached_provider=cached_provider,
|
||
provider_order=provider_order,
|
||
ollama_first_required=ollama_first_required,
|
||
)
|
||
raise ValueError("cached provider not allowed by current provider_order")
|
||
logger.info("ai_router_cache_hit", cache_key=cache_key[:30])
|
||
return AIResult(
|
||
raw_response=data.get("response", ""),
|
||
success=True,
|
||
provider=cached_provider,
|
||
from_cache=True,
|
||
)
|
||
except Exception as e:
|
||
logger.debug("ai_router_cache_read_failed", error=str(e))
|
||
|
||
# ③ 遍歷 Provider + 閘門 (D3)
|
||
# 2026-04-02 ogt: C1 修復 — 建立 Langfuse Trace (D5)
|
||
# 包住整個執行鏈,記錄每個 Provider 的 generation
|
||
try:
|
||
from src.services.langfuse_client import langfuse_trace
|
||
_lf_trace_ctx = langfuse_trace(
|
||
"ai_router_execute",
|
||
metadata={
|
||
"provider_order": provider_order,
|
||
"prompt_length": len(prompt),
|
||
"require_local": require_local,
|
||
"alert_type": (context or {}).get("alert_type", ""),
|
||
},
|
||
)
|
||
_lf_trace_ctx.__enter__()
|
||
except Exception:
|
||
_lf_trace_ctx = None
|
||
|
||
errors: list[str] = []
|
||
attempted_providers: set[str] = set()
|
||
alert_requires_ollama_before_cloud = bool(
|
||
(context or {}).get("alert_requires_ollama_before_cloud")
|
||
)
|
||
|
||
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — DIAGNOSE fallback metric 追蹤
|
||
# 透過 context.get("intent_hint") 判斷是否為 DIAGNOSE,避免改動 execute() 簽名
|
||
# _last_attempted_provider 記錄上一輪嘗試的 provider,用於計算 from→to 關係
|
||
_is_diagnose_intent = str((context or {}).get("intent_hint", "")).strip().lower() == "diagnose"
|
||
_last_attempted_provider: str | None = None
|
||
|
||
for provider_name in provider_order:
|
||
# 2026-04-27 Claude Sonnet 4.6: A2 — 若上一輪失敗且本輪開始,表示發生 fallback
|
||
# 記錄 metric(DIAGNOSE intent 專屬;非 DIAGNOSE 不記,不影響其他路徑)
|
||
# 2026-04-27 Claude Sonnet 4.6: F6 — fallback metric 只在真實 analyze() 失敗時觸發
|
||
# _last_attempted_provider 僅在 provider.analyze() 執行後失敗才賦值(見下方兩處);
|
||
# not_registered / privacy_skip / circuit_open / rate_limit 分支均不賦值,
|
||
# 避免這些「被跳過的 provider」誤計入 from→to fallback 鏈,metric 不可信問題(F6)。
|
||
if _is_diagnose_intent and _last_attempted_provider is not None:
|
||
try:
|
||
from src.core.metrics import record_diagnose_fallback
|
||
record_diagnose_fallback(
|
||
from_provider=_last_attempted_provider,
|
||
to_provider=provider_name,
|
||
)
|
||
logger.info(
|
||
"diagnose_fallback_recorded",
|
||
from_provider=_last_attempted_provider,
|
||
to_provider=provider_name,
|
||
)
|
||
except Exception as _metric_e:
|
||
# 2026-04-27 Claude Sonnet 4.6: F6 — 升 warning(原 debug 會 silent swallow)
|
||
# + 計入 error counter 讓 metric 管道問題可被 Prometheus 偵測
|
||
logger.warning("diagnose_fallback_metric_failed", error=str(_metric_e))
|
||
try:
|
||
from src.core.metrics import AIOPS_DIAGNOSE_FALLBACK_METRIC_ERROR_TOTAL
|
||
AIOPS_DIAGNOSE_FALLBACK_METRIC_ERROR_TOTAL.inc()
|
||
except Exception:
|
||
pass
|
||
|
||
provider = self._registry.get(provider_name)
|
||
if not provider:
|
||
# 2026-04-14 Claude Sonnet 4.6: silent skip 改 errors 累積(觀測性)
|
||
# 2026-04-27 Claude Sonnet 4.6: F6 — 不設 _last_attempted_provider(未真實執行 analyze)
|
||
errors.append(f"{provider_name}: not_registered")
|
||
continue
|
||
|
||
# 隱私過濾 (D7)
|
||
# 2026-04-27 Claude Sonnet 4.6: F6 — privacy_skip 不設 _last_attempted_provider(未嘗試)
|
||
if require_local and provider.privacy_level != "local":
|
||
errors.append(f"{provider_name}: privacy_skip(non_local)")
|
||
continue
|
||
|
||
if alert_requires_ollama_before_cloud and provider.privacy_level == "cloud":
|
||
if "ollama_local" not in attempted_providers:
|
||
errors.append(f"{provider_name}: blocked_until_ollama_local_attempted")
|
||
logger.warning(
|
||
"ai_router_cloud_blocked_until_ollama_local_attempted",
|
||
provider=provider_name,
|
||
provider_order=provider_order,
|
||
attempted_providers=sorted(attempted_providers),
|
||
)
|
||
continue
|
||
|
||
# 閘門 1: Circuit Breaker (per-provider, C2 修復)
|
||
cb = self._get_circuit_breaker(provider_name)
|
||
if cb.is_open():
|
||
if alert_requires_ollama_before_cloud and provider_name.startswith("ollama"):
|
||
logger.warning(
|
||
"ai_router_alert_ollama_circuit_bypassed",
|
||
provider=provider_name,
|
||
reason="alert_requires_ollama_before_cloud",
|
||
)
|
||
else:
|
||
errors.append(f"{provider_name}: circuit_open")
|
||
logger.warning("ai_router_circuit_open", provider=provider_name)
|
||
# 2026-04-27 Claude Sonnet 4.6: F6 — circuit_open 不設 _last_attempted_provider(未嘗試)
|
||
continue
|
||
|
||
# 閘門 2: Rate Limiter
|
||
# 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo)
|
||
if provider_name in ("openclaw_nemo", "nemotron", "gemini", "claude"):
|
||
try:
|
||
from src.services.ai_rate_limiter import get_ai_rate_limiter
|
||
rate_limiter = get_ai_rate_limiter()
|
||
allowed, reason = await rate_limiter.check_and_increment(provider_name)
|
||
if not allowed:
|
||
errors.append(f"{provider_name}: rate_limit({reason})")
|
||
logger.info("ai_router_rate_limited", provider=provider_name, reason=reason)
|
||
continue
|
||
except Exception as e:
|
||
logger.debug("ai_router_rate_limiter_error", error=str(e))
|
||
|
||
# 閘門 3: Semaphore (並發控制)
|
||
sem = self._get_semaphore(provider_name)
|
||
async with sem:
|
||
try:
|
||
attempted_providers.add(provider_name)
|
||
result = await provider.analyze(prompt, context)
|
||
|
||
if result.success:
|
||
# 記錄成功 (per-provider CB)
|
||
cb.record_success()
|
||
|
||
# 記錄費用
|
||
if result.cost_usd > 0:
|
||
try:
|
||
rate_limiter = get_ai_rate_limiter()
|
||
await rate_limiter.record_cost(provider_name, result.cost_usd)
|
||
except Exception:
|
||
pass
|
||
|
||
# 寫入 Cache (D4)
|
||
try:
|
||
redis = get_redis()
|
||
cache_data = _json.dumps({
|
||
"response": result.raw_response,
|
||
"provider": result.provider,
|
||
"cached_at": time.strftime("%Y-%m-%dT%H:%M:%S+08:00"),
|
||
})
|
||
await redis.set(cache_key, cache_data, ex=cache_ttl)
|
||
except Exception:
|
||
pass
|
||
|
||
logger.info(
|
||
"ai_router_execute_success",
|
||
provider=provider_name,
|
||
latency_ms=round(result.latency_ms, 1),
|
||
tokens=result.tokens,
|
||
from_cache=False,
|
||
)
|
||
# D5: 記錄 Langfuse generation
|
||
if _lf_trace_ctx:
|
||
try:
|
||
_lf_trace_ctx.generation(
|
||
name=f"{provider_name}_call",
|
||
model=provider_name,
|
||
input=prompt[:500],
|
||
output=result.raw_response[:500],
|
||
usage={"total": result.tokens} if result.tokens else None,
|
||
metadata={"cost_usd": result.cost_usd, "latency_ms": round(result.latency_ms, 1)},
|
||
)
|
||
_lf_trace_ctx.__exit__(None, None, None)
|
||
except Exception:
|
||
pass
|
||
return result
|
||
|
||
# Provider 回傳 success=False
|
||
errors.append(f"{provider_name}: {result.error}")
|
||
logger.warning("ai_router_provider_failed", provider=provider_name, error=result.error)
|
||
# 2026-04-27 A2: 記錄失敗的 provider,供下輪迭代計算 fallback metric
|
||
_last_attempted_provider = provider_name
|
||
|
||
except Exception as e:
|
||
errors.append(f"{provider_name}: {e}")
|
||
logger.warning("ai_router_provider_exception", provider=provider_name, error=str(e))
|
||
# 2026-04-05 Claude Code: v4.3 — Timeout 不計 CB 失敗
|
||
# NIM 偶爾 GPU 忙碌導致 27s,timeout 不代表 NIM 故障
|
||
# 只有明確連線錯誤(非 timeout)才累積 CB 失敗次數
|
||
import httpx as _httpx
|
||
if not isinstance(e, _httpx.TimeoutException):
|
||
cb.record_failure()
|
||
# 2026-04-27 A2: 記錄失敗的 provider,供下輪迭代計算 fallback metric
|
||
_last_attempted_provider = provider_name
|
||
|
||
# 全部失敗
|
||
logger.error("ai_router_all_providers_failed", tried=provider_order, errors=errors)
|
||
if _lf_trace_ctx:
|
||
try:
|
||
_lf_trace_ctx.__exit__(None, None, None)
|
||
except Exception:
|
||
pass
|
||
|
||
# 2026-04-04 ogt: Phase 25 P0 — require_local 全部失敗時 Telegram 通知(隱私邊界)
|
||
# 2026-04-15 ogt: 改用 ADR-075 TYPE-1 格式,禁止純文字 raw notification
|
||
if require_local:
|
||
try:
|
||
from src.services.telegram_gateway import get_telegram_gateway
|
||
tg = get_telegram_gateway()
|
||
import asyncio as _asyncio
|
||
tried_str = ", ".join(provider_order)
|
||
formatted = (
|
||
"⚠️ <b>TYPE-1 | AI Provider 不可用</b>\n"
|
||
"──────────────────────\n"
|
||
f"├─ 已嘗試: <code>{tried_str}</code>\n"
|
||
"└─ 原因: require_local=True,無可用本地 Provider\n"
|
||
"\n"
|
||
"需要人工介入"
|
||
)
|
||
_asyncio.create_task(
|
||
tg.send_alert_notification(formatted, parse_mode="HTML")
|
||
)
|
||
except Exception as _tg_e:
|
||
logger.warning("diagnose_reject_telegram_failed", error=str(_tg_e))
|
||
return AIResult(
|
||
raw_response="",
|
||
success=False,
|
||
provider="none",
|
||
error="local_providers_unavailable",
|
||
)
|
||
|
||
return AIResult(
|
||
raw_response="",
|
||
success=False,
|
||
provider="none",
|
||
error=f"All providers failed: {'; '.join(errors)}",
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# 單例管理
|
||
# =============================================================================
|
||
|
||
_router: AIRouter | None = None
|
||
_registry: AIProviderRegistry | None = None
|
||
_executor: AIRouterExecutor | None = None
|
||
|
||
|
||
def _init_registry() -> AIProviderRegistry:
|
||
"""初始化 Provider Registry (首次呼叫時自動註冊所有 Provider)"""
|
||
from src.services.ai_providers.ollama import (
|
||
OllamaProvider,
|
||
OllamaLocalProvider,
|
||
OllamaGcpBProvider, # 2026-05-04 ADR-110 GCP-B
|
||
)
|
||
from src.services.ai_providers.gemini import GeminiProvider
|
||
from src.services.ai_providers.claude import ClaudeProvider
|
||
from src.services.ai_providers.openclaw_nemo import OpenClawNemoProvider
|
||
|
||
registry = AIProviderRegistry()
|
||
|
||
# GCP-A Primary(name="ollama",OLLAMA_URL)
|
||
ollama_gcp_a = OllamaProvider()
|
||
registry.register(ollama_gcp_a)
|
||
|
||
registry.register(GeminiProvider())
|
||
registry.register(ClaudeProvider())
|
||
registry.register(OpenClawNemoProvider())
|
||
|
||
# 2026-04-02 Claude Code: Phase 24 B3 — 加入 NemotronProvider (tool_calling 優先)
|
||
from src.services.ai_providers.nemotron import NemotronProvider
|
||
registry.register(NemotronProvider())
|
||
|
||
# 2026-05-06 Codex: 188 不再作為 Ollama provider。
|
||
# Local fallback 統一命名為 ollama_local,端點由 OLLAMA_FALLBACK_URL 指向 111/110 proxy。
|
||
ollama_local = OllamaLocalProvider()
|
||
registry.register(ollama_local)
|
||
|
||
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災修復
|
||
# 根因:OllamaFailoverManager 回傳 "ollama_gcp_a"/"ollama_gcp_b"/"ollama_local"
|
||
# 但 registry 無這些名稱 → not_registered → 整條 Ollama 鏈跳過 → 直接跳 Gemini
|
||
# 修復:
|
||
# "ollama_gcp_a" alias → 同 OllamaProvider(OLLAMA_URL = GCP-A)
|
||
# "ollama_gcp_b" → 新 OllamaGcpBProvider(OLLAMA_SECONDARY_URL = GCP-B)
|
||
# "ollama_local" → OllamaLocalProvider(OLLAMA_FALLBACK_URL = 111 / 110:11437)
|
||
registry._providers["ollama_gcp_a"] = ollama_gcp_a
|
||
registry.register(OllamaGcpBProvider())
|
||
registry._providers["ollama_local"] = ollama_local
|
||
|
||
return registry
|
||
|
||
|
||
def get_ai_router() -> AIRouter:
|
||
"""取得 AIRouter 單例 (路由決策)"""
|
||
global _router
|
||
if _router is None:
|
||
_router = AIRouter()
|
||
return _router
|
||
|
||
|
||
def get_ai_registry() -> AIProviderRegistry:
|
||
"""取得 AIProviderRegistry 單例"""
|
||
global _registry
|
||
if _registry is None:
|
||
_registry = _init_registry()
|
||
return _registry
|
||
|
||
|
||
def get_ai_executor() -> AIRouterExecutor:
|
||
"""取得 AIRouterExecutor 單例 (路由決策 + 執行)"""
|
||
global _executor
|
||
if _executor is None:
|
||
_executor = AIRouterExecutor(get_ai_registry())
|
||
return _executor
|
||
|
||
|
||
def reset_ai_router() -> None:
|
||
"""重置所有單例 (用於測試)"""
|
||
global _router, _registry, _executor
|
||
_router = None
|
||
_registry = None
|
||
_executor = None
|