|
|
|
|
@@ -64,7 +64,7 @@ logger = structlog.get_logger(__name__)
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AIProvider(Enum):
|
|
|
|
|
class AIProviderEnum(Enum):
|
|
|
|
|
"""AI 提供者"""
|
|
|
|
|
|
|
|
|
|
OLLAMA = "ollama"
|
|
|
|
|
@@ -75,12 +75,12 @@ class AIProvider(Enum):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Provider 對應延遲預算 (ms)
|
|
|
|
|
PROVIDER_LATENCY_BUDGET: dict[AIProvider, int] = {
|
|
|
|
|
AIProvider.OLLAMA: 60000, # 本地,允許較長處理時間
|
|
|
|
|
AIProvider.GEMINI: 30000, # 雲端,較低延遲
|
|
|
|
|
AIProvider.CLAUDE: 30000, # 雲端,較低延遲
|
|
|
|
|
PROVIDER_LATENCY_BUDGET: dict[AIProviderEnumEnum, int] = {
|
|
|
|
|
AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間
|
|
|
|
|
AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲
|
|
|
|
|
AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲
|
|
|
|
|
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (延遲 11-45s)
|
|
|
|
|
AIProvider.NVIDIA: 60000, # Tool Calling 專用,允許較長時間
|
|
|
|
|
AIProviderEnum.NVIDIA: 60000, # Tool Calling 專用,允許較長時間
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -116,7 +116,7 @@ class IAIRouter(Protocol):
|
|
|
|
|
|
|
|
|
|
def route_tool_calling(
|
|
|
|
|
self,
|
|
|
|
|
) -> tuple[AIProvider, str, list[tuple[AIProvider, str]]]:
|
|
|
|
|
) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
|
|
|
|
|
"""Tool Calling 專用路由"""
|
|
|
|
|
...
|
|
|
|
|
|
|
|
|
|
@@ -130,9 +130,9 @@ class RoutingDecision:
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# 核心決策
|
|
|
|
|
selected_provider: AIProvider # 選擇的 AI Provider
|
|
|
|
|
selected_provider: AIProviderEnum # 選擇的 AI Provider
|
|
|
|
|
selected_model: str # 選擇的模型名稱
|
|
|
|
|
fallback_chain: list[tuple[AIProvider, str]] # 備援鏈 [(provider, model), ...]
|
|
|
|
|
fallback_chain: list[tuple[AIProviderEnum, str]] # 備援鏈 [(provider, model), ...]
|
|
|
|
|
routing_reason: str # 路由決策原因
|
|
|
|
|
latency_budget_ms: int # 延遲預算 (毫秒)
|
|
|
|
|
|
|
|
|
|
@@ -213,45 +213,45 @@ class AIRouter:
|
|
|
|
|
self._nvidia_default = self._model_registry.get_model("nvidia", "default")
|
|
|
|
|
|
|
|
|
|
# Provider 對應模型映射
|
|
|
|
|
self._provider_models: dict[AIProvider, str] = {
|
|
|
|
|
AIProvider.OLLAMA: self._ollama_default,
|
|
|
|
|
AIProvider.GEMINI: self._gemini_default,
|
|
|
|
|
AIProvider.CLAUDE: self._claude_default,
|
|
|
|
|
AIProvider.NVIDIA: self._nvidia_default, # ADR-036
|
|
|
|
|
self._provider_models: dict[AIProviderEnum, str] = {
|
|
|
|
|
AIProviderEnum.OLLAMA: self._ollama_default,
|
|
|
|
|
AIProviderEnum.GEMINI: self._gemini_default,
|
|
|
|
|
AIProviderEnum.CLAUDE: self._claude_default,
|
|
|
|
|
AIProviderEnum.NVIDIA: self._nvidia_default, # ADR-036
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 完整 Fallback 鏈 (Provider, Model)
|
|
|
|
|
# 2026-03-30 ogt: NVIDIA 成為首選仲裁,加入 Fallback 鏈首位
|
|
|
|
|
self._full_fallback_chain: list[tuple[AIProvider, str]] = [
|
|
|
|
|
(AIProvider.NVIDIA, self._nvidia_default),
|
|
|
|
|
(AIProvider.GEMINI, self._gemini_default),
|
|
|
|
|
(AIProvider.CLAUDE, self._claude_default),
|
|
|
|
|
(AIProvider.OLLAMA, self._ollama_default),
|
|
|
|
|
self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
|
|
|
|
(AIProviderEnum.NVIDIA, self._nvidia_default),
|
|
|
|
|
(AIProviderEnum.GEMINI, self._gemini_default),
|
|
|
|
|
(AIProviderEnum.CLAUDE, self._claude_default),
|
|
|
|
|
(AIProviderEnum.OLLAMA, self._ollama_default),
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Tool Calling 專用 Fallback 鏈 (ADR-036)
|
|
|
|
|
self._tool_calling_fallback_chain: list[tuple[AIProvider, str]] = [
|
|
|
|
|
(AIProvider.NVIDIA, self._nvidia_default),
|
|
|
|
|
(AIProvider.GEMINI, self._gemini_default),
|
|
|
|
|
(AIProvider.CLAUDE, self._claude_default),
|
|
|
|
|
self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
|
|
|
|
(AIProviderEnum.NVIDIA, self._nvidia_default),
|
|
|
|
|
(AIProviderEnum.GEMINI, self._gemini_default),
|
|
|
|
|
(AIProviderEnum.CLAUDE, self._claude_default),
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
|
|
|
|
|
self._intent_provider_overrides: dict[IntentType, AIProvider | None] = {
|
|
|
|
|
self._intent_provider_overrides: dict[IntentType, AIProviderEnum | None] = {
|
|
|
|
|
# 四大核心意圖
|
|
|
|
|
IntentType.RESTART: None, # 依複雜度
|
|
|
|
|
IntentType.SCALE: None, # 依複雜度
|
|
|
|
|
IntentType.CONFIG: None, # 依複雜度 (但 HIGH 會升級)
|
|
|
|
|
IntentType.DIAGNOSE: AIProvider.OLLAMA, # 診斷優先本地 (隱私)
|
|
|
|
|
IntentType.DIAGNOSE: AIProviderEnum.OLLAMA, # 診斷優先本地 (隱私)
|
|
|
|
|
# 輔助意圖
|
|
|
|
|
IntentType.DELETE: AIProvider.CLAUDE, # CRITICAL → 強制 Claude
|
|
|
|
|
IntentType.DELETE: AIProviderEnum.CLAUDE, # CRITICAL → 強制 Claude
|
|
|
|
|
IntentType.ROLLBACK: None, # 依複雜度
|
|
|
|
|
IntentType.UNKNOWN: None,
|
|
|
|
|
# 舊版兼容
|
|
|
|
|
IntentType.CODE_REVIEW: None,
|
|
|
|
|
IntentType.DEPLOYMENT: None,
|
|
|
|
|
IntentType.ALERT_TRIAGE: AIProvider.OLLAMA,
|
|
|
|
|
IntentType.QUERY: AIProvider.OLLAMA,
|
|
|
|
|
IntentType.ALERT_TRIAGE: AIProviderEnum.OLLAMA,
|
|
|
|
|
IntentType.QUERY: AIProviderEnum.OLLAMA,
|
|
|
|
|
IntentType.MAINTENANCE: None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -339,7 +339,7 @@ class AIRouter:
|
|
|
|
|
intent: IntentType,
|
|
|
|
|
intent_result: IntentResult,
|
|
|
|
|
complexity: ComplexityScore,
|
|
|
|
|
) -> tuple[AIProvider, str, str]:
|
|
|
|
|
) -> tuple[AIProviderEnum, str, str]:
|
|
|
|
|
"""
|
|
|
|
|
選擇 Provider 和模型 (Phase 13.3 #87 核心邏輯)
|
|
|
|
|
|
|
|
|
|
@@ -368,7 +368,7 @@ class AIRouter:
|
|
|
|
|
# 規則 1: CRITICAL 風險強制 Claude (最高優先級)
|
|
|
|
|
# =======================================================================
|
|
|
|
|
if risk == RiskLevel.CRITICAL:
|
|
|
|
|
provider = AIProvider.CLAUDE
|
|
|
|
|
provider = AIProviderEnum.CLAUDE
|
|
|
|
|
model = self._claude_default
|
|
|
|
|
reason = f"CRITICAL 風險 ({intent.value}) 強制使用 Claude"
|
|
|
|
|
return provider, model, reason
|
|
|
|
|
@@ -377,7 +377,7 @@ class AIRouter:
|
|
|
|
|
# 規則 2: DELETE 意圖強制 Claude (不可逆操作)
|
|
|
|
|
# =======================================================================
|
|
|
|
|
if intent == IntentType.DELETE:
|
|
|
|
|
provider = AIProvider.CLAUDE
|
|
|
|
|
provider = AIProviderEnum.CLAUDE
|
|
|
|
|
model = self._claude_default
|
|
|
|
|
reason = "DELETE 意圖 (不可逆) 強制使用 Claude"
|
|
|
|
|
return provider, model, reason
|
|
|
|
|
@@ -396,7 +396,7 @@ class AIRouter:
|
|
|
|
|
# 規則 4: 複雜度 4-5 或 HIGH 風險 → Nvidia Nemotron
|
|
|
|
|
# =======================================================================
|
|
|
|
|
if score >= 4 or risk == RiskLevel.HIGH:
|
|
|
|
|
provider = AIProvider.NVIDIA
|
|
|
|
|
provider = AIProviderEnum.NVIDIA
|
|
|
|
|
model = self._nvidia_default
|
|
|
|
|
reason = f"複雜度={score}/5, 風險={risk.value} → Nvidia (fallback Gemini)"
|
|
|
|
|
return provider, model, reason
|
|
|
|
|
@@ -405,7 +405,7 @@ class AIRouter:
|
|
|
|
|
# 規則 5: 複雜度 3 + MEDIUM → Ollama (fallback Gemini)
|
|
|
|
|
# =======================================================================
|
|
|
|
|
if score == 3:
|
|
|
|
|
provider = AIProvider.OLLAMA
|
|
|
|
|
provider = AIProviderEnum.OLLAMA
|
|
|
|
|
model = self._ollama_default
|
|
|
|
|
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (fallback Gemini)"
|
|
|
|
|
return provider, model, reason
|
|
|
|
|
@@ -413,7 +413,7 @@ class AIRouter:
|
|
|
|
|
# =======================================================================
|
|
|
|
|
# 規則 6: 複雜度 1-2 + LOW/MEDIUM → Ollama (快速本地處理)
|
|
|
|
|
# =======================================================================
|
|
|
|
|
provider = AIProvider.OLLAMA
|
|
|
|
|
provider = AIProviderEnum.OLLAMA
|
|
|
|
|
# 低複雜度使用輕量模型 (更快回應)
|
|
|
|
|
model = self._ollama_summary if score <= 1 else self._ollama_default
|
|
|
|
|
reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (成本優先)"
|
|
|
|
|
@@ -444,8 +444,8 @@ class AIRouter:
|
|
|
|
|
return model, reason
|
|
|
|
|
|
|
|
|
|
def _build_fallback_chain(
|
|
|
|
|
self, selected_provider: AIProvider
|
|
|
|
|
) -> list[tuple[AIProvider, str]]:
|
|
|
|
|
self, selected_provider: AIProviderEnum
|
|
|
|
|
) -> list[tuple[AIProviderEnum, str]]:
|
|
|
|
|
"""
|
|
|
|
|
建立 Fallback 鏈 (排除已選 Provider)
|
|
|
|
|
|
|
|
|
|
@@ -457,7 +457,7 @@ class AIRouter:
|
|
|
|
|
Returns:
|
|
|
|
|
Fallback 鏈 [(provider, model), ...]
|
|
|
|
|
"""
|
|
|
|
|
fallback_chain: list[tuple[AIProvider, str]] = []
|
|
|
|
|
fallback_chain: list[tuple[AIProviderEnum, str]] = []
|
|
|
|
|
|
|
|
|
|
for provider, model in self._full_fallback_chain:
|
|
|
|
|
if provider != selected_provider:
|
|
|
|
|
@@ -527,7 +527,7 @@ class AIRouter:
|
|
|
|
|
# Tool Calling 路由 (ADR-036)
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
def route_tool_calling(self) -> tuple[AIProvider, str, list[tuple[AIProvider, str]]]:
|
|
|
|
|
def route_tool_calling(self) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]:
|
|
|
|
|
"""
|
|
|
|
|
Tool Calling 專用路由 (ADR-036)
|
|
|
|
|
|
|
|
|
|
@@ -537,7 +537,7 @@ class AIRouter:
|
|
|
|
|
Returns:
|
|
|
|
|
(provider, model, fallback_chain)
|
|
|
|
|
"""
|
|
|
|
|
provider = AIProvider.NVIDIA
|
|
|
|
|
provider = AIProviderEnum.NVIDIA
|
|
|
|
|
model = self._nvidia_default
|
|
|
|
|
fallback_chain = [
|
|
|
|
|
(p, m) for p, m in self._tool_calling_fallback_chain if p != provider
|
|
|
|
|
@@ -552,7 +552,7 @@ class AIRouter:
|
|
|
|
|
|
|
|
|
|
return provider, model, fallback_chain
|
|
|
|
|
|
|
|
|
|
def get_tool_calling_fallback_chain(self) -> list[tuple[AIProvider, str]]:
|
|
|
|
|
def get_tool_calling_fallback_chain(self) -> list[tuple[AIProviderEnum, str]]:
|
|
|
|
|
"""取得 Tool Calling Fallback 鏈"""
|
|
|
|
|
return self._tool_calling_fallback_chain.copy()
|
|
|
|
|
|
|
|
|
|
@@ -560,12 +560,12 @@ class AIRouter:
|
|
|
|
|
# 便捷方法
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
def get_provider_for_intent(self, intent: IntentType) -> AIProvider:
|
|
|
|
|
def get_provider_for_intent(self, intent: IntentType) -> AIProviderEnum:
|
|
|
|
|
"""取得意圖對應的 Provider (不考慮複雜度)"""
|
|
|
|
|
override = self._intent_provider_overrides.get(intent)
|
|
|
|
|
return override if override else AIProvider.OLLAMA
|
|
|
|
|
return override if override else AIProviderEnum.OLLAMA
|
|
|
|
|
|
|
|
|
|
def get_model_for_provider(self, provider: AIProvider) -> str:
|
|
|
|
|
def get_model_for_provider(self, provider: AIProviderEnum) -> str:
|
|
|
|
|
"""取得 Provider 對應的模型"""
|
|
|
|
|
return self._provider_models.get(provider, self._ollama_default)
|
|
|
|
|
|
|
|
|
|
@@ -636,6 +636,36 @@ from src.services.ai_providers.interfaces import AIProvider as AIProviderProtoco
|
|
|
|
|
_settings = get_settings()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _SimpleCircuitBreaker:
|
|
|
|
|
"""
|
|
|
|
|
輕量 per-provider Circuit Breaker (Phase 24 C2 修復)
|
|
|
|
|
|
|
|
|
|
不共用 OpenClawGuard — 避免 Gemini 掛掉時 Ollama 也被擋
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, name: str, failure_threshold: int = 5, recovery_timeout: float = 60.0) -> None:
|
|
|
|
|
self.name = name
|
|
|
|
|
self._failure_threshold = failure_threshold
|
|
|
|
|
self._recovery_timeout = recovery_timeout
|
|
|
|
|
self._failure_count = 0
|
|
|
|
|
self._last_failure_time: float = 0.0
|
|
|
|
|
|
|
|
|
|
def is_open(self) -> bool:
|
|
|
|
|
if self._failure_count < self._failure_threshold:
|
|
|
|
|
return False
|
|
|
|
|
# 超過 recovery timeout → half-open (允許一次嘗試)
|
|
|
|
|
if time.time() - self._last_failure_time > self._recovery_timeout:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def record_success(self) -> None:
|
|
|
|
|
self._failure_count = 0
|
|
|
|
|
|
|
|
|
|
def record_failure(self) -> None:
|
|
|
|
|
self._failure_count += 1
|
|
|
|
|
self._last_failure_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AIProviderRegistry:
|
|
|
|
|
"""
|
|
|
|
|
AI Provider 註冊中心 — 類比 MCP ProviderRegistry (ADR-015)
|
|
|
|
|
@@ -646,7 +676,7 @@ class AIProviderRegistry:
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self._providers: dict[str, AIProviderProtocol] = {}
|
|
|
|
|
|
|
|
|
|
def register(self, provider: AIProviderProtocol) -> None:
|
|
|
|
|
def register(self, provider: AIProviderEnumProtocol) -> None:
|
|
|
|
|
"""註冊 Provider (啟動時呼叫)"""
|
|
|
|
|
self._providers[provider.name] = provider
|
|
|
|
|
status = "enabled" if provider.is_enabled else "disabled"
|
|
|
|
|
@@ -677,6 +707,16 @@ class AIProviderRegistry:
|
|
|
|
|
results[name] = False
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
async def close_all(self) -> None:
|
|
|
|
|
"""關閉所有 Provider 的 HTTP 連線 (I5 修復: shutdown hook)"""
|
|
|
|
|
for name, p in self._providers.items():
|
|
|
|
|
try:
|
|
|
|
|
if hasattr(p, "close"):
|
|
|
|
|
await p.close()
|
|
|
|
|
logger.info("ai_provider_closed", name=name)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning("ai_provider_close_failed", name=name, error=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AIRouterExecutor:
|
|
|
|
|
"""
|
|
|
|
|
@@ -697,6 +737,8 @@ class AIRouterExecutor:
|
|
|
|
|
def __init__(self, registry: AIProviderRegistry) -> None:
|
|
|
|
|
self._registry = registry
|
|
|
|
|
self._semaphores: dict[str, asyncio.Semaphore] = {}
|
|
|
|
|
# C2 修復: per-provider Circuit Breaker (不共用,避免一個掛全部擋)
|
|
|
|
|
self._circuit_breakers: dict[str, "_SimpleCircuitBreaker"] = {}
|
|
|
|
|
|
|
|
|
|
def _get_semaphore(self, name: str, limit: int = 3) -> asyncio.Semaphore:
|
|
|
|
|
"""取得 Provider 的並發 Semaphore (lazy init)"""
|
|
|
|
|
@@ -704,6 +746,12 @@ class AIRouterExecutor:
|
|
|
|
|
self._semaphores[name] = asyncio.Semaphore(limit)
|
|
|
|
|
return self._semaphores[name]
|
|
|
|
|
|
|
|
|
|
def _get_circuit_breaker(self, name: str) -> "_SimpleCircuitBreaker":
|
|
|
|
|
"""取得 Provider 的 Circuit Breaker (per-provider, lazy init)"""
|
|
|
|
|
if name not in self._circuit_breakers:
|
|
|
|
|
self._circuit_breakers[name] = _SimpleCircuitBreaker(name)
|
|
|
|
|
return self._circuit_breakers[name]
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _cache_key(prompt: str, context: dict | None) -> str:
|
|
|
|
|
"""生成 Cache Key (與 openclaw.py 相容)"""
|
|
|
|
|
@@ -750,10 +798,10 @@ class AIRouterExecutor:
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# ② Cache 檢查 (D4)
|
|
|
|
|
cache_key = self._cache_key(prompt, context) # C3 修復: 移到 try 外避免 UnboundLocalError
|
|
|
|
|
try:
|
|
|
|
|
from src.core.redis_client import get_redis
|
|
|
|
|
redis = get_redis()
|
|
|
|
|
cache_key = self._cache_key(prompt, context)
|
|
|
|
|
cached = await redis.get(cache_key)
|
|
|
|
|
if cached:
|
|
|
|
|
data = _json.loads(cached)
|
|
|
|
|
@@ -779,15 +827,11 @@ class AIRouterExecutor:
|
|
|
|
|
if require_local and provider.privacy_level != "local":
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 閘門 1: Circuit Breaker
|
|
|
|
|
try:
|
|
|
|
|
from src.core.circuit_breaker import get_openclaw_guard
|
|
|
|
|
guard = get_openclaw_guard()
|
|
|
|
|
if guard.is_circuit_open():
|
|
|
|
|
logger.debug("ai_router_circuit_open", provider=provider_name)
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass # Circuit Breaker 不阻塞主流程
|
|
|
|
|
# 閘門 1: Circuit Breaker (per-provider, C2 修復)
|
|
|
|
|
cb = self._get_circuit_breaker(provider_name)
|
|
|
|
|
if cb.is_open():
|
|
|
|
|
logger.debug("ai_router_circuit_open", provider=provider_name)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 閘門 2: Rate Limiter
|
|
|
|
|
if provider_name in ("nvidia", "gemini", "claude"):
|
|
|
|
|
@@ -808,12 +852,8 @@ class AIRouterExecutor:
|
|
|
|
|
result = await provider.analyze(prompt, context)
|
|
|
|
|
|
|
|
|
|
if result.success:
|
|
|
|
|
# 記錄成功
|
|
|
|
|
try:
|
|
|
|
|
guard = get_openclaw_guard()
|
|
|
|
|
guard.record_success()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
# 記錄成功 (per-provider CB)
|
|
|
|
|
cb.record_success()
|
|
|
|
|
|
|
|
|
|
# 記錄費用
|
|
|
|
|
if result.cost_usd > 0:
|
|
|
|
|
@@ -851,11 +891,7 @@ class AIRouterExecutor:
|
|
|
|
|
except Exception as e:
|
|
|
|
|
errors.append(f"{provider_name}: {e}")
|
|
|
|
|
logger.warning("ai_router_provider_exception", provider=provider_name, error=str(e))
|
|
|
|
|
try:
|
|
|
|
|
guard = get_openclaw_guard()
|
|
|
|
|
guard.record_failure()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
cb.record_failure()
|
|
|
|
|
|
|
|
|
|
# 全部失敗
|
|
|
|
|
logger.error("ai_router_all_providers_failed", tried=provider_order, errors=errors)
|
|
|
|
|
|