diff --git a/apps/api/src/services/ai_providers/claude.py b/apps/api/src/services/ai_providers/claude.py index 7bd2360e..6efe8d6a 100644 --- a/apps/api/src/services/ai_providers/claude.py +++ b/apps/api/src/services/ai_providers/claude.py @@ -19,6 +19,7 @@ import structlog from src.core.config import get_settings from src.services.ai_providers.interfaces import AIProvider, AIResult, is_provider_enabled_by_env +from src.services.model_registry import get_model_registry logger = structlog.get_logger(__name__) settings = get_settings() @@ -78,7 +79,7 @@ class ClaudeProvider: "content-type": "application/json", }, json={ - "model": "claude-3-haiku-20240307", + "model": get_model_registry().get_model("claude", "rca"), "max_tokens": 2048, "messages": [{"role": "user", "content": prompt}], "tools": [{ @@ -119,16 +120,26 @@ class ClaudeProvider: response.raise_for_status() data = response.json() + # I2 修復: 追蹤 tokens/cost + usage = data.get("usage", {}) + input_tokens = usage.get("input_tokens", 0) + output_tokens = usage.get("output_tokens", 0) + total_tokens = input_tokens + output_tokens + # Claude Haiku: Input $0.25/1M, Output $1.25/1M + cost_usd = (input_tokens * 0.00000025) + (output_tokens * 0.00000125) + # 從 Tool Use 回應中提取 JSON for block in data.get("content", []): if block.get("type") == "tool_use" and block.get("name") == "submit_analysis": tool_input = block.get("input", {}) latency = (time.perf_counter() - start) * 1000 - logger.info("claude_provider_success", keys=list(tool_input.keys()), latency_ms=round(latency, 1)) + logger.info("claude_provider_success", keys=list(tool_input.keys()), tokens=total_tokens, latency_ms=round(latency, 1)) return AIResult( raw_response=json.dumps(tool_input), success=True, provider=self.name, + tokens=total_tokens, + cost_usd=cost_usd, latency_ms=latency, ) @@ -140,6 +151,8 @@ class ClaudeProvider: raw_response=block.get("text", ""), success=True, provider=self.name, + tokens=total_tokens, + cost_usd=cost_usd, latency_ms=latency, ) diff --git a/apps/api/src/services/ai_providers/gemini.py b/apps/api/src/services/ai_providers/gemini.py index 4821fc8f..a439e8cf 100644 --- a/apps/api/src/services/ai_providers/gemini.py +++ b/apps/api/src/services/ai_providers/gemini.py @@ -77,7 +77,7 @@ class GeminiProvider: json={ "contents": [{"parts": [{"text": prompt}]}], "generationConfig": { - "temperature": 0.1, + "temperature": registry.get_provider_options("gemini").get("temperature", 0.1), "maxOutputTokens": 2048, "responseMimeType": "application/json", }, diff --git a/apps/api/src/services/ai_providers/ollama.py b/apps/api/src/services/ai_providers/ollama.py index a8622f7a..0b65cfb5 100644 --- a/apps/api/src/services/ai_providers/ollama.py +++ b/apps/api/src/services/ai_providers/ollama.py @@ -89,13 +89,16 @@ class OllamaProvider: response.raise_for_status() data = response.json() result = data.get("response", "") + # I3 修復: 追蹤 tokens + tokens = data.get("eval_count", 0) + data.get("prompt_eval_count", 0) latency = (time.perf_counter() - start) * 1000 - logger.info("ollama_provider_success", response_length=len(result), latency_ms=round(latency, 1)) + logger.info("ollama_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1)) return AIResult( raw_response=result, success=True, provider=self.name, + tokens=tokens, latency_ms=latency, ) diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index 47017152..faef105f 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -64,7 +64,7 @@ logger = structlog.get_logger(__name__) # ============================================================================= -class AIProvider(Enum): +class AIProviderEnum(Enum): """AI 提供者""" OLLAMA = "ollama" @@ -75,12 +75,12 @@ class AIProvider(Enum): # Provider 對應延遲預算 (ms) -PROVIDER_LATENCY_BUDGET: dict[AIProvider, int] = { - AIProvider.OLLAMA: 60000, # 本地,允許較長處理時間 - AIProvider.GEMINI: 30000, # 雲端,較低延遲 - AIProvider.CLAUDE: 30000, # 雲端,較低延遲 +PROVIDER_LATENCY_BUDGET: dict[AIProviderEnumEnum, int] = { + AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間 + AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲 + AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲 # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (延遲 11-45s) - AIProvider.NVIDIA: 60000, # Tool Calling 專用,允許較長時間 + AIProviderEnum.NVIDIA: 60000, # Tool Calling 專用,允許較長時間 } @@ -116,7 +116,7 @@ class IAIRouter(Protocol): def route_tool_calling( self, - ) -> tuple[AIProvider, str, list[tuple[AIProvider, str]]]: + ) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]: """Tool Calling 專用路由""" ... @@ -130,9 +130,9 @@ class RoutingDecision: """ # 核心決策 - selected_provider: AIProvider # 選擇的 AI Provider + selected_provider: AIProviderEnum # 選擇的 AI Provider selected_model: str # 選擇的模型名稱 - fallback_chain: list[tuple[AIProvider, str]] # 備援鏈 [(provider, model), ...] + fallback_chain: list[tuple[AIProviderEnum, str]] # 備援鏈 [(provider, model), ...] routing_reason: str # 路由決策原因 latency_budget_ms: int # 延遲預算 (毫秒) @@ -213,45 +213,45 @@ class AIRouter: self._nvidia_default = self._model_registry.get_model("nvidia", "default") # Provider 對應模型映射 - self._provider_models: dict[AIProvider, str] = { - AIProvider.OLLAMA: self._ollama_default, - AIProvider.GEMINI: self._gemini_default, - AIProvider.CLAUDE: self._claude_default, - AIProvider.NVIDIA: self._nvidia_default, # ADR-036 + self._provider_models: dict[AIProviderEnum, str] = { + AIProviderEnum.OLLAMA: self._ollama_default, + AIProviderEnum.GEMINI: self._gemini_default, + AIProviderEnum.CLAUDE: self._claude_default, + AIProviderEnum.NVIDIA: self._nvidia_default, # ADR-036 } # 完整 Fallback 鏈 (Provider, Model) # 2026-03-30 ogt: NVIDIA 成為首選仲裁,加入 Fallback 鏈首位 - self._full_fallback_chain: list[tuple[AIProvider, str]] = [ - (AIProvider.NVIDIA, self._nvidia_default), - (AIProvider.GEMINI, self._gemini_default), - (AIProvider.CLAUDE, self._claude_default), - (AIProvider.OLLAMA, self._ollama_default), + self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [ + (AIProviderEnum.NVIDIA, self._nvidia_default), + (AIProviderEnum.GEMINI, self._gemini_default), + (AIProviderEnum.CLAUDE, self._claude_default), + (AIProviderEnum.OLLAMA, self._ollama_default), ] # Tool Calling 專用 Fallback 鏈 (ADR-036) - self._tool_calling_fallback_chain: list[tuple[AIProvider, str]] = [ - (AIProvider.NVIDIA, self._nvidia_default), - (AIProvider.GEMINI, self._gemini_default), - (AIProvider.CLAUDE, self._claude_default), + self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [ + (AIProviderEnum.NVIDIA, self._nvidia_default), + (AIProviderEnum.GEMINI, self._gemini_default), + (AIProviderEnum.CLAUDE, self._claude_default), ] # 意圖對應 Provider 強制覆寫 (None = 依複雜度決定) - self._intent_provider_overrides: dict[IntentType, AIProvider | None] = { + self._intent_provider_overrides: dict[IntentType, AIProviderEnum | None] = { # 四大核心意圖 IntentType.RESTART: None, # 依複雜度 IntentType.SCALE: None, # 依複雜度 IntentType.CONFIG: None, # 依複雜度 (但 HIGH 會升級) - IntentType.DIAGNOSE: AIProvider.OLLAMA, # 診斷優先本地 (隱私) + IntentType.DIAGNOSE: AIProviderEnum.OLLAMA, # 診斷優先本地 (隱私) # 輔助意圖 - IntentType.DELETE: AIProvider.CLAUDE, # CRITICAL → 強制 Claude + IntentType.DELETE: AIProviderEnum.CLAUDE, # CRITICAL → 強制 Claude IntentType.ROLLBACK: None, # 依複雜度 IntentType.UNKNOWN: None, # 舊版兼容 IntentType.CODE_REVIEW: None, IntentType.DEPLOYMENT: None, - IntentType.ALERT_TRIAGE: AIProvider.OLLAMA, - IntentType.QUERY: AIProvider.OLLAMA, + IntentType.ALERT_TRIAGE: AIProviderEnum.OLLAMA, + IntentType.QUERY: AIProviderEnum.OLLAMA, IntentType.MAINTENANCE: None, } @@ -339,7 +339,7 @@ class AIRouter: intent: IntentType, intent_result: IntentResult, complexity: ComplexityScore, - ) -> tuple[AIProvider, str, str]: + ) -> tuple[AIProviderEnum, str, str]: """ 選擇 Provider 和模型 (Phase 13.3 #87 核心邏輯) @@ -368,7 +368,7 @@ class AIRouter: # 規則 1: CRITICAL 風險強制 Claude (最高優先級) # ======================================================================= if risk == RiskLevel.CRITICAL: - provider = AIProvider.CLAUDE + provider = AIProviderEnum.CLAUDE model = self._claude_default reason = f"CRITICAL 風險 ({intent.value}) 強制使用 Claude" return provider, model, reason @@ -377,7 +377,7 @@ class AIRouter: # 規則 2: DELETE 意圖強制 Claude (不可逆操作) # ======================================================================= if intent == IntentType.DELETE: - provider = AIProvider.CLAUDE + provider = AIProviderEnum.CLAUDE model = self._claude_default reason = "DELETE 意圖 (不可逆) 強制使用 Claude" return provider, model, reason @@ -396,7 +396,7 @@ class AIRouter: # 規則 4: 複雜度 4-5 或 HIGH 風險 → Nvidia Nemotron # ======================================================================= if score >= 4 or risk == RiskLevel.HIGH: - provider = AIProvider.NVIDIA + provider = AIProviderEnum.NVIDIA model = self._nvidia_default reason = f"複雜度={score}/5, 風險={risk.value} → Nvidia (fallback Gemini)" return provider, model, reason @@ -405,7 +405,7 @@ class AIRouter: # 規則 5: 複雜度 3 + MEDIUM → Ollama (fallback Gemini) # ======================================================================= if score == 3: - provider = AIProvider.OLLAMA + provider = AIProviderEnum.OLLAMA model = self._ollama_default reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (fallback Gemini)" return provider, model, reason @@ -413,7 +413,7 @@ class AIRouter: # ======================================================================= # 規則 6: 複雜度 1-2 + LOW/MEDIUM → Ollama (快速本地處理) # ======================================================================= - provider = AIProvider.OLLAMA + provider = AIProviderEnum.OLLAMA # 低複雜度使用輕量模型 (更快回應) model = self._ollama_summary if score <= 1 else self._ollama_default reason = f"複雜度={score}/5, 風險={risk.value} → Ollama (成本優先)" @@ -444,8 +444,8 @@ class AIRouter: return model, reason def _build_fallback_chain( - self, selected_provider: AIProvider - ) -> list[tuple[AIProvider, str]]: + self, selected_provider: AIProviderEnum + ) -> list[tuple[AIProviderEnum, str]]: """ 建立 Fallback 鏈 (排除已選 Provider) @@ -457,7 +457,7 @@ class AIRouter: Returns: Fallback 鏈 [(provider, model), ...] """ - fallback_chain: list[tuple[AIProvider, str]] = [] + fallback_chain: list[tuple[AIProviderEnum, str]] = [] for provider, model in self._full_fallback_chain: if provider != selected_provider: @@ -527,7 +527,7 @@ class AIRouter: # Tool Calling 路由 (ADR-036) # ========================================================================= - def route_tool_calling(self) -> tuple[AIProvider, str, list[tuple[AIProvider, str]]]: + def route_tool_calling(self) -> tuple[AIProviderEnum, str, list[tuple[AIProviderEnum, str]]]: """ Tool Calling 專用路由 (ADR-036) @@ -537,7 +537,7 @@ class AIRouter: Returns: (provider, model, fallback_chain) """ - provider = AIProvider.NVIDIA + provider = AIProviderEnum.NVIDIA model = self._nvidia_default fallback_chain = [ (p, m) for p, m in self._tool_calling_fallback_chain if p != provider @@ -552,7 +552,7 @@ class AIRouter: return provider, model, fallback_chain - def get_tool_calling_fallback_chain(self) -> list[tuple[AIProvider, str]]: + def get_tool_calling_fallback_chain(self) -> list[tuple[AIProviderEnum, str]]: """取得 Tool Calling Fallback 鏈""" return self._tool_calling_fallback_chain.copy() @@ -560,12 +560,12 @@ class AIRouter: # 便捷方法 # ========================================================================= - def get_provider_for_intent(self, intent: IntentType) -> AIProvider: + def get_provider_for_intent(self, intent: IntentType) -> AIProviderEnum: """取得意圖對應的 Provider (不考慮複雜度)""" override = self._intent_provider_overrides.get(intent) - return override if override else AIProvider.OLLAMA + return override if override else AIProviderEnum.OLLAMA - def get_model_for_provider(self, provider: AIProvider) -> str: + def get_model_for_provider(self, provider: AIProviderEnum) -> str: """取得 Provider 對應的模型""" return self._provider_models.get(provider, self._ollama_default) @@ -636,6 +636,36 @@ from src.services.ai_providers.interfaces import AIProvider as AIProviderProtoco _settings = get_settings() +class _SimpleCircuitBreaker: + """ + 輕量 per-provider Circuit Breaker (Phase 24 C2 修復) + + 不共用 OpenClawGuard — 避免 Gemini 掛掉時 Ollama 也被擋 + """ + + def __init__(self, name: str, failure_threshold: int = 5, recovery_timeout: float = 60.0) -> None: + self.name = name + self._failure_threshold = failure_threshold + self._recovery_timeout = recovery_timeout + self._failure_count = 0 + self._last_failure_time: float = 0.0 + + def is_open(self) -> bool: + if self._failure_count < self._failure_threshold: + return False + # 超過 recovery timeout → half-open (允許一次嘗試) + if time.time() - self._last_failure_time > self._recovery_timeout: + return False + return True + + def record_success(self) -> None: + self._failure_count = 0 + + def record_failure(self) -> None: + self._failure_count += 1 + self._last_failure_time = time.time() + + class AIProviderRegistry: """ AI Provider 註冊中心 — 類比 MCP ProviderRegistry (ADR-015) @@ -646,7 +676,7 @@ class AIProviderRegistry: def __init__(self) -> None: self._providers: dict[str, AIProviderProtocol] = {} - def register(self, provider: AIProviderProtocol) -> None: + def register(self, provider: AIProviderEnumProtocol) -> None: """註冊 Provider (啟動時呼叫)""" self._providers[provider.name] = provider status = "enabled" if provider.is_enabled else "disabled" @@ -677,6 +707,16 @@ class AIProviderRegistry: results[name] = False return results + async def close_all(self) -> None: + """關閉所有 Provider 的 HTTP 連線 (I5 修復: shutdown hook)""" + for name, p in self._providers.items(): + try: + if hasattr(p, "close"): + await p.close() + logger.info("ai_provider_closed", name=name) + except Exception as e: + logger.warning("ai_provider_close_failed", name=name, error=str(e)) + class AIRouterExecutor: """ @@ -697,6 +737,8 @@ class AIRouterExecutor: def __init__(self, registry: AIProviderRegistry) -> None: self._registry = registry self._semaphores: dict[str, asyncio.Semaphore] = {} + # C2 修復: per-provider Circuit Breaker (不共用,避免一個掛全部擋) + self._circuit_breakers: dict[str, "_SimpleCircuitBreaker"] = {} def _get_semaphore(self, name: str, limit: int = 3) -> asyncio.Semaphore: """取得 Provider 的並發 Semaphore (lazy init)""" @@ -704,6 +746,12 @@ class AIRouterExecutor: self._semaphores[name] = asyncio.Semaphore(limit) return self._semaphores[name] + def _get_circuit_breaker(self, name: str) -> "_SimpleCircuitBreaker": + """取得 Provider 的 Circuit Breaker (per-provider, lazy init)""" + if name not in self._circuit_breakers: + self._circuit_breakers[name] = _SimpleCircuitBreaker(name) + return self._circuit_breakers[name] + @staticmethod def _cache_key(prompt: str, context: dict | None) -> str: """生成 Cache Key (與 openclaw.py 相容)""" @@ -750,10 +798,10 @@ class AIRouterExecutor: ) # ② Cache 檢查 (D4) + cache_key = self._cache_key(prompt, context) # C3 修復: 移到 try 外避免 UnboundLocalError try: from src.core.redis_client import get_redis redis = get_redis() - cache_key = self._cache_key(prompt, context) cached = await redis.get(cache_key) if cached: data = _json.loads(cached) @@ -779,15 +827,11 @@ class AIRouterExecutor: if require_local and provider.privacy_level != "local": continue - # 閘門 1: Circuit Breaker - try: - from src.core.circuit_breaker import get_openclaw_guard - guard = get_openclaw_guard() - if guard.is_circuit_open(): - logger.debug("ai_router_circuit_open", provider=provider_name) - continue - except Exception: - pass # Circuit Breaker 不阻塞主流程 + # 閘門 1: Circuit Breaker (per-provider, C2 修復) + cb = self._get_circuit_breaker(provider_name) + if cb.is_open(): + logger.debug("ai_router_circuit_open", provider=provider_name) + continue # 閘門 2: Rate Limiter if provider_name in ("nvidia", "gemini", "claude"): @@ -808,12 +852,8 @@ class AIRouterExecutor: result = await provider.analyze(prompt, context) if result.success: - # 記錄成功 - try: - guard = get_openclaw_guard() - guard.record_success() - except Exception: - pass + # 記錄成功 (per-provider CB) + cb.record_success() # 記錄費用 if result.cost_usd > 0: @@ -851,11 +891,7 @@ class AIRouterExecutor: except Exception as e: errors.append(f"{provider_name}: {e}") logger.warning("ai_router_provider_exception", provider=provider_name, error=str(e)) - try: - guard = get_openclaw_guard() - guard.record_failure() - except Exception: - pass + cb.record_failure() # 全部失敗 logger.error("ai_router_all_providers_failed", tried=provider_order, errors=errors)