diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index 2035006e..b112799f 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -71,8 +71,12 @@ class AIProviderEnum(Enum): OLLAMA = "ollama" GEMINI = "gemini" CLAUDE = "claude" - # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (83.3% 精準度) - NVIDIA = "nvidia" + # 2026-04-02 ogt: C1 修復 — 對齊 Registry 實際名稱 + # OpenClawNemoProvider.name = "openclaw_nemo" (一般推理, via .188) + # NemotronProvider.name = "nemotron" (Tool Calling, direct NVIDIA NIM) + # 舊版 NVIDIA = "nvidia" 已移除: Registry 無此 Provider + OPENCLAW_NEMO = "openclaw_nemo" + NEMOTRON = "nemotron" # Provider 對應延遲預算 (ms) @@ -80,8 +84,9 @@ PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = { AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間 AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲 AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲 - # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (延遲 11-45s) - AIProviderEnum.NVIDIA: 60000, # Tool Calling 專用,允許較長時間 + # 2026-04-02 ogt: C1 修復 — 對齊 Registry 名稱 + AIProviderEnum.OPENCLAW_NEMO: 60000, # via .188 → NVIDIA NIM,允許較長時間 + AIProviderEnum.NEMOTRON: 60000, # Tool Calling 專用,允許較長時間 } @@ -210,21 +215,25 @@ class AIRouter: self._ollama_summary = self._model_registry.get_model("ollama", "summary") self._gemini_default = self._model_registry.get_model("gemini", "default") self._claude_default = self._model_registry.get_model("claude", "default") - # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling - self._nvidia_default = self._model_registry.get_model("nvidia", "default") + # 2026-04-02 ogt: C1 修復 — openclaw_nemo (一般推理) + nemotron (Tool Calling) + self._openclaw_nemo_default = self._model_registry.get_model("nvidia", "default") + self._nemotron_default = self._model_registry.get_model("nvidia", "default") + # 向後相容別名 + self._nvidia_default = self._openclaw_nemo_default # Provider 對應模型映射 self._provider_models: dict[AIProviderEnum, str] = { AIProviderEnum.OLLAMA: self._ollama_default, AIProviderEnum.GEMINI: self._gemini_default, AIProviderEnum.CLAUDE: self._claude_default, - AIProviderEnum.NVIDIA: self._nvidia_default, # ADR-036 + AIProviderEnum.OPENCLAW_NEMO: self._openclaw_nemo_default, + AIProviderEnum.NEMOTRON: self._nemotron_default, } # 完整 Fallback 鏈 (Provider, Model) - # 2026-03-30 ogt: NVIDIA 成為首選仲裁,加入 Fallback 鏈首位 + # 2026-04-02 ogt: C1 修復 — OPENCLAW_NEMO 首選仲裁 self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [ - (AIProviderEnum.NVIDIA, self._nvidia_default), + (AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default), (AIProviderEnum.GEMINI, self._gemini_default), (AIProviderEnum.CLAUDE, self._claude_default), (AIProviderEnum.OLLAMA, self._ollama_default), @@ -232,7 +241,7 @@ class AIRouter: # Tool Calling 專用 Fallback 鏈 (ADR-036) self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [ - (AIProviderEnum.NVIDIA, self._nvidia_default), + (AIProviderEnum.NEMOTRON, self._nemotron_default), (AIProviderEnum.GEMINI, self._gemini_default), (AIProviderEnum.CLAUDE, self._claude_default), ] @@ -394,12 +403,13 @@ class AIRouter: return provider, model, reason # ======================================================================= - # 規則 4: 複雜度 4-5 或 HIGH 風險 → Nvidia Nemotron + # 規則 4: 複雜度 4-5 或 HIGH 風險 → OpenClaw Nemo (via .188 → NVIDIA NIM) + # 2026-04-02 ogt: C1 修復 — NVIDIA→OPENCLAW_NEMO 對齊 Registry 名稱 # ======================================================================= if score >= 4 or risk == RiskLevel.HIGH: - provider = AIProviderEnum.NVIDIA - model = self._nvidia_default - reason = f"複雜度={score}/5, 風險={risk.value} → Nvidia (fallback Gemini)" + provider = AIProviderEnum.OPENCLAW_NEMO + model = self._openclaw_nemo_default + reason = f"複雜度={score}/5, 風險={risk.value} → OpenClaw Nemo (fallback Gemini)" return provider, model, reason # ======================================================================= @@ -538,8 +548,9 @@ class AIRouter: Returns: (provider, model, fallback_chain) """ - provider = AIProviderEnum.NVIDIA - model = self._nvidia_default + # 2026-04-02 ogt: C1 修復 — Tool Calling 使用 NEMOTRON (direct NIM) + provider = AIProviderEnum.NEMOTRON + model = self._nemotron_default fallback_chain = [ (p, m) for p, m in self._tool_calling_fallback_chain if p != provider ] @@ -599,8 +610,8 @@ class AIRouter: { "rule": 4, "condition": "complexity >= 4 OR HIGH risk", - "provider": "nvidia", - "reason": "高複雜度需要 Nvidia Nemotron 強大推理能力", + "provider": "openclaw_nemo", + "reason": "高複雜度需要 Nvidia Nemotron 強大推理能力 (via .188)", }, { "rule": 5, @@ -852,8 +863,8 @@ class AIRouterExecutor: continue # 閘門 2: Rate Limiter - # 2026-04-02 Claude Code: Phase 24 B3 — 加入 nemotron Rate Limiter - if provider_name in ("nvidia", "gemini", "claude", "nemotron"): + # 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo) + if provider_name in ("openclaw_nemo", "nemotron", "gemini", "claude"): try: from src.services.ai_rate_limiter import get_ai_rate_limiter rate_limiter = get_ai_rate_limiter()