From 97d86861ed9f42f638eba4076437f57d6f9949cf Mon Sep 17 00:00:00 2001 From: OG T Date: Thu, 2 Apr 2026 23:31:31 +0800 Subject: [PATCH] =?UTF-8?q?fix(ai=5Frouter):=20C1=20=E4=BF=AE=E5=BE=A9=20?= =?UTF-8?q?=E2=80=94=20AIProviderEnum=20=E5=B0=8D=E9=BD=8A=20Registry=20?= =?UTF-8?q?=E5=AF=A6=E9=9A=9B=20Provider=20=E5=90=8D=E7=A8=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 問題: AIProviderEnum.NVIDIA = "nvidia" 在 Registry 無對應 Provider OpenClawNemoProvider.name = "openclaw_nemo" NemotronProvider.name = "nemotron" → 高複雜度/Tool Calling 路由永遠 skip,靜默 fallback 到 Gemini/Ollama 修復: - 新增 OPENCLAW_NEMO = "openclaw_nemo" (一般推理, via .188 → NVIDIA NIM) - 新增 NEMOTRON = "nemotron" (Tool Calling, direct NVIDIA NIM) - 移除 NVIDIA = "nvidia" (Registry 無對應) - 規則 4 (複雜度>=4/HIGH風險): NVIDIA → OPENCLAW_NEMO - route_tool_calling: NVIDIA → NEMOTRON - Rate Limiter check: "nvidia" → "openclaw_nemo" - _full_fallback_chain: OPENCLAW_NEMO 首位 - _tool_calling_fallback_chain: NEMOTRON 首位 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/ai_router.py | 51 ++++++++++++++++++------------ 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index 2035006e..b112799f 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -71,8 +71,12 @@ class AIProviderEnum(Enum): OLLAMA = "ollama" GEMINI = "gemini" CLAUDE = "claude" - # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (83.3% 精準度) - NVIDIA = "nvidia" + # 2026-04-02 ogt: C1 修復 — 對齊 Registry 實際名稱 + # OpenClawNemoProvider.name = "openclaw_nemo" (一般推理, via .188) + # NemotronProvider.name = "nemotron" (Tool Calling, direct NVIDIA NIM) + # 舊版 NVIDIA = "nvidia" 已移除: Registry 無此 Provider + OPENCLAW_NEMO = "openclaw_nemo" + NEMOTRON = "nemotron" # Provider 對應延遲預算 (ms) @@ -80,8 +84,9 @@ PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = { AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間 AIProviderEnum.GEMINI: 30000, # 雲端,較低延遲 AIProviderEnum.CLAUDE: 30000, # 雲端,較低延遲 - # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (延遲 11-45s) - AIProviderEnum.NVIDIA: 60000, # Tool Calling 專用,允許較長時間 + # 2026-04-02 ogt: C1 修復 — 對齊 Registry 名稱 + AIProviderEnum.OPENCLAW_NEMO: 60000, # via .188 → NVIDIA NIM,允許較長時間 + AIProviderEnum.NEMOTRON: 60000, # Tool Calling 專用,允許較長時間 } @@ -210,21 +215,25 @@ class AIRouter: self._ollama_summary = self._model_registry.get_model("ollama", "summary") self._gemini_default = self._model_registry.get_model("gemini", "default") self._claude_default = self._model_registry.get_model("claude", "default") - # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling - self._nvidia_default = self._model_registry.get_model("nvidia", "default") + # 2026-04-02 ogt: C1 修復 — openclaw_nemo (一般推理) + nemotron (Tool Calling) + self._openclaw_nemo_default = self._model_registry.get_model("nvidia", "default") + self._nemotron_default = self._model_registry.get_model("nvidia", "default") + # 向後相容別名 + self._nvidia_default = self._openclaw_nemo_default # Provider 對應模型映射 self._provider_models: dict[AIProviderEnum, str] = { AIProviderEnum.OLLAMA: self._ollama_default, AIProviderEnum.GEMINI: self._gemini_default, AIProviderEnum.CLAUDE: self._claude_default, - AIProviderEnum.NVIDIA: self._nvidia_default, # ADR-036 + AIProviderEnum.OPENCLAW_NEMO: self._openclaw_nemo_default, + AIProviderEnum.NEMOTRON: self._nemotron_default, } # 完整 Fallback 鏈 (Provider, Model) - # 2026-03-30 ogt: NVIDIA 成為首選仲裁,加入 Fallback 鏈首位 + # 2026-04-02 ogt: C1 修復 — OPENCLAW_NEMO 首選仲裁 self._full_fallback_chain: list[tuple[AIProviderEnum, str]] = [ - (AIProviderEnum.NVIDIA, self._nvidia_default), + (AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default), (AIProviderEnum.GEMINI, self._gemini_default), (AIProviderEnum.CLAUDE, self._claude_default), (AIProviderEnum.OLLAMA, self._ollama_default), @@ -232,7 +241,7 @@ class AIRouter: # Tool Calling 專用 Fallback 鏈 (ADR-036) self._tool_calling_fallback_chain: list[tuple[AIProviderEnum, str]] = [ - (AIProviderEnum.NVIDIA, self._nvidia_default), + (AIProviderEnum.NEMOTRON, self._nemotron_default), (AIProviderEnum.GEMINI, self._gemini_default), (AIProviderEnum.CLAUDE, self._claude_default), ] @@ -394,12 +403,13 @@ class AIRouter: return provider, model, reason # ======================================================================= - # 規則 4: 複雜度 4-5 或 HIGH 風險 → Nvidia Nemotron + # 規則 4: 複雜度 4-5 或 HIGH 風險 → OpenClaw Nemo (via .188 → NVIDIA NIM) + # 2026-04-02 ogt: C1 修復 — NVIDIA→OPENCLAW_NEMO 對齊 Registry 名稱 # ======================================================================= if score >= 4 or risk == RiskLevel.HIGH: - provider = AIProviderEnum.NVIDIA - model = self._nvidia_default - reason = f"複雜度={score}/5, 風險={risk.value} → Nvidia (fallback Gemini)" + provider = AIProviderEnum.OPENCLAW_NEMO + model = self._openclaw_nemo_default + reason = f"複雜度={score}/5, 風險={risk.value} → OpenClaw Nemo (fallback Gemini)" return provider, model, reason # ======================================================================= @@ -538,8 +548,9 @@ class AIRouter: Returns: (provider, model, fallback_chain) """ - provider = AIProviderEnum.NVIDIA - model = self._nvidia_default + # 2026-04-02 ogt: C1 修復 — Tool Calling 使用 NEMOTRON (direct NIM) + provider = AIProviderEnum.NEMOTRON + model = self._nemotron_default fallback_chain = [ (p, m) for p, m in self._tool_calling_fallback_chain if p != provider ] @@ -599,8 +610,8 @@ class AIRouter: { "rule": 4, "condition": "complexity >= 4 OR HIGH risk", - "provider": "nvidia", - "reason": "高複雜度需要 Nvidia Nemotron 強大推理能力", + "provider": "openclaw_nemo", + "reason": "高複雜度需要 Nvidia Nemotron 強大推理能力 (via .188)", }, { "rule": 5, @@ -852,8 +863,8 @@ class AIRouterExecutor: continue # 閘門 2: Rate Limiter - # 2026-04-02 Claude Code: Phase 24 B3 — 加入 nemotron Rate Limiter - if provider_name in ("nvidia", "gemini", "claude", "nemotron"): + # 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo) + if provider_name in ("openclaw_nemo", "nemotron", "gemini", "claude"): try: from src.services.ai_rate_limiter import get_ai_rate_limiter rate_limiter = get_ai_rate_limiter()