fix(p0): 實測修正 — timeout 依 benchmark 調整，_local_fallback_chain 移除雲端 Nemotron

- config.py: NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS=60s (NIM 實測 11-45s + 15s buffer) - config.py: OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=200s (Ollama 實測 ~173s + 27s buffer) - ollama.py: 新增 per-task timeout (diagnose/force_local 用 200s) - ai_router.py: _local_fallback_chain 移除 Nemotron (NIM=雲端，不可進 local chain) - ai_router.py: v4.2 — Option C 分情境路由正式確立 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 00:29:09 +08:00
parent ddb75b69c5
commit 96d5e18924
3 changed files with 27 additions and 9 deletions
--- a/apps/api/src/core/config.py
+++ b/apps/api/src/core/config.py
@@ -85,13 +85,16 @@ class Settings(BaseSettings):
        description="Phase 22: True=異步更新 (先推 OpenClaw), False=同步等待",
    )
    # 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE Privacy-First 專用 timeout
+    # 實測依據 (2026-04-04):
+    #   NIM 雲端: 11-45s (免費 tier，偶有排隊)  → 60s timeout (45s + 15s buffer)
+    #   Ollama 本地 llama3.2:3b: ~173s 首次推理 → 200s timeout (173s + 27s buffer)
    NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
-        default=30,
-        description="Phase 25 P0: DIAGNOSE 任務 Nemotron timeout (秒)，比 Tool Calling 短",
+        default=60,
+        description="Phase 25 P0: 一般 DIAGNOSE 任務 NIM timeout (秒)，實測 11-45s，60s 含 buffer",
    )
    OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
-        default=60,
-        description="Phase 25 P0: DIAGNOSE 任務 Ollama backup timeout (秒)，Ollama 較慢",
+        default=200,
+        description="Phase 25 P0: FORCE_LOCAL DIAGNOSE Ollama timeout (秒)，實測 ~173s，200s 含 buffer",
    )

    # ==========================================================================
--- a/apps/api/src/services/ai_providers/ollama.py
+++ b/apps/api/src/services/ai_providers/ollama.py
@@ -71,6 +71,15 @@ class OllamaProvider:
            model_name = registry.get_model("ollama", "rca")
            options = registry.get_provider_options("ollama")

+            # P0 2026-04-04 Claude Code: per-task timeout（Option C 分情境）
+            # FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s，實測 ~173s)
+            # 其他 → OPENCLAW_TIMEOUT（既有設定）
+            task_type = (context or {}).get("task_type", "")
+            if task_type in ("diagnose", "force_local"):
+                read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200))
+            else:
+                read_timeout = float(settings.OPENCLAW_TIMEOUT)
+
            response = await client.post(
                f"{settings.OLLAMA_URL}/api/generate",
                json={
@@ -84,7 +93,7 @@ class OllamaProvider:
                        "top_p": options.get("top_p", 0.9),
                    },
                },
-                timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0),
+                timeout=httpx.Timeout(read_timeout, connect=10.0),
            )
            response.raise_for_status()
            data = response.json()
--- a/apps/api/src/services/ai_router.py
+++ b/apps/api/src/services/ai_router.py
@@ -31,6 +31,7 @@ AI Router - Phase 13.3 #87
 | v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 |
 | v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close |
 | v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram |
+| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端)，僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) |
 """

 from __future__ import annotations
@@ -247,11 +248,16 @@ class AIRouter:
            (AIProviderEnum.CLAUDE, self._claude_default),
        ]

-        # 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE/FORCE_LOCAL 專用鏈
-        # 隱私邊界：絕不包含任何雲端 Provider，到 OLLAMA 為止
+        # 2026-04-04 ogt: Phase 25 P0 — FORCE_LOCAL 專用鏈（機密資料，絕不出網）
+        # 實測依據 (2026-04-04):
+        #   Nemotron NIM = integrate.api.nvidia.com（雲端 API）→ 不可放入 local chain
+        #   Ollama = 192.168.0.188:11434（真正本地）→ 唯一合法的 local provider
+        #   Ollama 實測 ~173s，timeout 設 200s（含 buffer）
+        # Option C 分情境：
+        #   FORCE_LOCAL → 此 chain [OLLAMA only]（機密，寧可慢不上雲）
+        #   一般 DIAGNOSE → _full_fallback_chain（NEMOTRON first，走雲端高能力）
        self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [
-            (AIProviderEnum.NEMOTRON, self._nemotron_default),  # NIM 188，主力（零費用，高能力）
-            (AIProviderEnum.OLLAMA, self._ollama_summary),      # Ollama 188，備援（慢但可靠）
+            (AIProviderEnum.OLLAMA, self._ollama_summary),  # 唯一本地 Provider，~173s 實測
        ]

        # 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)