diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index 1346d185..318766b2 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -85,13 +85,16 @@ class Settings(BaseSettings): description="Phase 22: True=異步更新 (先推 OpenClaw), False=同步等待", ) # 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE Privacy-First 專用 timeout + # 實測依據 (2026-04-04): + # NIM 雲端: 11-45s (免費 tier,偶有排隊) → 60s timeout (45s + 15s buffer) + # Ollama 本地 llama3.2:3b: ~173s 首次推理 → 200s timeout (173s + 27s buffer) NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS: int = Field( - default=30, - description="Phase 25 P0: DIAGNOSE 任務 Nemotron timeout (秒),比 Tool Calling 短", + default=60, + description="Phase 25 P0: 一般 DIAGNOSE 任務 NIM timeout (秒),實測 11-45s,60s 含 buffer", ) OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field( - default=60, - description="Phase 25 P0: DIAGNOSE 任務 Ollama backup timeout (秒),Ollama 較慢", + default=200, + description="Phase 25 P0: FORCE_LOCAL DIAGNOSE Ollama timeout (秒),實測 ~173s,200s 含 buffer", ) # ========================================================================== diff --git a/apps/api/src/services/ai_providers/ollama.py b/apps/api/src/services/ai_providers/ollama.py index 0b65cfb5..eb5dce07 100644 --- a/apps/api/src/services/ai_providers/ollama.py +++ b/apps/api/src/services/ai_providers/ollama.py @@ -71,6 +71,15 @@ class OllamaProvider: model_name = registry.get_model("ollama", "rca") options = registry.get_provider_options("ollama") + # P0 2026-04-04 Claude Code: per-task timeout(Option C 分情境) + # FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s,實測 ~173s) + # 其他 → OPENCLAW_TIMEOUT(既有設定) + task_type = (context or {}).get("task_type", "") + if task_type in ("diagnose", "force_local"): + read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200)) + else: + read_timeout = float(settings.OPENCLAW_TIMEOUT) + response = await client.post( f"{settings.OLLAMA_URL}/api/generate", json={ @@ -84,7 +93,7 @@ class OllamaProvider: "top_p": options.get("top_p", 0.9), }, }, - timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0), + timeout=httpx.Timeout(read_timeout, connect=10.0), ) response.raise_for_status() data = response.json() diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index fda1ffd8..31ffac0a 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -31,6 +31,7 @@ AI Router - Phase 13.3 #87 | v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 | | v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close | | v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram | +| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端),僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) | """ from __future__ import annotations @@ -247,11 +248,16 @@ class AIRouter: (AIProviderEnum.CLAUDE, self._claude_default), ] - # 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE/FORCE_LOCAL 專用鏈 - # 隱私邊界:絕不包含任何雲端 Provider,到 OLLAMA 為止 + # 2026-04-04 ogt: Phase 25 P0 — FORCE_LOCAL 專用鏈(機密資料,絕不出網) + # 實測依據 (2026-04-04): + # Nemotron NIM = integrate.api.nvidia.com(雲端 API)→ 不可放入 local chain + # Ollama = 192.168.0.188:11434(真正本地)→ 唯一合法的 local provider + # Ollama 實測 ~173s,timeout 設 200s(含 buffer) + # Option C 分情境: + # FORCE_LOCAL → 此 chain [OLLAMA only](機密,寧可慢不上雲) + # 一般 DIAGNOSE → _full_fallback_chain(NEMOTRON first,走雲端高能力) self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [ - (AIProviderEnum.NEMOTRON, self._nemotron_default), # NIM 188,主力(零費用,高能力) - (AIProviderEnum.OLLAMA, self._ollama_summary), # Ollama 188,備援(慢但可靠) + (AIProviderEnum.OLLAMA, self._ollama_summary), # 唯一本地 Provider,~173s 實測 ] # 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)