fix(p0): 實測修正 — timeout 依 benchmark 調整,_local_fallback_chain 移除雲端 Nemotron

- config.py: NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS=60s (NIM 實測 11-45s + 15s buffer)
- config.py: OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=200s (Ollama 實測 ~173s + 27s buffer)
- ollama.py: 新增 per-task timeout (diagnose/force_local 用 200s)
- ai_router.py: _local_fallback_chain 移除 Nemotron (NIM=雲端,不可進 local chain)
- ai_router.py: v4.2 — Option C 分情境路由正式確立

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-05 00:29:09 +08:00
parent ddb75b69c5
commit 96d5e18924
3 changed files with 27 additions and 9 deletions

View File

@@ -85,13 +85,16 @@ class Settings(BaseSettings):
description="Phase 22: True=異步更新 (先推 OpenClaw), False=同步等待",
)
# 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE Privacy-First 專用 timeout
# 實測依據 (2026-04-04):
# NIM 雲端: 11-45s (免費 tier偶有排隊) → 60s timeout (45s + 15s buffer)
# Ollama 本地 llama3.2:3b: ~173s 首次推理 → 200s timeout (173s + 27s buffer)
NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
default=30,
description="Phase 25 P0: DIAGNOSE 任務 Nemotron timeout (秒)比 Tool Calling 短",
default=60,
description="Phase 25 P0: 一般 DIAGNOSE 任務 NIM timeout (秒)實測 11-45s60s 含 buffer",
)
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
default=60,
description="Phase 25 P0: DIAGNOSE 任務 Ollama backup timeout (秒)Ollama 較慢",
default=200,
description="Phase 25 P0: FORCE_LOCAL DIAGNOSE Ollama timeout (秒)實測 ~173s200s 含 buffer",
)
# ==========================================================================

View File

@@ -71,6 +71,15 @@ class OllamaProvider:
model_name = registry.get_model("ollama", "rca")
options = registry.get_provider_options("ollama")
# P0 2026-04-04 Claude Code: per-task timeoutOption C 分情境)
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s實測 ~173s)
# 其他 → OPENCLAW_TIMEOUT既有設定
task_type = (context or {}).get("task_type", "")
if task_type in ("diagnose", "force_local"):
read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200))
else:
read_timeout = float(settings.OPENCLAW_TIMEOUT)
response = await client.post(
f"{settings.OLLAMA_URL}/api/generate",
json={
@@ -84,7 +93,7 @@ class OllamaProvider:
"top_p": options.get("top_p", 0.9),
},
},
timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0),
timeout=httpx.Timeout(read_timeout, connect=10.0),
)
response.raise_for_status()
data = response.json()

View File

@@ -31,6 +31,7 @@ AI Router - Phase 13.3 #87
| v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 |
| v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close |
| v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram |
| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端),僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) |
"""
from __future__ import annotations
@@ -247,11 +248,16 @@ class AIRouter:
(AIProviderEnum.CLAUDE, self._claude_default),
]
# 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE/FORCE_LOCAL 專用鏈
# 隱私邊界:絕不包含任何雲端 Provider到 OLLAMA 為止
# 2026-04-04 ogt: Phase 25 P0 — FORCE_LOCAL 專用鏈(機密資料,絕不出網)
# 實測依據 (2026-04-04):
# Nemotron NIM = integrate.api.nvidia.com雲端 API→ 不可放入 local chain
# Ollama = 192.168.0.188:11434真正本地→ 唯一合法的 local provider
# Ollama 實測 ~173stimeout 設 200s含 buffer
# Option C 分情境:
# FORCE_LOCAL → 此 chain [OLLAMA only](機密,寧可慢不上雲)
# 一般 DIAGNOSE → _full_fallback_chainNEMOTRON first走雲端高能力
self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [
(AIProviderEnum.NEMOTRON, self._nemotron_default), # NIM 188主力零費用高能力
(AIProviderEnum.OLLAMA, self._ollama_summary), # Ollama 188備援慢但可靠
(AIProviderEnum.OLLAMA, self._ollama_summary), # 唯一本地 Provider~173s 實測
]
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)