fix(p0): 實測修正 — timeout 依 benchmark 調整,_local_fallback_chain 移除雲端 Nemotron
- config.py: NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS=60s (NIM 實測 11-45s + 15s buffer) - config.py: OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=200s (Ollama 實測 ~173s + 27s buffer) - ollama.py: 新增 per-task timeout (diagnose/force_local 用 200s) - ai_router.py: _local_fallback_chain 移除 Nemotron (NIM=雲端,不可進 local chain) - ai_router.py: v4.2 — Option C 分情境路由正式確立 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -85,13 +85,16 @@ class Settings(BaseSettings):
|
||||
description="Phase 22: True=異步更新 (先推 OpenClaw), False=同步等待",
|
||||
)
|
||||
# 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE Privacy-First 專用 timeout
|
||||
# 實測依據 (2026-04-04):
|
||||
# NIM 雲端: 11-45s (免費 tier,偶有排隊) → 60s timeout (45s + 15s buffer)
|
||||
# Ollama 本地 llama3.2:3b: ~173s 首次推理 → 200s timeout (173s + 27s buffer)
|
||||
NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
|
||||
default=30,
|
||||
description="Phase 25 P0: DIAGNOSE 任務 Nemotron timeout (秒),比 Tool Calling 短",
|
||||
default=60,
|
||||
description="Phase 25 P0: 一般 DIAGNOSE 任務 NIM timeout (秒),實測 11-45s,60s 含 buffer",
|
||||
)
|
||||
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
|
||||
default=60,
|
||||
description="Phase 25 P0: DIAGNOSE 任務 Ollama backup timeout (秒),Ollama 較慢",
|
||||
default=200,
|
||||
description="Phase 25 P0: FORCE_LOCAL DIAGNOSE Ollama timeout (秒),實測 ~173s,200s 含 buffer",
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
|
||||
@@ -71,6 +71,15 @@ class OllamaProvider:
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
# P0 2026-04-04 Claude Code: per-task timeout(Option C 分情境)
|
||||
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s,實測 ~173s)
|
||||
# 其他 → OPENCLAW_TIMEOUT(既有設定)
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200))
|
||||
else:
|
||||
read_timeout = float(settings.OPENCLAW_TIMEOUT)
|
||||
|
||||
response = await client.post(
|
||||
f"{settings.OLLAMA_URL}/api/generate",
|
||||
json={
|
||||
@@ -84,7 +93,7 @@ class OllamaProvider:
|
||||
"top_p": options.get("top_p", 0.9),
|
||||
},
|
||||
},
|
||||
timeout=httpx.Timeout(float(settings.OPENCLAW_TIMEOUT), connect=10.0),
|
||||
timeout=httpx.Timeout(read_timeout, connect=10.0),
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
@@ -31,6 +31,7 @@ AI Router - Phase 13.3 #87
|
||||
| v3.0 | 2026-03-26 | Claude Code | Phase 13.3 #87 完整路由決策矩陣 |
|
||||
| v4.0 | 2026-04-02 | ogt (首席架構師) | Phase 24 AIProvider Registry + Executor; C1 Langfuse Trace; C2 AIRouter.route(); C3 型別 typo; I4 Protocol close |
|
||||
| v4.1 | 2026-04-04 | ogt (首席架構師) | Phase 25 P0: DIAGNOSE Privacy-First — _local_fallback_chain; DIAGNOSE→NEMOTRON; REJECT+Telegram |
|
||||
| v4.2 | 2026-04-04 | Claude Code | Phase 25 P0 實測修正: _local_fallback_chain 移除 Nemotron(雲端),僅留 Ollama(本地); timeout 依實測調整(NIM 60s/Ollama 200s) |
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -247,11 +248,16 @@ class AIRouter:
|
||||
(AIProviderEnum.CLAUDE, self._claude_default),
|
||||
]
|
||||
|
||||
# 2026-04-04 ogt: Phase 25 P0 — DIAGNOSE/FORCE_LOCAL 專用鏈
|
||||
# 隱私邊界:絕不包含任何雲端 Provider,到 OLLAMA 為止
|
||||
# 2026-04-04 ogt: Phase 25 P0 — FORCE_LOCAL 專用鏈(機密資料,絕不出網)
|
||||
# 實測依據 (2026-04-04):
|
||||
# Nemotron NIM = integrate.api.nvidia.com(雲端 API)→ 不可放入 local chain
|
||||
# Ollama = 192.168.0.188:11434(真正本地)→ 唯一合法的 local provider
|
||||
# Ollama 實測 ~173s,timeout 設 200s(含 buffer)
|
||||
# Option C 分情境:
|
||||
# FORCE_LOCAL → 此 chain [OLLAMA only](機密,寧可慢不上雲)
|
||||
# 一般 DIAGNOSE → _full_fallback_chain(NEMOTRON first,走雲端高能力)
|
||||
self._local_fallback_chain: list[tuple[AIProviderEnum, str]] = [
|
||||
(AIProviderEnum.NEMOTRON, self._nemotron_default), # NIM 188,主力(零費用,高能力)
|
||||
(AIProviderEnum.OLLAMA, self._ollama_summary), # Ollama 188,備援(慢但可靠)
|
||||
(AIProviderEnum.OLLAMA, self._ollama_summary), # 唯一本地 Provider,~173s 實測
|
||||
]
|
||||
|
||||
# 意圖對應 Provider 強制覆寫 (None = 依複雜度決定)
|
||||
|
||||
Reference in New Issue
Block a user