From 7fbeaaf213de6748fd86038c1fd0f8e4270dbc18 Mon Sep 17 00:00:00 2001 From: ogt Date: Sun, 19 Apr 2026 14:25:28 +0800 Subject: [PATCH] =?UTF-8?q?fix(ai-ops):=20Hermes=20L1=20=E7=A7=BB=E9=99=A4?= =?UTF-8?q?=E9=81=8E=E7=B7=8A=20timeout=20+=20keep=5Falive=20=E5=B8=B8?= =?UTF-8?q?=E9=A7=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 問題盤點(2026-04-19 實地 SSH 111:11434): - 我原本設 HERMES_TIMEOUT=30 是人為限制,AI 推理不該被綁 - 111 Ollama 實況:9 個模型共享,deepseek-r1:14b 會佔 VRAM - hermes3 冷啟動 30+s(切換)/ warm 後 <1s(40x 差距) - 30s timeout → 冷啟動必中 → 誤判 AI 掛 → 人為降級 修正: - HERMES_TIMEOUT default 30 → 180(HERMES_TIMEOUT=0 代表無限制) - 新增 keep_alive=24h payload,讓 hermes3 常駐 VRAM 避免被其他客戶端(deepseek-r1 等)切換觸發冷啟動 - Memory reference_env_map.md 更新 111 實況(9 模型清單、切換陷阱、 ADR-012 呼叫設定) Co-Authored-By: Claude Sonnet 4.6 --- services/event_router.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/services/event_router.py b/services/event_router.py index ef11102..d963f8d 100644 --- a/services/event_router.py +++ b/services/event_router.py @@ -231,7 +231,12 @@ def _render_l2_with_fallback(event: dict) -> str: # ===================================================================== _HERMES_URL = os.getenv("HERMES_URL", "http://192.168.0.111:11434") _HERMES_MODEL = os.getenv("HERMES_MODEL", "hermes3:latest") -_HERMES_TIMEOUT = int(os.getenv("HERMES_TIMEOUT", "30")) +# 放寬 timeout:hermes3 warm 後 <1s,冷啟動 30-60s(deepseek-r1 佔 VRAM 時會切換) +# 設 HERMES_TIMEOUT=0 表示無限制(僅受 OS TCP 層保底 ~120s) +_HERMES_TIMEOUT_RAW = int(os.getenv("HERMES_TIMEOUT", "180")) +_HERMES_TIMEOUT: float | None = None if _HERMES_TIMEOUT_RAW <= 0 else _HERMES_TIMEOUT_RAW +# keep_alive=24h 讓 hermes3 常駐記憶體,避免被其他客戶端切換掉造成冷啟動 +_HERMES_KEEP_ALIVE = os.getenv("HERMES_KEEP_ALIVE", "24h") _HERMES_OBSERVE_PROMPT = """你是一個 SRE 助手,任務是把技術錯誤翻譯成人類可理解的摘要。 @@ -267,6 +272,7 @@ def _hermes_observe_parsed(event: dict) -> dict | None: "system": _HERMES_OBSERVE_PROMPT, "prompt": user_prompt, "stream": False, + "keep_alive": _HERMES_KEEP_ALIVE, # 讓模型常駐 VRAM,避免冷切換 "options": {"temperature": 0.1, "num_predict": 300}, }, timeout=_HERMES_TIMEOUT,