From b225c23ad8d8ca7962e05e8450e35b4051011316 Mon Sep 17 00:00:00 2001 From: OG T Date: Fri, 3 Apr 2026 12:32:01 +0800 Subject: [PATCH] =?UTF-8?q?fix(ai=5Frouter):=20DIAGNOSE/ALERT=5FTRIAGE=20?= =?UTF-8?q?=E6=94=B9=E7=94=A8=20llama3.2:3b=20=E9=81=BF=E5=85=8D=2090?= =?UTF-8?q?=E7=A7=92=20timeout?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qwen2.5:7b-instruct 在 prod 需要 >90s,導致 DIAGNOSE intent 全鏈路失敗。 llama3.2:3b (summary model) 實測 4s 回應,適合 triage 類快速判斷。 規則 3 新增特判: DIAGNOSE/ALERT_TRIAGE/QUERY → ollama summary model 不影響其他 intent 的 model 選擇邏輯。 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/ai_router.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index b112799f..f4d14206 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -398,7 +398,14 @@ class AIRouter: provider_override = self._intent_provider_overrides.get(intent) if provider_override is not None: provider = provider_override - model = self._provider_models[provider] + # 2026-04-03 ogt: DIAGNOSE/ALERT_TRIAGE 用 summary model (llama3.2:3b) + # 避免 qwen2.5:7b-instruct 90秒 timeout 導致全鏈路失敗 (Phase 24 A選項) + if provider == AIProviderEnum.OLLAMA and intent in ( + IntentType.DIAGNOSE, IntentType.ALERT_TRIAGE, IntentType.QUERY + ): + model = self._ollama_summary + else: + model = self._provider_models[provider] reason = f"意圖 {intent.value} 指定使用 {provider.value}" return provider, model, reason