diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index 5e321740..7f07df74 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -253,16 +253,27 @@ class AIRouter: (AIProviderEnum.OLLAMA, self._ollama_default), ] - # 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — DIAGNOSE 移除 Ollama (CPU 238s 不可用) - # 根因: INC-20260425 NIM timeout 後 fallback 到 Ollama deepseek-r1:14b(CPU 238s), - # 造成二次 timeout,統帥批准 A+B 雙修,本任務 A2 將 Ollama 從 DIAGNOSE chain 移除。 - # chain 順序: OPENCLAW_NEMO(主力) → GEMINI(第一備援) → CLAUDE(第二備援) - # Ollama 刻意排除: CPU-only 實測 238s,絕對不可用於 DIAGNOSE(INC-20260425 血的教訓) + # 2026-04-29 ogt + Claude Code: 推翻 A2 鐵律(INC-20260425 事實基礎已過期) + # 統帥鐵律 2026-04-29: 「主要優先用 111 主機的 Ollama」 + # 統帥鐵律 feedback_ai_autonomous_direction.md: 以本地免費 LLM 為主 + # 統帥鐵律 feedback_ollama_111_only.md: Ollama 唯一主機 = 111 + # + # 推翻原因: + # 舊事實 (2026-04-27 A2): Ollama = CPU-only deepseek-r1:14b @ 238s(不可用) + # 新事實 (2026-04-29): prod Ollama 111 = M1 Pro Apple Silicon GPU + qwen2.5:7b-instruct + # VRAM 8.2GB 全載入,ctx 32k,實測 hi 0.54s + # 雲端全死: OpenClaw 188 → 500,Gemini → 429 配額爆,Claude → 404 endpoint 過時 + # 不推翻 → 100% incident llm_failed → AI 自動修復永遠不啟動 + # + # 配套修改: + # - IntentType.DIAGNOSE override: OPENCLAW_NEMO → OLLAMA + # - openclaw.py 注入 task_type="diagnose" 讓 Ollama 用 200s timeout + # - test_p0_diagnose_routing.py / test_ai_router_diagnose_fallback.py 同步更新 self._diagnose_fallback_chain: list[tuple[AIProviderEnum, str]] = [ - (AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default), - (AIProviderEnum.GEMINI, self._gemini_default), - (AIProviderEnum.CLAUDE, self._claude_default), - # OLLAMA 永久排除於此 chain: CPU 238s, INC-20260425, 統帥授權 A2 + (AIProviderEnum.OLLAMA, self._ollama_default), # 主:本地免費,統帥鐵律 + (AIProviderEnum.OPENCLAW_NEMO, self._openclaw_nemo_default), # fallback 1 + (AIProviderEnum.GEMINI, self._gemini_default), # fallback 2 + (AIProviderEnum.CLAUDE, self._claude_default), # fallback 3 ] # Tool Calling 專用 Fallback 鏈 (ADR-036) @@ -288,11 +299,12 @@ class AIRouter: IntentType.RESTART: None, # 依複雜度 IntentType.SCALE: None, # 依複雜度 IntentType.CONFIG: None, # 依複雜度 (但 HIGH 會升級) - # 2026-04-16 ogt: 恢復 DIAGNOSE → OPENCLAW_NEMO - # 2026-04-12 patch 改為 None(複雜度路由)後落入 Rule 6 → Ollama deepseek-r1:14b - # deepseek-r1:14b CPU 需 238s → timeout → degraded 20% → 全部「待分析」 - # OPENCLAW_NEMO = via 188:8088 → NVIDIA NIM,實測 2-27s,JSON 輸出可靠 - IntentType.DIAGNOSE: AIProviderEnum.OPENCLAW_NEMO, + # 2026-04-16 ogt: 恢復 DIAGNOSE → OPENCLAW_NEMO(已過期,見 2026-04-29 推翻) + # 2026-04-29 ogt + Claude Code: 推翻 A2,DIAGNOSE → OLLAMA(統帥鐵律本地優先) + # 舊事實過期:M1 Pro GPU + qwen2.5:7b 0.54s(不再 CPU deepseek 238s) + # OPENCLAW_NEMO 188:8088 現況 500 → 不可用 + # 雲端全死 → 必須回到本地 Ollama 主推理 + IntentType.DIAGNOSE: AIProviderEnum.OLLAMA, # 輔助意圖 IntentType.DELETE: AIProviderEnum.CLAUDE, # CRITICAL → 強制 Claude IntentType.ROLLBACK: None, # 依複雜度 diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index fe844fad..278514fd 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -920,10 +920,20 @@ class OpenClawService: # require_local=True 對 DIAGNOSE 只會讓所有 provider 被 privacy_skip → 永遠失敗 require_local = decision.intent in (IntentType.CODE_REVIEW,) + # 2026-04-29 ogt + Claude Code: 注入 task_type 讓 Ollama 用正確 timeout + # 根因: ai_providers/ollama.py:77 讀 context["task_type"] 決定 timeout + # - "diagnose"/"force_local" → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=200s + # - 其他/未注入 → OPENCLAW_TIMEOUT=30s(不夠 qwen2.5:7b 推理) + # webhooks alert_context 從未注入 task_type → Ollama fallback 永遠 30s timeout + # 對齊 decision.intent 後 Ollama fallback 真正能跑完 + exec_context = dict(alert_context) if alert_context else {} + if decision.intent == IntentType.DIAGNOSE: + exec_context["task_type"] = "diagnose" + result = await executor.execute( prompt=prompt, provider_order=provider_order, - context=alert_context, + context=exec_context, cache_ttl=3600, require_local=require_local, ) diff --git a/apps/api/tests/test_ai_router_diagnose_fallback.py b/apps/api/tests/test_ai_router_diagnose_fallback.py index 96815d32..24d5852b 100644 --- a/apps/api/tests/test_ai_router_diagnose_fallback.py +++ b/apps/api/tests/test_ai_router_diagnose_fallback.py @@ -95,8 +95,20 @@ def _make_registry_with_providers( # ============================================================================= -def test_diagnose_fallback_chain_no_ollama(): - """_diagnose_fallback_chain 應存在,且不含任何 OLLAMA variant""" +def test_diagnose_fallback_chain_ollama_primary(): + """2026-04-29 ogt + Claude Code: 推翻 A2,OLLAMA 為 DIAGNOSE primary + + 統帥鐵律 (2026-04-29): 主要優先用 111 主機的 Ollama + + feedback_ai_autonomous_direction.md: 以本地免費 LLM 為主 + + feedback_ollama_111_only.md: Ollama 唯一主機 = 111 + + 推翻 A2 (2026-04-27 INC-20260425) 原因: + 舊事實: Ollama = CPU-only deepseek-r1:14b @ 238s(不可用) + 新事實: prod Ollama 111 = M1 Pro GPU + qwen2.5:7b 已實載(VRAM 8.2GB) + 實測 hi 0.54s + 雲端全死: OpenClaw 500 / Gemini 429 / Claude 404 + 不推翻 → 100% llm_failed + """ router = _make_router() assert hasattr(router, "_diagnose_fallback_chain"), ( @@ -104,12 +116,16 @@ def test_diagnose_fallback_chain_no_ollama(): ) providers_in_chain = [p for p, _ in router._diagnose_fallback_chain] - assert AIProviderEnum.OLLAMA not in providers_in_chain, ( - f"OLLAMA 不應出現在 _diagnose_fallback_chain: {providers_in_chain}" - ) - assert AIProviderEnum.OLLAMA_188 not in providers_in_chain, ( - f"OLLAMA_188 不應出現在 _diagnose_fallback_chain: {providers_in_chain}" + # 新鐵律:OLLAMA 必須在 chain 第一位 + assert providers_in_chain[0] == AIProviderEnum.OLLAMA, ( + f"統帥鐵律: chain 第一位應為 OLLAMA,實際: {providers_in_chain}" ) + # 雲端 fallback 仍在(救命備援) + assert AIProviderEnum.OPENCLAW_NEMO in providers_in_chain + assert AIProviderEnum.GEMINI in providers_in_chain + assert AIProviderEnum.CLAUDE in providers_in_chain + # OLLAMA_188 (CPU-only 備援) 仍排除(M1 Pro 111 才是 GPU 主推理) + assert AIProviderEnum.OLLAMA_188 not in providers_in_chain def test_diagnose_fallback_chain_contains_cloud_providers(): @@ -128,8 +144,8 @@ def test_diagnose_fallback_chain_contains_cloud_providers(): @pytest.mark.asyncio -async def test_diagnose_route_fallback_chain_excludes_ollama(): - """DIAGNOSE intent route() 回傳的 fallback_chain 不含 OLLAMA""" +async def test_diagnose_route_primary_is_ollama(): + """2026-04-29: DIAGNOSE intent route() primary 必須是 OLLAMA(推翻 A2)""" router = _make_router() decision = await router.route( @@ -137,22 +153,24 @@ async def test_diagnose_route_fallback_chain_excludes_ollama(): context={"intent_hint": "diagnose"}, ) - assert decision.selected_provider == AIProviderEnum.OPENCLAW_NEMO, ( - f"primary 應為 OPENCLAW_NEMO,實際: {decision.selected_provider}" + assert decision.selected_provider == AIProviderEnum.OLLAMA, ( + f"統帥鐵律: DIAGNOSE primary 應為 OLLAMA,實際: {decision.selected_provider}" ) + # 雲端 fallback 仍在(OpenClaw / Gemini / Claude 救命備援) fb_providers = [p for p, _ in decision.fallback_chain] - assert AIProviderEnum.OLLAMA not in fb_providers, ( - f"OLLAMA 不應在 DIAGNOSE fallback_chain: {fb_providers}" + # ollama_failover_manager 可能轉到 ollama_188,但 ollama variant 必須有 + has_cloud_fallback = ( + AIProviderEnum.GEMINI in fb_providers or AIProviderEnum.CLAUDE in fb_providers ) - assert AIProviderEnum.OLLAMA_188 not in fb_providers, ( - f"OLLAMA_188 不應在 DIAGNOSE fallback_chain: {fb_providers}" + assert has_cloud_fallback, ( + f"雲端 fallback 應存在當救命備援: {fb_providers}" ) @pytest.mark.asyncio -async def test_diagnose_route_sync_fallback_chain_excludes_ollama(): - """DIAGNOSE intent route_sync() 回傳的 fallback_chain 同樣不含 OLLAMA""" +async def test_diagnose_route_sync_primary_is_ollama(): + """2026-04-29: DIAGNOSE route_sync() primary 同樣是 OLLAMA""" router = _make_router() decision = router.route_sync( @@ -160,9 +178,8 @@ async def test_diagnose_route_sync_fallback_chain_excludes_ollama(): context={"intent_hint": "diagnose"}, ) - fb_providers = [p for p, _ in decision.fallback_chain] - assert AIProviderEnum.OLLAMA not in fb_providers, ( - f"OLLAMA 不應在 DIAGNOSE route_sync fallback_chain: {fb_providers}" + assert decision.selected_provider == AIProviderEnum.OLLAMA, ( + f"統帥鐵律: DIAGNOSE route_sync primary 應為 OLLAMA,實際: {decision.selected_provider}" ) @@ -282,22 +299,24 @@ async def test_restart_intent_still_has_ollama_in_fallback(): ) -def test_build_fallback_chain_for_intent_diagnose_no_ollama(): - """_build_fallback_chain_for_intent(DIAGNOSE) 回傳結果不含 OLLAMA""" +def test_build_fallback_chain_for_intent_diagnose_with_ollama_primary(): + """2026-04-29: _build_fallback_chain_for_intent(DIAGNOSE, primary=OLLAMA) + 回傳結果應排除 primary OLLAMA,但保留雲端 fallback。""" router = _make_router() + # primary 是 OLLAMA(推翻 A2 後) chain = router._build_fallback_chain_for_intent( - AIProviderEnum.OPENCLAW_NEMO, + AIProviderEnum.OLLAMA, IntentType.DIAGNOSE, ) providers = [p for p, _ in chain] + # primary 已排除 assert AIProviderEnum.OLLAMA not in providers - assert AIProviderEnum.OLLAMA_188 not in providers - # primary 已排除,chain 剩 GEMINI + CLAUDE + # fallback 雲端救命備援必須存在 + assert AIProviderEnum.OPENCLAW_NEMO in providers assert AIProviderEnum.GEMINI in providers assert AIProviderEnum.CLAUDE in providers - assert AIProviderEnum.OPENCLAW_NEMO not in providers # primary 排除 def test_build_fallback_chain_for_intent_restart_has_ollama(): diff --git a/apps/api/tests/test_ai_router_failover_integration.py b/apps/api/tests/test_ai_router_failover_integration.py index f1dfc008..8cab0543 100644 --- a/apps/api/tests/test_ai_router_failover_integration.py +++ b/apps/api/tests/test_ai_router_failover_integration.py @@ -186,21 +186,32 @@ async def test_router_does_not_use_failover_for_nemotron(): @pytest.mark.asyncio -async def test_router_does_not_use_failover_for_openclaw_nemo(): - """DIAGNOSE intent → OPENCLAW_NEMO → failover_manager 不應被呼叫""" +async def test_router_uses_failover_for_diagnose_ollama_primary(): + """2026-04-29: DIAGNOSE intent → OLLAMA → failover_manager 應被呼叫 + + 推翻 A2 後 DIAGNOSE primary 為 OLLAMA(本地優先鐵律) + failover_manager 對 OLLAMA primary 會檢查健康度(111 vs 188 CPU 備援切換) + """ mock_fm = MagicMock() mock_fm.select_provider = AsyncMock() + # mock 回傳保持原 provider(無切換) + mock_decision = MagicMock() + mock_decision.primary.provider_name = "ollama" + mock_decision.primary.model = "qwen2.5:7b-instruct" + mock_decision.fallback_chain = [] + mock_fm.select_provider.return_value = mock_decision router = _make_router_with_mock_failover(mock_fm) - # context_hint=diagnose → OPENCLAW_NEMO(規則 3 override) decision = await router.route( "diagnose service crash", context={"intent_hint": "diagnose"}, ) - assert decision.selected_provider == AIProviderEnum.OPENCLAW_NEMO - mock_fm.select_provider.assert_not_awaited() + # 推翻 A2:DIAGNOSE primary 是 OLLAMA + assert decision.selected_provider == AIProviderEnum.OLLAMA + # OLLAMA primary 觸發 failover_manager 健康檢查(111 vs 188) + mock_fm.select_provider.assert_awaited() # ============================================================================= diff --git a/apps/api/tests/test_p0_diagnose_routing.py b/apps/api/tests/test_p0_diagnose_routing.py index 8e7d186b..646f9140 100644 --- a/apps/api/tests/test_p0_diagnose_routing.py +++ b/apps/api/tests/test_p0_diagnose_routing.py @@ -142,19 +142,29 @@ class TestLocalFallbackChain: class TestDiagnoseIntentOverride: """DIAGNOSE intent 路由設定驗證""" - def test_diagnose_override_is_openclaw_nemo(self): - """_intent_provider_overrides[DIAGNOSE] 應為 OPENCLAW_NEMO + def test_diagnose_override_is_ollama(self): + """_intent_provider_overrides[DIAGNOSE] 應為 OLLAMA(2026-04-29 推翻 A2) - 2026-04-12 ogt: NEMOTRON routing 暫停 — NIM tool_call 無 confidence 欄位 - 2026-04-16 ogt: 恢復 DIAGNOSE → OPENCLAW_NEMO — None 複雜度路由落入 Rule 6 + 歷史脈絡: + - 2026-04-12 ogt: NEMOTRON routing 暫停 — NIM tool_call 無 confidence 欄位 + - 2026-04-16 ogt: 恢復 DIAGNOSE → OPENCLAW_NEMO — None 複雜度路由落入 Rule 6 → Ollama deepseek-r1:14b CPU 需 238s → timeout → degraded → 全部「待分析」 - OPENCLAW_NEMO = 188:8088 NVIDIA NIM,實測 2-27s,JSON 輸出可靠 + - 2026-04-27 Claude Sonnet 4.6 A2: 確立「Ollama 永久排除於 DIAGNOSE chain」 + + 2026-04-29 推翻 A2 鐵律: + - 統帥指令: 「主要優先用 111 主機的 Ollama」 + - 統帥鐵律 feedback_ai_autonomous_direction.md: 以本地免費 LLM 為主 + - 統帥鐵律 feedback_ollama_111_only.md: Ollama 唯一主機 = 111 + - 新事實: prod Ollama 111 = M1 Pro Apple Silicon GPU + qwen2.5:7b-instruct + VRAM 8.2GB 全載入,實測 hi 0.54s + - 雲端全死: OpenClaw 500 / Gemini 429 / Claude 404 + - 配套:openclaw.py 注入 task_type="diagnose" → Ollama 用 200s timeout """ from src.services.ai_router import AIRouter, AIProviderEnum from src.services.intent_classifier import IntentType router = AIRouter() override = router._intent_provider_overrides.get(IntentType.DIAGNOSE) - assert override is AIProviderEnum.OPENCLAW_NEMO, ( - f"DIAGNOSE 應為 OPENCLAW_NEMO,實際為 {override}" + assert override is AIProviderEnum.OLLAMA, ( + f"統帥鐵律: DIAGNOSE 應為 OLLAMA(本地優先),實際為 {override}" )