fix(ai): keep openclaw before gemini in alert fallback

2026-05-06 20:20:44 +08:00
parent bcb9397c38
commit d0e98192de
3 changed files with 33 additions and 0 deletions
--- a/apps/api/src/services/openclaw.py
+++ b/apps/api/src/services/openclaw.py
@@ -262,6 +262,14 @@ class OpenClawService:
            cloud_aliases.get(provider_name, provider_name)
            for provider_name in (cloud_provider_order or [])
        }
+        try:
+            from src.services.ai_control import is_provider_disabled
+            if await is_provider_disabled("openclaw_nemo"):
+                cloud_candidates.discard("openclaw_nemo")
+            else:
+                cloud_candidates.add("openclaw_nemo")
+        except Exception as control_error:
+            logger.warning("alert_openclaw_nemo_control_check_failed", error=str(control_error))
        # Gemini remains the final paid backup, but alert traffic should use
        # OpenClaw/Nemo first whenever the router control plane has not disabled it.
        cloud_candidates.add("gemini")
--- a/apps/api/tests/test_openclaw_alert_cloud_fallback_gate.py
+++ b/apps/api/tests/test_openclaw_alert_cloud_fallback_gate.py
@@ -266,6 +266,7 @@ async def test_alert_context_sorts_ollama_lane_before_openclaw_nemo_backup(
 ) -> None:
    monkeypatch.setattr(openclaw_module.settings, "ALERT_AI_ALLOW_CLOUD_FALLBACK", True)
    monkeypatch.setattr(openclaw_module.settings, "ALERT_AI_ENFORCE_OLLAMA_FIRST", True)
+    monkeypatch.setattr(ai_control_module, "is_provider_disabled", AsyncMock(return_value=False))
    monkeypatch.setattr(openclaw_module, "get_ollama_failover_manager", lambda: _UnorderedFailoverManager())

    service = object.__new__(OpenClawService)
@@ -276,3 +277,22 @@ async def test_alert_context_sorts_ollama_lane_before_openclaw_nemo_backup(
    )

    assert provider_order == ["ollama_gcp_a", "ollama_gcp_b", "ollama_local", "openclaw_nemo", "gemini"]
+
+
+@pytest.mark.asyncio
+async def test_alert_context_respects_disabled_openclaw_nemo_backup(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(openclaw_module.settings, "ALERT_AI_ALLOW_CLOUD_FALLBACK", True)
+    monkeypatch.setattr(openclaw_module.settings, "ALERT_AI_ENFORCE_OLLAMA_FIRST", True)
+    monkeypatch.setattr(ai_control_module, "is_provider_disabled", AsyncMock(return_value=True))
+    monkeypatch.setattr(openclaw_module, "get_ollama_failover_manager", lambda: _UnorderedFailoverManager())
+
+    service = object.__new__(OpenClawService)
+    provider_order = await service._resolve_alert_provider_order(
+        task_type="diagnose",
+        alert_context={"incident_id": "INC-1", "alertname": "HostHighCpuLoad"},
+        cloud_provider_order=["openclaw_nemo", "gemini"],
+    )
+
+    assert provider_order == ["ollama_gcp_a", "ollama_gcp_b", "ollama_local", "gemini"]
--- a/docs/LOGBOOK.md
+++ b/docs/LOGBOOK.md
@@ -4129,3 +4129,8 @@ DATABASE_URL='postgresql+asyncpg://test:test@localhost:5432/test' \

 - 這次不是要禁 Gemini；而是恢復正確順序：GCP-A/GCP-B/111 優先，OpenClaw/Nemo 作為 cloud 仲裁備援，Gemini 只保留最後備援。
 - `openclaw_nemo` 在修補前仍被 Redis control disabled；需等 AWOOOI 新 image 部署後，再依現場測試結果解除 disabled，避免舊 image 仍直接跳 Gemini。
+
+### 20:18 追加
+
+- 第一版部署後 live 檢查發現 Ollama failover chain 的 cloud candidates 仍可能只有 `gemini`；已再補強 resolver：只要 AI control 未停用 `openclaw_nemo`，告警路徑會主動把它插入 Gemini 前面；若 Redis control 顯示 disabled，則會明確移除，不繞過人工控制。
+- 新增測試覆蓋 disabled 狀態：`openclaw_nemo` disabled 時 provider order 仍維持 `ollama_gcp_a → ollama_gcp_b → ollama_local → gemini`。