fix(ai): isolate heavy Ollama workloads from GCP alert lane

2026-05-05 23:05:59 +08:00
parent 1dcc6d61dc
commit c4854bb355
17 changed files with 146 additions and 84 deletions
--- a/apps/api/tests/test_local_code_review_cloud_fallback.py
+++ b/apps/api/tests/test_local_code_review_cloud_fallback.py
@@ -33,7 +33,7 @@ async def _noop_save(*args: Any, **kwargs: Any) -> None:


@pytest.mark.asyncio
-async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
+async def test_large_pr_uses_local_ollama_when_gemini_fallback_disabled(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    monkeypatch.setattr(
@@ -44,7 +44,7 @@ async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
    monkeypatch.setattr(
        review_module,
        "resolve_ollama_endpoint",
-        lambda workload_type: "http://gcp-b:11436",
+        lambda workload_type: "http://local-111:11434",
    )

    client = _FakeClient()
@@ -69,7 +69,7 @@ async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(

    assert result is not None
    assert result["provider"] == "ollama"
-    assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
+    assert client.posted_urls == ["http://local-111:11434/api/generate"]


@pytest.mark.asyncio
@@ -84,7 +84,7 @@ async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
    monkeypatch.setattr(
        review_module,
        "resolve_ollama_endpoint",
-        lambda workload_type: "http://gcp-b:11436",
+        lambda workload_type: "http://local-111:11434",
    )

    client = _FakeClient(fail=True)
@@ -110,7 +110,7 @@ async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
    assert result is not None
    assert result["provider"] == "ollama_unavailable"
    assert result["cloud_fallback_skipped"] is True
-    assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
+    assert client.posted_urls == ["http://local-111:11434/api/generate"]


@pytest.mark.asyncio