fix(code-review): keep Gemini fallback opt-in

2026-05-05 14:38:44 +08:00
parent 3b73cc7f94
commit fc1a6196df
5 changed files with 355 additions and 9 deletions
--- a/apps/api/src/core/config.py
+++ b/apps/api/src/core/config.py
@@ -362,7 +362,7 @@ class Settings(BaseSettings):
            raise ValueError(
                f"OLLAMA URL host 不允許的外部域名：{host!r}（完整 URL：{v!r}）"
                "，必須使用私網 IP 或已知 K8s Service hostname"
-            )
+            ) from None
        if not (ip.is_private or ip.is_loopback):
            raise ValueError(
                f"OLLAMA URL 必須是私網/loopback IP、已知 K8s SVC 或 GCP 白名單 IP，"
@@ -496,6 +496,14 @@ class Settings(BaseSettings):
    )
    GEMINI_API_KEY: str = Field(default="", description="Google Gemini API key")
    CLAUDE_API_KEY: str = Field(default="", description="Anthropic Claude API key")
+    LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK: bool = Field(
+        default=False,
+        description=(
+            "Allow LocalCodeReviewService to fall back to Gemini when the "
+            "GCP-B/Ollama code-review lane fails. Default false to avoid "
+            "unexpected cloud spend from Gitea push/PR alerts."
+        ),
+    )
    # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
    NVIDIA_API_KEY: str = Field(
        default="",
--- a/apps/api/src/services/local_code_review_service.py
+++ b/apps/api/src/services/local_code_review_service.py
@@ -20,6 +20,7 @@ import structlog

 from src.core.config import get_settings
 from src.services.model_registry import get_model
+from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint

 logger = structlog.get_logger(__name__)
 settings = get_settings()
@@ -75,18 +76,24 @@ class LocalCodeReviewService:
                redis = None

            diff_size = len(diff.encode())
-            use_gemini = diff_size > _MAX_DIFF_BYTES
+            allow_cloud_fallback = settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK

-            if use_gemini:
+            if diff_size > _MAX_DIFF_BYTES and allow_cloud_fallback:
                result = await self._review_with_gemini(pr_id, repo, title, diff)
            else:
+                if diff_size > _MAX_DIFF_BYTES:
+                    logger.info(
+                        "pr_review_large_diff_using_ollama_truncated",
+                        pr_id=pr_id,
+                        diff_size_bytes=diff_size,
+                        cloud_fallback_enabled=allow_cloud_fallback,
+                    )
                result = await self._review_with_ollama(pr_id, repo, title, diff)
-                if result is None:
-                    # Ollama 失敗 → fallback Gemini
+                if result is None and allow_cloud_fallback:
                    result = await self._review_with_gemini(pr_id, repo, title, diff)

            if result is None:
-                return None
+                result = self._cloud_fallback_disabled_result(pr_id, repo, title)

            result["diff_size_bytes"] = diff_size

@@ -116,7 +123,7 @@ class LocalCodeReviewService:
        try:
            http = await self._get_http()
            resp = await http.post(
-                f"{settings.OLLAMA_URL}/api/generate",
+                f"{resolve_ollama_endpoint('code_review')}/api/generate",
                json={
                    "model": _MODEL_OLLAMA,
                    "prompt": prompt,
@@ -138,6 +145,14 @@ class LocalCodeReviewService:
    async def _review_with_gemini(
        self, pr_id: str, repo: str, title: str, diff: str
    ) -> dict[str, Any] | None:
+        if not settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK:
+            logger.warning(
+                "pr_review_gemini_fallback_disabled",
+                pr_id=pr_id,
+                repo=repo,
+            )
+            return None
+
        try:
            from src.services.openclaw import get_openclaw
            openclaw = get_openclaw()
@@ -156,12 +171,37 @@ class LocalCodeReviewService:
            logger.error("pr_review_gemini_failed", pr_id=pr_id, error=str(e))
        return None

+    def _cloud_fallback_disabled_result(
+        self,
+        pr_id: str,
+        repo: str,
+        title: str,
+    ) -> dict[str, Any]:
+        logger.warning(
+            "pr_review_cloud_fallback_skipped",
+            pr_id=pr_id,
+            repo=repo,
+            title=title,
+            reason="LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK=false",
+        )
+        return {
+            "review_text": (
+                "⚠️ Code Review：GCP-B/Ollama 審查未完成，"
+                "已依成本策略跳過 Gemini fallback。"
+            ),
+            "issues_count": 1,
+            "model": _MODEL_OLLAMA,
+            "provider": "ollama_unavailable",
+            "cloud_fallback_skipped": True,
+        }
+
    async def _save_to_db(
        self, pr_id: str, repo: str, title: str, diff_size: int, result: dict
    ) -> None:
        try:
-            from src.db.base import get_db_context
            from sqlalchemy import text
+
+            from src.db.base import get_db_context
            async with get_db_context() as db:
                await db.execute(
                    text("""
@@ -206,7 +246,7 @@ class LocalCodeReviewService:
            try:
                http = await self._get_http()
                resp = await http.post(
-                    f"{settings.OLLAMA_URL}/api/generate",
+                    f"{resolve_ollama_endpoint('code_review')}/api/generate",
                    json={
                        "model": _MODEL_OLLAMA,
                        "prompt": prompt,
--- a/apps/api/src/services/ollama_endpoint_resolver.py
+++ b/apps/api/src/services/ollama_endpoint_resolver.py
@@ -0,0 +1,101 @@
+"""
+Ollama endpoint resolver for non-critical workload placement.
+
+ADR-110 gives AWOOOI three Ollama endpoints. This resolver is intentionally
+small: it chooses the preferred endpoint by workload class, while health-aware
+failover remains owned by ollama_failover_manager.py.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal, Protocol
+
+from src.core.config import settings
+
+OllamaWorkloadType = Literal[
+    "interactive",
+    "healthcheck",
+    "batch",
+    "embedding",
+    "rag",
+    "code_review",
+    "shadow",
+    "canary",
+    "local_required",
+    "privacy_sensitive",
+    "dr",
+]
+
+_GCP_B_PREFERRED_WORKLOADS = {
+    "batch",
+    "embedding",
+    "rag",
+    "code_review",
+    "shadow",
+    "canary",
+}
+
+_LOCAL_PREFERRED_WORKLOADS = {
+    "local_required",
+    "privacy_sensitive",
+    "dr",
+}
+
+
+class _OllamaSettings(Protocol):
+    OLLAMA_URL: str
+    OLLAMA_SECONDARY_URL: str
+    OLLAMA_FALLBACK_URL: str
+
+
+@dataclass(frozen=True)
+class OllamaEndpointSelection:
+    url: str
+    provider_name: str
+    workload_type: OllamaWorkloadType
+    reason: str
+
+
+def resolve_ollama_selection(
+    workload_type: OllamaWorkloadType = "interactive",
+    *,
+    config: _OllamaSettings | None = None,
+) -> OllamaEndpointSelection:
+    """Return the preferred Ollama endpoint for a workload class."""
+    cfg = config or settings
+    primary = cfg.OLLAMA_URL
+    secondary = cfg.OLLAMA_SECONDARY_URL
+    fallback = cfg.OLLAMA_FALLBACK_URL
+
+    if workload_type in _GCP_B_PREFERRED_WORKLOADS and secondary:
+        return OllamaEndpointSelection(
+            url=secondary,
+            provider_name="ollama_gcp_b",
+            workload_type=workload_type,
+            reason="gcp_b_batch_lane",
+        )
+
+    if workload_type in _LOCAL_PREFERRED_WORKLOADS and fallback:
+        return OllamaEndpointSelection(
+            url=fallback,
+            provider_name="ollama_local",
+            workload_type=workload_type,
+            reason="local_privacy_or_dr_lane",
+        )
+
+    return OllamaEndpointSelection(
+        url=primary,
+        provider_name="ollama_gcp_a",
+        workload_type=workload_type,
+        reason="primary_interactive_lane",
+    )
+
+
+def resolve_ollama_endpoint(
+    workload_type: OllamaWorkloadType = "interactive",
+    *,
+    config: _OllamaSettings | None = None,
+) -> str:
+    """Return only the preferred Ollama base URL."""
+    return resolve_ollama_selection(workload_type, config=config).url
--- a/apps/api/tests/test_local_code_review_cloud_fallback.py
+++ b/apps/api/tests/test_local_code_review_cloud_fallback.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+from typing import Any
+
+import httpx
+import pytest
+
+from src.services import local_code_review_service as review_module
+from src.services.local_code_review_service import LocalCodeReviewService
+
+
+class _FakeResponse:
+    status_code = 200
+
+    def json(self) -> dict[str, str]:
+        return {"response": "✅ Push 品質正常"}
+
+
+class _FakeClient:
+    def __init__(self, *, fail: bool = False) -> None:
+        self.fail = fail
+        self.posted_urls: list[str] = []
+
+    async def post(self, url: str, **kwargs: Any) -> _FakeResponse:
+        self.posted_urls.append(url)
+        if self.fail:
+            raise httpx.TimeoutException("timeout")
+        return _FakeResponse()
+
+
+async def _noop_save(*args: Any, **kwargs: Any) -> None:
+    return None
+
+
+@pytest.mark.asyncio
+async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        review_module.settings,
+        "LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
+        False,
+    )
+    monkeypatch.setattr(
+        review_module,
+        "resolve_ollama_endpoint",
+        lambda workload_type: "http://gcp-b:11436",
+    )
+
+    client = _FakeClient()
+    service = LocalCodeReviewService()
+
+    async def _get_http() -> _FakeClient:
+        return client
+
+    async def _fail_gemini(*args: Any, **kwargs: Any) -> None:
+        raise AssertionError("Gemini fallback should stay disabled")
+
+    monkeypatch.setattr(service, "_get_http", _get_http)
+    monkeypatch.setattr(service, "_save_to_db", _noop_save)
+    monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini)
+
+    result = await service.review_pr(
+        pr_id="pr-1",
+        repo="wooo/awoooi",
+        title="large diff",
+        diff="x" * (60 * 1024),
+    )
+
+    assert result is not None
+    assert result["provider"] == "ollama"
+    assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
+
+
+@pytest.mark.asyncio
+async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        review_module.settings,
+        "LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
+        False,
+    )
+    monkeypatch.setattr(
+        review_module,
+        "resolve_ollama_endpoint",
+        lambda workload_type: "http://gcp-b:11436",
+    )
+
+    client = _FakeClient(fail=True)
+    service = LocalCodeReviewService()
+
+    async def _get_http() -> _FakeClient:
+        return client
+
+    async def _fail_gemini(*args: Any, **kwargs: Any) -> None:
+        raise AssertionError("Gemini fallback should stay disabled")
+
+    monkeypatch.setattr(service, "_get_http", _get_http)
+    monkeypatch.setattr(service, "_save_to_db", _noop_save)
+    monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini)
+
+    result = await service.review_pr(
+        pr_id="pr-2",
+        repo="wooo/awoooi",
+        title="ollama unavailable",
+        diff="small diff",
+    )
+
+    assert result is not None
+    assert result["provider"] == "ollama_unavailable"
+    assert result["cloud_fallback_skipped"] is True
+    assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
+
+
+@pytest.mark.asyncio
+async def test_gemini_fallback_requires_explicit_flag(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        review_module.settings,
+        "LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
+        True,
+    )
+
+    service = LocalCodeReviewService()
+
+    async def _gemini_result(*args: Any, **kwargs: Any) -> dict[str, Any]:
+        return {"review_text": "ok", "issues_count": 0, "model": "gemini", "provider": "gemini"}
+
+    monkeypatch.setattr(service, "_save_to_db", _noop_save)
+    monkeypatch.setattr(service, "_review_with_gemini", _gemini_result)
+
+    result = await service.review_pr(
+        pr_id="pr-3",
+        repo="wooo/awoooi",
+        title="large diff explicit cloud",
+        diff="x" * (60 * 1024),
+    )
+
+    assert result is not None
+    assert result["provider"] == "gemini"
--- a/apps/api/tests/test_ollama_endpoint_resolver.py
+++ b/apps/api/tests/test_ollama_endpoint_resolver.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+from src.services.ollama_endpoint_resolver import (
+    resolve_ollama_endpoint,
+    resolve_ollama_selection,
+)
+
+
+def _settings(
+    *,
+    primary: str = "http://192.168.0.110:11435",
+    secondary: str = "http://192.168.0.110:11436",
+    fallback: str = "http://192.168.0.110:11437",
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        OLLAMA_URL=primary,
+        OLLAMA_SECONDARY_URL=secondary,
+        OLLAMA_FALLBACK_URL=fallback,
+    )
+
+
+def test_batch_workloads_prefer_gcp_b() -> None:
+    cfg = _settings()
+
+    for workload in ("batch", "embedding", "rag", "code_review", "shadow", "canary"):
+        selection = resolve_ollama_selection(workload, config=cfg)
+        assert selection.url == "http://192.168.0.110:11436"
+        assert selection.provider_name == "ollama_gcp_b"
+        assert selection.reason == "gcp_b_batch_lane"
+
+
+def test_interactive_workloads_stay_on_gcp_a() -> None:
+    cfg = _settings()
+
+    for workload in ("interactive", "healthcheck"):
+        selection = resolve_ollama_selection(workload, config=cfg)
+        assert selection.url == "http://192.168.0.110:11435"
+        assert selection.provider_name == "ollama_gcp_a"
+
+
+def test_local_required_workloads_use_local_lane() -> None:
+    cfg = _settings()
+
+    for workload in ("local_required", "privacy_sensitive", "dr"):
+        selection = resolve_ollama_selection(workload, config=cfg)
+        assert selection.url == "http://192.168.0.110:11437"
+        assert selection.provider_name == "ollama_local"
+
+
+def test_batch_workloads_fall_back_to_primary_when_secondary_missing() -> None:
+    cfg = _settings(secondary="")
+
+    assert resolve_ollama_endpoint("embedding", config=cfg) == "http://192.168.0.110:11435"