fix(code-review): keep Gemini fallback opt-in
Some checks failed
CD Pipeline / tests (push) Successful in 2m2s
Code Review / ai-code-review (push) Successful in 27s
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled

This commit is contained in:
Your Name
2026-05-05 14:38:44 +08:00
parent 3b73cc7f94
commit fc1a6196df
5 changed files with 355 additions and 9 deletions

View File

@@ -362,7 +362,7 @@ class Settings(BaseSettings):
raise ValueError(
f"OLLAMA URL host 不允許的外部域名:{host!r}(完整 URL{v!r}"
",必須使用私網 IP 或已知 K8s Service hostname"
)
) from None
if not (ip.is_private or ip.is_loopback):
raise ValueError(
f"OLLAMA URL 必須是私網/loopback IP、已知 K8s SVC 或 GCP 白名單 IP"
@@ -496,6 +496,14 @@ class Settings(BaseSettings):
)
GEMINI_API_KEY: str = Field(default="", description="Google Gemini API key")
CLAUDE_API_KEY: str = Field(default="", description="Anthropic Claude API key")
LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK: bool = Field(
default=False,
description=(
"Allow LocalCodeReviewService to fall back to Gemini when the "
"GCP-B/Ollama code-review lane fails. Default false to avoid "
"unexpected cloud spend from Gitea push/PR alerts."
),
)
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
NVIDIA_API_KEY: str = Field(
default="",

View File

@@ -20,6 +20,7 @@ import structlog
from src.core.config import get_settings
from src.services.model_registry import get_model
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
settings = get_settings()
@@ -75,18 +76,24 @@ class LocalCodeReviewService:
redis = None
diff_size = len(diff.encode())
use_gemini = diff_size > _MAX_DIFF_BYTES
allow_cloud_fallback = settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK
if use_gemini:
if diff_size > _MAX_DIFF_BYTES and allow_cloud_fallback:
result = await self._review_with_gemini(pr_id, repo, title, diff)
else:
if diff_size > _MAX_DIFF_BYTES:
logger.info(
"pr_review_large_diff_using_ollama_truncated",
pr_id=pr_id,
diff_size_bytes=diff_size,
cloud_fallback_enabled=allow_cloud_fallback,
)
result = await self._review_with_ollama(pr_id, repo, title, diff)
if result is None:
# Ollama 失敗 → fallback Gemini
if result is None and allow_cloud_fallback:
result = await self._review_with_gemini(pr_id, repo, title, diff)
if result is None:
return None
result = self._cloud_fallback_disabled_result(pr_id, repo, title)
result["diff_size_bytes"] = diff_size
@@ -116,7 +123,7 @@ class LocalCodeReviewService:
try:
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('code_review')}/api/generate",
json={
"model": _MODEL_OLLAMA,
"prompt": prompt,
@@ -138,6 +145,14 @@ class LocalCodeReviewService:
async def _review_with_gemini(
self, pr_id: str, repo: str, title: str, diff: str
) -> dict[str, Any] | None:
if not settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK:
logger.warning(
"pr_review_gemini_fallback_disabled",
pr_id=pr_id,
repo=repo,
)
return None
try:
from src.services.openclaw import get_openclaw
openclaw = get_openclaw()
@@ -156,12 +171,37 @@ class LocalCodeReviewService:
logger.error("pr_review_gemini_failed", pr_id=pr_id, error=str(e))
return None
def _cloud_fallback_disabled_result(
self,
pr_id: str,
repo: str,
title: str,
) -> dict[str, Any]:
logger.warning(
"pr_review_cloud_fallback_skipped",
pr_id=pr_id,
repo=repo,
title=title,
reason="LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK=false",
)
return {
"review_text": (
"⚠️ Code ReviewGCP-B/Ollama 審查未完成,"
"已依成本策略跳過 Gemini fallback。"
),
"issues_count": 1,
"model": _MODEL_OLLAMA,
"provider": "ollama_unavailable",
"cloud_fallback_skipped": True,
}
async def _save_to_db(
self, pr_id: str, repo: str, title: str, diff_size: int, result: dict
) -> None:
try:
from src.db.base import get_db_context
from sqlalchemy import text
from src.db.base import get_db_context
async with get_db_context() as db:
await db.execute(
text("""
@@ -206,7 +246,7 @@ class LocalCodeReviewService:
try:
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('code_review')}/api/generate",
json={
"model": _MODEL_OLLAMA,
"prompt": prompt,

View File

@@ -0,0 +1,101 @@
"""
Ollama endpoint resolver for non-critical workload placement.
ADR-110 gives AWOOOI three Ollama endpoints. This resolver is intentionally
small: it chooses the preferred endpoint by workload class, while health-aware
failover remains owned by ollama_failover_manager.py.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Literal, Protocol
from src.core.config import settings
OllamaWorkloadType = Literal[
"interactive",
"healthcheck",
"batch",
"embedding",
"rag",
"code_review",
"shadow",
"canary",
"local_required",
"privacy_sensitive",
"dr",
]
_GCP_B_PREFERRED_WORKLOADS = {
"batch",
"embedding",
"rag",
"code_review",
"shadow",
"canary",
}
_LOCAL_PREFERRED_WORKLOADS = {
"local_required",
"privacy_sensitive",
"dr",
}
class _OllamaSettings(Protocol):
OLLAMA_URL: str
OLLAMA_SECONDARY_URL: str
OLLAMA_FALLBACK_URL: str
@dataclass(frozen=True)
class OllamaEndpointSelection:
url: str
provider_name: str
workload_type: OllamaWorkloadType
reason: str
def resolve_ollama_selection(
workload_type: OllamaWorkloadType = "interactive",
*,
config: _OllamaSettings | None = None,
) -> OllamaEndpointSelection:
"""Return the preferred Ollama endpoint for a workload class."""
cfg = config or settings
primary = cfg.OLLAMA_URL
secondary = cfg.OLLAMA_SECONDARY_URL
fallback = cfg.OLLAMA_FALLBACK_URL
if workload_type in _GCP_B_PREFERRED_WORKLOADS and secondary:
return OllamaEndpointSelection(
url=secondary,
provider_name="ollama_gcp_b",
workload_type=workload_type,
reason="gcp_b_batch_lane",
)
if workload_type in _LOCAL_PREFERRED_WORKLOADS and fallback:
return OllamaEndpointSelection(
url=fallback,
provider_name="ollama_local",
workload_type=workload_type,
reason="local_privacy_or_dr_lane",
)
return OllamaEndpointSelection(
url=primary,
provider_name="ollama_gcp_a",
workload_type=workload_type,
reason="primary_interactive_lane",
)
def resolve_ollama_endpoint(
workload_type: OllamaWorkloadType = "interactive",
*,
config: _OllamaSettings | None = None,
) -> str:
"""Return only the preferred Ollama base URL."""
return resolve_ollama_selection(workload_type, config=config).url

View File

@@ -0,0 +1,142 @@
from __future__ import annotations
from typing import Any
import httpx
import pytest
from src.services import local_code_review_service as review_module
from src.services.local_code_review_service import LocalCodeReviewService
class _FakeResponse:
status_code = 200
def json(self) -> dict[str, str]:
return {"response": "✅ Push 品質正常"}
class _FakeClient:
def __init__(self, *, fail: bool = False) -> None:
self.fail = fail
self.posted_urls: list[str] = []
async def post(self, url: str, **kwargs: Any) -> _FakeResponse:
self.posted_urls.append(url)
if self.fail:
raise httpx.TimeoutException("timeout")
return _FakeResponse()
async def _noop_save(*args: Any, **kwargs: Any) -> None:
return None
@pytest.mark.asyncio
async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(
review_module.settings,
"LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
False,
)
monkeypatch.setattr(
review_module,
"resolve_ollama_endpoint",
lambda workload_type: "http://gcp-b:11436",
)
client = _FakeClient()
service = LocalCodeReviewService()
async def _get_http() -> _FakeClient:
return client
async def _fail_gemini(*args: Any, **kwargs: Any) -> None:
raise AssertionError("Gemini fallback should stay disabled")
monkeypatch.setattr(service, "_get_http", _get_http)
monkeypatch.setattr(service, "_save_to_db", _noop_save)
monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini)
result = await service.review_pr(
pr_id="pr-1",
repo="wooo/awoooi",
title="large diff",
diff="x" * (60 * 1024),
)
assert result is not None
assert result["provider"] == "ollama"
assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
@pytest.mark.asyncio
async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(
review_module.settings,
"LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
False,
)
monkeypatch.setattr(
review_module,
"resolve_ollama_endpoint",
lambda workload_type: "http://gcp-b:11436",
)
client = _FakeClient(fail=True)
service = LocalCodeReviewService()
async def _get_http() -> _FakeClient:
return client
async def _fail_gemini(*args: Any, **kwargs: Any) -> None:
raise AssertionError("Gemini fallback should stay disabled")
monkeypatch.setattr(service, "_get_http", _get_http)
monkeypatch.setattr(service, "_save_to_db", _noop_save)
monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini)
result = await service.review_pr(
pr_id="pr-2",
repo="wooo/awoooi",
title="ollama unavailable",
diff="small diff",
)
assert result is not None
assert result["provider"] == "ollama_unavailable"
assert result["cloud_fallback_skipped"] is True
assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
@pytest.mark.asyncio
async def test_gemini_fallback_requires_explicit_flag(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(
review_module.settings,
"LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
True,
)
service = LocalCodeReviewService()
async def _gemini_result(*args: Any, **kwargs: Any) -> dict[str, Any]:
return {"review_text": "ok", "issues_count": 0, "model": "gemini", "provider": "gemini"}
monkeypatch.setattr(service, "_save_to_db", _noop_save)
monkeypatch.setattr(service, "_review_with_gemini", _gemini_result)
result = await service.review_pr(
pr_id="pr-3",
repo="wooo/awoooi",
title="large diff explicit cloud",
diff="x" * (60 * 1024),
)
assert result is not None
assert result["provider"] == "gemini"

View File

@@ -0,0 +1,55 @@
from __future__ import annotations
from types import SimpleNamespace
from src.services.ollama_endpoint_resolver import (
resolve_ollama_endpoint,
resolve_ollama_selection,
)
def _settings(
*,
primary: str = "http://192.168.0.110:11435",
secondary: str = "http://192.168.0.110:11436",
fallback: str = "http://192.168.0.110:11437",
) -> SimpleNamespace:
return SimpleNamespace(
OLLAMA_URL=primary,
OLLAMA_SECONDARY_URL=secondary,
OLLAMA_FALLBACK_URL=fallback,
)
def test_batch_workloads_prefer_gcp_b() -> None:
cfg = _settings()
for workload in ("batch", "embedding", "rag", "code_review", "shadow", "canary"):
selection = resolve_ollama_selection(workload, config=cfg)
assert selection.url == "http://192.168.0.110:11436"
assert selection.provider_name == "ollama_gcp_b"
assert selection.reason == "gcp_b_batch_lane"
def test_interactive_workloads_stay_on_gcp_a() -> None:
cfg = _settings()
for workload in ("interactive", "healthcheck"):
selection = resolve_ollama_selection(workload, config=cfg)
assert selection.url == "http://192.168.0.110:11435"
assert selection.provider_name == "ollama_gcp_a"
def test_local_required_workloads_use_local_lane() -> None:
cfg = _settings()
for workload in ("local_required", "privacy_sensitive", "dr"):
selection = resolve_ollama_selection(workload, config=cfg)
assert selection.url == "http://192.168.0.110:11437"
assert selection.provider_name == "ollama_local"
def test_batch_workloads_fall_back_to_primary_when_secondary_missing() -> None:
cfg = _settings(secondary="")
assert resolve_ollama_endpoint("embedding", config=cfg) == "http://192.168.0.110:11435"