fix(code-review): keep Gemini fallback opt-in
This commit is contained in:
@@ -362,7 +362,7 @@ class Settings(BaseSettings):
|
||||
raise ValueError(
|
||||
f"OLLAMA URL host 不允許的外部域名:{host!r}(完整 URL:{v!r})"
|
||||
",必須使用私網 IP 或已知 K8s Service hostname"
|
||||
)
|
||||
) from None
|
||||
if not (ip.is_private or ip.is_loopback):
|
||||
raise ValueError(
|
||||
f"OLLAMA URL 必須是私網/loopback IP、已知 K8s SVC 或 GCP 白名單 IP,"
|
||||
@@ -496,6 +496,14 @@ class Settings(BaseSettings):
|
||||
)
|
||||
GEMINI_API_KEY: str = Field(default="", description="Google Gemini API key")
|
||||
CLAUDE_API_KEY: str = Field(default="", description="Anthropic Claude API key")
|
||||
LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"Allow LocalCodeReviewService to fall back to Gemini when the "
|
||||
"GCP-B/Ollama code-review lane fails. Default false to avoid "
|
||||
"unexpected cloud spend from Gitea push/PR alerts."
|
||||
),
|
||||
)
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
|
||||
NVIDIA_API_KEY: str = Field(
|
||||
default="",
|
||||
|
||||
@@ -20,6 +20,7 @@ import structlog
|
||||
|
||||
from src.core.config import get_settings
|
||||
from src.services.model_registry import get_model
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
settings = get_settings()
|
||||
@@ -75,18 +76,24 @@ class LocalCodeReviewService:
|
||||
redis = None
|
||||
|
||||
diff_size = len(diff.encode())
|
||||
use_gemini = diff_size > _MAX_DIFF_BYTES
|
||||
allow_cloud_fallback = settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK
|
||||
|
||||
if use_gemini:
|
||||
if diff_size > _MAX_DIFF_BYTES and allow_cloud_fallback:
|
||||
result = await self._review_with_gemini(pr_id, repo, title, diff)
|
||||
else:
|
||||
if diff_size > _MAX_DIFF_BYTES:
|
||||
logger.info(
|
||||
"pr_review_large_diff_using_ollama_truncated",
|
||||
pr_id=pr_id,
|
||||
diff_size_bytes=diff_size,
|
||||
cloud_fallback_enabled=allow_cloud_fallback,
|
||||
)
|
||||
result = await self._review_with_ollama(pr_id, repo, title, diff)
|
||||
if result is None:
|
||||
# Ollama 失敗 → fallback Gemini
|
||||
if result is None and allow_cloud_fallback:
|
||||
result = await self._review_with_gemini(pr_id, repo, title, diff)
|
||||
|
||||
if result is None:
|
||||
return None
|
||||
result = self._cloud_fallback_disabled_result(pr_id, repo, title)
|
||||
|
||||
result["diff_size_bytes"] = diff_size
|
||||
|
||||
@@ -116,7 +123,7 @@ class LocalCodeReviewService:
|
||||
try:
|
||||
http = await self._get_http()
|
||||
resp = await http.post(
|
||||
f"{settings.OLLAMA_URL}/api/generate",
|
||||
f"{resolve_ollama_endpoint('code_review')}/api/generate",
|
||||
json={
|
||||
"model": _MODEL_OLLAMA,
|
||||
"prompt": prompt,
|
||||
@@ -138,6 +145,14 @@ class LocalCodeReviewService:
|
||||
async def _review_with_gemini(
|
||||
self, pr_id: str, repo: str, title: str, diff: str
|
||||
) -> dict[str, Any] | None:
|
||||
if not settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK:
|
||||
logger.warning(
|
||||
"pr_review_gemini_fallback_disabled",
|
||||
pr_id=pr_id,
|
||||
repo=repo,
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
from src.services.openclaw import get_openclaw
|
||||
openclaw = get_openclaw()
|
||||
@@ -156,12 +171,37 @@ class LocalCodeReviewService:
|
||||
logger.error("pr_review_gemini_failed", pr_id=pr_id, error=str(e))
|
||||
return None
|
||||
|
||||
def _cloud_fallback_disabled_result(
|
||||
self,
|
||||
pr_id: str,
|
||||
repo: str,
|
||||
title: str,
|
||||
) -> dict[str, Any]:
|
||||
logger.warning(
|
||||
"pr_review_cloud_fallback_skipped",
|
||||
pr_id=pr_id,
|
||||
repo=repo,
|
||||
title=title,
|
||||
reason="LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK=false",
|
||||
)
|
||||
return {
|
||||
"review_text": (
|
||||
"⚠️ Code Review:GCP-B/Ollama 審查未完成,"
|
||||
"已依成本策略跳過 Gemini fallback。"
|
||||
),
|
||||
"issues_count": 1,
|
||||
"model": _MODEL_OLLAMA,
|
||||
"provider": "ollama_unavailable",
|
||||
"cloud_fallback_skipped": True,
|
||||
}
|
||||
|
||||
async def _save_to_db(
|
||||
self, pr_id: str, repo: str, title: str, diff_size: int, result: dict
|
||||
) -> None:
|
||||
try:
|
||||
from src.db.base import get_db_context
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
async with get_db_context() as db:
|
||||
await db.execute(
|
||||
text("""
|
||||
@@ -206,7 +246,7 @@ class LocalCodeReviewService:
|
||||
try:
|
||||
http = await self._get_http()
|
||||
resp = await http.post(
|
||||
f"{settings.OLLAMA_URL}/api/generate",
|
||||
f"{resolve_ollama_endpoint('code_review')}/api/generate",
|
||||
json={
|
||||
"model": _MODEL_OLLAMA,
|
||||
"prompt": prompt,
|
||||
|
||||
101
apps/api/src/services/ollama_endpoint_resolver.py
Normal file
101
apps/api/src/services/ollama_endpoint_resolver.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
Ollama endpoint resolver for non-critical workload placement.
|
||||
|
||||
ADR-110 gives AWOOOI three Ollama endpoints. This resolver is intentionally
|
||||
small: it chooses the preferred endpoint by workload class, while health-aware
|
||||
failover remains owned by ollama_failover_manager.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal, Protocol
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
OllamaWorkloadType = Literal[
|
||||
"interactive",
|
||||
"healthcheck",
|
||||
"batch",
|
||||
"embedding",
|
||||
"rag",
|
||||
"code_review",
|
||||
"shadow",
|
||||
"canary",
|
||||
"local_required",
|
||||
"privacy_sensitive",
|
||||
"dr",
|
||||
]
|
||||
|
||||
_GCP_B_PREFERRED_WORKLOADS = {
|
||||
"batch",
|
||||
"embedding",
|
||||
"rag",
|
||||
"code_review",
|
||||
"shadow",
|
||||
"canary",
|
||||
}
|
||||
|
||||
_LOCAL_PREFERRED_WORKLOADS = {
|
||||
"local_required",
|
||||
"privacy_sensitive",
|
||||
"dr",
|
||||
}
|
||||
|
||||
|
||||
class _OllamaSettings(Protocol):
|
||||
OLLAMA_URL: str
|
||||
OLLAMA_SECONDARY_URL: str
|
||||
OLLAMA_FALLBACK_URL: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OllamaEndpointSelection:
|
||||
url: str
|
||||
provider_name: str
|
||||
workload_type: OllamaWorkloadType
|
||||
reason: str
|
||||
|
||||
|
||||
def resolve_ollama_selection(
|
||||
workload_type: OllamaWorkloadType = "interactive",
|
||||
*,
|
||||
config: _OllamaSettings | None = None,
|
||||
) -> OllamaEndpointSelection:
|
||||
"""Return the preferred Ollama endpoint for a workload class."""
|
||||
cfg = config or settings
|
||||
primary = cfg.OLLAMA_URL
|
||||
secondary = cfg.OLLAMA_SECONDARY_URL
|
||||
fallback = cfg.OLLAMA_FALLBACK_URL
|
||||
|
||||
if workload_type in _GCP_B_PREFERRED_WORKLOADS and secondary:
|
||||
return OllamaEndpointSelection(
|
||||
url=secondary,
|
||||
provider_name="ollama_gcp_b",
|
||||
workload_type=workload_type,
|
||||
reason="gcp_b_batch_lane",
|
||||
)
|
||||
|
||||
if workload_type in _LOCAL_PREFERRED_WORKLOADS and fallback:
|
||||
return OllamaEndpointSelection(
|
||||
url=fallback,
|
||||
provider_name="ollama_local",
|
||||
workload_type=workload_type,
|
||||
reason="local_privacy_or_dr_lane",
|
||||
)
|
||||
|
||||
return OllamaEndpointSelection(
|
||||
url=primary,
|
||||
provider_name="ollama_gcp_a",
|
||||
workload_type=workload_type,
|
||||
reason="primary_interactive_lane",
|
||||
)
|
||||
|
||||
|
||||
def resolve_ollama_endpoint(
|
||||
workload_type: OllamaWorkloadType = "interactive",
|
||||
*,
|
||||
config: _OllamaSettings | None = None,
|
||||
) -> str:
|
||||
"""Return only the preferred Ollama base URL."""
|
||||
return resolve_ollama_selection(workload_type, config=config).url
|
||||
142
apps/api/tests/test_local_code_review_cloud_fallback.py
Normal file
142
apps/api/tests/test_local_code_review_cloud_fallback.py
Normal file
@@ -0,0 +1,142 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from src.services import local_code_review_service as review_module
|
||||
from src.services.local_code_review_service import LocalCodeReviewService
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
status_code = 200
|
||||
|
||||
def json(self) -> dict[str, str]:
|
||||
return {"response": "✅ Push 品質正常"}
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, *, fail: bool = False) -> None:
|
||||
self.fail = fail
|
||||
self.posted_urls: list[str] = []
|
||||
|
||||
async def post(self, url: str, **kwargs: Any) -> _FakeResponse:
|
||||
self.posted_urls.append(url)
|
||||
if self.fail:
|
||||
raise httpx.TimeoutException("timeout")
|
||||
return _FakeResponse()
|
||||
|
||||
|
||||
async def _noop_save(*args: Any, **kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(
|
||||
review_module.settings,
|
||||
"LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
|
||||
False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
review_module,
|
||||
"resolve_ollama_endpoint",
|
||||
lambda workload_type: "http://gcp-b:11436",
|
||||
)
|
||||
|
||||
client = _FakeClient()
|
||||
service = LocalCodeReviewService()
|
||||
|
||||
async def _get_http() -> _FakeClient:
|
||||
return client
|
||||
|
||||
async def _fail_gemini(*args: Any, **kwargs: Any) -> None:
|
||||
raise AssertionError("Gemini fallback should stay disabled")
|
||||
|
||||
monkeypatch.setattr(service, "_get_http", _get_http)
|
||||
monkeypatch.setattr(service, "_save_to_db", _noop_save)
|
||||
monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini)
|
||||
|
||||
result = await service.review_pr(
|
||||
pr_id="pr-1",
|
||||
repo="wooo/awoooi",
|
||||
title="large diff",
|
||||
diff="x" * (60 * 1024),
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["provider"] == "ollama"
|
||||
assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(
|
||||
review_module.settings,
|
||||
"LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
|
||||
False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
review_module,
|
||||
"resolve_ollama_endpoint",
|
||||
lambda workload_type: "http://gcp-b:11436",
|
||||
)
|
||||
|
||||
client = _FakeClient(fail=True)
|
||||
service = LocalCodeReviewService()
|
||||
|
||||
async def _get_http() -> _FakeClient:
|
||||
return client
|
||||
|
||||
async def _fail_gemini(*args: Any, **kwargs: Any) -> None:
|
||||
raise AssertionError("Gemini fallback should stay disabled")
|
||||
|
||||
monkeypatch.setattr(service, "_get_http", _get_http)
|
||||
monkeypatch.setattr(service, "_save_to_db", _noop_save)
|
||||
monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini)
|
||||
|
||||
result = await service.review_pr(
|
||||
pr_id="pr-2",
|
||||
repo="wooo/awoooi",
|
||||
title="ollama unavailable",
|
||||
diff="small diff",
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["provider"] == "ollama_unavailable"
|
||||
assert result["cloud_fallback_skipped"] is True
|
||||
assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_fallback_requires_explicit_flag(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(
|
||||
review_module.settings,
|
||||
"LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK",
|
||||
True,
|
||||
)
|
||||
|
||||
service = LocalCodeReviewService()
|
||||
|
||||
async def _gemini_result(*args: Any, **kwargs: Any) -> dict[str, Any]:
|
||||
return {"review_text": "ok", "issues_count": 0, "model": "gemini", "provider": "gemini"}
|
||||
|
||||
monkeypatch.setattr(service, "_save_to_db", _noop_save)
|
||||
monkeypatch.setattr(service, "_review_with_gemini", _gemini_result)
|
||||
|
||||
result = await service.review_pr(
|
||||
pr_id="pr-3",
|
||||
repo="wooo/awoooi",
|
||||
title="large diff explicit cloud",
|
||||
diff="x" * (60 * 1024),
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["provider"] == "gemini"
|
||||
55
apps/api/tests/test_ollama_endpoint_resolver.py
Normal file
55
apps/api/tests/test_ollama_endpoint_resolver.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
from src.services.ollama_endpoint_resolver import (
|
||||
resolve_ollama_endpoint,
|
||||
resolve_ollama_selection,
|
||||
)
|
||||
|
||||
|
||||
def _settings(
|
||||
*,
|
||||
primary: str = "http://192.168.0.110:11435",
|
||||
secondary: str = "http://192.168.0.110:11436",
|
||||
fallback: str = "http://192.168.0.110:11437",
|
||||
) -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
OLLAMA_URL=primary,
|
||||
OLLAMA_SECONDARY_URL=secondary,
|
||||
OLLAMA_FALLBACK_URL=fallback,
|
||||
)
|
||||
|
||||
|
||||
def test_batch_workloads_prefer_gcp_b() -> None:
|
||||
cfg = _settings()
|
||||
|
||||
for workload in ("batch", "embedding", "rag", "code_review", "shadow", "canary"):
|
||||
selection = resolve_ollama_selection(workload, config=cfg)
|
||||
assert selection.url == "http://192.168.0.110:11436"
|
||||
assert selection.provider_name == "ollama_gcp_b"
|
||||
assert selection.reason == "gcp_b_batch_lane"
|
||||
|
||||
|
||||
def test_interactive_workloads_stay_on_gcp_a() -> None:
|
||||
cfg = _settings()
|
||||
|
||||
for workload in ("interactive", "healthcheck"):
|
||||
selection = resolve_ollama_selection(workload, config=cfg)
|
||||
assert selection.url == "http://192.168.0.110:11435"
|
||||
assert selection.provider_name == "ollama_gcp_a"
|
||||
|
||||
|
||||
def test_local_required_workloads_use_local_lane() -> None:
|
||||
cfg = _settings()
|
||||
|
||||
for workload in ("local_required", "privacy_sensitive", "dr"):
|
||||
selection = resolve_ollama_selection(workload, config=cfg)
|
||||
assert selection.url == "http://192.168.0.110:11437"
|
||||
assert selection.provider_name == "ollama_local"
|
||||
|
||||
|
||||
def test_batch_workloads_fall_back_to_primary_when_secondary_missing() -> None:
|
||||
cfg = _settings(secondary="")
|
||||
|
||||
assert resolve_ollama_endpoint("embedding", config=cfg) == "http://192.168.0.110:11435"
|
||||
Reference in New Issue
Block a user