diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index 40bd83bc..41421313 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -362,7 +362,7 @@ class Settings(BaseSettings): raise ValueError( f"OLLAMA URL host 不允許的外部域名:{host!r}(完整 URL:{v!r})" ",必須使用私網 IP 或已知 K8s Service hostname" - ) + ) from None if not (ip.is_private or ip.is_loopback): raise ValueError( f"OLLAMA URL 必須是私網/loopback IP、已知 K8s SVC 或 GCP 白名單 IP," @@ -496,6 +496,14 @@ class Settings(BaseSettings): ) GEMINI_API_KEY: str = Field(default="", description="Google Gemini API key") CLAUDE_API_KEY: str = Field(default="", description="Anthropic Claude API key") + LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK: bool = Field( + default=False, + description=( + "Allow LocalCodeReviewService to fall back to Gemini when the " + "GCP-B/Ollama code-review lane fails. Default false to avoid " + "unexpected cloud spend from Gitea push/PR alerts." + ), + ) # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合 NVIDIA_API_KEY: str = Field( default="", diff --git a/apps/api/src/services/local_code_review_service.py b/apps/api/src/services/local_code_review_service.py index 60028e52..9e7bdcd0 100644 --- a/apps/api/src/services/local_code_review_service.py +++ b/apps/api/src/services/local_code_review_service.py @@ -20,6 +20,7 @@ import structlog from src.core.config import get_settings from src.services.model_registry import get_model +from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint logger = structlog.get_logger(__name__) settings = get_settings() @@ -75,18 +76,24 @@ class LocalCodeReviewService: redis = None diff_size = len(diff.encode()) - use_gemini = diff_size > _MAX_DIFF_BYTES + allow_cloud_fallback = settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK - if use_gemini: + if diff_size > _MAX_DIFF_BYTES and allow_cloud_fallback: result = await self._review_with_gemini(pr_id, repo, title, diff) else: + if diff_size > _MAX_DIFF_BYTES: + logger.info( + "pr_review_large_diff_using_ollama_truncated", + pr_id=pr_id, + diff_size_bytes=diff_size, + cloud_fallback_enabled=allow_cloud_fallback, + ) result = await self._review_with_ollama(pr_id, repo, title, diff) - if result is None: - # Ollama 失敗 → fallback Gemini + if result is None and allow_cloud_fallback: result = await self._review_with_gemini(pr_id, repo, title, diff) if result is None: - return None + result = self._cloud_fallback_disabled_result(pr_id, repo, title) result["diff_size_bytes"] = diff_size @@ -116,7 +123,7 @@ class LocalCodeReviewService: try: http = await self._get_http() resp = await http.post( - f"{settings.OLLAMA_URL}/api/generate", + f"{resolve_ollama_endpoint('code_review')}/api/generate", json={ "model": _MODEL_OLLAMA, "prompt": prompt, @@ -138,6 +145,14 @@ class LocalCodeReviewService: async def _review_with_gemini( self, pr_id: str, repo: str, title: str, diff: str ) -> dict[str, Any] | None: + if not settings.LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK: + logger.warning( + "pr_review_gemini_fallback_disabled", + pr_id=pr_id, + repo=repo, + ) + return None + try: from src.services.openclaw import get_openclaw openclaw = get_openclaw() @@ -156,12 +171,37 @@ class LocalCodeReviewService: logger.error("pr_review_gemini_failed", pr_id=pr_id, error=str(e)) return None + def _cloud_fallback_disabled_result( + self, + pr_id: str, + repo: str, + title: str, + ) -> dict[str, Any]: + logger.warning( + "pr_review_cloud_fallback_skipped", + pr_id=pr_id, + repo=repo, + title=title, + reason="LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK=false", + ) + return { + "review_text": ( + "⚠️ Code Review:GCP-B/Ollama 審查未完成," + "已依成本策略跳過 Gemini fallback。" + ), + "issues_count": 1, + "model": _MODEL_OLLAMA, + "provider": "ollama_unavailable", + "cloud_fallback_skipped": True, + } + async def _save_to_db( self, pr_id: str, repo: str, title: str, diff_size: int, result: dict ) -> None: try: - from src.db.base import get_db_context from sqlalchemy import text + + from src.db.base import get_db_context async with get_db_context() as db: await db.execute( text(""" @@ -206,7 +246,7 @@ class LocalCodeReviewService: try: http = await self._get_http() resp = await http.post( - f"{settings.OLLAMA_URL}/api/generate", + f"{resolve_ollama_endpoint('code_review')}/api/generate", json={ "model": _MODEL_OLLAMA, "prompt": prompt, diff --git a/apps/api/src/services/ollama_endpoint_resolver.py b/apps/api/src/services/ollama_endpoint_resolver.py new file mode 100644 index 00000000..bbdd8ade --- /dev/null +++ b/apps/api/src/services/ollama_endpoint_resolver.py @@ -0,0 +1,101 @@ +""" +Ollama endpoint resolver for non-critical workload placement. + +ADR-110 gives AWOOOI three Ollama endpoints. This resolver is intentionally +small: it chooses the preferred endpoint by workload class, while health-aware +failover remains owned by ollama_failover_manager.py. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal, Protocol + +from src.core.config import settings + +OllamaWorkloadType = Literal[ + "interactive", + "healthcheck", + "batch", + "embedding", + "rag", + "code_review", + "shadow", + "canary", + "local_required", + "privacy_sensitive", + "dr", +] + +_GCP_B_PREFERRED_WORKLOADS = { + "batch", + "embedding", + "rag", + "code_review", + "shadow", + "canary", +} + +_LOCAL_PREFERRED_WORKLOADS = { + "local_required", + "privacy_sensitive", + "dr", +} + + +class _OllamaSettings(Protocol): + OLLAMA_URL: str + OLLAMA_SECONDARY_URL: str + OLLAMA_FALLBACK_URL: str + + +@dataclass(frozen=True) +class OllamaEndpointSelection: + url: str + provider_name: str + workload_type: OllamaWorkloadType + reason: str + + +def resolve_ollama_selection( + workload_type: OllamaWorkloadType = "interactive", + *, + config: _OllamaSettings | None = None, +) -> OllamaEndpointSelection: + """Return the preferred Ollama endpoint for a workload class.""" + cfg = config or settings + primary = cfg.OLLAMA_URL + secondary = cfg.OLLAMA_SECONDARY_URL + fallback = cfg.OLLAMA_FALLBACK_URL + + if workload_type in _GCP_B_PREFERRED_WORKLOADS and secondary: + return OllamaEndpointSelection( + url=secondary, + provider_name="ollama_gcp_b", + workload_type=workload_type, + reason="gcp_b_batch_lane", + ) + + if workload_type in _LOCAL_PREFERRED_WORKLOADS and fallback: + return OllamaEndpointSelection( + url=fallback, + provider_name="ollama_local", + workload_type=workload_type, + reason="local_privacy_or_dr_lane", + ) + + return OllamaEndpointSelection( + url=primary, + provider_name="ollama_gcp_a", + workload_type=workload_type, + reason="primary_interactive_lane", + ) + + +def resolve_ollama_endpoint( + workload_type: OllamaWorkloadType = "interactive", + *, + config: _OllamaSettings | None = None, +) -> str: + """Return only the preferred Ollama base URL.""" + return resolve_ollama_selection(workload_type, config=config).url diff --git a/apps/api/tests/test_local_code_review_cloud_fallback.py b/apps/api/tests/test_local_code_review_cloud_fallback.py new file mode 100644 index 00000000..77c0c227 --- /dev/null +++ b/apps/api/tests/test_local_code_review_cloud_fallback.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +from typing import Any + +import httpx +import pytest + +from src.services import local_code_review_service as review_module +from src.services.local_code_review_service import LocalCodeReviewService + + +class _FakeResponse: + status_code = 200 + + def json(self) -> dict[str, str]: + return {"response": "✅ Push 品質正常"} + + +class _FakeClient: + def __init__(self, *, fail: bool = False) -> None: + self.fail = fail + self.posted_urls: list[str] = [] + + async def post(self, url: str, **kwargs: Any) -> _FakeResponse: + self.posted_urls.append(url) + if self.fail: + raise httpx.TimeoutException("timeout") + return _FakeResponse() + + +async def _noop_save(*args: Any, **kwargs: Any) -> None: + return None + + +@pytest.mark.asyncio +async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + review_module.settings, + "LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK", + False, + ) + monkeypatch.setattr( + review_module, + "resolve_ollama_endpoint", + lambda workload_type: "http://gcp-b:11436", + ) + + client = _FakeClient() + service = LocalCodeReviewService() + + async def _get_http() -> _FakeClient: + return client + + async def _fail_gemini(*args: Any, **kwargs: Any) -> None: + raise AssertionError("Gemini fallback should stay disabled") + + monkeypatch.setattr(service, "_get_http", _get_http) + monkeypatch.setattr(service, "_save_to_db", _noop_save) + monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini) + + result = await service.review_pr( + pr_id="pr-1", + repo="wooo/awoooi", + title="large diff", + diff="x" * (60 * 1024), + ) + + assert result is not None + assert result["provider"] == "ollama" + assert client.posted_urls == ["http://gcp-b:11436/api/generate"] + + +@pytest.mark.asyncio +async def test_ollama_failure_does_not_fall_back_to_gemini_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + review_module.settings, + "LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK", + False, + ) + monkeypatch.setattr( + review_module, + "resolve_ollama_endpoint", + lambda workload_type: "http://gcp-b:11436", + ) + + client = _FakeClient(fail=True) + service = LocalCodeReviewService() + + async def _get_http() -> _FakeClient: + return client + + async def _fail_gemini(*args: Any, **kwargs: Any) -> None: + raise AssertionError("Gemini fallback should stay disabled") + + monkeypatch.setattr(service, "_get_http", _get_http) + monkeypatch.setattr(service, "_save_to_db", _noop_save) + monkeypatch.setattr(service, "_review_with_gemini", _fail_gemini) + + result = await service.review_pr( + pr_id="pr-2", + repo="wooo/awoooi", + title="ollama unavailable", + diff="small diff", + ) + + assert result is not None + assert result["provider"] == "ollama_unavailable" + assert result["cloud_fallback_skipped"] is True + assert client.posted_urls == ["http://gcp-b:11436/api/generate"] + + +@pytest.mark.asyncio +async def test_gemini_fallback_requires_explicit_flag( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + review_module.settings, + "LOCAL_CODE_REVIEW_ALLOW_GEMINI_FALLBACK", + True, + ) + + service = LocalCodeReviewService() + + async def _gemini_result(*args: Any, **kwargs: Any) -> dict[str, Any]: + return {"review_text": "ok", "issues_count": 0, "model": "gemini", "provider": "gemini"} + + monkeypatch.setattr(service, "_save_to_db", _noop_save) + monkeypatch.setattr(service, "_review_with_gemini", _gemini_result) + + result = await service.review_pr( + pr_id="pr-3", + repo="wooo/awoooi", + title="large diff explicit cloud", + diff="x" * (60 * 1024), + ) + + assert result is not None + assert result["provider"] == "gemini" diff --git a/apps/api/tests/test_ollama_endpoint_resolver.py b/apps/api/tests/test_ollama_endpoint_resolver.py new file mode 100644 index 00000000..5519ec16 --- /dev/null +++ b/apps/api/tests/test_ollama_endpoint_resolver.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from types import SimpleNamespace + +from src.services.ollama_endpoint_resolver import ( + resolve_ollama_endpoint, + resolve_ollama_selection, +) + + +def _settings( + *, + primary: str = "http://192.168.0.110:11435", + secondary: str = "http://192.168.0.110:11436", + fallback: str = "http://192.168.0.110:11437", +) -> SimpleNamespace: + return SimpleNamespace( + OLLAMA_URL=primary, + OLLAMA_SECONDARY_URL=secondary, + OLLAMA_FALLBACK_URL=fallback, + ) + + +def test_batch_workloads_prefer_gcp_b() -> None: + cfg = _settings() + + for workload in ("batch", "embedding", "rag", "code_review", "shadow", "canary"): + selection = resolve_ollama_selection(workload, config=cfg) + assert selection.url == "http://192.168.0.110:11436" + assert selection.provider_name == "ollama_gcp_b" + assert selection.reason == "gcp_b_batch_lane" + + +def test_interactive_workloads_stay_on_gcp_a() -> None: + cfg = _settings() + + for workload in ("interactive", "healthcheck"): + selection = resolve_ollama_selection(workload, config=cfg) + assert selection.url == "http://192.168.0.110:11435" + assert selection.provider_name == "ollama_gcp_a" + + +def test_local_required_workloads_use_local_lane() -> None: + cfg = _settings() + + for workload in ("local_required", "privacy_sensitive", "dr"): + selection = resolve_ollama_selection(workload, config=cfg) + assert selection.url == "http://192.168.0.110:11437" + assert selection.provider_name == "ollama_local" + + +def test_batch_workloads_fall_back_to_primary_when_secondary_missing() -> None: + cfg = _settings(secondary="") + + assert resolve_ollama_endpoint("embedding", config=cfg) == "http://192.168.0.110:11435"