fix(api): enforce global ollama endpoint order
This commit is contained in:
@@ -22,7 +22,7 @@ import httpx
|
|||||||
import structlog
|
import structlog
|
||||||
|
|
||||||
from src.services.model_registry import get_model as _get_model
|
from src.services.model_registry import get_model as _get_model
|
||||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
@@ -98,7 +98,15 @@ class OllamaEmbeddingService:
|
|||||||
P1 修復 (2026-03-29): 維度配置化,支援更多模型
|
P1 修復 (2026-03-29): 維度配置化,支援更多模型
|
||||||
"""
|
"""
|
||||||
self._model = model
|
self._model = model
|
||||||
self._ollama_url = ollama_url or resolve_ollama_endpoint("embedding")
|
if ollama_url:
|
||||||
|
self._ollama_endpoints = ((ollama_url, "custom"),)
|
||||||
|
else:
|
||||||
|
self._ollama_endpoints = tuple(
|
||||||
|
(endpoint.url, endpoint.provider_name)
|
||||||
|
for endpoint in resolve_ollama_order("embedding")
|
||||||
|
if endpoint.url
|
||||||
|
)
|
||||||
|
self._ollama_url = self._ollama_endpoints[0][0] if self._ollama_endpoints else ""
|
||||||
self._timeout = timeout
|
self._timeout = timeout
|
||||||
self._default_dimension = default_dimension or self.MODEL_DIMENSIONS.get(
|
self._default_dimension = default_dimension or self.MODEL_DIMENSIONS.get(
|
||||||
model, self.DEFAULT_DIMENSION
|
model, self.DEFAULT_DIMENSION
|
||||||
@@ -142,43 +150,65 @@ class OllamaEmbeddingService:
|
|||||||
"""
|
"""
|
||||||
client = await self._get_client()
|
client = await self._get_client()
|
||||||
|
|
||||||
try:
|
last_error: Exception | None = None
|
||||||
response = await client.post(
|
for endpoint_url, provider_name in self._ollama_endpoints:
|
||||||
f"{self._ollama_url}/api/embeddings",
|
try:
|
||||||
json={
|
response = await client.post(
|
||||||
"model": self._model,
|
f"{endpoint_url}/api/embeddings",
|
||||||
"prompt": text,
|
json={
|
||||||
},
|
"model": self._model,
|
||||||
)
|
"prompt": text,
|
||||||
response.raise_for_status()
|
},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
embedding = data.get("embedding", [])
|
embedding = data.get("embedding", [])
|
||||||
|
|
||||||
# 更新維度快取
|
# 更新維度快取
|
||||||
if self._dimension is None and embedding:
|
if self._dimension is None and embedding:
|
||||||
self._dimension = len(embedding)
|
self._dimension = len(embedding)
|
||||||
logger.info(
|
logger.info(
|
||||||
"embedding_dimension_detected",
|
"embedding_dimension_detected",
|
||||||
|
model=self._model,
|
||||||
|
dimension=self._dimension,
|
||||||
|
provider=provider_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
return embedding
|
||||||
|
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
last_error = e
|
||||||
|
logger.error(
|
||||||
|
"embedding_timeout",
|
||||||
model=self._model,
|
model=self._model,
|
||||||
dimension=self._dimension,
|
text_len=len(text),
|
||||||
|
provider=provider_name,
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
last_error = e
|
||||||
|
logger.error(
|
||||||
|
"embedding_http_error",
|
||||||
|
status=e.response.status_code,
|
||||||
|
model=self._model,
|
||||||
|
provider=provider_name,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
last_error = e
|
||||||
|
logger.error(
|
||||||
|
"embedding_error",
|
||||||
|
error=str(e),
|
||||||
|
model=self._model,
|
||||||
|
provider=provider_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
return embedding
|
if isinstance(last_error, httpx.TimeoutException):
|
||||||
|
raise EmbeddingError(f"Embedding timeout after {self._timeout}s") from last_error
|
||||||
except httpx.TimeoutException as e:
|
if isinstance(last_error, httpx.HTTPStatusError):
|
||||||
logger.error("embedding_timeout", model=self._model, text_len=len(text))
|
raise EmbeddingError(
|
||||||
raise EmbeddingError(f"Embedding timeout after {self._timeout}s") from e
|
f"Ollama API error: {last_error.response.status_code}"
|
||||||
except httpx.HTTPStatusError as e:
|
) from last_error
|
||||||
logger.error(
|
raise EmbeddingError("Embedding failed on all Ollama endpoints") from last_error
|
||||||
"embedding_http_error",
|
|
||||||
status=e.response.status_code,
|
|
||||||
model=self._model,
|
|
||||||
)
|
|
||||||
raise EmbeddingError(f"Ollama API error: {e.response.status_code}") from e
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("embedding_error", error=str(e), model=self._model)
|
|
||||||
raise EmbeddingError(f"Embedding failed: {e}") from e
|
|
||||||
|
|
||||||
async def embed_batch(
|
async def embed_batch(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ Knowledge Extractor Service — KB Phase 2-A
|
|||||||
Incident resolved 後自動萃取 KB 草稿。
|
Incident resolved 後自動萃取 KB 草稿。
|
||||||
|
|
||||||
設計原則:
|
設計原則:
|
||||||
- 強制使用 Ollama llama3.2:3b(本地推理,符合 Phase 24 D7 隱私規則)
|
- 使用 Ollama llama3.2:3b,依全域順序 GCP-A → GCP-B → 111 嘗試
|
||||||
- fire-and-forget:失敗不影響 resolve 主流程
|
- fire-and-forget:失敗不影響 resolve 主流程
|
||||||
- logger.exception 保留完整 Stack Trace 供 Prompt 調優
|
- logger.exception 保留完整 Stack Trace 供 Prompt 調優
|
||||||
|
|
||||||
@@ -15,11 +15,11 @@ import structlog
|
|||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
# 2026-05-05 Codex: KB 萃取走 111 lane,避免污染 GCP alert-fast lane
|
# 2026-05-19 Codex: 統帥校正,全 Ollama workload 固定 GCP-A → GCP-B → 111。
|
||||||
def _get_ollama_base() -> str:
|
def _get_ollama_endpoints():
|
||||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||||
|
|
||||||
return resolve_ollama_endpoint("deep_rca")
|
return resolve_ollama_order("deep_rca")
|
||||||
_EXTRACT_MODEL = "llama3.2:3b"
|
_EXTRACT_MODEL = "llama3.2:3b"
|
||||||
_EXTRACT_TIMEOUT = 30.0 # 秒,容忍慢速
|
_EXTRACT_TIMEOUT = 30.0 # 秒,容忍慢速
|
||||||
|
|
||||||
@@ -160,36 +160,54 @@ class KnowledgeExtractorService:
|
|||||||
|
|
||||||
不走 AIRouter 是刻意設計:
|
不走 AIRouter 是刻意設計:
|
||||||
- KB 萃取是背景工作,不需要完整的路由/閘門/Cache 邏輯
|
- KB 萃取是背景工作,不需要完整的路由/閘門/Cache 邏輯
|
||||||
- 強制本地,不允許 fallback 到 cloud provider
|
- Ollama endpoint 固定依 GCP-A → GCP-B → 111 嘗試
|
||||||
"""
|
"""
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
try:
|
endpoints = _get_ollama_endpoints()
|
||||||
async with httpx.AsyncClient(timeout=_EXTRACT_TIMEOUT) as client:
|
async with httpx.AsyncClient(timeout=_EXTRACT_TIMEOUT) as client:
|
||||||
r = await client.post(
|
for endpoint in endpoints:
|
||||||
f"{_get_ollama_base()}/api/generate",
|
if not endpoint.url:
|
||||||
json={
|
continue
|
||||||
"model": _EXTRACT_MODEL,
|
try:
|
||||||
"prompt": prompt,
|
r = await client.post(
|
||||||
"stream": False,
|
f"{endpoint.url}/api/generate",
|
||||||
"options": {
|
json={
|
||||||
"temperature": 0.3, # 低溫:減少幻覺
|
"model": _EXTRACT_MODEL,
|
||||||
"num_predict": 800, # 控制長度
|
"prompt": prompt,
|
||||||
"stop": ["\n\n\n"], # 防止無限生成
|
"stream": False,
|
||||||
|
"options": {
|
||||||
|
"temperature": 0.3, # 低溫:減少幻覺
|
||||||
|
"num_predict": 800, # 控制長度
|
||||||
|
"stop": ["\n\n\n"], # 防止無限生成
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
)
|
||||||
)
|
r.raise_for_status()
|
||||||
r.raise_for_status()
|
text = r.json().get("response", "").strip()
|
||||||
text = r.json().get("response", "").strip()
|
if text:
|
||||||
return text or None
|
logger.info(
|
||||||
|
"kb_ollama_call_success",
|
||||||
|
model=_EXTRACT_MODEL,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
base=endpoint.url,
|
||||||
|
)
|
||||||
|
return text
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"kb_ollama_call_failed",
|
||||||
|
model=_EXTRACT_MODEL,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
base=endpoint.url,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
except Exception:
|
logger.error(
|
||||||
logger.exception(
|
"kb_ollama_all_endpoints_failed",
|
||||||
"kb_ollama_call_failed",
|
model=_EXTRACT_MODEL,
|
||||||
model=_EXTRACT_MODEL,
|
attempted=[endpoint.provider_name for endpoint in endpoints],
|
||||||
base=_get_ollama_base(),
|
)
|
||||||
)
|
return None
|
||||||
return None
|
|
||||||
|
|
||||||
def _extract_title(self, markdown: str, incident) -> str:
|
def _extract_title(self, markdown: str, incident) -> str:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ AWOOOI — Knowledge RAG Service (Phase 33, ADR-067)
|
|||||||
- 超過 100 筆: 執行 CREATE INDEX ivfflat (手動觸發)
|
- 超過 100 筆: 執行 CREATE INDEX ivfflat (手動觸發)
|
||||||
|
|
||||||
向量模型: bge-m3 (GCP-A/GCP-B/111 Ollama lane, 1024維)
|
向量模型: bge-m3 (GCP-A/GCP-B/111 Ollama lane, 1024維)
|
||||||
生成模型: qwen2.5:7b-instruct (Ollama 111)
|
生成模型: qwen2.5:7b-instruct (Ollama GCP-A/GCP-B/111)
|
||||||
|
|
||||||
leWOOOgo: Service 層只處理業務邏輯,DB 存取委派 rag_chunk_repository
|
leWOOOgo: Service 層只處理業務邏輯,DB 存取委派 rag_chunk_repository
|
||||||
架構審查 C1 修正: 2026-04-10 Claude Sonnet 4.6 Asia/Taipei
|
架構審查 C1 修正: 2026-04-10 Claude Sonnet 4.6 Asia/Taipei
|
||||||
@@ -22,7 +22,7 @@ import structlog
|
|||||||
|
|
||||||
import src.repositories.rag_chunk_repository as rag_repo
|
import src.repositories.rag_chunk_repository as rag_repo
|
||||||
from src.core.config import settings
|
from src.core.config import settings
|
||||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
@@ -128,19 +128,35 @@ class KnowledgeRAGService:
|
|||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
async def _embed(self, text: str) -> list[float] | None:
|
async def _embed(self, text: str) -> list[float] | None:
|
||||||
try:
|
http = await self._get_http()
|
||||||
http = await self._get_http()
|
for endpoint in resolve_ollama_order("embedding"):
|
||||||
resp = await http.post(
|
if not endpoint.url:
|
||||||
f"{resolve_ollama_endpoint('embedding')}/api/embeddings",
|
continue
|
||||||
json={
|
try:
|
||||||
"model": getattr(settings, "OLLAMA_EMBEDDING_MODEL", _EMBED_MODEL),
|
resp = await http.post(
|
||||||
"prompt": text,
|
f"{endpoint.url}/api/embeddings",
|
||||||
},
|
json={
|
||||||
)
|
"model": getattr(settings, "OLLAMA_EMBEDDING_MODEL", _EMBED_MODEL),
|
||||||
if resp.status_code == 200:
|
"prompt": text,
|
||||||
return resp.json().get("embedding")
|
},
|
||||||
except Exception as e:
|
)
|
||||||
logger.warning("rag_embed_failed", error=str(e))
|
if resp.status_code == 200:
|
||||||
|
logger.debug(
|
||||||
|
"rag_embed_success",
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
)
|
||||||
|
return resp.json().get("embedding")
|
||||||
|
logger.warning(
|
||||||
|
"rag_embed_http_error",
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
status=resp.status_code,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"rag_embed_failed",
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _generate_answer(self, question: str, context: str) -> str:
|
async def _generate_answer(self, question: str, context: str) -> str:
|
||||||
@@ -150,22 +166,38 @@ class KnowledgeRAGService:
|
|||||||
f"=== 相關資料 ===\n{context[:6000]}\n\n"
|
f"=== 相關資料 ===\n{context[:6000]}\n\n"
|
||||||
f"=== 問題 ===\n{question}"
|
f"=== 問題 ===\n{question}"
|
||||||
)
|
)
|
||||||
try:
|
http = await self._get_http()
|
||||||
http = await self._get_http()
|
for endpoint in resolve_ollama_order("rag"):
|
||||||
resp = await http.post(
|
if not endpoint.url:
|
||||||
f"{resolve_ollama_endpoint('rag')}/api/generate",
|
continue
|
||||||
json={
|
try:
|
||||||
"model": _GEN_MODEL,
|
resp = await http.post(
|
||||||
"prompt": prompt,
|
f"{endpoint.url}/api/generate",
|
||||||
"stream": False,
|
json={
|
||||||
"options": {"num_predict": 512, "temperature": 0.2},
|
"model": _GEN_MODEL,
|
||||||
},
|
"prompt": prompt,
|
||||||
timeout=httpx.Timeout(90.0, connect=10.0),
|
"stream": False,
|
||||||
)
|
"options": {"num_predict": 512, "temperature": 0.2},
|
||||||
if resp.status_code == 200:
|
},
|
||||||
return resp.json().get("response", "").strip()
|
timeout=httpx.Timeout(90.0, connect=10.0),
|
||||||
except Exception as e:
|
)
|
||||||
logger.error("rag_generate_failed", error=str(e))
|
if resp.status_code == 200:
|
||||||
|
logger.debug(
|
||||||
|
"rag_generate_success",
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
)
|
||||||
|
return resp.json().get("response", "").strip()
|
||||||
|
logger.warning(
|
||||||
|
"rag_generate_http_error",
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
status=resp.status_code,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"rag_generate_failed",
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
return "⚠️ RAG 生成失敗,請稍後再試"
|
return "⚠️ RAG 生成失敗,請稍後再試"
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import structlog
|
|||||||
|
|
||||||
from src.core.config import get_settings
|
from src.core.config import get_settings
|
||||||
from src.services.model_registry import get_model
|
from src.services.model_registry import get_model
|
||||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
@@ -120,26 +120,54 @@ class LocalCodeReviewService:
|
|||||||
"格式:每個問題獨立一行,以「⚠️」開頭。如果沒有問題,說「✅ 程式碼品質良好」\n\n"
|
"格式:每個問題獨立一行,以「⚠️」開頭。如果沒有問題,說「✅ 程式碼品質良好」\n\n"
|
||||||
f"=== Diff ===\n{diff[:40000]}\n=== 結束 ==="
|
f"=== Diff ===\n{diff[:40000]}\n=== 結束 ==="
|
||||||
)
|
)
|
||||||
try:
|
http = await self._get_http()
|
||||||
http = await self._get_http()
|
for endpoint in resolve_ollama_order("code_review"):
|
||||||
resp = await http.post(
|
if not endpoint.url:
|
||||||
f"{resolve_ollama_endpoint('code_review')}/api/generate",
|
continue
|
||||||
json={
|
try:
|
||||||
"model": _MODEL_OLLAMA,
|
resp = await http.post(
|
||||||
"prompt": prompt,
|
f"{endpoint.url}/api/generate",
|
||||||
"stream": False,
|
json={
|
||||||
"options": {"num_predict": 1024, "temperature": 0.1},
|
"model": _MODEL_OLLAMA,
|
||||||
},
|
"prompt": prompt,
|
||||||
)
|
"stream": False,
|
||||||
if resp.status_code == 200:
|
"options": {"num_predict": 1024, "temperature": 0.1},
|
||||||
text = resp.json().get("response", "").strip()
|
},
|
||||||
issues = text.count("⚠️")
|
)
|
||||||
logger.info("pr_review_ollama_done", pr_id=pr_id, issues=issues)
|
if resp.status_code == 200:
|
||||||
return {"review_text": text, "issues_count": issues, "model": _MODEL_OLLAMA, "provider": "ollama"}
|
text = resp.json().get("response", "").strip()
|
||||||
except httpx.TimeoutException:
|
issues = text.count("⚠️")
|
||||||
logger.warning("pr_review_ollama_timeout", pr_id=pr_id)
|
logger.info(
|
||||||
except Exception as e:
|
"pr_review_ollama_done",
|
||||||
logger.error("pr_review_ollama_failed", pr_id=pr_id, error=str(e))
|
pr_id=pr_id,
|
||||||
|
issues=issues,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"review_text": text,
|
||||||
|
"issues_count": issues,
|
||||||
|
"model": _MODEL_OLLAMA,
|
||||||
|
"provider": endpoint.provider_name,
|
||||||
|
}
|
||||||
|
logger.warning(
|
||||||
|
"pr_review_ollama_http_error",
|
||||||
|
pr_id=pr_id,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
status=resp.status_code,
|
||||||
|
)
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning(
|
||||||
|
"pr_review_ollama_timeout",
|
||||||
|
pr_id=pr_id,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"pr_review_ollama_failed",
|
||||||
|
pr_id=pr_id,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _review_with_gemini(
|
async def _review_with_gemini(
|
||||||
@@ -243,26 +271,55 @@ class LocalCodeReviewService:
|
|||||||
"格式:每個問題以「⚠️」開頭,如無問題說「✅ Push 品質正常」\n"
|
"格式:每個問題以「⚠️」開頭,如無問題說「✅ Push 品質正常」\n"
|
||||||
"5 行以內,繁體中文。"
|
"5 行以內,繁體中文。"
|
||||||
)
|
)
|
||||||
try:
|
http = await self._get_http()
|
||||||
http = await self._get_http()
|
for endpoint in resolve_ollama_order("code_review"):
|
||||||
resp = await http.post(
|
if not endpoint.url:
|
||||||
f"{resolve_ollama_endpoint('code_review')}/api/generate",
|
continue
|
||||||
json={
|
try:
|
||||||
"model": _MODEL_OLLAMA,
|
resp = await http.post(
|
||||||
"prompt": prompt,
|
f"{endpoint.url}/api/generate",
|
||||||
"stream": False,
|
json={
|
||||||
"options": {"num_predict": 512, "temperature": 0.1},
|
"model": _MODEL_OLLAMA,
|
||||||
},
|
"prompt": prompt,
|
||||||
)
|
"stream": False,
|
||||||
if resp.status_code == 200:
|
"options": {"num_predict": 512, "temperature": 0.1},
|
||||||
text = resp.json().get("response", "").strip()
|
},
|
||||||
issues = text.count("⚠️")
|
)
|
||||||
logger.info("push_review_ollama_done", repo=repo_name, branch=branch, issues=issues)
|
if resp.status_code == 200:
|
||||||
return {"review_text": text, "issues_count": issues, "model": _MODEL_OLLAMA, "provider": "ollama"}
|
text = resp.json().get("response", "").strip()
|
||||||
except httpx.TimeoutException:
|
issues = text.count("⚠️")
|
||||||
logger.warning("push_review_ollama_timeout", repo=repo_name)
|
logger.info(
|
||||||
except Exception as e:
|
"push_review_ollama_done",
|
||||||
logger.error("push_review_ollama_failed", repo=repo_name, error=str(e))
|
repo=repo_name,
|
||||||
|
branch=branch,
|
||||||
|
issues=issues,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"review_text": text,
|
||||||
|
"issues_count": issues,
|
||||||
|
"model": _MODEL_OLLAMA,
|
||||||
|
"provider": endpoint.provider_name,
|
||||||
|
}
|
||||||
|
logger.warning(
|
||||||
|
"push_review_ollama_http_error",
|
||||||
|
repo=repo_name,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
status=resp.status_code,
|
||||||
|
)
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning(
|
||||||
|
"push_review_ollama_timeout",
|
||||||
|
repo=repo_name,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"push_review_ollama_failed",
|
||||||
|
repo=repo_name,
|
||||||
|
provider=endpoint.provider_name,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
"""
|
"""
|
||||||
Ollama endpoint resolver for non-critical workload placement.
|
Ollama endpoint resolver for AWOOOI workload placement.
|
||||||
|
|
||||||
ADR-110 gives AWOOOI three Ollama endpoints. This resolver is intentionally
|
ADR-110 gives AWOOOI three Ollama endpoints. The global order is always
|
||||||
small: it chooses the preferred endpoint by workload class, while health-aware
|
GCP-A -> GCP-B -> 111 local; Gemini is owned by the caller/AI Router as the
|
||||||
failover remains owned by ollama_failover_manager.py.
|
final non-Ollama fallback.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -31,28 +31,6 @@ OllamaWorkloadType = Literal[
|
|||||||
"dr",
|
"dr",
|
||||||
]
|
]
|
||||||
|
|
||||||
_GCP_A_PREFERRED_WORKLOADS = {
|
|
||||||
"interactive",
|
|
||||||
"healthcheck",
|
|
||||||
"alert_fast",
|
|
||||||
"batch",
|
|
||||||
"embedding",
|
|
||||||
"rag",
|
|
||||||
"code_review",
|
|
||||||
"shadow",
|
|
||||||
"canary",
|
|
||||||
"deep_rca",
|
|
||||||
"image_analysis",
|
|
||||||
"hermes",
|
|
||||||
}
|
|
||||||
|
|
||||||
_LOCAL_PREFERRED_WORKLOADS = {
|
|
||||||
"local_required",
|
|
||||||
"privacy_sensitive",
|
|
||||||
"dr",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class _OllamaSettings(Protocol):
|
class _OllamaSettings(Protocol):
|
||||||
OLLAMA_URL: str
|
OLLAMA_URL: str
|
||||||
OLLAMA_SECONDARY_URL: str
|
OLLAMA_SECONDARY_URL: str
|
||||||
@@ -73,56 +51,47 @@ def resolve_ollama_selection(
|
|||||||
config: _OllamaSettings | None = None,
|
config: _OllamaSettings | None = None,
|
||||||
) -> OllamaEndpointSelection:
|
) -> OllamaEndpointSelection:
|
||||||
"""Return the preferred Ollama endpoint for a workload class."""
|
"""Return the preferred Ollama endpoint for a workload class."""
|
||||||
|
return resolve_ollama_order(workload_type, config=config)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_ollama_order(
|
||||||
|
workload_type: OllamaWorkloadType = "interactive",
|
||||||
|
*,
|
||||||
|
config: _OllamaSettings | None = None,
|
||||||
|
) -> tuple[OllamaEndpointSelection, ...]:
|
||||||
|
"""Return the global Ollama fallback order: GCP-A -> GCP-B -> 111."""
|
||||||
cfg = config or settings
|
cfg = config or settings
|
||||||
primary = cfg.OLLAMA_URL
|
|
||||||
secondary = cfg.OLLAMA_SECONDARY_URL
|
|
||||||
fallback = cfg.OLLAMA_FALLBACK_URL
|
|
||||||
|
|
||||||
if workload_type in _LOCAL_PREFERRED_WORKLOADS:
|
candidates = (
|
||||||
if fallback:
|
(cfg.OLLAMA_URL, "ollama_gcp_a", "global_primary_gcp_a"),
|
||||||
return OllamaEndpointSelection(
|
(cfg.OLLAMA_SECONDARY_URL, "ollama_gcp_b", "global_secondary_gcp_b"),
|
||||||
url=fallback,
|
(cfg.OLLAMA_FALLBACK_URL, "ollama_local", "global_local_111"),
|
||||||
provider_name="ollama_local",
|
)
|
||||||
|
selections: list[OllamaEndpointSelection] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for url, provider_name, reason in candidates:
|
||||||
|
if not url or url in seen:
|
||||||
|
continue
|
||||||
|
seen.add(url)
|
||||||
|
selections.append(
|
||||||
|
OllamaEndpointSelection(
|
||||||
|
url=url,
|
||||||
|
provider_name=provider_name,
|
||||||
workload_type=workload_type,
|
workload_type=workload_type,
|
||||||
reason="local_heavy_or_privacy_lane",
|
reason=reason,
|
||||||
)
|
)
|
||||||
if secondary:
|
|
||||||
return OllamaEndpointSelection(
|
|
||||||
url=secondary,
|
|
||||||
provider_name="ollama_gcp_b",
|
|
||||||
workload_type=workload_type,
|
|
||||||
reason="local_missing_gcp_b_fallback",
|
|
||||||
)
|
|
||||||
|
|
||||||
if workload_type not in _GCP_A_PREFERRED_WORKLOADS and secondary:
|
|
||||||
return OllamaEndpointSelection(
|
|
||||||
url=secondary,
|
|
||||||
provider_name="ollama_gcp_b",
|
|
||||||
workload_type=workload_type,
|
|
||||||
reason="gcp_b_default_non_alert_lane",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if primary:
|
if selections:
|
||||||
return OllamaEndpointSelection(
|
return tuple(selections)
|
||||||
url=primary,
|
|
||||||
provider_name="ollama_gcp_a",
|
|
||||||
workload_type=workload_type,
|
|
||||||
reason="primary_interactive_lane",
|
|
||||||
)
|
|
||||||
|
|
||||||
if secondary:
|
return (
|
||||||
return OllamaEndpointSelection(
|
OllamaEndpointSelection(
|
||||||
url=secondary,
|
url="",
|
||||||
provider_name="ollama_gcp_b",
|
provider_name="ollama_unconfigured",
|
||||||
workload_type=workload_type,
|
workload_type=workload_type,
|
||||||
reason="primary_missing_gcp_b_fallback",
|
reason="no_ollama_endpoint_configured",
|
||||||
)
|
),
|
||||||
|
|
||||||
return OllamaEndpointSelection(
|
|
||||||
url=fallback,
|
|
||||||
provider_name="ollama_local",
|
|
||||||
workload_type=workload_type,
|
|
||||||
reason="gcp_missing_local_fallback",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@@ -32,6 +33,14 @@ async def _noop_save(*args: Any, **kwargs: Any) -> None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_ollama_order(_workload_type: str) -> tuple[SimpleNamespace, ...]:
|
||||||
|
return (
|
||||||
|
SimpleNamespace(url="http://gcp-a:11434", provider_name="ollama_gcp_a"),
|
||||||
|
SimpleNamespace(url="http://gcp-b:11434", provider_name="ollama_gcp_b"),
|
||||||
|
SimpleNamespace(url="http://local-111:11434", provider_name="ollama_local"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_large_pr_uses_local_ollama_when_gemini_fallback_disabled(
|
async def test_large_pr_uses_local_ollama_when_gemini_fallback_disabled(
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
@@ -43,8 +52,8 @@ async def test_large_pr_uses_local_ollama_when_gemini_fallback_disabled(
|
|||||||
)
|
)
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
review_module,
|
review_module,
|
||||||
"resolve_ollama_endpoint",
|
"resolve_ollama_order",
|
||||||
lambda workload_type: "http://local-111:11434",
|
_fake_ollama_order,
|
||||||
)
|
)
|
||||||
|
|
||||||
client = _FakeClient()
|
client = _FakeClient()
|
||||||
@@ -68,8 +77,8 @@ async def test_large_pr_uses_local_ollama_when_gemini_fallback_disabled(
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result["provider"] == "ollama"
|
assert result["provider"] == "ollama_gcp_a"
|
||||||
assert client.posted_urls == ["http://local-111:11434/api/generate"]
|
assert client.posted_urls == ["http://gcp-a:11434/api/generate"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -83,8 +92,8 @@ async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
|
|||||||
)
|
)
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
review_module,
|
review_module,
|
||||||
"resolve_ollama_endpoint",
|
"resolve_ollama_order",
|
||||||
lambda workload_type: "http://local-111:11434",
|
_fake_ollama_order,
|
||||||
)
|
)
|
||||||
|
|
||||||
client = _FakeClient(fail=True)
|
client = _FakeClient(fail=True)
|
||||||
@@ -110,7 +119,11 @@ async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
|
|||||||
assert result is not None
|
assert result is not None
|
||||||
assert result["provider"] == "ollama_unavailable"
|
assert result["provider"] == "ollama_unavailable"
|
||||||
assert result["cloud_fallback_skipped"] is True
|
assert result["cloud_fallback_skipped"] is True
|
||||||
assert client.posted_urls == ["http://local-111:11434/api/generate"]
|
assert client.posted_urls == [
|
||||||
|
"http://gcp-a:11434/api/generate",
|
||||||
|
"http://gcp-b:11434/api/generate",
|
||||||
|
"http://local-111:11434/api/generate",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
|
||||||
from src.services.ollama_endpoint_resolver import (
|
from src.services.ollama_endpoint_resolver import (
|
||||||
|
resolve_ollama_order,
|
||||||
resolve_ollama_selection,
|
resolve_ollama_selection,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -20,10 +21,13 @@ def _settings(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_non_sensitive_workloads_prefer_gcp_a_lane() -> None:
|
def test_all_workloads_prefer_gcp_a_lane() -> None:
|
||||||
cfg = _settings()
|
cfg = _settings()
|
||||||
|
|
||||||
for workload in (
|
for workload in (
|
||||||
|
"interactive",
|
||||||
|
"healthcheck",
|
||||||
|
"alert_fast",
|
||||||
"batch",
|
"batch",
|
||||||
"embedding",
|
"embedding",
|
||||||
"rag",
|
"rag",
|
||||||
@@ -33,29 +37,31 @@ def test_non_sensitive_workloads_prefer_gcp_a_lane() -> None:
|
|||||||
"deep_rca",
|
"deep_rca",
|
||||||
"image_analysis",
|
"image_analysis",
|
||||||
"hermes",
|
"hermes",
|
||||||
|
"local_required",
|
||||||
|
"privacy_sensitive",
|
||||||
|
"dr",
|
||||||
):
|
):
|
||||||
selection = resolve_ollama_selection(workload, config=cfg)
|
selection = resolve_ollama_selection(workload, config=cfg)
|
||||||
assert selection.url == "http://192.168.0.110:11435"
|
assert selection.url == "http://192.168.0.110:11435"
|
||||||
assert selection.provider_name == "ollama_gcp_a"
|
assert selection.provider_name == "ollama_gcp_a"
|
||||||
assert selection.reason == "primary_interactive_lane"
|
assert selection.reason == "global_primary_gcp_a"
|
||||||
|
|
||||||
|
|
||||||
def test_interactive_workloads_stay_on_gcp_a() -> None:
|
def test_all_workloads_share_global_ollama_order() -> None:
|
||||||
cfg = _settings()
|
cfg = _settings()
|
||||||
|
|
||||||
for workload in ("interactive", "healthcheck", "alert_fast"):
|
for workload in ("interactive", "deep_rca", "local_required", "privacy_sensitive", "dr"):
|
||||||
selection = resolve_ollama_selection(workload, config=cfg)
|
order = resolve_ollama_order(workload, config=cfg)
|
||||||
assert selection.url == "http://192.168.0.110:11435"
|
assert [selection.url for selection in order] == [
|
||||||
assert selection.provider_name == "ollama_gcp_a"
|
"http://192.168.0.110:11435",
|
||||||
|
"http://192.168.0.110:11436",
|
||||||
|
"http://192.168.0.110:11437",
|
||||||
def test_local_required_workloads_use_local_lane() -> None:
|
]
|
||||||
cfg = _settings()
|
assert [selection.provider_name for selection in order] == [
|
||||||
|
"ollama_gcp_a",
|
||||||
for workload in ("local_required", "privacy_sensitive", "dr"):
|
"ollama_gcp_b",
|
||||||
selection = resolve_ollama_selection(workload, config=cfg)
|
"ollama_local",
|
||||||
assert selection.url == "http://192.168.0.110:11437"
|
]
|
||||||
assert selection.provider_name == "ollama_local"
|
|
||||||
|
|
||||||
|
|
||||||
def test_non_sensitive_workloads_fall_back_to_gcp_b_when_primary_missing() -> None:
|
def test_non_sensitive_workloads_fall_back_to_gcp_b_when_primary_missing() -> None:
|
||||||
@@ -64,4 +70,10 @@ def test_non_sensitive_workloads_fall_back_to_gcp_b_when_primary_missing() -> No
|
|||||||
selection = resolve_ollama_selection("embedding", config=cfg)
|
selection = resolve_ollama_selection("embedding", config=cfg)
|
||||||
assert selection.url == "http://192.168.0.110:11436"
|
assert selection.url == "http://192.168.0.110:11436"
|
||||||
assert selection.provider_name == "ollama_gcp_b"
|
assert selection.provider_name == "ollama_gcp_b"
|
||||||
assert selection.reason == "primary_missing_gcp_b_fallback"
|
assert selection.reason == "global_secondary_gcp_b"
|
||||||
|
|
||||||
|
order = resolve_ollama_order("embedding", config=cfg)
|
||||||
|
assert [selection.provider_name for selection in order] == [
|
||||||
|
"ollama_gcp_b",
|
||||||
|
"ollama_local",
|
||||||
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user