fix(ai): isolate heavy Ollama workloads from GCP alert lane
All checks were successful
CD Pipeline / tests (push) Successful in 54s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 3m12s

This commit is contained in:
Your Name
2026-05-05 23:05:59 +08:00
parent 1dcc6d61dc
commit c4854bb355
17 changed files with 146 additions and 84 deletions

View File

@@ -31,8 +31,8 @@ from typing import Protocol, runtime_checkable
import httpx
import structlog
from src.core.config import settings
from src.services.model_registry import get_model_registry
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
@@ -549,7 +549,7 @@ class IntentClassifier:
# 呼叫 Ollama
async with httpx.AsyncClient() as client:
response = await client.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('hermes')}/api/generate",
json={
"model": model_name,
"prompt": prompt,