fix(ai): isolate heavy Ollama workloads from GCP alert lane
This commit is contained in:
@@ -31,8 +31,8 @@ from typing import Protocol, runtime_checkable
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
from src.core.config import settings
|
||||
from src.services.model_registry import get_model_registry
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
@@ -549,7 +549,7 @@ class IntentClassifier:
|
||||
# 呼叫 Ollama
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{settings.OLLAMA_URL}/api/generate",
|
||||
f"{resolve_ollama_endpoint('hermes')}/api/generate",
|
||||
json={
|
||||
"model": model_name,
|
||||
"prompt": prompt,
|
||||
|
||||
Reference in New Issue
Block a user