fix(ai): isolate heavy Ollama workloads from GCP alert lane
All checks were successful
CD Pipeline / tests (push) Successful in 54s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 3m12s

This commit is contained in:
Your Name
2026-05-05 23:05:59 +08:00
parent 1dcc6d61dc
commit c4854bb355
17 changed files with 146 additions and 84 deletions

View File

@@ -500,7 +500,7 @@ class Settings(BaseSettings):
default=False,
description=(
"Allow LocalCodeReviewService to fall back to Gemini when the "
"GCP-B/Ollama code-review lane fails. Default false to avoid "
"local Ollama code-review lane fails. Default false to avoid "
"unexpected cloud spend from Gitea push/PR alerts."
),
)

View File

@@ -9,6 +9,7 @@ Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型111→ Tel
debugger/vuln → deepseek-r1:14b推理; code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
"""
from __future__ import annotations
import asyncio
import re
import time
@@ -17,7 +18,6 @@ import httpx
import structlog
from sqlalchemy import text
from src.core.config import settings
from src.core.redis_client import get_redis
from src.db.base import get_db_context
from src.hermes.agent_loader import get_agent_system_prompt
@@ -266,7 +266,9 @@ async def process_nl_message(
success = False
error_type: str | None = None
try:
ollama_base = getattr(settings, "OLLAMA_URL", "http://34.143.170.20:11434") # 2026-05-03 ogt: ADR-110 GCP-A Primary
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
ollama_base = resolve_ollama_endpoint("hermes")
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
resp = await _hc.post(
f"{ollama_base}/api/chat",

View File

@@ -26,7 +26,7 @@ from __future__ import annotations
import asyncio
import re
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Literal
import httpx
@@ -125,7 +125,7 @@ class DecisionFusionAdapter:
# Public API
# =========================================================================
async def fuse_decision(self, event: "AiGovernanceEvent") -> FusedDecision:
async def fuse_decision(self, event: AiGovernanceEvent) -> FusedDecision:
"""三維融合LLM × Playbook × MCP → FusedDecision。
三個維度並行評估asyncio.gather任一失敗靜默降為 0.5。
@@ -226,7 +226,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_llm(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, str, dict[str, Any]]:
"""Ollama LLM 推理:治理事件情境 → 建議動作 + 信心度。
@@ -254,7 +254,9 @@ class DecisionFusionAdapter:
"只輸出 CONFIDENCE 和 ACTION 兩行,不要其他解釋。"
)
ollama_url = getattr(self._settings, "OLLAMA_URL", "http://192.168.0.111:11434") # 2026-05-04 ogt: ADR-110 修正 — 111 primary
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
ollama_url = resolve_ollama_endpoint("deep_rca")
try:
async with httpx.AsyncClient(
@@ -320,7 +322,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_playbook(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, str | None, float | None]:
"""Playbook 相似度比對 → 取最高 trust_score。
@@ -373,7 +375,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_mcp(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, dict[str, Any]]:
"""Prometheus 情報採集 → MCP 感官品質分數。

View File

@@ -33,10 +33,11 @@ logger = structlog.get_logger(__name__)
# ============================================================
# 設定
# ============================================================
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
# 2026-05-05 Codex: 重摘要走 111 lane避免污染 GCP alert-fast lane
def _get_ollama_url() -> str:
from src.core.config import get_settings
return get_settings().OLLAMA_URL
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.drift_summary 讀取
NARRATOR_MODEL = get_model("ollama", "drift_summary")
NARRATOR_TIMEOUT = 90.0 # seconds
@@ -120,8 +121,8 @@ class DriftNarratorService:
async def narrate_and_notify(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None" = None,
report: DriftReport,
interpretation: DriftInterpretation | None = None,
) -> None:
"""
生成人話摘要並推送 Telegram
@@ -166,7 +167,7 @@ class DriftNarratorService:
medium=report.medium_count,
)
def _should_narrate(self, report: "DriftReport") -> bool:
def _should_narrate(self, report: DriftReport) -> bool:
"""觸發條件high >= 1 or medium >= 3"""
# 過濾 HPA 白名單後重算
non_hpa_items = [
@@ -180,8 +181,8 @@ class DriftNarratorService:
async def _generate_narrative_and_items(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> tuple[str, list[dict], dict]:
"""
2026-04-18 ogt + Claude Opus 4.7: B 方案 — LLM 產生 narrative + 結構化 items
@@ -354,8 +355,8 @@ class DriftNarratorService:
def _fallback_recommendation(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> dict:
"""
2026-04-20 P0.2 ogt + Claude Opus 4.7: LLM 沒給 recommendation 時的 Python fallback
@@ -397,7 +398,7 @@ class DriftNarratorService:
async def _log_ai_action_to_db(
self,
report: "DriftReport",
report: DriftReport,
prompt: str,
raw_response: str | None,
narrative: str,
@@ -416,7 +417,9 @@ class DriftNarratorService:
- 若能找到該 drift 的 incident 關聯,設 parent_op_id
"""
import json as _json
from sqlalchemy import text as _sql
from src.db.base import get_db_context
input_json = _json.dumps({
@@ -511,7 +514,7 @@ class DriftNarratorService:
items_count=len(items),
)
def _format_drift_for_llm(self, report: "DriftReport") -> str:
def _format_drift_for_llm(self, report: DriftReport) -> str:
"""
2026-04-18 ogt + Claude Opus 4.7: B 方案 — 餵 LLM 用的 JSON 序列化
保留更多原始 context 給 LLM 推理,不做 30 字元暴力截斷
@@ -582,7 +585,7 @@ class DriftNarratorService:
# 一般變化
return f"{from_val}{to_val}"
def _fallback_items(self, report: "DriftReport") -> list[dict]:
def _fallback_items(self, report: DriftReport) -> list[dict]:
"""
LLM 失敗時的 Python 智能摘要 (取代舊 str()[:30])
- 過濾白名單
@@ -605,7 +608,7 @@ class DriftNarratorService:
})
return items
def _format_intent_summary(self, interpretation: "DriftInterpretation | None") -> str:
def _format_intent_summary(self, interpretation: DriftInterpretation | None) -> str:
if not interpretation:
return "無意圖分析"
return (
@@ -616,8 +619,8 @@ class DriftNarratorService:
def _fallback_narrative(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> str:
"""LLM 失敗時的結構化 fallback"""
resources = list({
@@ -636,7 +639,7 @@ class DriftNarratorService:
async def _send_telegram(
self,
report: "DriftReport",
report: DriftReport,
narrative: str,
items: list[dict],
recommendation: dict | None = None,
@@ -667,7 +670,7 @@ class DriftNarratorService:
except Exception as e:
logger.warning("drift_narrator_telegram_error", error=str(e))
def _count_nontrivial_drift(self, report: "DriftReport") -> int:
def _count_nontrivial_drift(self, report: DriftReport) -> int:
"""
計算非白名單、非 trivial (K8s 自動補齊) 的 drift 數
用於 Telegram 底部「還有 N 項」顯示實際可操作數量
@@ -704,7 +707,7 @@ class DriftNarratorService:
def _render_telegram_body(
self,
report: "DriftReport",
report: DriftReport,
narrative: str,
items: list[dict],
recommendation: dict | None = None,

View File

@@ -21,8 +21,8 @@ from typing import Protocol
import httpx
import structlog
from src.core.config import settings
from src.services.model_registry import get_model as _get_model
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
@@ -98,7 +98,7 @@ class OllamaEmbeddingService:
P1 修復 (2026-03-29): 維度配置化,支援更多模型
"""
self._model = model
self._ollama_url = ollama_url or settings.OLLAMA_URL
self._ollama_url = ollama_url or resolve_ollama_endpoint("embedding")
self._timeout = timeout
self._default_dimension = default_dimension or self.MODEL_DIMENSIONS.get(
model, self.DEFAULT_DIMENSION

View File

@@ -21,7 +21,6 @@ AWOOOI — Image Analysis Service (Phase 34, ADR-067)
from __future__ import annotations
import base64
import os
import time
from pathlib import Path
from typing import TYPE_CHECKING
@@ -29,14 +28,13 @@ from typing import TYPE_CHECKING
import httpx
import structlog
from src.core.config import get_settings
from src.services.model_registry import get_model
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
if TYPE_CHECKING:
pass
logger = structlog.get_logger(__name__)
settings = get_settings()
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.image_analysis 讀取
_MODEL = get_model("ollama", "image_analysis")
@@ -124,7 +122,7 @@ class ImageAnalysisService:
image_b64 = base64.b64encode(image_path.read_bytes()).decode()
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('image_analysis')}/api/generate",
json={
"model": _MODEL,
"prompt": question,

View File

@@ -31,8 +31,8 @@ from typing import Protocol, runtime_checkable
import httpx
import structlog
from src.core.config import settings
from src.services.model_registry import get_model_registry
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
@@ -549,7 +549,7 @@ class IntentClassifier:
# 呼叫 Ollama
async with httpx.AsyncClient() as client:
response = await client.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('hermes')}/api/generate",
json={
"model": model_name,
"prompt": prompt,

View File

@@ -15,10 +15,11 @@ import structlog
logger = structlog.get_logger(__name__)
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
# 2026-05-05 Codex: KB 萃取走 111 lane避免污染 GCP alert-fast lane
def _get_ollama_base() -> str:
from src.core.config import get_settings
return get_settings().OLLAMA_URL
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
_EXTRACT_MODEL = "llama3.2:3b"
_EXTRACT_TIMEOUT = 30.0 # 秒,容忍慢速
@@ -117,7 +118,11 @@ class KnowledgeExtractorService:
category = self._infer_category(incident)
# 5. 建立 KB 條目
from src.models.knowledge import EntrySource, EntryType, KnowledgeEntryCreate
from src.models.knowledge import (
EntrySource,
EntryType,
KnowledgeEntryCreate,
)
from src.services.knowledge_service import get_knowledge_service
entry_data = KnowledgeEntryCreate(

View File

@@ -20,11 +20,10 @@ from pathlib import Path
import httpx
import structlog
from src.core.config import get_settings
import src.repositories.rag_chunk_repository as rag_repo
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
settings = get_settings()
_EMBED_MODEL = "nomic-embed-text"
_GEN_MODEL = "qwen2.5:7b-instruct"
@@ -131,7 +130,7 @@ class KnowledgeRAGService:
try:
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/embeddings",
f"{resolve_ollama_endpoint('embedding')}/api/embeddings",
json={"model": _EMBED_MODEL, "prompt": text},
)
if resp.status_code == 200:
@@ -150,7 +149,7 @@ class KnowledgeRAGService:
try:
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('rag')}/api/generate",
json={
"model": _GEN_MODEL,
"prompt": prompt,

View File

@@ -186,7 +186,7 @@ class LocalCodeReviewService:
)
return {
"review_text": (
"⚠️ Code ReviewGCP-B/Ollama 審查未完成,"
"⚠️ Code Review本地 Ollama 審查未完成,"
"已依成本策略跳過 Gemini fallback。"
),
"issues_count": 1,

View File

@@ -35,8 +35,9 @@ logger = structlog.get_logger(__name__)
# ============================================================
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
def _get_ollama_url() -> str:
from src.core.config import get_settings
return get_settings().OLLAMA_URL
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.log_anomaly 讀取
SUMMARY_MODEL = get_model("ollama", "log_anomaly")
LLM_TIMEOUT = 180.0 # deepseek-r1 硬超時
@@ -145,7 +146,7 @@ class LogSummaryService:
self.summarize(pod_name, namespace),
timeout=SOFT_TIMEOUT,
)
except asyncio.TimeoutError:
except TimeoutError:
logger.info(
"log_summary_soft_timeout",
pod=pod_name,
@@ -182,7 +183,7 @@ class LogSummaryService:
def _extract_anomaly_lines(self, raw_logs: str) -> list[str]:
"""過濾異常行 + 敏感資料遮蔽,取最後 ANOMALY_TAIL_LINES 行"""
lines = raw_logs.splitlines()
anomaly = [l for l in lines if _ANOMALY_PATTERN.search(l)]
anomaly = [line for line in lines if _ANOMALY_PATTERN.search(line)]
# 取最後 N 行
anomaly = anomaly[-ANOMALY_TAIL_LINES:]
# 遮蔽敏感資料

View File

@@ -16,27 +16,37 @@ from src.core.config import settings
OllamaWorkloadType = Literal[
"interactive",
"healthcheck",
"alert_fast",
"batch",
"embedding",
"rag",
"code_review",
"shadow",
"canary",
"deep_rca",
"image_analysis",
"hermes",
"local_required",
"privacy_sensitive",
"dr",
]
_GCP_B_PREFERRED_WORKLOADS = {
_GCP_A_PREFERRED_WORKLOADS = {
"interactive",
"healthcheck",
"alert_fast",
}
_LOCAL_PREFERRED_WORKLOADS = {
"batch",
"embedding",
"rag",
"code_review",
"shadow",
"canary",
}
_LOCAL_PREFERRED_WORKLOADS = {
"deep_rca",
"image_analysis",
"hermes",
"local_required",
"privacy_sensitive",
"dr",
@@ -68,20 +78,28 @@ def resolve_ollama_selection(
secondary = cfg.OLLAMA_SECONDARY_URL
fallback = cfg.OLLAMA_FALLBACK_URL
if workload_type in _GCP_B_PREFERRED_WORKLOADS and secondary:
if workload_type in _LOCAL_PREFERRED_WORKLOADS:
if fallback:
return OllamaEndpointSelection(
url=fallback,
provider_name="ollama_local",
workload_type=workload_type,
reason="local_heavy_or_privacy_lane",
)
if secondary:
return OllamaEndpointSelection(
url=secondary,
provider_name="ollama_gcp_b",
workload_type=workload_type,
reason="local_missing_gcp_b_fallback",
)
if workload_type not in _GCP_A_PREFERRED_WORKLOADS and secondary:
return OllamaEndpointSelection(
url=secondary,
provider_name="ollama_gcp_b",
workload_type=workload_type,
reason="gcp_b_batch_lane",
)
if workload_type in _LOCAL_PREFERRED_WORKLOADS and fallback:
return OllamaEndpointSelection(
url=fallback,
provider_name="ollama_local",
workload_type=workload_type,
reason="local_privacy_or_dr_lane",
reason="gcp_b_default_non_alert_lane",
)
return OllamaEndpointSelection(

View File

@@ -29,9 +29,9 @@ from typing import Any
import httpx
import structlog
from src.core.config import settings
from src.models.playbook import Playbook, SymptomPattern
from src.repositories.interfaces import IEmbeddingCacheRepository
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
@@ -146,7 +146,7 @@ class PlaybookRAGService:
"""
self._http_client = http_client
self._embedding_cache = embedding_cache
self.ollama_url = settings.OLLAMA_URL
self.ollama_url = resolve_ollama_endpoint("embedding")
self.embedding_model = EMBEDDING_MODEL
# =========================================================================