fix(ai): stabilize GCP Ollama alert lane
This commit is contained in:
@@ -25,7 +25,7 @@
|
||||
"log_anomaly": "deepseek-r1:14b",
|
||||
"nemoclaw": "deepseek-r1:14b",
|
||||
"playbook_draft": "qwen3:14b",
|
||||
"code_review": "qwen2.5-coder:32b",
|
||||
"code_review": "qwen2.5-coder:7b",
|
||||
"embedding": "bge-m3:latest",
|
||||
"rag_generate": "qwen3:14b",
|
||||
"image_analysis": "minicpm-v:latest",
|
||||
@@ -175,7 +175,7 @@
|
||||
},
|
||||
"pr_code_review": {
|
||||
"phase": 32,
|
||||
"model": "qwen2.5-coder:32b",
|
||||
"model": "qwen2.5-coder:7b",
|
||||
"timeout_seconds": 120,
|
||||
"purpose": "Gitea PR 自動審查"
|
||||
},
|
||||
|
||||
@@ -520,6 +520,14 @@ class Settings(BaseSettings):
|
||||
"then local 111 before cloud backup providers such as Gemini."
|
||||
),
|
||||
)
|
||||
ALERT_OLLAMA_MODEL: str = Field(
|
||||
default="gemma3:4b",
|
||||
description=(
|
||||
"Ollama model used for incident/alert diagnosis. Keep this separate "
|
||||
"from the general RCA model so alert cards stay on the fast local "
|
||||
"lane before Gemini backup is considered."
|
||||
),
|
||||
)
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
|
||||
NVIDIA_API_KEY: str = Field(
|
||||
default="",
|
||||
|
||||
@@ -46,6 +46,7 @@ _DEDUP_TTL_SEC = 3600 # 同一告警 1 小時內不重複發送
|
||||
_TG_SILENCE_THRESHOLD = 2 # PENDING telegram_message_id IS NULL 告警門檻
|
||||
_FLYWHEEL_SUCCESS_MIN = 0.30 # 執行成功率下限
|
||||
_STUCK_ANALYSIS_THRESHOLD = 3 # Agent Debate 失敗導致卡住的告警門檻
|
||||
_TRUST_DRIFT_META_MIN_RATIO = 0.20 # 低於此比例只記治理事件,不升 Meta System
|
||||
|
||||
# 2026-05-03 ogt + Claude Opus 4.7 — feedback_silencing_alerts_recurring_violation
|
||||
# 啟動寬限期:30 分鐘內可 skip「資料還沒到」噪音;超過寬限期仍空 = 真資料管線斷,必須告警
|
||||
@@ -210,7 +211,8 @@ async def _check_once() -> None:
|
||||
from src.services.governance_agent import get_governance_agent
|
||||
trust_result = await get_governance_agent().check_trust_drift(emit_alert=False)
|
||||
drifted = trust_result.get("drifted", 0)
|
||||
if drifted > 0:
|
||||
drift_ratio = float(trust_result.get("drift_ratio") or 0.0)
|
||||
if drifted > 0 and drift_ratio >= _TRUST_DRIFT_META_MIN_RATIO:
|
||||
auto_deprecated = trust_result.get("auto_deprecated", 0)
|
||||
kept = trust_result.get("kept", 0)
|
||||
violations.append(
|
||||
@@ -219,6 +221,13 @@ async def _check_once() -> None:
|
||||
)
|
||||
# 2026-05-05 ogt W6 修復:移除動態 low_count,避免 count 微變繞過 dedup
|
||||
violation_codes.append("W6:trust_drift")
|
||||
elif drifted > 0:
|
||||
logger.info(
|
||||
"watchdog_w6_trust_drift_below_meta_threshold",
|
||||
drifted=drifted,
|
||||
drift_ratio=round(drift_ratio, 3),
|
||||
threshold=_TRUST_DRIFT_META_MIN_RATIO,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e))
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ class OllamaProvider:
|
||||
client = await self._get_client()
|
||||
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
# P0 2026-04-04 Claude Code: per-task timeout(Option C 分情境)
|
||||
@@ -112,7 +112,13 @@ class OllamaProvider:
|
||||
tokens = data.get("eval_count", 0) + data.get("prompt_eval_count", 0)
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
|
||||
logger.info("ollama_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1))
|
||||
logger.info(
|
||||
"ollama_provider_success",
|
||||
response_length=len(result),
|
||||
tokens=tokens,
|
||||
latency_ms=round(latency, 1),
|
||||
model=model_name,
|
||||
)
|
||||
return AIResult(
|
||||
raw_response=result,
|
||||
success=True,
|
||||
@@ -158,7 +164,7 @@ class OllamaProvider:
|
||||
total_tokens = 0
|
||||
messages: list[dict] = [{"role": "user", "content": prompt}]
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
|
||||
options = registry.get_provider_options("ollama")
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
@@ -321,9 +327,9 @@ class Ollama188Provider(OllamaProvider):
|
||||
registry = get_model_registry()
|
||||
# 嘗試取 ollama_188 專屬設定,fallback 到 ollama 預設
|
||||
try:
|
||||
model_name = registry.get_model("ollama_188", "rca")
|
||||
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_188", "rca")).strip()
|
||||
except Exception:
|
||||
model_name = getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")
|
||||
model_name = str((context or {}).get("ollama_model") or getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")).strip()
|
||||
|
||||
try:
|
||||
options = registry.get_provider_options("ollama_188")
|
||||
@@ -364,6 +370,7 @@ class Ollama188Provider(OllamaProvider):
|
||||
tokens=tokens,
|
||||
latency_ms=round(latency, 1),
|
||||
endpoint=fallback_url,
|
||||
model=model_name,
|
||||
)
|
||||
return AIResult(
|
||||
raw_response=result,
|
||||
|
||||
@@ -165,9 +165,11 @@ class GovernanceAgent:
|
||||
auto_deprecated=len(auto_deprecated_ids),
|
||||
kept=len(kept_ids),
|
||||
)
|
||||
drift_ratio = len(drifted) / total if total > 0 else 0.0
|
||||
return {
|
||||
"checked": total,
|
||||
"drifted": len(drifted),
|
||||
"drift_ratio": drift_ratio,
|
||||
"auto_deprecated": len(auto_deprecated_ids),
|
||||
"kept": len(kept_ids),
|
||||
}
|
||||
|
||||
@@ -1142,6 +1142,8 @@ class OpenClawService:
|
||||
exec_context = dict(alert_context) if alert_context else {}
|
||||
if decision.intent == IntentType.DIAGNOSE:
|
||||
exec_context["task_type"] = "diagnose"
|
||||
if self._is_incident_alert_context(alert_context):
|
||||
exec_context["ollama_model"] = getattr(settings, "ALERT_OLLAMA_MODEL", "gemma3:4b")
|
||||
|
||||
result = await executor.execute(
|
||||
prompt=prompt,
|
||||
|
||||
@@ -73,6 +73,12 @@ spec:
|
||||
value: "http://192.168.0.110:11436" # 2026-05-04 ogt: GCP-B secondary via 110 nginx proxy(11436 → 34.21.145.224:11434)
|
||||
- name: OLLAMA_FALLBACK_URL
|
||||
value: "http://192.168.0.111:11434" # 2026-05-04 ogt: 111 兜底(K8s 內網直連,GPU RTX)
|
||||
- name: ALERT_AI_ALLOW_CLOUD_FALLBACK
|
||||
value: "true" # Gemini 只作 GCP-A → GCP-B → 111 全失敗後的備援
|
||||
- name: ALERT_AI_ENFORCE_OLLAMA_FIRST
|
||||
value: "true" # 告警診斷強制先走 GCP-A → GCP-B → 111
|
||||
- name: ALERT_OLLAMA_MODEL
|
||||
value: "gemma3:4b" # 2026-05-05 Codex: qwen3:14b 告警 JSON prompt 會拖到 504
|
||||
- name: OPENCLAW_DEFAULT_MODEL
|
||||
value: "qwen2.5:7b-instruct"
|
||||
- name: OPENCLAW_TIMEOUT
|
||||
|
||||
Reference in New Issue
Block a user