fix(ai): stabilize GCP Ollama alert lane
Some checks failed
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled

This commit is contained in:
Your Name
2026-05-05 22:20:10 +08:00
parent a4e9a04982
commit bf847ad045
7 changed files with 42 additions and 8 deletions

View File

@@ -25,7 +25,7 @@
"log_anomaly": "deepseek-r1:14b",
"nemoclaw": "deepseek-r1:14b",
"playbook_draft": "qwen3:14b",
"code_review": "qwen2.5-coder:32b",
"code_review": "qwen2.5-coder:7b",
"embedding": "bge-m3:latest",
"rag_generate": "qwen3:14b",
"image_analysis": "minicpm-v:latest",
@@ -175,7 +175,7 @@
},
"pr_code_review": {
"phase": 32,
"model": "qwen2.5-coder:32b",
"model": "qwen2.5-coder:7b",
"timeout_seconds": 120,
"purpose": "Gitea PR 自動審查"
},

View File

@@ -520,6 +520,14 @@ class Settings(BaseSettings):
"then local 111 before cloud backup providers such as Gemini."
),
)
ALERT_OLLAMA_MODEL: str = Field(
default="gemma3:4b",
description=(
"Ollama model used for incident/alert diagnosis. Keep this separate "
"from the general RCA model so alert cards stay on the fast local "
"lane before Gemini backup is considered."
),
)
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
NVIDIA_API_KEY: str = Field(
default="",

View File

@@ -46,6 +46,7 @@ _DEDUP_TTL_SEC = 3600 # 同一告警 1 小時內不重複發送
_TG_SILENCE_THRESHOLD = 2 # PENDING telegram_message_id IS NULL 告警門檻
_FLYWHEEL_SUCCESS_MIN = 0.30 # 執行成功率下限
_STUCK_ANALYSIS_THRESHOLD = 3 # Agent Debate 失敗導致卡住的告警門檻
_TRUST_DRIFT_META_MIN_RATIO = 0.20 # 低於此比例只記治理事件,不升 Meta System
# 2026-05-03 ogt + Claude Opus 4.7 — feedback_silencing_alerts_recurring_violation
# 啟動寬限期30 分鐘內可 skip「資料還沒到」噪音超過寬限期仍空 = 真資料管線斷,必須告警
@@ -210,7 +211,8 @@ async def _check_once() -> None:
from src.services.governance_agent import get_governance_agent
trust_result = await get_governance_agent().check_trust_drift(emit_alert=False)
drifted = trust_result.get("drifted", 0)
if drifted > 0:
drift_ratio = float(trust_result.get("drift_ratio") or 0.0)
if drifted > 0 and drift_ratio >= _TRUST_DRIFT_META_MIN_RATIO:
auto_deprecated = trust_result.get("auto_deprecated", 0)
kept = trust_result.get("kept", 0)
violations.append(
@@ -219,6 +221,13 @@ async def _check_once() -> None:
)
# 2026-05-05 ogt W6 修復:移除動態 low_count避免 count 微變繞過 dedup
violation_codes.append("W6:trust_drift")
elif drifted > 0:
logger.info(
"watchdog_w6_trust_drift_below_meta_threshold",
drifted=drifted,
drift_ratio=round(drift_ratio, 3),
threshold=_TRUST_DRIFT_META_MIN_RATIO,
)
except Exception as e:
logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e))

View File

@@ -77,7 +77,7 @@ class OllamaProvider:
client = await self._get_client()
registry = get_model_registry()
model_name = registry.get_model("ollama", "rca")
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
options = registry.get_provider_options("ollama")
# P0 2026-04-04 Claude Code: per-task timeoutOption C 分情境)
@@ -112,7 +112,13 @@ class OllamaProvider:
tokens = data.get("eval_count", 0) + data.get("prompt_eval_count", 0)
latency = (time.perf_counter() - start) * 1000
logger.info("ollama_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1))
logger.info(
"ollama_provider_success",
response_length=len(result),
tokens=tokens,
latency_ms=round(latency, 1),
model=model_name,
)
return AIResult(
raw_response=result,
success=True,
@@ -158,7 +164,7 @@ class OllamaProvider:
total_tokens = 0
messages: list[dict] = [{"role": "user", "content": prompt}]
registry = get_model_registry()
model_name = registry.get_model("ollama", "rca")
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
options = registry.get_provider_options("ollama")
task_type = (context or {}).get("task_type", "")
if task_type in ("diagnose", "force_local"):
@@ -321,9 +327,9 @@ class Ollama188Provider(OllamaProvider):
registry = get_model_registry()
# 嘗試取 ollama_188 專屬設定fallback 到 ollama 預設
try:
model_name = registry.get_model("ollama_188", "rca")
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_188", "rca")).strip()
except Exception:
model_name = getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")
model_name = str((context or {}).get("ollama_model") or getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")).strip()
try:
options = registry.get_provider_options("ollama_188")
@@ -364,6 +370,7 @@ class Ollama188Provider(OllamaProvider):
tokens=tokens,
latency_ms=round(latency, 1),
endpoint=fallback_url,
model=model_name,
)
return AIResult(
raw_response=result,

View File

@@ -165,9 +165,11 @@ class GovernanceAgent:
auto_deprecated=len(auto_deprecated_ids),
kept=len(kept_ids),
)
drift_ratio = len(drifted) / total if total > 0 else 0.0
return {
"checked": total,
"drifted": len(drifted),
"drift_ratio": drift_ratio,
"auto_deprecated": len(auto_deprecated_ids),
"kept": len(kept_ids),
}

View File

@@ -1142,6 +1142,8 @@ class OpenClawService:
exec_context = dict(alert_context) if alert_context else {}
if decision.intent == IntentType.DIAGNOSE:
exec_context["task_type"] = "diagnose"
if self._is_incident_alert_context(alert_context):
exec_context["ollama_model"] = getattr(settings, "ALERT_OLLAMA_MODEL", "gemma3:4b")
result = await executor.execute(
prompt=prompt,

View File

@@ -73,6 +73,12 @@ spec:
value: "http://192.168.0.110:11436" # 2026-05-04 ogt: GCP-B secondary via 110 nginx proxy11436 → 34.21.145.224:11434
- name: OLLAMA_FALLBACK_URL
value: "http://192.168.0.111:11434" # 2026-05-04 ogt: 111 兜底K8s 內網直連GPU RTX
- name: ALERT_AI_ALLOW_CLOUD_FALLBACK
value: "true" # Gemini 只作 GCP-A → GCP-B → 111 全失敗後的備援
- name: ALERT_AI_ENFORCE_OLLAMA_FIRST
value: "true" # 告警診斷強制先走 GCP-A → GCP-B → 111
- name: ALERT_OLLAMA_MODEL
value: "gemma3:4b" # 2026-05-05 Codex: qwen3:14b 告警 JSON prompt 會拖到 504
- name: OPENCLAW_DEFAULT_MODEL
value: "qwen2.5:7b-instruct"
- name: OPENCLAW_TIMEOUT