From bf847ad045d52f1ca688dfd5c07a69e09b6dc4b8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 5 May 2026 22:20:10 +0800 Subject: [PATCH] fix(ai): stabilize GCP Ollama alert lane --- apps/api/models.json | 4 ++-- apps/api/src/core/config.py | 8 ++++++++ apps/api/src/jobs/ai_slo_watchdog_job.py | 11 ++++++++++- apps/api/src/services/ai_providers/ollama.py | 17 ++++++++++++----- apps/api/src/services/governance_agent.py | 2 ++ apps/api/src/services/openclaw.py | 2 ++ k8s/awoooi-prod/06-deployment-api.yaml | 6 ++++++ 7 files changed, 42 insertions(+), 8 deletions(-) diff --git a/apps/api/models.json b/apps/api/models.json index 57333ee0..56992111 100644 --- a/apps/api/models.json +++ b/apps/api/models.json @@ -25,7 +25,7 @@ "log_anomaly": "deepseek-r1:14b", "nemoclaw": "deepseek-r1:14b", "playbook_draft": "qwen3:14b", - "code_review": "qwen2.5-coder:32b", + "code_review": "qwen2.5-coder:7b", "embedding": "bge-m3:latest", "rag_generate": "qwen3:14b", "image_analysis": "minicpm-v:latest", @@ -175,7 +175,7 @@ }, "pr_code_review": { "phase": 32, - "model": "qwen2.5-coder:32b", + "model": "qwen2.5-coder:7b", "timeout_seconds": 120, "purpose": "Gitea PR 自動審查" }, diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index ba322b6b..dcb43443 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -520,6 +520,14 @@ class Settings(BaseSettings): "then local 111 before cloud backup providers such as Gemini." ), ) + ALERT_OLLAMA_MODEL: str = Field( + default="gemma3:4b", + description=( + "Ollama model used for incident/alert diagnosis. Keep this separate " + "from the general RCA model so alert cards stay on the fast local " + "lane before Gemini backup is considered." + ), + ) # 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合 NVIDIA_API_KEY: str = Field( default="", diff --git a/apps/api/src/jobs/ai_slo_watchdog_job.py b/apps/api/src/jobs/ai_slo_watchdog_job.py index 0af6664d..29d0f676 100644 --- a/apps/api/src/jobs/ai_slo_watchdog_job.py +++ b/apps/api/src/jobs/ai_slo_watchdog_job.py @@ -46,6 +46,7 @@ _DEDUP_TTL_SEC = 3600 # 同一告警 1 小時內不重複發送 _TG_SILENCE_THRESHOLD = 2 # PENDING telegram_message_id IS NULL 告警門檻 _FLYWHEEL_SUCCESS_MIN = 0.30 # 執行成功率下限 _STUCK_ANALYSIS_THRESHOLD = 3 # Agent Debate 失敗導致卡住的告警門檻 +_TRUST_DRIFT_META_MIN_RATIO = 0.20 # 低於此比例只記治理事件,不升 Meta System # 2026-05-03 ogt + Claude Opus 4.7 — feedback_silencing_alerts_recurring_violation # 啟動寬限期:30 分鐘內可 skip「資料還沒到」噪音;超過寬限期仍空 = 真資料管線斷,必須告警 @@ -210,7 +211,8 @@ async def _check_once() -> None: from src.services.governance_agent import get_governance_agent trust_result = await get_governance_agent().check_trust_drift(emit_alert=False) drifted = trust_result.get("drifted", 0) - if drifted > 0: + drift_ratio = float(trust_result.get("drift_ratio") or 0.0) + if drifted > 0 and drift_ratio >= _TRUST_DRIFT_META_MIN_RATIO: auto_deprecated = trust_result.get("auto_deprecated", 0) kept = trust_result.get("kept", 0) violations.append( @@ -219,6 +221,13 @@ async def _check_once() -> None: ) # 2026-05-05 ogt W6 修復:移除動態 low_count,避免 count 微變繞過 dedup violation_codes.append("W6:trust_drift") + elif drifted > 0: + logger.info( + "watchdog_w6_trust_drift_below_meta_threshold", + drifted=drifted, + drift_ratio=round(drift_ratio, 3), + threshold=_TRUST_DRIFT_META_MIN_RATIO, + ) except Exception as e: logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e)) diff --git a/apps/api/src/services/ai_providers/ollama.py b/apps/api/src/services/ai_providers/ollama.py index bf31460f..d6caaf78 100644 --- a/apps/api/src/services/ai_providers/ollama.py +++ b/apps/api/src/services/ai_providers/ollama.py @@ -77,7 +77,7 @@ class OllamaProvider: client = await self._get_client() registry = get_model_registry() - model_name = registry.get_model("ollama", "rca") + model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip() options = registry.get_provider_options("ollama") # P0 2026-04-04 Claude Code: per-task timeout(Option C 分情境) @@ -112,7 +112,13 @@ class OllamaProvider: tokens = data.get("eval_count", 0) + data.get("prompt_eval_count", 0) latency = (time.perf_counter() - start) * 1000 - logger.info("ollama_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1)) + logger.info( + "ollama_provider_success", + response_length=len(result), + tokens=tokens, + latency_ms=round(latency, 1), + model=model_name, + ) return AIResult( raw_response=result, success=True, @@ -158,7 +164,7 @@ class OllamaProvider: total_tokens = 0 messages: list[dict] = [{"role": "user", "content": prompt}] registry = get_model_registry() - model_name = registry.get_model("ollama", "rca") + model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip() options = registry.get_provider_options("ollama") task_type = (context or {}).get("task_type", "") if task_type in ("diagnose", "force_local"): @@ -321,9 +327,9 @@ class Ollama188Provider(OllamaProvider): registry = get_model_registry() # 嘗試取 ollama_188 專屬設定,fallback 到 ollama 預設 try: - model_name = registry.get_model("ollama_188", "rca") + model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_188", "rca")).strip() except Exception: - model_name = getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct") + model_name = str((context or {}).get("ollama_model") or getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")).strip() try: options = registry.get_provider_options("ollama_188") @@ -364,6 +370,7 @@ class Ollama188Provider(OllamaProvider): tokens=tokens, latency_ms=round(latency, 1), endpoint=fallback_url, + model=model_name, ) return AIResult( raw_response=result, diff --git a/apps/api/src/services/governance_agent.py b/apps/api/src/services/governance_agent.py index cf16b40c..5241afc0 100644 --- a/apps/api/src/services/governance_agent.py +++ b/apps/api/src/services/governance_agent.py @@ -165,9 +165,11 @@ class GovernanceAgent: auto_deprecated=len(auto_deprecated_ids), kept=len(kept_ids), ) + drift_ratio = len(drifted) / total if total > 0 else 0.0 return { "checked": total, "drifted": len(drifted), + "drift_ratio": drift_ratio, "auto_deprecated": len(auto_deprecated_ids), "kept": len(kept_ids), } diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index f3b15d1f..62c19bb4 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -1142,6 +1142,8 @@ class OpenClawService: exec_context = dict(alert_context) if alert_context else {} if decision.intent == IntentType.DIAGNOSE: exec_context["task_type"] = "diagnose" + if self._is_incident_alert_context(alert_context): + exec_context["ollama_model"] = getattr(settings, "ALERT_OLLAMA_MODEL", "gemma3:4b") result = await executor.execute( prompt=prompt, diff --git a/k8s/awoooi-prod/06-deployment-api.yaml b/k8s/awoooi-prod/06-deployment-api.yaml index 2acaa001..41799f72 100644 --- a/k8s/awoooi-prod/06-deployment-api.yaml +++ b/k8s/awoooi-prod/06-deployment-api.yaml @@ -73,6 +73,12 @@ spec: value: "http://192.168.0.110:11436" # 2026-05-04 ogt: GCP-B secondary via 110 nginx proxy(11436 → 34.21.145.224:11434) - name: OLLAMA_FALLBACK_URL value: "http://192.168.0.111:11434" # 2026-05-04 ogt: 111 兜底(K8s 內網直連,GPU RTX) + - name: ALERT_AI_ALLOW_CLOUD_FALLBACK + value: "true" # Gemini 只作 GCP-A → GCP-B → 111 全失敗後的備援 + - name: ALERT_AI_ENFORCE_OLLAMA_FIRST + value: "true" # 告警診斷強制先走 GCP-A → GCP-B → 111 + - name: ALERT_OLLAMA_MODEL + value: "gemma3:4b" # 2026-05-05 Codex: qwen3:14b 告警 JSON prompt 會拖到 504 - name: OPENCLAW_DEFAULT_MODEL value: "qwen2.5:7b-instruct" - name: OPENCLAW_TIMEOUT