diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index a81e8a2b..26b97a56 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -370,9 +370,10 @@ class Settings(BaseSettings): ) return v - # 2026-04-25 Claude Engineer-C (P1.1): Ollama 健康檢測推理測試模型 + # 2026-05-05 Codex: health inference must stay on alert-fast model; qwen2.5 + # keeps reloading a 7B model on CPU-only GCP and slows incident fallback. OLLAMA_HEALTH_CHECK_MODEL: str = Field( - default="qwen2.5:7b-instruct", + default="gemma3:4b", description="OllamaHealthMonitor 推理測試使用模型(P1.1)", ) # 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單 diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 45aa2727..3a738b70 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -3201,6 +3201,7 @@ bash scripts/ops/ollama-topology-check.sh - `interactive` / `healthcheck` / `alert_fast` 保持 GCP-A 優先 - `code_review` / `rag` / `embedding` / `deep_rca` / `image_analysis` / `hermes` 改為 111 優先 - 111 不可用時才回 GCP-B,避免 GCP-A/B 在告警 canary 期間被 7B/14B/32B 模型污染 +- `OLLAMA_HEALTH_CHECK_MODEL` 改為 `gemma3:4b`,避免 health probe 自己把 `qwen2.5:7b-instruct` 載入 GCP-A 驗證: diff --git a/k8s/awoooi-prod/06-deployment-api.yaml b/k8s/awoooi-prod/06-deployment-api.yaml index 41799f72..e5aa26e7 100644 --- a/k8s/awoooi-prod/06-deployment-api.yaml +++ b/k8s/awoooi-prod/06-deployment-api.yaml @@ -79,6 +79,8 @@ spec: value: "true" # 告警診斷強制先走 GCP-A → GCP-B → 111 - name: ALERT_OLLAMA_MODEL value: "gemma3:4b" # 2026-05-05 Codex: qwen3:14b 告警 JSON prompt 會拖到 504 + - name: OLLAMA_HEALTH_CHECK_MODEL + value: "gemma3:4b" # 2026-05-05 Codex: 避免 health probe 載入 qwen2.5 7B 污染 GCP alert lane - name: OPENCLAW_DEFAULT_MODEL value: "qwen2.5:7b-instruct" - name: OPENCLAW_TIMEOUT