fix(alerts): bypass proxy timeout for GCP Ollama

This commit is contained in:
Your Name
2026-05-06 08:55:02 +08:00
parent df5e6c6626
commit 9ef9633aff
11 changed files with 74 additions and 34 deletions

View File

@@ -215,8 +215,8 @@ class Settings(BaseSettings):
description="Phase 25 P0: DIAGNOSE NIM timeout (秒),實測 2.2-27.3s avg 10.6s60s 含 buffer",
)
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
default=200,
description="Phase 25 P0: Ollama timeout (秒),實測 CPU-only 238s保留欄位但 DIAGNOSE 不再走 Ollama",
default=300,
description="Ollama diagnose timeout (秒)。GCP qwen3:14b CPU-only can exceed the old 120s proxy limit.",
)
# ==========================================================================
@@ -534,7 +534,7 @@ class Settings(BaseSettings):
),
)
INCIDENT_LLM_TIMEOUT_SECONDS: int = Field(
default=240,
default=360,
description=(
"Outer timeout for incident OpenClaw proposal generation. This must "
"be long enough for the GCP-A/GCP-B/111 Ollama lane to complete "

View File

@@ -70,11 +70,11 @@ def _agent_debate_global_timeout_seconds() -> float:
90s guard. Keep a hard ceiling, but make it an explicit deployment knob.
"""
raw = os.environ.get("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "260.0")
raw = os.environ.get("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "420.0")
try:
timeout = float(raw)
except (TypeError, ValueError):
timeout = 260.0
timeout = 420.0
return max(timeout, 90.0)

View File

@@ -143,7 +143,7 @@ class OllamaProvider:
options = registry.get_provider_options("ollama")
# P0 2026-04-04 Claude Code: per-task timeoutOption C 分情境)
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s實測 ~173s)
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS
# 其他 → OPENCLAW_TIMEOUT既有設定
task_type = (context or {}).get("task_type", "")
if task_type in ("diagnose", "force_local"):

View File

@@ -1137,7 +1137,7 @@ class OpenClawService:
# 2026-04-29 ogt + Claude Code: 注入 task_type 讓 Ollama 用正確 timeout
# 根因: ai_providers/ollama.py:77 讀 context["task_type"] 決定 timeout
# - "diagnose"/"force_local" → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=200s
# - "diagnose"/"force_local" → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS
# - 其他/未注入 → OPENCLAW_TIMEOUT=30s不夠 qwen2.5:7b 推理)
# webhooks alert_context 從未注入 task_type → Ollama fallback 永遠 30s timeout
# 對齊 decision.intent 後 Ollama fallback 真正能跑完

View File

@@ -71,8 +71,8 @@ class TestTimeoutDefaults:
f"Critic default timeout 期望 15.0,實際 {mod.AGENT_CRITIC_TIMEOUT_SEC}"
)
def test_agent_debate_global_timeout_default_is_260(self, monkeypatch):
"""Agent debate global timeout defaults to the GCP Ollama-first budget."""
def test_agent_debate_global_timeout_default_is_420(self, monkeypatch):
"""Agent debate global timeout defaults to the direct GCP qwen3 budget."""
monkeypatch.delenv("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", raising=False)
if "src.services.agent_orchestrator" in sys.modules:
@@ -80,7 +80,7 @@ class TestTimeoutDefaults:
import src.services.agent_orchestrator as mod
importlib.reload(mod)
assert mod.GLOBAL_TIMEOUT_SEC == 260.0
assert mod.GLOBAL_TIMEOUT_SEC == 420.0
def test_deprecated_alias_matches_new_constant_diagnostician(self, monkeypatch):
"""PHASE2_STEP_TIMEOUT_SEC alias 應等於 AGENT_DIAGNOSTICIAN_TIMEOUT_SEC相容性保證"""
@@ -183,7 +183,7 @@ class TestEnvOverride:
assert mod.PHASE2_STEP_TIMEOUT_SEC == mod.AGENT_CRITIC_TIMEOUT_SEC
def test_agent_debate_global_timeout_env_override(self, monkeypatch):
"""AGENT_DEBATE_GLOBAL_TIMEOUT_SEC=300 覆蓋 default 260.0"""
"""AGENT_DEBATE_GLOBAL_TIMEOUT_SEC=300 覆蓋 default 420.0"""
monkeypatch.setenv("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "300")
if "src.services.agent_orchestrator" in sys.modules:

View File

@@ -1,3 +1,16 @@
## 2026-05-06 | GCP Ollama direct endpoint hotfix for alert diagnosis
**背景**:生產 log 顯示 alert path 的 provider order 已是 `ollama_gcp_a → ollama_gcp_b → ollama_local → gemini`,但 GCP-A/GCP-B 經 110 nginx bridge 各跑滿 120s 後回 `504 Gateway Time-out`,因此最後仍 fallback 到 Gemini 並產生成本。110 同時存在 `conf.d/ollama-gcp-proxy.conf`120s`sites-enabled/110-ollama-proxy.conf`300s較早載入的 `conf.d` 實際截斷了 qwen3:14b。
**本次修補**
- production active endpoint 暫改 direct GCP`OLLAMA_URL=http://34.143.170.20:11434``OLLAMA_SECONDARY_URL=http://34.21.145.224:11434`111 維持最後 Ollama fallback。
- `OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=300``INCIDENT_LLM_TIMEOUT_SECONDS=360``AGENT_DEBATE_GLOBAL_TIMEOUT_SEC=420`,讓 qwen3:14b 有足夠時間完成。
- ADR-125 / GCP proxy runbook 補註 direct endpoint 只是 110 bridge timeout 衝突的 stopgap長期仍走 WireGuard mesh + AwoooP Inference Gateway。
**驗證**
- API pod 直連 `34.143.170.20:11434/api/tags``34.21.145.224:11434/api/tags` 均 200。
- `bge-m3:latest` embedding 已在 GCP-A/GCP-B 回傳 1024 維RAG 不再打舊 `nomic-embed-text`
## 2026-05-06 | CD host-key prompt unblock for AwoooP Ollama rollout
**背景**`09256be6` 已推到 Gitea main但 CD `build-and-deploy` 卡在 SSH 到 `192.168.0.121` 的 host-key authenticity promptrunner 無互動輸入,導致新 image tag 尚未注入 `kustomization.yaml`

View File

@@ -58,6 +58,14 @@ Ollama endpoints after cutover:
The current `192.168.0.110:11435/11436` nginx proxy remains an emergency bridge
only until the mesh cutover passes shadow and canary gates.
2026-05-06 operational amendment: the active production env temporarily uses the
GCP public Ollama IPs directly (`34.143.170.20:11434`,
`34.21.145.224:11434`) because the 110 bridge has an older duplicate
`/etc/nginx/conf.d/ollama-gcp-proxy.conf` with a 120s read timeout that returns
504 before `qwen3:14b` can finish. This is a stopgap, not the target transport.
The next durable transport remains WireGuard mesh plus the AwoooP Inference
Gateway.
### D2 - Public Ollama exposure is forbidden after cutover
After mesh cutover:
@@ -110,10 +118,13 @@ gateway explicitly opens a maintenance window.
## Migration Plan
### Phase 0 - Current bridge
### Phase 0 - Current bridge / direct stopgap
- Keep `192.168.0.110:11435` and `192.168.0.110:11436` active.
- Alert path uses `ALERT_OLLAMA_MODEL=gemma3:4b`.
- Active production may use direct GCP IPs until the 110 bridge timeout conflict
is removed.
- Alert path uses `ALERT_OLLAMA_MODEL=qwen3:14b` when the user prioritizes
problem resolution quality over fast card delivery.
- Gemini remains paid emergency fallback only.
### Phase 1 - Mesh build in parallel
@@ -184,4 +195,3 @@ Paid provider fallback must remain budget-gated.
- AwoooP can manage Ollama as a platform resource shared by all tenants.
- CPU-only GCP performance remains a capacity constraint; routing must keep
heavy jobs off the alert lane or use GPU-capable GCP nodes.

View File

@@ -10,8 +10,12 @@
## 背景
GCP Ollama (34.143.170.20 / 34.21.145.224) 已部署完成,但 K3s 叢集內無法直接連線 GCP 外網 IP。
透過 192.168.0.110 (DevOps 金庫) 架設 nginx 反向代理,讓 K3s Pod 走內網連線 GCP Ollama。
GCP Ollama (34.143.170.20 / 34.21.145.224) 已部署完成
最初透過 192.168.0.110 (DevOps 金庫) 架設 nginx 反向代理,讓 K3s Pod 走內網連線 GCP Ollama。
2026-05-06 現況K3s Pod 已可直連 GCP-A/GCP-B `11434/tcp`,且 production
暫時改用 direct endpoint避開 110 上舊 `conf.d/ollama-gcp-proxy.conf`
的 120s `proxy_read_timeout`。正式長期方案仍是 ADR-125 WireGuard mesh。
---
@@ -84,7 +88,7 @@ curl http://192.168.0.110:11435/api/tags
kubectl edit configmap -n awoooi-prod awoooi-config
```
修改以下欄位:
若使用 nginx bridge修改以下欄位:
```yaml
# 修改前
@@ -104,6 +108,16 @@ OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434" # Local GPU 最後防線
kubectl rollout restart deployment/awoooi-api -n awoooi-prod
```
若需要繞過 110 bridge timeout使用 direct GCP endpoint
```yaml
OLLAMA_URL: "http://34.143.170.20:11434"
OLLAMA_SECONDARY_URL: "http://34.21.145.224:11434"
OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434"
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: "300"
INCIDENT_LLM_TIMEOUT_SECONDS: "360"
```
---
## 確認模型已載入

View File

@@ -165,9 +165,10 @@ spec:
# Gitea — CI/CD 主倉 probe + monitoring
- protocol: TCP
port: 3001
# 2026-05-04 ogt: GCP Ollama nginx proxy
# K8s → GCP-A/B:11434 外網路由不通NetworkPolicy 外網 egress 只開 443
# 110 架設 nginx 反向代理K8s 走內網 110:11435(GCP-A) / 110:11436(GCP-B)
# 2026-05-04 ogt: GCP Ollama nginx proxy bridge.
# 2026-05-06 Codex: production active inference temporarily bypasses
# this bridge because an older 110 nginx conf.d server block still has
# a 120s read timeout. Keep the bridge ports for rollback/emergency use.
- protocol: TCP
port: 11435
- protocol: TCP

View File

@@ -17,14 +17,13 @@ data:
# 服務端點 (非機密)
# 2026-04-16 ogt + Claude Sonnet 4.6: 改指向 111GPU 機RTX
# 188 = CPU-only Ollama推理極慢>60s111 有 GPUavg 10s
# 2026-05-04 ogt: ADR-110 三層容災正式路由nginx proxy 架設完成後恢復 GCP 優先)
# GCP-A(via 110:11435) → GCP-B(via 110:11436) → Local(via 110:11437) 統一走 nginx proxy
# 110 nginx proxy 轉發11435→GCP-A, 11436→GCP-B, 11437→192.168.0.111:11434
# K8s pods 不可直連 GCP:11434NetworkPolicy 外網 egress 只開 443
# 2026-05-05 Codex: 110:11437 proxy 從 pod 內 connection refused暫回直連 111
# 保持告警成本路由為 GCP-A → GCP-B → 111 → Gemini backup。
OLLAMA_URL: "http://192.168.0.110:11435"
OLLAMA_SECONDARY_URL: "http://192.168.0.110:11436"
# 2026-05-06 Codex: bypass the transitional 110 nginx bridge for active
# inference because /etc/nginx/conf.d/ollama-gcp-proxy.conf still has a 120s
# read timeout and returns 504 before qwen3:14b can finish. NetworkPolicy
# already allows direct GCP-A/GCP-B:11434. Target architecture remains
# ADR-125 WireGuard mesh + AwoooP Inference Gateway.
OLLAMA_URL: "http://34.143.170.20:11434"
OLLAMA_SECONDARY_URL: "http://34.21.145.224:11434"
OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434"
OPENCLAW_URL: "http://192.168.0.188:8088"
KALI_SCANNER_URL: "http://192.168.0.112:8080"
@@ -67,8 +66,9 @@ data:
OLLAMA_EMBEDDING_MODEL: "bge-m3:latest"
OPENCLAW_DEFAULT_MODEL: "qwen2.5:7b-instruct"
OPENCLAW_TIMEOUT: "120"
INCIDENT_LLM_TIMEOUT_SECONDS: "240"
AGENT_DEBATE_GLOBAL_TIMEOUT_SEC: "260"
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: "300"
INCIDENT_LLM_TIMEOUT_SECONDS: "360"
AGENT_DEBATE_GLOBAL_TIMEOUT_SEC: "420"
AGENT_DIAGNOSTICIAN_TIMEOUT_SEC: "100"
AGENT_SOLVER_TIMEOUT_SEC: "80"
# ADR-105 P1: OpenClaw Agent Loop shadow canary.

View File

@@ -68,9 +68,9 @@ spec:
- name: TELEGRAM_ENABLE_POLLING
value: "true"
- name: OLLAMA_URL
value: "http://192.168.0.110:11435" # 2026-05-04 ogt: GCP-A primary via 110 nginx proxy11435 → 34.143.170.20:11434
value: "http://34.143.170.20:11434" # 2026-05-06 Codex: GCP-A direct; avoids 110 nginx 120s bridge timeout
- name: OLLAMA_SECONDARY_URL
value: "http://192.168.0.110:11436" # 2026-05-04 ogt: GCP-B secondary via 110 nginx proxy11436 → 34.21.145.224:11434
value: "http://34.21.145.224:11434" # 2026-05-06 Codex: GCP-B direct; mesh gateway remains target architecture
- name: OLLAMA_FALLBACK_URL
value: "http://192.168.0.111:11434" # 2026-05-04 ogt: 111 兜底K8s 內網直連GPU RTX
- name: ALERT_AI_ALLOW_CLOUD_FALLBACK
@@ -87,10 +87,12 @@ spec:
value: "qwen2.5:7b-instruct"
- name: OPENCLAW_TIMEOUT
value: "120"
- name: OLLAMA_DIAGNOSE_TIMEOUT_SECONDS
value: "300"
- name: INCIDENT_LLM_TIMEOUT_SECONDS
value: "240"
value: "360"
- name: AGENT_DEBATE_GLOBAL_TIMEOUT_SEC
value: "260"
value: "420"
- name: AGENT_DIAGNOSTICIAN_TIMEOUT_SEC
value: "100"
- name: AGENT_SOLVER_TIMEOUT_SEC