diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index e9e6d465..7a66c4f6 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -215,8 +215,8 @@ class Settings(BaseSettings): description="Phase 25 P0: DIAGNOSE NIM timeout (秒),實測 2.2-27.3s avg 10.6s,60s 含 buffer", ) OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field( - default=200, - description="Phase 25 P0: Ollama timeout (秒),實測 CPU-only 238s,保留欄位但 DIAGNOSE 不再走 Ollama", + default=300, + description="Ollama diagnose timeout (秒)。GCP qwen3:14b CPU-only can exceed the old 120s proxy limit.", ) # ========================================================================== @@ -534,7 +534,7 @@ class Settings(BaseSettings): ), ) INCIDENT_LLM_TIMEOUT_SECONDS: int = Field( - default=240, + default=360, description=( "Outer timeout for incident OpenClaw proposal generation. This must " "be long enough for the GCP-A/GCP-B/111 Ollama lane to complete " diff --git a/apps/api/src/services/agent_orchestrator.py b/apps/api/src/services/agent_orchestrator.py index a4605304..65c2dada 100644 --- a/apps/api/src/services/agent_orchestrator.py +++ b/apps/api/src/services/agent_orchestrator.py @@ -70,11 +70,11 @@ def _agent_debate_global_timeout_seconds() -> float: 90s guard. Keep a hard ceiling, but make it an explicit deployment knob. """ - raw = os.environ.get("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "260.0") + raw = os.environ.get("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "420.0") try: timeout = float(raw) except (TypeError, ValueError): - timeout = 260.0 + timeout = 420.0 return max(timeout, 90.0) diff --git a/apps/api/src/services/ai_providers/ollama.py b/apps/api/src/services/ai_providers/ollama.py index 19f8d888..9c47aa76 100644 --- a/apps/api/src/services/ai_providers/ollama.py +++ b/apps/api/src/services/ai_providers/ollama.py @@ -143,7 +143,7 @@ class OllamaProvider: options = registry.get_provider_options("ollama") # P0 2026-04-04 Claude Code: per-task timeout(Option C 分情境) - # FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s,實測 ~173s) + # FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS # 其他 → OPENCLAW_TIMEOUT(既有設定) task_type = (context or {}).get("task_type", "") if task_type in ("diagnose", "force_local"): diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index e0d5b696..bbb2ddfb 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -1137,7 +1137,7 @@ class OpenClawService: # 2026-04-29 ogt + Claude Code: 注入 task_type 讓 Ollama 用正確 timeout # 根因: ai_providers/ollama.py:77 讀 context["task_type"] 決定 timeout - # - "diagnose"/"force_local" → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=200s + # - "diagnose"/"force_local" → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS # - 其他/未注入 → OPENCLAW_TIMEOUT=30s(不夠 qwen2.5:7b 推理) # webhooks alert_context 從未注入 task_type → Ollama fallback 永遠 30s timeout # 對齊 decision.intent 後 Ollama fallback 真正能跑完 diff --git a/apps/api/tests/test_agent_step_timeouts.py b/apps/api/tests/test_agent_step_timeouts.py index 1ee1a49d..df5d2154 100644 --- a/apps/api/tests/test_agent_step_timeouts.py +++ b/apps/api/tests/test_agent_step_timeouts.py @@ -71,8 +71,8 @@ class TestTimeoutDefaults: f"Critic default timeout 期望 15.0,實際 {mod.AGENT_CRITIC_TIMEOUT_SEC}" ) - def test_agent_debate_global_timeout_default_is_260(self, monkeypatch): - """Agent debate global timeout defaults to the GCP Ollama-first budget.""" + def test_agent_debate_global_timeout_default_is_420(self, monkeypatch): + """Agent debate global timeout defaults to the direct GCP qwen3 budget.""" monkeypatch.delenv("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", raising=False) if "src.services.agent_orchestrator" in sys.modules: @@ -80,7 +80,7 @@ class TestTimeoutDefaults: import src.services.agent_orchestrator as mod importlib.reload(mod) - assert mod.GLOBAL_TIMEOUT_SEC == 260.0 + assert mod.GLOBAL_TIMEOUT_SEC == 420.0 def test_deprecated_alias_matches_new_constant_diagnostician(self, monkeypatch): """PHASE2_STEP_TIMEOUT_SEC alias 應等於 AGENT_DIAGNOSTICIAN_TIMEOUT_SEC(相容性保證)""" @@ -183,7 +183,7 @@ class TestEnvOverride: assert mod.PHASE2_STEP_TIMEOUT_SEC == mod.AGENT_CRITIC_TIMEOUT_SEC def test_agent_debate_global_timeout_env_override(self, monkeypatch): - """AGENT_DEBATE_GLOBAL_TIMEOUT_SEC=300 覆蓋 default 260.0""" + """AGENT_DEBATE_GLOBAL_TIMEOUT_SEC=300 覆蓋 default 420.0""" monkeypatch.setenv("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "300") if "src.services.agent_orchestrator" in sys.modules: diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 6eea00e7..5313d210 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,16 @@ +## 2026-05-06 | GCP Ollama direct endpoint hotfix for alert diagnosis + +**背景**:生產 log 顯示 alert path 的 provider order 已是 `ollama_gcp_a → ollama_gcp_b → ollama_local → gemini`,但 GCP-A/GCP-B 經 110 nginx bridge 各跑滿 120s 後回 `504 Gateway Time-out`,因此最後仍 fallback 到 Gemini 並產生成本。110 同時存在 `conf.d/ollama-gcp-proxy.conf`(120s)與 `sites-enabled/110-ollama-proxy.conf`(300s),較早載入的 `conf.d` 實際截斷了 qwen3:14b。 + +**本次修補**: +- production active endpoint 暫改 direct GCP:`OLLAMA_URL=http://34.143.170.20:11434`、`OLLAMA_SECONDARY_URL=http://34.21.145.224:11434`,111 維持最後 Ollama fallback。 +- `OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=300`、`INCIDENT_LLM_TIMEOUT_SECONDS=360`、`AGENT_DEBATE_GLOBAL_TIMEOUT_SEC=420`,讓 qwen3:14b 有足夠時間完成。 +- ADR-125 / GCP proxy runbook 補註 direct endpoint 只是 110 bridge timeout 衝突的 stopgap;長期仍走 WireGuard mesh + AwoooP Inference Gateway。 + +**驗證**: +- API pod 直連 `34.143.170.20:11434/api/tags` 與 `34.21.145.224:11434/api/tags` 均 200。 +- `bge-m3:latest` embedding 已在 GCP-A/GCP-B 回傳 1024 維,RAG 不再打舊 `nomic-embed-text`。 + ## 2026-05-06 | CD host-key prompt unblock for AwoooP Ollama rollout **背景**:`09256be6` 已推到 Gitea main,但 CD `build-and-deploy` 卡在 SSH 到 `192.168.0.121` 的 host-key authenticity prompt,runner 無互動輸入,導致新 image tag 尚未注入 `kustomization.yaml`。 diff --git a/docs/adr/ADR-125-gcp-ollama-private-mesh-inference-gateway.md b/docs/adr/ADR-125-gcp-ollama-private-mesh-inference-gateway.md index 99676bb9..f79b1fbc 100644 --- a/docs/adr/ADR-125-gcp-ollama-private-mesh-inference-gateway.md +++ b/docs/adr/ADR-125-gcp-ollama-private-mesh-inference-gateway.md @@ -58,6 +58,14 @@ Ollama endpoints after cutover: The current `192.168.0.110:11435/11436` nginx proxy remains an emergency bridge only until the mesh cutover passes shadow and canary gates. +2026-05-06 operational amendment: the active production env temporarily uses the +GCP public Ollama IPs directly (`34.143.170.20:11434`, +`34.21.145.224:11434`) because the 110 bridge has an older duplicate +`/etc/nginx/conf.d/ollama-gcp-proxy.conf` with a 120s read timeout that returns +504 before `qwen3:14b` can finish. This is a stopgap, not the target transport. +The next durable transport remains WireGuard mesh plus the AwoooP Inference +Gateway. + ### D2 - Public Ollama exposure is forbidden after cutover After mesh cutover: @@ -110,10 +118,13 @@ gateway explicitly opens a maintenance window. ## Migration Plan -### Phase 0 - Current bridge +### Phase 0 - Current bridge / direct stopgap - Keep `192.168.0.110:11435` and `192.168.0.110:11436` active. -- Alert path uses `ALERT_OLLAMA_MODEL=gemma3:4b`. +- Active production may use direct GCP IPs until the 110 bridge timeout conflict + is removed. +- Alert path uses `ALERT_OLLAMA_MODEL=qwen3:14b` when the user prioritizes + problem resolution quality over fast card delivery. - Gemini remains paid emergency fallback only. ### Phase 1 - Mesh build in parallel @@ -184,4 +195,3 @@ Paid provider fallback must remain budget-gated. - AwoooP can manage Ollama as a platform resource shared by all tenants. - CPU-only GCP performance remains a capacity constraint; routing must keep heavy jobs off the alert lane or use GPU-capable GCP nodes. - diff --git a/docs/runbooks/DEPLOY-GCP-OLLAMA-PROXY.md b/docs/runbooks/DEPLOY-GCP-OLLAMA-PROXY.md index 77a39ad5..d9e8f531 100644 --- a/docs/runbooks/DEPLOY-GCP-OLLAMA-PROXY.md +++ b/docs/runbooks/DEPLOY-GCP-OLLAMA-PROXY.md @@ -10,8 +10,12 @@ ## 背景 -GCP Ollama (34.143.170.20 / 34.21.145.224) 已部署完成,但 K3s 叢集內無法直接連線 GCP 外網 IP。 -透過 192.168.0.110 (DevOps 金庫) 架設 nginx 反向代理,讓 K3s Pod 走內網連線 GCP Ollama。 +GCP Ollama (34.143.170.20 / 34.21.145.224) 已部署完成。 +最初透過 192.168.0.110 (DevOps 金庫) 架設 nginx 反向代理,讓 K3s Pod 走內網連線 GCP Ollama。 + +2026-05-06 現況:K3s Pod 已可直連 GCP-A/GCP-B `11434/tcp`,且 production +暫時改用 direct endpoint,避開 110 上舊 `conf.d/ollama-gcp-proxy.conf` +的 120s `proxy_read_timeout`。正式長期方案仍是 ADR-125 WireGuard mesh。 --- @@ -84,7 +88,7 @@ curl http://192.168.0.110:11435/api/tags kubectl edit configmap -n awoooi-prod awoooi-config ``` -修改以下欄位: +若使用 nginx bridge,修改以下欄位: ```yaml # 修改前 @@ -104,6 +108,16 @@ OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434" # Local GPU 最後防線 kubectl rollout restart deployment/awoooi-api -n awoooi-prod ``` +若需要繞過 110 bridge timeout,使用 direct GCP endpoint: + +```yaml +OLLAMA_URL: "http://34.143.170.20:11434" +OLLAMA_SECONDARY_URL: "http://34.21.145.224:11434" +OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434" +OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: "300" +INCIDENT_LLM_TIMEOUT_SECONDS: "360" +``` + --- ## 確認模型已載入 diff --git a/k8s/awoooi-prod/02-network-policy.yaml b/k8s/awoooi-prod/02-network-policy.yaml index 64989df0..484130ee 100644 --- a/k8s/awoooi-prod/02-network-policy.yaml +++ b/k8s/awoooi-prod/02-network-policy.yaml @@ -165,9 +165,10 @@ spec: # Gitea — CI/CD 主倉 probe + monitoring - protocol: TCP port: 3001 - # 2026-05-04 ogt: GCP Ollama nginx proxy - # K8s → GCP-A/B:11434 外網路由不通(NetworkPolicy 外網 egress 只開 443) - # 在 110 架設 nginx 反向代理,K8s 走內網 110:11435(GCP-A) / 110:11436(GCP-B) + # 2026-05-04 ogt: GCP Ollama nginx proxy bridge. + # 2026-05-06 Codex: production active inference temporarily bypasses + # this bridge because an older 110 nginx conf.d server block still has + # a 120s read timeout. Keep the bridge ports for rollback/emergency use. - protocol: TCP port: 11435 - protocol: TCP diff --git a/k8s/awoooi-prod/04-configmap.yaml b/k8s/awoooi-prod/04-configmap.yaml index 6ea0b5ec..0272154d 100644 --- a/k8s/awoooi-prod/04-configmap.yaml +++ b/k8s/awoooi-prod/04-configmap.yaml @@ -17,14 +17,13 @@ data: # 服務端點 (非機密) # 2026-04-16 ogt + Claude Sonnet 4.6: 改指向 111(GPU 機,RTX) # 188 = CPU-only Ollama,推理極慢(>60s);111 有 GPU,avg 10s - # 2026-05-04 ogt: ADR-110 三層容災正式路由(nginx proxy 架設完成後恢復 GCP 優先) - # GCP-A(via 110:11435) → GCP-B(via 110:11436) → Local(via 110:11437) 統一走 nginx proxy - # 110 nginx proxy 轉發:11435→GCP-A, 11436→GCP-B, 11437→192.168.0.111:11434 - # K8s pods 不可直連 GCP:11434(NetworkPolicy 外網 egress 只開 443) - # 2026-05-05 Codex: 110:11437 proxy 從 pod 內 connection refused,暫回直連 111; - # 保持告警成本路由為 GCP-A → GCP-B → 111 → Gemini backup。 - OLLAMA_URL: "http://192.168.0.110:11435" - OLLAMA_SECONDARY_URL: "http://192.168.0.110:11436" + # 2026-05-06 Codex: bypass the transitional 110 nginx bridge for active + # inference because /etc/nginx/conf.d/ollama-gcp-proxy.conf still has a 120s + # read timeout and returns 504 before qwen3:14b can finish. NetworkPolicy + # already allows direct GCP-A/GCP-B:11434. Target architecture remains + # ADR-125 WireGuard mesh + AwoooP Inference Gateway. + OLLAMA_URL: "http://34.143.170.20:11434" + OLLAMA_SECONDARY_URL: "http://34.21.145.224:11434" OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434" OPENCLAW_URL: "http://192.168.0.188:8088" KALI_SCANNER_URL: "http://192.168.0.112:8080" @@ -67,8 +66,9 @@ data: OLLAMA_EMBEDDING_MODEL: "bge-m3:latest" OPENCLAW_DEFAULT_MODEL: "qwen2.5:7b-instruct" OPENCLAW_TIMEOUT: "120" - INCIDENT_LLM_TIMEOUT_SECONDS: "240" - AGENT_DEBATE_GLOBAL_TIMEOUT_SEC: "260" + OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: "300" + INCIDENT_LLM_TIMEOUT_SECONDS: "360" + AGENT_DEBATE_GLOBAL_TIMEOUT_SEC: "420" AGENT_DIAGNOSTICIAN_TIMEOUT_SEC: "100" AGENT_SOLVER_TIMEOUT_SEC: "80" # ADR-105 P1: OpenClaw Agent Loop shadow canary. diff --git a/k8s/awoooi-prod/06-deployment-api.yaml b/k8s/awoooi-prod/06-deployment-api.yaml index 91a73da1..e1550ff1 100644 --- a/k8s/awoooi-prod/06-deployment-api.yaml +++ b/k8s/awoooi-prod/06-deployment-api.yaml @@ -68,9 +68,9 @@ spec: - name: TELEGRAM_ENABLE_POLLING value: "true" - name: OLLAMA_URL - value: "http://192.168.0.110:11435" # 2026-05-04 ogt: GCP-A primary via 110 nginx proxy(11435 → 34.143.170.20:11434) + value: "http://34.143.170.20:11434" # 2026-05-06 Codex: GCP-A direct; avoids 110 nginx 120s bridge timeout - name: OLLAMA_SECONDARY_URL - value: "http://192.168.0.110:11436" # 2026-05-04 ogt: GCP-B secondary via 110 nginx proxy(11436 → 34.21.145.224:11434) + value: "http://34.21.145.224:11434" # 2026-05-06 Codex: GCP-B direct; mesh gateway remains target architecture - name: OLLAMA_FALLBACK_URL value: "http://192.168.0.111:11434" # 2026-05-04 ogt: 111 兜底(K8s 內網直連,GPU RTX) - name: ALERT_AI_ALLOW_CLOUD_FALLBACK @@ -87,10 +87,12 @@ spec: value: "qwen2.5:7b-instruct" - name: OPENCLAW_TIMEOUT value: "120" + - name: OLLAMA_DIAGNOSE_TIMEOUT_SECONDS + value: "300" - name: INCIDENT_LLM_TIMEOUT_SECONDS - value: "240" + value: "360" - name: AGENT_DEBATE_GLOBAL_TIMEOUT_SEC - value: "260" + value: "420" - name: AGENT_DIAGNOSTICIAN_TIMEOUT_SEC value: "100" - name: AGENT_SOLVER_TIMEOUT_SEC