fix(ai): isolate heavy Ollama workloads from GCP alert lane
All checks were successful
CD Pipeline / tests (push) Successful in 54s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 3m12s

This commit is contained in:
Your Name
2026-05-05 23:05:59 +08:00
parent 1dcc6d61dc
commit c4854bb355
17 changed files with 146 additions and 84 deletions

View File

@@ -500,7 +500,7 @@ class Settings(BaseSettings):
default=False,
description=(
"Allow LocalCodeReviewService to fall back to Gemini when the "
"GCP-B/Ollama code-review lane fails. Default false to avoid "
"local Ollama code-review lane fails. Default false to avoid "
"unexpected cloud spend from Gitea push/PR alerts."
),
)

View File

@@ -9,6 +9,7 @@ Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型111→ Tel
debugger/vuln → deepseek-r1:14b推理; code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
"""
from __future__ import annotations
import asyncio
import re
import time
@@ -17,7 +18,6 @@ import httpx
import structlog
from sqlalchemy import text
from src.core.config import settings
from src.core.redis_client import get_redis
from src.db.base import get_db_context
from src.hermes.agent_loader import get_agent_system_prompt
@@ -266,7 +266,9 @@ async def process_nl_message(
success = False
error_type: str | None = None
try:
ollama_base = getattr(settings, "OLLAMA_URL", "http://34.143.170.20:11434") # 2026-05-03 ogt: ADR-110 GCP-A Primary
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
ollama_base = resolve_ollama_endpoint("hermes")
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
resp = await _hc.post(
f"{ollama_base}/api/chat",

View File

@@ -26,7 +26,7 @@ from __future__ import annotations
import asyncio
import re
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Literal
import httpx
@@ -125,7 +125,7 @@ class DecisionFusionAdapter:
# Public API
# =========================================================================
async def fuse_decision(self, event: "AiGovernanceEvent") -> FusedDecision:
async def fuse_decision(self, event: AiGovernanceEvent) -> FusedDecision:
"""三維融合LLM × Playbook × MCP → FusedDecision。
三個維度並行評估asyncio.gather任一失敗靜默降為 0.5。
@@ -226,7 +226,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_llm(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, str, dict[str, Any]]:
"""Ollama LLM 推理:治理事件情境 → 建議動作 + 信心度。
@@ -254,7 +254,9 @@ class DecisionFusionAdapter:
"只輸出 CONFIDENCE 和 ACTION 兩行,不要其他解釋。"
)
ollama_url = getattr(self._settings, "OLLAMA_URL", "http://192.168.0.111:11434") # 2026-05-04 ogt: ADR-110 修正 — 111 primary
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
ollama_url = resolve_ollama_endpoint("deep_rca")
try:
async with httpx.AsyncClient(
@@ -320,7 +322,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_playbook(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, str | None, float | None]:
"""Playbook 相似度比對 → 取最高 trust_score。
@@ -373,7 +375,7 @@ class DecisionFusionAdapter:
# =========================================================================
async def _score_mcp(
self, event: "AiGovernanceEvent"
self, event: AiGovernanceEvent
) -> tuple[float, dict[str, Any]]:
"""Prometheus 情報採集 → MCP 感官品質分數。

View File

@@ -33,10 +33,11 @@ logger = structlog.get_logger(__name__)
# ============================================================
# 設定
# ============================================================
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
# 2026-05-05 Codex: 重摘要走 111 lane避免污染 GCP alert-fast lane
def _get_ollama_url() -> str:
from src.core.config import get_settings
return get_settings().OLLAMA_URL
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.drift_summary 讀取
NARRATOR_MODEL = get_model("ollama", "drift_summary")
NARRATOR_TIMEOUT = 90.0 # seconds
@@ -120,8 +121,8 @@ class DriftNarratorService:
async def narrate_and_notify(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None" = None,
report: DriftReport,
interpretation: DriftInterpretation | None = None,
) -> None:
"""
生成人話摘要並推送 Telegram
@@ -166,7 +167,7 @@ class DriftNarratorService:
medium=report.medium_count,
)
def _should_narrate(self, report: "DriftReport") -> bool:
def _should_narrate(self, report: DriftReport) -> bool:
"""觸發條件high >= 1 or medium >= 3"""
# 過濾 HPA 白名單後重算
non_hpa_items = [
@@ -180,8 +181,8 @@ class DriftNarratorService:
async def _generate_narrative_and_items(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> tuple[str, list[dict], dict]:
"""
2026-04-18 ogt + Claude Opus 4.7: B 方案 — LLM 產生 narrative + 結構化 items
@@ -354,8 +355,8 @@ class DriftNarratorService:
def _fallback_recommendation(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> dict:
"""
2026-04-20 P0.2 ogt + Claude Opus 4.7: LLM 沒給 recommendation 時的 Python fallback
@@ -397,7 +398,7 @@ class DriftNarratorService:
async def _log_ai_action_to_db(
self,
report: "DriftReport",
report: DriftReport,
prompt: str,
raw_response: str | None,
narrative: str,
@@ -416,7 +417,9 @@ class DriftNarratorService:
- 若能找到該 drift 的 incident 關聯,設 parent_op_id
"""
import json as _json
from sqlalchemy import text as _sql
from src.db.base import get_db_context
input_json = _json.dumps({
@@ -511,7 +514,7 @@ class DriftNarratorService:
items_count=len(items),
)
def _format_drift_for_llm(self, report: "DriftReport") -> str:
def _format_drift_for_llm(self, report: DriftReport) -> str:
"""
2026-04-18 ogt + Claude Opus 4.7: B 方案 — 餵 LLM 用的 JSON 序列化
保留更多原始 context 給 LLM 推理,不做 30 字元暴力截斷
@@ -582,7 +585,7 @@ class DriftNarratorService:
# 一般變化
return f"{from_val}{to_val}"
def _fallback_items(self, report: "DriftReport") -> list[dict]:
def _fallback_items(self, report: DriftReport) -> list[dict]:
"""
LLM 失敗時的 Python 智能摘要 (取代舊 str()[:30])
- 過濾白名單
@@ -605,7 +608,7 @@ class DriftNarratorService:
})
return items
def _format_intent_summary(self, interpretation: "DriftInterpretation | None") -> str:
def _format_intent_summary(self, interpretation: DriftInterpretation | None) -> str:
if not interpretation:
return "無意圖分析"
return (
@@ -616,8 +619,8 @@ class DriftNarratorService:
def _fallback_narrative(
self,
report: "DriftReport",
interpretation: "DriftInterpretation | None",
report: DriftReport,
interpretation: DriftInterpretation | None,
) -> str:
"""LLM 失敗時的結構化 fallback"""
resources = list({
@@ -636,7 +639,7 @@ class DriftNarratorService:
async def _send_telegram(
self,
report: "DriftReport",
report: DriftReport,
narrative: str,
items: list[dict],
recommendation: dict | None = None,
@@ -667,7 +670,7 @@ class DriftNarratorService:
except Exception as e:
logger.warning("drift_narrator_telegram_error", error=str(e))
def _count_nontrivial_drift(self, report: "DriftReport") -> int:
def _count_nontrivial_drift(self, report: DriftReport) -> int:
"""
計算非白名單、非 trivial (K8s 自動補齊) 的 drift 數
用於 Telegram 底部「還有 N 項」顯示實際可操作數量
@@ -704,7 +707,7 @@ class DriftNarratorService:
def _render_telegram_body(
self,
report: "DriftReport",
report: DriftReport,
narrative: str,
items: list[dict],
recommendation: dict | None = None,

View File

@@ -21,8 +21,8 @@ from typing import Protocol
import httpx
import structlog
from src.core.config import settings
from src.services.model_registry import get_model as _get_model
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
@@ -98,7 +98,7 @@ class OllamaEmbeddingService:
P1 修復 (2026-03-29): 維度配置化,支援更多模型
"""
self._model = model
self._ollama_url = ollama_url or settings.OLLAMA_URL
self._ollama_url = ollama_url or resolve_ollama_endpoint("embedding")
self._timeout = timeout
self._default_dimension = default_dimension or self.MODEL_DIMENSIONS.get(
model, self.DEFAULT_DIMENSION

View File

@@ -21,7 +21,6 @@ AWOOOI — Image Analysis Service (Phase 34, ADR-067)
from __future__ import annotations
import base64
import os
import time
from pathlib import Path
from typing import TYPE_CHECKING
@@ -29,14 +28,13 @@ from typing import TYPE_CHECKING
import httpx
import structlog
from src.core.config import get_settings
from src.services.model_registry import get_model
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
if TYPE_CHECKING:
pass
logger = structlog.get_logger(__name__)
settings = get_settings()
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.image_analysis 讀取
_MODEL = get_model("ollama", "image_analysis")
@@ -124,7 +122,7 @@ class ImageAnalysisService:
image_b64 = base64.b64encode(image_path.read_bytes()).decode()
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('image_analysis')}/api/generate",
json={
"model": _MODEL,
"prompt": question,

View File

@@ -31,8 +31,8 @@ from typing import Protocol, runtime_checkable
import httpx
import structlog
from src.core.config import settings
from src.services.model_registry import get_model_registry
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
@@ -549,7 +549,7 @@ class IntentClassifier:
# 呼叫 Ollama
async with httpx.AsyncClient() as client:
response = await client.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('hermes')}/api/generate",
json={
"model": model_name,
"prompt": prompt,

View File

@@ -15,10 +15,11 @@ import structlog
logger = structlog.get_logger(__name__)
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
# 2026-05-05 Codex: KB 萃取走 111 lane避免污染 GCP alert-fast lane
def _get_ollama_base() -> str:
from src.core.config import get_settings
return get_settings().OLLAMA_URL
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
_EXTRACT_MODEL = "llama3.2:3b"
_EXTRACT_TIMEOUT = 30.0 # 秒,容忍慢速
@@ -117,7 +118,11 @@ class KnowledgeExtractorService:
category = self._infer_category(incident)
# 5. 建立 KB 條目
from src.models.knowledge import EntrySource, EntryType, KnowledgeEntryCreate
from src.models.knowledge import (
EntrySource,
EntryType,
KnowledgeEntryCreate,
)
from src.services.knowledge_service import get_knowledge_service
entry_data = KnowledgeEntryCreate(

View File

@@ -20,11 +20,10 @@ from pathlib import Path
import httpx
import structlog
from src.core.config import get_settings
import src.repositories.rag_chunk_repository as rag_repo
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
settings = get_settings()
_EMBED_MODEL = "nomic-embed-text"
_GEN_MODEL = "qwen2.5:7b-instruct"
@@ -131,7 +130,7 @@ class KnowledgeRAGService:
try:
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/embeddings",
f"{resolve_ollama_endpoint('embedding')}/api/embeddings",
json={"model": _EMBED_MODEL, "prompt": text},
)
if resp.status_code == 200:
@@ -150,7 +149,7 @@ class KnowledgeRAGService:
try:
http = await self._get_http()
resp = await http.post(
f"{settings.OLLAMA_URL}/api/generate",
f"{resolve_ollama_endpoint('rag')}/api/generate",
json={
"model": _GEN_MODEL,
"prompt": prompt,

View File

@@ -186,7 +186,7 @@ class LocalCodeReviewService:
)
return {
"review_text": (
"⚠️ Code ReviewGCP-B/Ollama 審查未完成,"
"⚠️ Code Review本地 Ollama 審查未完成,"
"已依成本策略跳過 Gemini fallback。"
),
"issues_count": 1,

View File

@@ -35,8 +35,9 @@ logger = structlog.get_logger(__name__)
# ============================================================
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
def _get_ollama_url() -> str:
from src.core.config import get_settings
return get_settings().OLLAMA_URL
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.log_anomaly 讀取
SUMMARY_MODEL = get_model("ollama", "log_anomaly")
LLM_TIMEOUT = 180.0 # deepseek-r1 硬超時
@@ -145,7 +146,7 @@ class LogSummaryService:
self.summarize(pod_name, namespace),
timeout=SOFT_TIMEOUT,
)
except asyncio.TimeoutError:
except TimeoutError:
logger.info(
"log_summary_soft_timeout",
pod=pod_name,
@@ -182,7 +183,7 @@ class LogSummaryService:
def _extract_anomaly_lines(self, raw_logs: str) -> list[str]:
"""過濾異常行 + 敏感資料遮蔽,取最後 ANOMALY_TAIL_LINES 行"""
lines = raw_logs.splitlines()
anomaly = [l for l in lines if _ANOMALY_PATTERN.search(l)]
anomaly = [line for line in lines if _ANOMALY_PATTERN.search(line)]
# 取最後 N 行
anomaly = anomaly[-ANOMALY_TAIL_LINES:]
# 遮蔽敏感資料

View File

@@ -16,27 +16,37 @@ from src.core.config import settings
OllamaWorkloadType = Literal[
"interactive",
"healthcheck",
"alert_fast",
"batch",
"embedding",
"rag",
"code_review",
"shadow",
"canary",
"deep_rca",
"image_analysis",
"hermes",
"local_required",
"privacy_sensitive",
"dr",
]
_GCP_B_PREFERRED_WORKLOADS = {
_GCP_A_PREFERRED_WORKLOADS = {
"interactive",
"healthcheck",
"alert_fast",
}
_LOCAL_PREFERRED_WORKLOADS = {
"batch",
"embedding",
"rag",
"code_review",
"shadow",
"canary",
}
_LOCAL_PREFERRED_WORKLOADS = {
"deep_rca",
"image_analysis",
"hermes",
"local_required",
"privacy_sensitive",
"dr",
@@ -68,20 +78,28 @@ def resolve_ollama_selection(
secondary = cfg.OLLAMA_SECONDARY_URL
fallback = cfg.OLLAMA_FALLBACK_URL
if workload_type in _GCP_B_PREFERRED_WORKLOADS and secondary:
if workload_type in _LOCAL_PREFERRED_WORKLOADS:
if fallback:
return OllamaEndpointSelection(
url=fallback,
provider_name="ollama_local",
workload_type=workload_type,
reason="local_heavy_or_privacy_lane",
)
if secondary:
return OllamaEndpointSelection(
url=secondary,
provider_name="ollama_gcp_b",
workload_type=workload_type,
reason="local_missing_gcp_b_fallback",
)
if workload_type not in _GCP_A_PREFERRED_WORKLOADS and secondary:
return OllamaEndpointSelection(
url=secondary,
provider_name="ollama_gcp_b",
workload_type=workload_type,
reason="gcp_b_batch_lane",
)
if workload_type in _LOCAL_PREFERRED_WORKLOADS and fallback:
return OllamaEndpointSelection(
url=fallback,
provider_name="ollama_local",
workload_type=workload_type,
reason="local_privacy_or_dr_lane",
reason="gcp_b_default_non_alert_lane",
)
return OllamaEndpointSelection(

View File

@@ -29,9 +29,9 @@ from typing import Any
import httpx
import structlog
from src.core.config import settings
from src.models.playbook import Playbook, SymptomPattern
from src.repositories.interfaces import IEmbeddingCacheRepository
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
@@ -146,7 +146,7 @@ class PlaybookRAGService:
"""
self._http_client = http_client
self._embedding_cache = embedding_cache
self.ollama_url = settings.OLLAMA_URL
self.ollama_url = resolve_ollama_endpoint("embedding")
self.embedding_model = EMBEDDING_MODEL
# =========================================================================

View File

@@ -33,7 +33,7 @@ async def _noop_save(*args: Any, **kwargs: Any) -> None:
@pytest.mark.asyncio
async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
async def test_large_pr_uses_local_ollama_when_gemini_fallback_disabled(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(
@@ -44,7 +44,7 @@ async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
monkeypatch.setattr(
review_module,
"resolve_ollama_endpoint",
lambda workload_type: "http://gcp-b:11436",
lambda workload_type: "http://local-111:11434",
)
client = _FakeClient()
@@ -69,7 +69,7 @@ async def test_large_pr_uses_gcp_b_ollama_when_gemini_fallback_disabled(
assert result is not None
assert result["provider"] == "ollama"
assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
assert client.posted_urls == ["http://local-111:11434/api/generate"]
@pytest.mark.asyncio
@@ -84,7 +84,7 @@ async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
monkeypatch.setattr(
review_module,
"resolve_ollama_endpoint",
lambda workload_type: "http://gcp-b:11436",
lambda workload_type: "http://local-111:11434",
)
client = _FakeClient(fail=True)
@@ -110,7 +110,7 @@ async def test_ollama_failure_does_not_fall_back_to_gemini_by_default(
assert result is not None
assert result["provider"] == "ollama_unavailable"
assert result["cloud_fallback_skipped"] is True
assert client.posted_urls == ["http://gcp-b:11436/api/generate"]
assert client.posted_urls == ["http://local-111:11434/api/generate"]
@pytest.mark.asyncio

View File

@@ -3,7 +3,6 @@ from __future__ import annotations
from types import SimpleNamespace
from src.services.ollama_endpoint_resolver import (
resolve_ollama_endpoint,
resolve_ollama_selection,
)
@@ -21,20 +20,30 @@ def _settings(
)
def test_batch_workloads_prefer_gcp_b() -> None:
def test_heavy_workloads_prefer_local_lane() -> None:
cfg = _settings()
for workload in ("batch", "embedding", "rag", "code_review", "shadow", "canary"):
for workload in (
"batch",
"embedding",
"rag",
"code_review",
"shadow",
"canary",
"deep_rca",
"image_analysis",
"hermes",
):
selection = resolve_ollama_selection(workload, config=cfg)
assert selection.url == "http://192.168.0.110:11436"
assert selection.provider_name == "ollama_gcp_b"
assert selection.reason == "gcp_b_batch_lane"
assert selection.url == "http://192.168.0.110:11437"
assert selection.provider_name == "ollama_local"
assert selection.reason == "local_heavy_or_privacy_lane"
def test_interactive_workloads_stay_on_gcp_a() -> None:
cfg = _settings()
for workload in ("interactive", "healthcheck"):
for workload in ("interactive", "healthcheck", "alert_fast"):
selection = resolve_ollama_selection(workload, config=cfg)
assert selection.url == "http://192.168.0.110:11435"
assert selection.provider_name == "ollama_gcp_a"
@@ -49,7 +58,10 @@ def test_local_required_workloads_use_local_lane() -> None:
assert selection.provider_name == "ollama_local"
def test_batch_workloads_fall_back_to_primary_when_secondary_missing() -> None:
cfg = _settings(secondary="")
def test_heavy_workloads_fall_back_to_gcp_b_when_local_missing() -> None:
cfg = _settings(fallback="")
assert resolve_ollama_endpoint("embedding", config=cfg) == "http://192.168.0.110:11435"
selection = resolve_ollama_selection("embedding", config=cfg)
assert selection.url == "http://192.168.0.110:11436"
assert selection.provider_name == "ollama_gcp_b"
assert selection.reason == "local_missing_gcp_b_fallback"

View File

@@ -3193,3 +3193,26 @@ bash scripts/ops/ollama-topology-check.sh
```
結論GCP-A/B 可作 `alert-fast` lane但目前不應承擔 14B/32B 同步告警推理;重模型必須由 AwoooP Inference Gateway 隔離到 async / 111 / GPU 節點。
### Runtime 過渡護欄
在 Inference Gateway 尚未接管所有 provider 前,先調整 `ollama_endpoint_resolver`
- `interactive` / `healthcheck` / `alert_fast` 保持 GCP-A 優先
- `code_review` / `rag` / `embedding` / `deep_rca` / `image_analysis` / `hermes` 改為 111 優先
- 111 不可用時才回 GCP-B避免 GCP-A/B 在告警 canary 期間被 7B/14B/32B 模型污染
驗證:
```bash
/Users/ogt/awoooi/apps/api/.venv/bin/python -m ruff check apps/api/src/core/config.py apps/api/src/services/ollama_endpoint_resolver.py apps/api/src/services/knowledge_rag_service.py apps/api/src/services/playbook_rag.py apps/api/src/services/log_summary_service.py apps/api/src/services/image_analysis_service.py apps/api/src/services/local_code_review_service.py apps/api/src/hermes/nl_gateway.py apps/api/tests/test_ollama_endpoint_resolver.py apps/api/tests/test_local_code_review_cloud_fallback.py
# All checks passed
DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/test REDIS_URL=redis://localhost:6379/0 \
/Users/ogt/awoooi/apps/api/.venv/bin/python -m pytest \
apps/api/tests/test_ollama_endpoint_resolver.py \
apps/api/tests/test_local_code_review_cloud_fallback.py \
apps/api/tests/test_ollama_provider_endpoints.py \
apps/api/tests/test_openclaw_alert_cloud_fallback_gate.py -q
# 15 passed
```

View File

@@ -35,8 +35,8 @@ gateway must own runtime scheduling.
| Lane | Model | Allowed hosts | Notes |
|------|-------|---------------|-------|
| `alert-fast` | `gemma3:4b` | GCP-A, GCP-B, 111 | Synchronous, protected |
| `code-review` | `qwen2.5-coder:7b` | GCP-B, 111 | Never 32B on GCP during alert canary |
| `embedding` | `bge-m3` | GCP-A, GCP-B, 111 | Short timeout |
| `code-review` | `qwen2.5-coder:7b` | 111, then GCP-B | Transitional: keep GCP-B clean during alert canary |
| `embedding` | `bge-m3` | 111, then GCP-B | Transitional: keep GCP-A/B clean during alert canary |
| `deep-rca` | 14B-class model | 111 or GPU node | Async only |
| `paid-emergency` | Gemini / Claude | Cloud | Budget-gated emergency fallback |
@@ -88,8 +88,8 @@ if lane == alert-fast:
if lane == code-review:
model = qwen2.5-coder:7b
try GCP-B with 90s timeout
try 111 with 120s timeout
try GCP-B with 90s timeout only if 111 is unavailable
if lane == deep-rca:
reject synchronous request
@@ -150,4 +150,3 @@ OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434"
```
Do not disable budget hard kill during rollback.