diff --git a/apps/api/src/api/v1/health.py b/apps/api/src/api/v1/health.py index 0cf4bcad..dc6228e6 100644 --- a/apps/api/src/api/v1/health.py +++ b/apps/api/src/api/v1/health.py @@ -11,7 +11,7 @@ Endpoints: Components Checked: - PostgreSQL (192.168.0.188:5432) - Redis (192.168.0.188:6380) -- Ollama (192.168.0.188:11434) +- Ollama (settings.OLLAMA_URL / ADR-110 provider pool) - OpenClaw (192.168.0.188:8089) - SigNoz (192.168.0.188:3301) """ diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index 8a85bbce..08aec070 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -145,7 +145,7 @@ class Settings(BaseSettings): # ========================================================================== # ADR-104: LLM Playbook Generator # 成功修復且未命中既有 Playbook 時,用本地 LLM 生成 DRAFT/REVIEW Playbook。 - # 成本護欄:實作層只走 local provider(Ollama 111 → Ollama 188),不新增雲端 fallback。 + # 成本護欄:實作層只走 local provider(GCP-A → GCP-B → 111),不新增雲端 fallback。 # 回滾指令: kubectl set env deployment/awoooi-api ENABLE_LLM_PLAYBOOK_GENERATION=false # ========================================================================== ENABLE_LLM_PLAYBOOK_GENERATION: bool = Field( @@ -899,7 +899,7 @@ class Settings(BaseSettings): # ========================================================================== # MCP Phase 2b: Prometheus MCP Server (ADR-071, 2026-04-11 Claude Sonnet 4.6) # ========================================================================== - # 2026-04-29 ogt + Claude Opus 4.7: drift fix — 188 是 Ollama Hub,Prometheus 實際在 110 + # 2026-04-29 ogt + Claude Opus 4.7: drift fix — Prometheus 實際在 110 # ConfigMap 04-configmap.yaml 也是 110;governance_agent / SLO check 連 188 會 timeout # 此 drift 是 SPF-4 (governance_agent silently fail) 根因之一 PROMETHEUS_URL: str = Field( @@ -973,7 +973,7 @@ class Settings(BaseSettings): "devops": "192.168.0.110", # Harbor, GH Runner "security": "192.168.0.112", # Kali Scanner "k3s_master": "192.168.0.120", # K3s Master - "ai_web": "192.168.0.188", # Nginx, Postgres, Redis, Ollama + "ai_web": "192.168.0.188", # Nginx, Postgres, Redis, SignOz } diff --git a/apps/api/src/jobs/asset_scanner_job.py b/apps/api/src/jobs/asset_scanner_job.py index 32fb9918..1ab8282c 100644 --- a/apps/api/src/jobs/asset_scanner_job.py +++ b/apps/api/src/jobs/asset_scanner_job.py @@ -479,7 +479,7 @@ async def _collect_all_k8s_assets() -> tuple[list[dict[str, Any]], list[dict[str # 6. Prometheus targets — 補齊 host-install services (110/112/188/125 等非 K8s) # Gap 1 修補 (2026-04-19 audit): 原本 asset_inventory 只涵蓋 K8s, - # 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis/Ollama host-install 全漏 + # 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis host-install 全漏 # 用 Prometheus /api/v1/targets 自動發現全節點服務 try: prom_assets, host_relationships = await _collect_prometheus_targets() diff --git a/apps/api/src/jobs/capacity_forecaster_job.py b/apps/api/src/jobs/capacity_forecaster_job.py index 73a147a6..18b27c2c 100644 --- a/apps/api/src/jobs/capacity_forecaster_job.py +++ b/apps/api/src/jobs/capacity_forecaster_job.py @@ -172,7 +172,7 @@ _LLM_FORECAST_PROMPT = """你是 AWOOOI 容量規劃專家。以下 host 過去 {findings_json} ## 當前主機環境資訊 - - 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/Ollama/MinIO) + - 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/MinIO) - 判斷請考慮: 該主機上跑什麼服務、常見瓶頸模式 ## 輸出規格 (必須是合法 JSON,純 JSON 無前後文字) diff --git a/apps/api/src/main.py b/apps/api/src/main.py index 139010a9..bc5ac052 100644 --- a/apps/api/src/main.py +++ b/apps/api/src/main.py @@ -683,7 +683,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: logger.warning("ollama_failover_system_start_failed", error=str(e)) # 2026-04-27 P3.2.2 by Claude — AI Provider 版本追蹤(每 1 小時) - # 探測 5 Provider(ollama/ollama_188/gemini/claude/openclaw_nemo)版本 + # 探測 5 Provider(ollama/ollama_local/gemini/claude/openclaw_nemo)版本 # 寫入 ai_provider_version_history;版本變更時 log warning,P3.2.3 alerter 後續整合 try: async def _run_model_version_tracker_loop() -> None: diff --git a/apps/api/src/observability/agent_step_metrics.py b/apps/api/src/observability/agent_step_metrics.py index 1b9d1465..c284d70c 100644 --- a/apps/api/src/observability/agent_step_metrics.py +++ b/apps/api/src/observability/agent_step_metrics.py @@ -29,7 +29,7 @@ from __future__ import annotations from prometheus_client import Histogram # Buckets 對齊 NIM 實測分佈(2-27s),並覆蓋三段 timeout 30/20/15s 邊界 -# 低端(0.5-5s):快速路徑(Ollama 188 本地) +# 低端(0.5-5s):快速路徑(Ollama provider pool) # 中端(5-20s):NIM + Gemini fallback # 高端(20-60s):超時 / 慢速 Provider _AGENT_STEP_BUCKETS = [0.5, 1.0, 2.0, 5.0, 10.0, 15.0, 20.0, 30.0, 45.0, 60.0] diff --git a/apps/api/src/routes/agent.py b/apps/api/src/routes/agent.py index 6afdd407..cf67de47 100644 --- a/apps/api/src/routes/agent.py +++ b/apps/api/src/routes/agent.py @@ -104,7 +104,7 @@ async def get_agent_thinking( ) -> StreamingResponse: """ OpenClaw 思考軌跡 (SSE 串流) - Phase 1.2: 真實串接 Ollama at 192.168.0.188:11434 + Phase 1.2: 真實串接設定中的 Ollama provider pool """ async def generate_thinking_stream(): diff --git a/apps/api/src/services/ai_providers/ollama.py b/apps/api/src/services/ai_providers/ollama.py index 9c47aa76..de6a9f7f 100644 --- a/apps/api/src/services/ai_providers/ollama.py +++ b/apps/api/src/services/ai_providers/ollama.py @@ -1,10 +1,10 @@ """ Ollama Provider - Phase 24 ADR-052 ==================================== -本地 LLM 推理 (192.168.0.188 VMware VM, CPU-only) +本地 / 私有 LLM 推理 Provider。 搬移自: openclaw.py _call_ollama (L349-409) -特性: 免費、隱私安全 (local)、但 CPU 慢 (~97s/30tokens for qwen2.5:7b) +特性: 免費、隱私安全 (local)、可依 ADR-110 指向 GCP-A/GCP-B/111。 2026-04-02 ogt: Phase 24-A 從 openclaw.py 抽出 """ @@ -335,33 +335,27 @@ class OllamaProvider: self._http_client = None -# 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 — OLLAMA_188 provider 註冊 -class Ollama188Provider(OllamaProvider): +# 2026-05-06 Codex — 188 不再作為 Ollama Provider;本地備援統一命名為 ollama_local。 +class OllamaLocalProvider(OllamaProvider): """ - Ollama 188 CPU-only 備援 Provider + Ollama Local fallback Provider - 繼承 OllamaProvider,但使用 OLLAMA_FALLBACK_URL(192.168.0.188:11434) - 作為推理端點,模型預設 OLLAMA_HEALTH_CHECK_MODEL(qwen2.5:7b-instruct)。 - - B1 修復:原本 _init_registry 未登錄此 provider,導致 - executor.execute() 遇到 "ollama_188" → not_registered → 跳過, - 188 從未被打到。此類別補全登錄鏈路。 - - 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 + 使用 OLLAMA_FALLBACK_URL 作為本地最後防線端點。 + ADR-110 目前設定為 110 nginx proxy → 111 Ollama;188 不得再作為 Ollama provider。 """ @property def name(self) -> str: - return "ollama_188" + return "ollama_local" @property def is_enabled(self) -> bool: import os - # 優先查 ENABLE_OLLAMA_188;若未設定(預設 true)則看 OLLAMA_FALLBACK_URL 是否有值 - env_override = os.getenv("ENABLE_OLLAMA_188", "true").lower() == "true" + # 優先查 ENABLE_OLLAMA_LOCAL;若未設定(預設 true)則看 OLLAMA_FALLBACK_URL 是否有值。 + env_override = os.getenv("ENABLE_OLLAMA_LOCAL", "true").lower() == "true" if not env_override: return False - # OLLAMA_FALLBACK_URL 空字串 → 未設定 188 節點 → 停用 + # OLLAMA_FALLBACK_URL 空字串 → 未設定本地節點 → 停用。 return bool(getattr(settings, "OLLAMA_FALLBACK_URL", "")) def _endpoint_url(self) -> str: @@ -386,18 +380,18 @@ class Ollama188Provider(OllamaProvider): client = await self._get_client() registry = get_model_registry() - # 嘗試取 ollama_188 專屬設定,fallback 到 ollama 預設 + # 嘗試取本地 fallback 專屬設定,fallback 到 ollama 預設。 try: - model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_188", "rca")).strip() + model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_local", "rca")).strip() except Exception: model_name = str((context or {}).get("ollama_model") or getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")).strip() try: - options = registry.get_provider_options("ollama_188") + options = registry.get_provider_options("ollama_local") except Exception: options = registry.get_provider_options("ollama") - # CPU-only 備援:固定使用較長 timeout(CPU 推理慢) + # 本地備援:固定使用較長 timeout,避免 111 模型載入時被過早判死。 task_type = (context or {}).get("task_type", "") if task_type in ("diagnose", "force_local"): read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200)) @@ -426,7 +420,7 @@ class Ollama188Provider(OllamaProvider): latency = (time.perf_counter() - start) * 1000 logger.info( - "ollama_188_provider_success", + "ollama_local_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1), @@ -443,12 +437,12 @@ class Ollama188Provider(OllamaProvider): except httpx.TimeoutException as e: latency = (time.perf_counter() - start) * 1000 - logger.warning("ollama_188_provider_timeout", error=str(e), latency_ms=round(latency, 1)) + logger.warning("ollama_local_provider_timeout", error=str(e), latency_ms=round(latency, 1)) return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=f"Timeout: {e}") except Exception as e: latency = (time.perf_counter() - start) * 1000 - logger.warning("ollama_188_provider_failed", error=str(e), latency_ms=round(latency, 1)) + logger.warning("ollama_local_provider_failed", error=str(e), latency_ms=round(latency, 1)) return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=str(e)) async def health_check(self) -> bool: diff --git a/apps/api/src/services/ai_router.py b/apps/api/src/services/ai_router.py index d8c4935d..3e27dd68 100644 --- a/apps/api/src/services/ai_router.py +++ b/apps/api/src/services/ai_router.py @@ -73,10 +73,6 @@ class AIProviderEnum(str, Enum): """AI 提供者""" OLLAMA = "ollama" - # 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2 - # P1.1b OllamaFailoverManager 使用 provider_name="ollama_188", - # 但 AIProviderEnum 沒有此值 → P1.2 整合時 lookup 失敗 - OLLAMA_188 = "ollama_188" # 188 CPU-only 備援節點(P1.1b) # 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災 # OllamaFailoverManager 回傳 provider_name="ollama_gcp_a"/"ollama_gcp_b"/"ollama_local" # 缺少 enum 值 → AIProviderEnum(primary_str) 拋 ValueError → fallback chain 清空 → 直跳 Gemini @@ -96,8 +92,6 @@ class AIProviderEnum(str, Enum): # Provider 對應延遲預算 (ms) PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = { AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間 - # 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2 — 188 CPU-only 推理較慢 - AIProviderEnum.OLLAMA_188: 120000, # 120s budget for CPU inference # 2026-05-04 ogt: ADR-110 GCP 三層容災 — GCP NVMe SSD 推理快,60s 足夠 AIProviderEnum.OLLAMA_GCP_A: 60000, AIProviderEnum.OLLAMA_GCP_B: 60000, @@ -432,7 +426,7 @@ class AIRouter: model = failover_result.primary.model reason = f"{reason} [failover→{primary_str}]" except ValueError: - # provider_name 無法對應已知 enum(理論上不應發生,OLLAMA_188 已加) + # provider_name 無法對應已知 enum;避免未知 provider 靜默進入執行層。 logger.warning( "ai_router_unknown_failover_provider", provider=primary_str, @@ -1364,7 +1358,7 @@ def _init_registry() -> AIProviderRegistry: """初始化 Provider Registry (首次呼叫時自動註冊所有 Provider)""" from src.services.ai_providers.ollama import ( OllamaProvider, - Ollama188Provider, + OllamaLocalProvider, OllamaGcpBProvider, # 2026-05-04 ADR-110 GCP-B ) from src.services.ai_providers.gemini import GeminiProvider @@ -1385,8 +1379,9 @@ def _init_registry() -> AIProviderRegistry: from src.services.ai_providers.nemotron import NemotronProvider registry.register(NemotronProvider()) - # 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 — 補登 OLLAMA_188 備援 provider - ollama_local = Ollama188Provider() + # 2026-05-06 Codex: 188 不再作為 Ollama provider。 + # Local fallback 統一命名為 ollama_local,端點由 OLLAMA_FALLBACK_URL 指向 111/110 proxy。 + ollama_local = OllamaLocalProvider() registry.register(ollama_local) # 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災修復 @@ -1395,7 +1390,7 @@ def _init_registry() -> AIProviderRegistry: # 修復: # "ollama_gcp_a" alias → 同 OllamaProvider(OLLAMA_URL = GCP-A) # "ollama_gcp_b" → 新 OllamaGcpBProvider(OLLAMA_SECONDARY_URL = GCP-B) - # "ollama_local" alias → 同 Ollama188Provider(OLLAMA_FALLBACK_URL = 111) + # "ollama_local" → OllamaLocalProvider(OLLAMA_FALLBACK_URL = 111 / 110:11437) registry._providers["ollama_gcp_a"] = ollama_gcp_a registry.register(OllamaGcpBProvider()) registry._providers["ollama_local"] = ollama_local diff --git a/apps/api/src/services/decision_manager.py b/apps/api/src/services/decision_manager.py index 96d8ec26..797f434c 100644 --- a/apps/api/src/services/decision_manager.py +++ b/apps/api/src/services/decision_manager.py @@ -637,7 +637,7 @@ async def _nemoclaw_second_opinion(incident: "Incident", primary_result: dict) - """ MCP Phase 4a: NemoClaw second opinion — 信心 < 0.7 時觸發 ============================================================ - 用 deepseek-r1:14b (Ollama 188) 對同一份資料做獨立推理, + 用 deepseek-r1:14b (設定的 Ollama primary) 對同一份資料做獨立推理, 輸出純文字 advisory_note,不執行任何操作。 2026-04-11 Claude Sonnet 4.6 Asia/Taipei @@ -696,7 +696,7 @@ async def _generate_playbook_draft_if_new(incident: "Incident") -> None: MCP Phase 4c: Playbook 無命中時,自動生成 AI 草稿 Playbook 寫入 KM ===================================================================== - 僅在 KM 中不存在同 alertname 的 Playbook 時觸發(避免重複) - - 用 qwen2.5:7b-instruct (Ollama 188) 生成結構化 Playbook 草稿 + - 用 qwen2.5:7b-instruct (設定的 Ollama primary) 生成結構化 Playbook 草稿 - 寫入 KnowledgeEntry,status=DRAFT,需人工審核後升為 APPROVED - 寫入 AlertOperationLog PLAYBOOK_DRAFT_CREATED 事件 diff --git a/apps/api/src/services/host_aggregator.py b/apps/api/src/services/host_aggregator.py index 7e1fcc52..ef1a2475 100644 --- a/apps/api/src/services/host_aggregator.py +++ b/apps/api/src/services/host_aggregator.py @@ -7,7 +7,7 @@ Hosts: - 192.168.0.110: DevOps 金庫 (Harbor, GH Runner) - 192.168.0.112: Kali Security (Scanner API) - 192.168.0.120: K3s Master (awoooi-prod namespace) -- 192.168.0.188: AI+Web 中心 (Nginx, PostgreSQL, Redis, Ollama, OpenClaw, SigNoz) +- 192.168.0.188: AI+Web 中心 (Nginx, PostgreSQL, Redis, OpenClaw, SigNoz) Features: - asyncio.gather for parallel fetching diff --git a/apps/api/src/services/model_version_probe.py b/apps/api/src/services/model_version_probe.py index e715870f..df6f94ab 100644 --- a/apps/api/src/services/model_version_probe.py +++ b/apps/api/src/services/model_version_probe.py @@ -5,7 +5,7 @@ AI Provider 版本探測 — 為每個 Provider 提供 get_version() Provider: - ollama : 34.143.170.20 GCP-A Ollama (primary) — 2026-05-03 ogt: ADR-110 GCP-A Primary - - ollama_188 : 192.168.0.188 Ollama (fallback) + - ollama_local : 192.168.0.111 / 110 proxy Ollama (local fallback) - gemini : Google Gemini API (版本 = model name) - claude : Anthropic Claude (版本 = model name) - openclaw_nemo : OpenClaw NemoTron (版本 = OPENCLAW_DEFAULT_MODEL) @@ -31,7 +31,7 @@ TAIPEI_TZ = timezone(timedelta(hours=8)) class ProviderVersionInfo: """AI Provider 版本快照""" - provider: str # "ollama" / "ollama_188" / "gemini" / "claude" / "openclaw_nemo" + provider: str # "ollama" / "ollama_local" / "gemini" / "claude" / "openclaw_nemo" model: str version: str # version string 或 tag(Ollama 用 modified_at,其他用 model name) digest: str | None = None # SHA256 digest(僅 Ollama 有) @@ -43,7 +43,7 @@ class ProviderVersionInfo: # ============================================================================= async def probe_ollama_version(url: str, model: str) -> ProviderVersionInfo: - """探測 Ollama(GCP-A 或 188):GET /api/tags 取 model digest + modified_at + """探測 Ollama(GCP-A/GCP-B 或本地 111):GET /api/tags 取 model digest + modified_at Args: url: Ollama base URL,例如 "http://34.143.170.20:11434"(GCP-A Primary) @@ -58,15 +58,12 @@ async def probe_ollama_version(url: str, model: str) -> ProviderVersionInfo: """ import httpx - # 2026-05-03 ogt: ADR-110 GCP-A Primary — 擴展 provider 判斷邏輯支援 GCP 三層容災 - # 188 保留 ollama_188 命名(CPU-only 主機,雖移出 routing chain 但仍可被 probe) + # 2026-05-06 Codex: 188 不再作為 Ollama provider;local fallback 一律標示 ollama_local。 _GCP_OLLAMA_IPS = {"34.143.170.20", "34.21.145.224"} if any(ip in url for ip in _GCP_OLLAMA_IPS): provider_name = "ollama" - elif "192.168.0.111" in url: + elif "192.168.0.111" in url or "192.168.0.110:11437" in url: provider_name = "ollama_local" - elif "192.168.0.188" in url: - provider_name = "ollama_188" else: provider_name = "ollama_remote" @@ -179,7 +176,7 @@ async def probe_claude_version() -> ProviderVersionInfo: async def probe_openclaw_nemo_version() -> ProviderVersionInfo: """OpenClaw NemoTron:版本字串從 settings.OPENCLAW_DEFAULT_MODEL 讀取 - NemoTron 運行在 OpenClaw 188 節點(使用 Ollama 推理), + NemoTron 運行在 OpenClaw 節點, 透過 OPENCLAW_URL /api/tags 探測,模型名稱即版本識別。 Returns: @@ -195,18 +192,18 @@ async def probe_openclaw_nemo_version() -> ProviderVersionInfo: # OpenClaw 底層是 Ollama,使用 OPENCLAW_URL 的 host:port 加上 Ollama port # OPENCLAW_URL 是 8088(OpenClaw API),Ollama 通常在 11434 - # 188 的 Ollama URL 若有設定則直接用 OLLAMA_FALLBACK_URL - ollama_188_url = settings.OLLAMA_FALLBACK_URL - if not ollama_188_url: + # OpenClaw 底層 tags 來源優先使用本地 fallback Ollama URL。 + ollama_local_url = settings.OLLAMA_FALLBACK_URL + if not ollama_local_url: # fallback:從 OPENCLAW_URL host 構建 Ollama URL from urllib.parse import urlparse parsed = urlparse(settings.OPENCLAW_URL) - ollama_188_url = f"{parsed.scheme}://{parsed.hostname}:11434" + ollama_local_url = f"{parsed.scheme}://{parsed.hostname}:11434" import httpx async with httpx.AsyncClient(timeout=5.0) as client: - resp = await client.get(f"{ollama_188_url}/api/tags") + resp = await client.get(f"{ollama_local_url}/api/tags") resp.raise_for_status() models = resp.json().get("models", []) @@ -220,7 +217,7 @@ async def probe_openclaw_nemo_version() -> ProviderVersionInfo: ) # model 不在清單時:version 用 model name,digest=None - logger.warning("openclaw_nemo_model_not_in_tags", model=model, url=ollama_188_url) + logger.warning("openclaw_nemo_model_not_in_tags", model=model, url=ollama_local_url) return ProviderVersionInfo( provider="openclaw_nemo", model=model, @@ -257,7 +254,7 @@ async def probe_all_providers() -> list[ProviderVersionInfo]: raw = await asyncio.gather(*tasks, return_exceptions=True) results: list[ProviderVersionInfo] = [] - provider_labels = ["ollama", "ollama_188", "gemini", "claude", "openclaw_nemo"] + provider_labels = ["ollama", "ollama_local", "gemini", "claude", "openclaw_nemo"] for label, outcome in zip(provider_labels, raw, strict=True): if isinstance(outcome, ProviderVersionInfo): results.append(outcome) diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index bbb2ddfb..946a6550 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -1945,7 +1945,7 @@ Focus on: from src.services.ai_router import get_ai_registry ai_registry = get_ai_registry() - provider = ai_registry.get("ollama") or ai_registry.get("ollama_188") + provider = ai_registry.get("ollama") or ai_registry.get("ollama_local") if provider is None or not hasattr(provider, "analyze_with_tools"): logger.warning( "openclaw_agent_loop_shadow_skipped", diff --git a/apps/api/src/services/playbook_generator.py b/apps/api/src/services/playbook_generator.py index 7fceba57..a383f63c 100644 --- a/apps/api/src/services/playbook_generator.py +++ b/apps/api/src/services/playbook_generator.py @@ -4,7 +4,7 @@ LLM Playbook Generator - ADR-104 T1/T2/T6 從成功修復案例生成可治理的 Playbook 草稿。 設計重點: -- 只用 local provider 順序(Ollama 111 -> Ollama 188),避免新增雲端成本。 +- 只用 local/provider pool 順序(GCP-A -> 111 local),避免新增雲端成本。 - LLM 產出必須經 Pydantic + action_parser 安全收斂。 - 不直接 APPROVED;先 DRAFT/REVIEW,再交治理 job 晉級。 """ @@ -30,7 +30,6 @@ from src.models.playbook import ( RiskLevel, SymptomPattern, ) -from src.services.action_parser import is_safe_kubectl_action from src.services.action_parser import kubectl_safety_reason logger = structlog.get_logger(__name__) @@ -218,7 +217,7 @@ class LLMPlaybookGenerator: executor = get_ai_executor() result = await executor.execute( prompt=prompt, - provider_order=["ollama", "ollama_188"], + provider_order=["ollama", "ollama_local"], context=context, cache_ttl=86400, require_local=True, diff --git a/apps/api/tests/test_ai_router_diagnose_fallback.py b/apps/api/tests/test_ai_router_diagnose_fallback.py index 24d5852b..11a301f9 100644 --- a/apps/api/tests/test_ai_router_diagnose_fallback.py +++ b/apps/api/tests/test_ai_router_diagnose_fallback.py @@ -124,8 +124,9 @@ def test_diagnose_fallback_chain_ollama_primary(): assert AIProviderEnum.OPENCLAW_NEMO in providers_in_chain assert AIProviderEnum.GEMINI in providers_in_chain assert AIProviderEnum.CLAUDE in providers_in_chain - # OLLAMA_188 (CPU-only 備援) 仍排除(M1 Pro 111 才是 GPU 主推理) - assert AIProviderEnum.OLLAMA_188 not in providers_in_chain + # 188 不得作為 Ollama provider;本地備援只允許 ollama_local。 + provider_values = {p.value for p in providers_in_chain} + assert "ollama_188" not in provider_values def test_diagnose_fallback_chain_contains_cloud_providers(): @@ -159,7 +160,7 @@ async def test_diagnose_route_primary_is_ollama(): # 雲端 fallback 仍在(OpenClaw / Gemini / Claude 救命備援) fb_providers = [p for p, _ in decision.fallback_chain] - # ollama_failover_manager 可能轉到 ollama_188,但 ollama variant 必須有 + # ollama_failover_manager 可能轉到 GCP-B / ollama_local,但雲端救命備援仍必須存在。 has_cloud_fallback = ( AIProviderEnum.GEMINI in fb_providers or AIProviderEnum.CLAUDE in fb_providers ) diff --git a/apps/api/tests/test_ai_router_failover_integration.py b/apps/api/tests/test_ai_router_failover_integration.py index be1e9948..f0b25402 100644 --- a/apps/api/tests/test_ai_router_failover_integration.py +++ b/apps/api/tests/test_ai_router_failover_integration.py @@ -83,7 +83,7 @@ async def test_router_uses_failover_when_ollama_initial_provider(): return_value=_make_failover_result( primary_provider="gemini", primary_model="gemini-1.5-flash", - fallback=[("ollama_188", "qwen2.5:7b-instruct"), ("nemotron", "nvidia/nemotron-mini-4b-instruct")], + fallback=[("ollama_local", "qwen2.5:7b-instruct"), ("nemotron", "nvidia/nemotron-mini-4b-instruct")], ) ) @@ -109,14 +109,14 @@ async def test_router_uses_failover_when_ollama_initial_provider(): @pytest.mark.asyncio async def test_router_failover_fallback_chain_converted(): - """failover_manager 回傳 fallback_chain → decision.fallback_chain 包含 OLLAMA_188""" + """failover_manager 回傳 fallback_chain → decision.fallback_chain 包含 OLLAMA_LOCAL""" mock_fm = MagicMock() mock_fm.select_provider = AsyncMock( return_value=_make_failover_result( primary_provider="gemini", primary_model="gemini-1.5-flash", fallback=[ - ("ollama_188", "qwen2.5:7b-instruct"), + ("ollama_local", "qwen2.5:7b-instruct"), ("nemotron", "nvidia/nemotron-mini-4b-instruct"), ("claude", "claude-haiku-4-5-20251001"), ], @@ -134,8 +134,8 @@ async def test_router_failover_fallback_chain_converted(): decision = await router.route("test alert message") fb_providers = [p for p, _ in decision.fallback_chain] - assert AIProviderEnum.OLLAMA_188 in fb_providers, ( - f"OLLAMA_188 not in fallback_chain: {fb_providers}" + assert AIProviderEnum.OLLAMA_LOCAL in fb_providers, ( + f"OLLAMA_LOCAL not in fallback_chain: {fb_providers}" ) assert AIProviderEnum.NEMOTRON in fb_providers assert AIProviderEnum.CLAUDE in fb_providers diff --git a/apps/api/tests/test_failover_alerter.py b/apps/api/tests/test_failover_alerter.py index 72b95500..db193d7b 100644 --- a/apps/api/tests/test_failover_alerter.py +++ b/apps/api/tests/test_failover_alerter.py @@ -68,7 +68,7 @@ async def test_alert_failover_dedup(mock_redis, mock_telegram_send): "to_provider": "gemini", "reason": "111 unhealthy", "model": "qwen3:8b", - "fallback_chain_str": "gemini → ollama_188", + "fallback_chain_str": "gemini → ollama_local", } # 第 1 次:dedup pass,發送 diff --git a/apps/api/tests/test_failover_e2e_dispatch.py b/apps/api/tests/test_failover_e2e_dispatch.py index 5b1bfc3c..9ea7a015 100644 --- a/apps/api/tests/test_failover_e2e_dispatch.py +++ b/apps/api/tests/test_failover_e2e_dispatch.py @@ -1,16 +1,15 @@ -# apps/api/tests/test_failover_e2e_dispatch.py | 2026-04-26 @ Asia/Taipei -# 2026-04-26 Wave5 B4 by Claude Engineer-A4 — E2E executor dispatch 測試 -# 驗證 failover 切到 OLLAMA_188 後,HTTP 請求真的打到 OLLAMA_FALLBACK_URL +# apps/api/tests/test_failover_e2e_dispatch.py | 2026-05-06 @ Asia/Taipei +# 2026-05-06 Codex — 188 不再作為 Ollama Provider;驗證 ollama_local dispatch。 """ E2E:executor dispatch 層驗證 =============================== 測試覆蓋(補全 B4 — 整合測試只驗決策層,未驗執行層): -1. registry 確實有 ollama_188 provider(B1 修復後基本健全性) -2. Ollama188Provider.is_enabled 在有 OLLAMA_FALLBACK_URL 時為 True -3. Ollama188Provider.is_enabled 在 OLLAMA_FALLBACK_URL 空字串時為 False -4. Ollama188Provider.analyze() 真的把 HTTP 打到 OLLAMA_FALLBACK_URL(攔截 httpx) -5. executor.execute(provider_order=["ollama_188"]) 真的路由到 188 URL +1. registry 確實有 ollama_local provider,且沒有 ollama_188 provider +2. OllamaLocalProvider.is_enabled 在有 OLLAMA_FALLBACK_URL 時為 True +3. OllamaLocalProvider.is_enabled 在 OLLAMA_FALLBACK_URL 空字串時為 False +4. OllamaLocalProvider.analyze() 真的把 HTTP 打到 OLLAMA_FALLBACK_URL(攔截 httpx) +5. executor.execute(provider_order=["ollama_local"]) 真的路由到 local URL 6. Gemini quota pipeline 並行 5 次不超發(B3 atomic 驗證) 7. Gemini quota TTL 第一次呼叫即設定 """ @@ -28,31 +27,30 @@ import pytest # ============================================================================= -def test_registry_has_ollama_188_provider(): - """B1 基本健全性:_init_registry() 後 registry 必須有 ollama_188""" +def test_registry_has_ollama_local_provider_without_ollama_188(): + """_init_registry() 後 registry 必須有 ollama_local,且不得有 ollama_188""" from src.services.ai_router import _init_registry registry = _init_registry() # registry.get() 只返回 is_enabled=True 的 provider # 用 _providers dict 直接檢查(不管 is_enabled) - assert "ollama_188" in registry._providers, ( - "ollama_188 not found in registry._providers — B1 fix 未生效" - ) + assert "ollama_local" in registry._providers + assert "ollama_188" not in registry._providers -def test_ollama_188_provider_name(): - """Ollama188Provider.name == 'ollama_188'""" - from src.services.ai_providers.ollama import Ollama188Provider +def test_ollama_local_provider_name(): + """OllamaLocalProvider.name == 'ollama_local'""" + from src.services.ai_providers.ollama import OllamaLocalProvider - p = Ollama188Provider() - assert p.name == "ollama_188" + p = OllamaLocalProvider() + assert p.name == "ollama_local" -def test_ollama_188_provider_privacy_level(): - """Ollama188Provider.privacy_level == 'local'(本地推理,可接機密資料)""" - from src.services.ai_providers.ollama import Ollama188Provider +def test_ollama_local_provider_privacy_level(): + """OllamaLocalProvider.privacy_level == 'local'(本地推理,可接機密資料)""" + from src.services.ai_providers.ollama import OllamaLocalProvider - p = Ollama188Provider() + p = OllamaLocalProvider() assert p.privacy_level == "local" @@ -61,45 +59,44 @@ def test_ollama_188_provider_privacy_level(): # ============================================================================= -def test_ollama_188_is_enabled_with_fallback_url(monkeypatch): - """OLLAMA_FALLBACK_URL 有值 + ENABLE_OLLAMA_188 未設 → is_enabled == True""" - from src.services.ai_providers.ollama import Ollama188Provider - from src.core.config import get_settings +def test_ollama_local_is_enabled_with_fallback_url(monkeypatch): + """OLLAMA_FALLBACK_URL 有值 + ENABLE_OLLAMA_LOCAL 未設 → is_enabled == True""" + from src.services.ai_providers.ollama import OllamaLocalProvider - monkeypatch.setenv("ENABLE_OLLAMA_188", "true") + monkeypatch.setenv("ENABLE_OLLAMA_LOCAL", "true") # patch settings 的 OLLAMA_FALLBACK_URL mock_settings = MagicMock() - mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.111:11434" mock_settings.OPENCLAW_TIMEOUT = "60" - p = Ollama188Provider() + p = OllamaLocalProvider() # 直接 patch module-level settings 物件 with patch("src.services.ai_providers.ollama.settings", mock_settings): assert p.is_enabled is True -def test_ollama_188_is_disabled_without_fallback_url(monkeypatch): - """OLLAMA_FALLBACK_URL 空字串 → is_enabled == False(188 節點未設定)""" - from src.services.ai_providers.ollama import Ollama188Provider +def test_ollama_local_is_disabled_without_fallback_url(monkeypatch): + """OLLAMA_FALLBACK_URL 空字串 → is_enabled == False(local 節點未設定)""" + from src.services.ai_providers.ollama import OllamaLocalProvider - monkeypatch.setenv("ENABLE_OLLAMA_188", "true") + monkeypatch.setenv("ENABLE_OLLAMA_LOCAL", "true") mock_settings = MagicMock() mock_settings.OLLAMA_FALLBACK_URL = "" - p = Ollama188Provider() + p = OllamaLocalProvider() with patch("src.services.ai_providers.ollama.settings", mock_settings): assert p.is_enabled is False -def test_ollama_188_is_disabled_by_env_flag(monkeypatch): - """ENABLE_OLLAMA_188=false → is_enabled == False(即使有 URL)""" - from src.services.ai_providers.ollama import Ollama188Provider +def test_ollama_local_is_disabled_by_env_flag(monkeypatch): + """ENABLE_OLLAMA_LOCAL=false → is_enabled == False(即使有 URL)""" + from src.services.ai_providers.ollama import OllamaLocalProvider - monkeypatch.setenv("ENABLE_OLLAMA_188", "false") + monkeypatch.setenv("ENABLE_OLLAMA_LOCAL", "false") mock_settings = MagicMock() - mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.111:11434" - p = Ollama188Provider() + p = OllamaLocalProvider() with patch("src.services.ai_providers.ollama.settings", mock_settings): assert p.is_enabled is False @@ -110,14 +107,14 @@ def test_ollama_188_is_disabled_by_env_flag(monkeypatch): @pytest.mark.asyncio -async def test_ollama_188_analyze_dispatches_to_fallback_url(): +async def test_ollama_local_analyze_dispatches_to_fallback_url(): """ - B4 核心:Ollama188Provider.analyze() 必須把 HTTP 打到 OLLAMA_FALLBACK_URL。 - 攔截 httpx.AsyncClient.post,記錄實際呼叫 URL,斷言包含 188 IP。 + B4 核心:OllamaLocalProvider.analyze() 必須把 HTTP 打到 OLLAMA_FALLBACK_URL。 + 攔截 httpx.AsyncClient.post,記錄實際呼叫 URL,斷言包含本地 fallback IP。 """ - from src.services.ai_providers.ollama import Ollama188Provider + from src.services.ai_providers.ollama import OllamaLocalProvider - FALLBACK_URL = "http://192.168.0.188:11434" + FALLBACK_URL = "http://192.168.0.111:11434" captured_urls: list[str] = [] mock_response = MagicMock() @@ -149,7 +146,7 @@ async def test_ollama_188_analyze_dispatches_to_fallback_url(): "top_p": 0.9, }) - provider = Ollama188Provider() + provider = OllamaLocalProvider() with patch("src.services.ai_providers.ollama.settings", mock_settings): with patch("src.services.ai_providers.ollama.get_model_registry", return_value=mock_registry): @@ -159,45 +156,45 @@ async def test_ollama_188_analyze_dispatches_to_fallback_url(): result = await provider.analyze("test prompt", context={}) assert len(captured_urls) > 0, "analyze() 未發出任何 HTTP 請求" - assert any("192.168.0.188" in url for url in captured_urls), ( - f"HTTP 請求未打到 188,實際 URL: {captured_urls}" + assert any("192.168.0.111" in url for url in captured_urls), ( + f"HTTP 請求未打到 local fallback,實際 URL: {captured_urls}" ) - assert result.provider == "ollama_188" + assert result.provider == "ollama_local" @pytest.mark.asyncio -async def test_ollama_188_analyze_returns_error_when_no_fallback_url(): +async def test_ollama_local_analyze_returns_error_when_no_fallback_url(): """OLLAMA_FALLBACK_URL 未設定 → analyze() 應返回 success=False,不發 HTTP""" - from src.services.ai_providers.ollama import Ollama188Provider + from src.services.ai_providers.ollama import OllamaLocalProvider mock_settings = MagicMock() mock_settings.OLLAMA_FALLBACK_URL = "" - provider = Ollama188Provider() + provider = OllamaLocalProvider() with patch("src.services.ai_providers.ollama.settings", mock_settings): result = await provider.analyze("test prompt") assert result.success is False - assert result.provider == "ollama_188" + assert result.provider == "ollama_local" assert "OLLAMA_FALLBACK_URL" in (result.error or "") @pytest.mark.asyncio -async def test_executor_dispatches_ollama_188_to_fallback_url(): +async def test_executor_dispatches_ollama_local_to_fallback_url(): """ - B4 執行層:AIRouterExecutor.execute(provider_order=["ollama_188"]) - 應路由到 Ollama188Provider,且 HTTP 打到 OLLAMA_FALLBACK_URL。 + B4 執行層:AIRouterExecutor.execute(provider_order=["ollama_local"]) + 應路由到 OllamaLocalProvider,且 HTTP 打到 OLLAMA_FALLBACK_URL。 """ from src.services.ai_router import AIProviderRegistry, AIRouterExecutor, reset_ai_router - from src.services.ai_providers.ollama import Ollama188Provider + from src.services.ai_providers.ollama import OllamaLocalProvider from src.services.ai_providers.interfaces import AIResult reset_ai_router() - FALLBACK_URL = "http://192.168.0.188:11434" + FALLBACK_URL = "http://192.168.0.111:11434" captured_urls: list[str] = [] - # 建立真實 registry,只登錄 ollama_188 + # 建立真實 registry,只登錄 ollama_local registry = AIProviderRegistry() # mock analyze 讓它回傳成功,但驗 URL 路徑 @@ -206,15 +203,15 @@ async def test_executor_dispatches_ollama_188_to_fallback_url(): return AIResult( raw_response='{"action_title":"ok","confidence":0.9}', success=True, - provider="ollama_188", + provider="ollama_local", tokens=10, ) mock_settings_global = MagicMock() mock_settings_global.OLLAMA_FALLBACK_URL = FALLBACK_URL - # 建立 Ollama188Provider,mock 其 analyze + is_enabled - provider = Ollama188Provider() + # 建立 OllamaLocalProvider,mock 其 analyze + is_enabled + provider = OllamaLocalProvider() provider.analyze = fake_analyze # type: ignore[method-assign] # 強制 is_enabled = True(繞過 settings patch 的複雜度) @@ -233,14 +230,14 @@ async def test_executor_dispatches_ollama_188_to_fallback_url(): mock_settings.MOCK_MODE = False result = await executor.execute( prompt="test alert", - provider_order=["ollama_188"], + provider_order=["ollama_local"], context={}, ) assert result.success is True, f"execute 失敗: {result.error}" - assert result.provider == "ollama_188", f"provider 不是 ollama_188: {result.provider}" - assert any("192.168.0.188" in u for u in captured_urls), ( - f"HTTP 未打到 188,captured: {captured_urls}" + assert result.provider == "ollama_local", f"provider 不是 ollama_local: {result.provider}" + assert any("192.168.0.111" in u for u in captured_urls), ( + f"HTTP 未打到 local fallback,captured: {captured_urls}" ) diff --git a/apps/api/tests/test_model_regression.py b/apps/api/tests/test_model_regression.py index d868f14f..d39b4614 100644 --- a/apps/api/tests/test_model_regression.py +++ b/apps/api/tests/test_model_regression.py @@ -16,7 +16,7 @@ import httpx import pytest # Ollama 伺服器配置 -OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.188:11434") +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.111:11434") DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5:7b-instruct") TIMEOUT = 300 # 秒 (CPU 推理模式需 ~222-666 秒,見 2026-03-26 評估) @@ -111,7 +111,7 @@ async def check_ollama_available() -> bool: @pytest.mark.integration class TestModelRegression: - """模型回歸測試 — 需要 Ollama 服務 (192.168.0.188:11434)""" + """模型回歸測試 — 需要 Ollama 服務(預設 111,可用 OLLAMA_URL 覆寫)""" @pytest.fixture(autouse=True) async def check_ollama(self): diff --git a/apps/api/tests/test_model_version_probe.py b/apps/api/tests/test_model_version_probe.py index 1c916808..9c88d757 100644 --- a/apps/api/tests/test_model_version_probe.py +++ b/apps/api/tests/test_model_version_probe.py @@ -90,8 +90,8 @@ class TestProbeOllamaVersion: assert isinstance(info.captured_at, datetime) @pytest.mark.asyncio - async def test_success_188_provider(self): - """188 URL → provider='ollama_188'""" + async def test_success_local_provider(self): + """111 / local proxy URL → provider='ollama_local'""" model_entry = { "name": "deepseek-r1:14b", "modified_at": "2026-04-02T00:00:00Z", @@ -106,10 +106,10 @@ class TestProbeOllamaVersion: with patch("httpx.AsyncClient", return_value=mock_client): info = await probe_ollama_version( - "http://192.168.0.188:11434", "deepseek-r1:14b" + "http://192.168.0.111:11434", "deepseek-r1:14b" ) - assert info.provider == "ollama_188" + assert info.provider == "ollama_local" @pytest.mark.asyncio async def test_model_not_found_raises(self): @@ -279,7 +279,7 @@ class TestProbeOpenclawNemoVersion: mock_settings = MagicMock() mock_settings.OPENCLAW_DEFAULT_MODEL = "deepseek-r1:14b" - mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.111:11434" with patch("src.services.model_version_probe.settings", mock_settings), \ patch("httpx.AsyncClient", return_value=mock_client): @@ -301,7 +301,7 @@ class TestProbeOpenclawNemoVersion: mock_settings = MagicMock() mock_settings.OPENCLAW_DEFAULT_MODEL = "deepseek-r1:14b" - mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.111:11434" with patch("src.services.model_version_probe.settings", mock_settings), \ patch("httpx.AsyncClient", return_value=mock_client): @@ -333,7 +333,7 @@ class TestProbeAllProviders: """5 個 provider 全部成功 → 回傳 5 筆 ProviderVersionInfo""" fake_results = [ ProviderVersionInfo(provider="ollama", model="qwen2.5:7b-instruct", version="v1"), - ProviderVersionInfo(provider="ollama_188", model="qwen2.5:7b-instruct", version="v1"), + ProviderVersionInfo(provider="ollama_local", model="qwen2.5:7b-instruct", version="v1"), ProviderVersionInfo(provider="gemini", model="gemini-1.5-flash", version="gemini-1.5-flash"), ProviderVersionInfo(provider="claude", model="claude-haiku-4-5-20251001", version="claude-haiku-4-5-20251001"), ProviderVersionInfo(provider="openclaw_nemo", model="deepseek-r1:14b", version="v1"), @@ -347,7 +347,7 @@ class TestProbeAllProviders: mock_settings = MagicMock() mock_settings.OLLAMA_URL = "http://34.143.170.20:11434" # GCP-A(ADR-110) - mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.111:11434" mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" with patch("src.services.model_version_probe.settings", mock_settings): @@ -364,8 +364,8 @@ class TestProbeAllProviders: raise RuntimeError("simulated failure") async def _fail_ollama(url, model): - if "188" in url: - raise RuntimeError("188 offline") + if "111" in url: + raise RuntimeError("local offline") return good with patch("src.services.model_version_probe.probe_ollama_version", side_effect=_fail_ollama), \ @@ -379,13 +379,13 @@ class TestProbeAllProviders: mock_settings = MagicMock() mock_settings.OLLAMA_URL = "http://34.143.170.20:11434" # GCP-A(ADR-110) - mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.111:11434" mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" with patch("src.services.model_version_probe.settings", mock_settings): results = await probe_all_providers() - # ollama(ok) + ollama_188(fail) + gemini(fail) + claude(ok) + openclaw_nemo(ok) → 3 + # ollama(ok) + ollama_local(fail) + gemini(fail) + claude(ok) + openclaw_nemo(ok) → 3 assert len(results) == 3 providers = {r.provider for r in results} assert "ollama" in providers diff --git a/apps/api/tests/test_model_version_tracker.py b/apps/api/tests/test_model_version_tracker.py index bd253648..b7e192a5 100644 --- a/apps/api/tests/test_model_version_tracker.py +++ b/apps/api/tests/test_model_version_tracker.py @@ -48,7 +48,7 @@ def _make_info(provider: str, version: str = "v1", digest: str | None = "sha256: def _make_five() -> list[ProviderVersionInfo]: return [ _make_info("ollama"), - _make_info("ollama_188"), + _make_info("ollama_local"), _make_info("gemini", digest=None), _make_info("claude", digest=None), _make_info("openclaw_nemo"), diff --git a/apps/api/tests/test_ollama_failover_manager.py b/apps/api/tests/test_ollama_failover_manager.py index 647c28c5..fe8a8aaf 100644 --- a/apps/api/tests/test_ollama_failover_manager.py +++ b/apps/api/tests/test_ollama_failover_manager.py @@ -310,7 +310,7 @@ class TestSelectProvider: ) with patch.object(manager, "_write_failover_audit", return_value=None): - result = await manager.select_provider() + await manager.select_provider() # 並行 check 三台主機(GCP-A / GCP-B / Local) assert mock_monitor.check.call_count == 3 @@ -625,7 +625,6 @@ class TestWriteFailoverAudit: @pytest.mark.asyncio async def test_audit_uses_structlog_not_db(self): """_write_failover_audit 應呼叫 structlog,不呼叫 DB""" - import structlog manager = _make_manager() from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult @@ -657,22 +656,22 @@ class TestWriteFailoverAudit: # ============================================================================= -# B2: AIProviderEnum.OLLAMA_188 存在 -# 2026-04-25 critic-fix Part2 by Claude Engineer-C2 +# B2: AIProviderEnum.OLLAMA_LOCAL 存在 +# 2026-05-06 Codex — 188 不再作為 Ollama Provider # ============================================================================= -class TestAIProviderEnumOllama188: - """B2 修復驗證:AIProviderEnum.OLLAMA_188 存在且 PROVIDER_LATENCY_BUDGET 有對應值""" +class TestAIProviderEnumOllamaLocal: + """B2 修復驗證:AIProviderEnum.OLLAMA_LOCAL 存在且 PROVIDER_LATENCY_BUDGET 有對應值""" - def test_ollama_188_enum_exists(self): + def test_ollama_local_enum_exists(self): from src.services.ai_router import AIProviderEnum - assert AIProviderEnum.OLLAMA_188.value == "ollama_188" + assert AIProviderEnum.OLLAMA_LOCAL.value == "ollama_local" - def test_ollama_188_in_latency_budget(self): + def test_ollama_local_in_latency_budget(self): from src.services.ai_router import AIProviderEnum, PROVIDER_LATENCY_BUDGET - assert AIProviderEnum.OLLAMA_188 in PROVIDER_LATENCY_BUDGET - assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_188] == 120000 + assert AIProviderEnum.OLLAMA_LOCAL in PROVIDER_LATENCY_BUDGET + assert PROVIDER_LATENCY_BUDGET[AIProviderEnum.OLLAMA_LOCAL] == 90000 # ============================================================================= diff --git a/apps/api/tests/test_ollama_health_monitor.py b/apps/api/tests/test_ollama_health_monitor.py index d67231cf..740a438e 100644 --- a/apps/api/tests/test_ollama_health_monitor.py +++ b/apps/api/tests/test_ollama_health_monitor.py @@ -42,7 +42,7 @@ from src.services.ollama_health_monitor import ( # ============================================================================= HOST = "http://34.143.170.20:11434" # GCP-A Primary(ADR-110 2026-05-03) -HOST_188 = "http://192.168.0.188:11434" # 歷史遺留參考常數(已移出主路由) +HOST_LOCAL = "http://192.168.0.111:11434" # Local fallback(已移出 188 主路由) @pytest.fixture(autouse=True) diff --git a/apps/api/tests/test_prompt_validation.py b/apps/api/tests/test_prompt_validation.py index cdadaefc..f8533450 100644 --- a/apps/api/tests/test_prompt_validation.py +++ b/apps/api/tests/test_prompt_validation.py @@ -18,7 +18,7 @@ import pytest from src.core.prompts import OPENCLAW_TEST_PROMPT # Ollama 配置 -OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.188:11434") +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.111:11434") DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5:7b-instruct") TIMEOUT = 300 # 秒 (CPU 推理模式需 ~222-666 秒,見 2026-03-26 評估) diff --git a/docs/runbooks/RUNBOOK-OLLAMA-FAILOVER.md b/docs/runbooks/RUNBOOK-OLLAMA-FAILOVER.md index c6f65d39..931357a4 100644 --- a/docs/runbooks/RUNBOOK-OLLAMA-FAILOVER.md +++ b/docs/runbooks/RUNBOOK-OLLAMA-FAILOVER.md @@ -13,15 +13,15 @@ Dashboard 路徑:`Ollama 容災監控`(uid: `ollama-failover-p23`) ### Panel 1 — Ollama 可用性 (Stat) -**看什麼**:`up{job=~"ollama_111|ollama_188"}` × 100,顯示每台 Ollama 主機的 scrape 存活狀態。 +**看什麼**:`up{job=~"ollama_gcp_a|ollama_gcp_b|ollama_local|ollama_111"}` × 100,顯示每個 Ollama provider endpoint 的 scrape 存活狀態。 | 顏色 | 意義 | |------|------| | 綠色 100% | Prometheus 探測正常,主機在線 | -| 黃色 50% | 一台離線,另一台在線(容災中) | -| 紅色 0% | 兩台全離線,高風險 | +| 黃色 | 部分 endpoint 離線,系統應進入容災 | +| 紅色 0% | Ollama provider pool 全離線,高風險 | -**注意**:此面板反映 Prometheus scrape 狀態,需要 scrape job 命名為 `ollama_111` / `ollama_188`。 +**注意**:此面板反映 Prometheus scrape 狀態,需要 scrape job 命名對齊 `ollama_gcp_a` / `ollama_gcp_b` / `ollama_local`。 設定檔位於 `ops/monitoring/generated/prometheus-scrape-generated.yaml`。 --- @@ -47,9 +47,10 @@ Dashboard 路徑:`Ollama 容災監控`(uid: `ollama-failover-p23`) | 分布 | 意義 | |------|------| -| ollama 佔 >90% | 正常,111 健康 | -| gemini 佔多數 | 111 SLOW/DEGRADED/OFFLINE,容災中 | -| ollama_188 出現 | Gemini 配額耗盡備援,或 111 和 Gemini 同時失敗 | +| ollama / ollama_gcp_a 佔 >90% | 正常,GCP-A 健康 | +| ollama_gcp_b 佔多數 | GCP-A SLOW/DEGRADED/OFFLINE,容災到 GCP-B | +| ollama_local 出現 | GCP-A/B 均不可用,容災到 111 local | +| gemini 佔多數 | Ollama provider pool 全部不可用,使用付費備援 | | 全部 nemotron/claude | 極端情況,所有主力 provider 失敗 | --- @@ -71,10 +72,10 @@ Dashboard 路徑:`Ollama 容災監控`(uid: `ollama-failover-p23`) ### `OllamaInstanceDown` — Ollama 主機離線 -**觸發條件**:`up{job=~"ollama_111|ollama_188"} == 0` 持續 2 分鐘。 +**觸發條件**:`up{job=~"ollama_gcp_a|ollama_gcp_b|ollama_local|ollama_111"} == 0` 持續 2 分鐘。 **影響評估**: -- 系統應已自動切至 Gemini(查 Panel 3 確認) +- 系統應已依序切至 GCP-B / 111 local / Gemini(查 Panel 3 確認) - 查 Panel 4 是否有 Failover 計數上升 **排查步驟**: @@ -82,11 +83,9 @@ Dashboard 路徑:`Ollama 容災監控`(uid: `ollama-failover-p23`) ```bash # 步驟 1:確認主機存活 ping -c 3 192.168.0.111 -ping -c 3 192.168.0.188 # 步驟 2:SSH 進主機確認 ollama 服務狀態 ssh wooo@192.168.0.111 'systemctl status ollama' -ssh wooo@192.168.0.188 'systemctl status ollama' # 步驟 3:查 ollama 最近的 journal log ssh wooo@192.168.0.111 'journalctl -u ollama -n 50 --no-pager' @@ -210,8 +209,9 @@ ssh wooo@192.168.0.111 'systemctl status ollama && nvidia-smi' | Metric | 類型 | 狀態 | 說明 | |--------|------|------|------| -| `up{job="ollama_111"}` | Gauge | ✅ 現有 | Prometheus scrape 存活 | -| `up{job="ollama_188"}` | Gauge | ✅ 現有 | Prometheus scrape 存活 | +| `up{job="ollama_gcp_a"}` | Gauge | ✅ 現有 | Prometheus scrape 存活 | +| `up{job="ollama_gcp_b"}` | Gauge | ✅ 現有 | Prometheus scrape 存活 | +| `up{job="ollama_local"}` | Gauge | ✅ 現有 | Prometheus scrape 存活 | | `ollama_failover_triggered_total` | Counter | ✅ P2.3 補入 | failover 切換次數,labels: from_provider, to_provider | | `ollama_recovery_triggered_total` | Counter | ✅ P2.3 補入 | recovery 切回次數,labels: from_provider | | `ollama_health_status{host}` | Gauge | ✅ P2.3 補入 | 健康狀態 1=healthy, 0=not_healthy | diff --git a/k8s/awoooi-prod/04-configmap.yaml.patch-188-fallback b/k8s/awoooi-prod/04-configmap.yaml.patch-188-fallback deleted file mode 100644 index e252be70..00000000 --- a/k8s/awoooi-prod/04-configmap.yaml.patch-188-fallback +++ /dev/null @@ -1,34 +0,0 @@ -# ============================================================================ -# PATCH: 188 CPU-only Ollama 備援端點 -# 日期: 2026-04-25 (台北時區) -# 負責人: ogt + Claude Sonnet 4.6 -# ADR 參考: plan_complete_v3.md P0.5 -# 診斷實測數據: -# 主機: 192.168.0.188, Intel Xeon Silver 4214 @ 2.2GHz, 12 核, CPU-only -# RAM: 62GB (used 14GB), Disk: 982GB (used 221GB) -# GPU: 無 -# 現有模型: qwen2.5:7b-instruct (4.5GB), llama3.2:3b (1.9GB), -# deepseek-r1:14b (8.5GB), nomic-embed-text (261MB) -# 推理延遲實測: qwen2.5:7b-instruct → total=111s, eval_rate=0.09 token/s -# llama3.2:3b → total=155s (cold start, 比 7b 更慢) -# 目標 ~30s 無法達到 (CPU 推理硬上限 ~0.09 token/s) -# 決策: qwen2.5:7b-instruct 已存在,設為備援 (111s 延遲,使用者需知情) -# 連通性: 110 → 188:11434 ✅ 已驗證 -# ⚠️ 注意: 188 推理極慢(~111s),應只在 111 GPU Ollama 完全失效時啟用 -# 建議: 程式碼層應設 OLLAMA_FALLBACK_188_TIMEOUT_SEC = 150 -# ============================================================================ -# -# 將以下兩行加入 /Users/ogt/awoooi/k8s/awoooi-prod/04-configmap.yaml -# 建議位置: OLLAMA_URL 行 (第 20 行) 之後 -# -# --- 新增內容 --- - # 2026-04-25 ogt + Claude Sonnet 4.6: 188 CPU-only Ollama 備援 (plan_complete_v3 P0.5) - # ⚠️ 188 推理延遲實測 ~111s (0.09 token/s, CPU-only Xeon 4214),僅作 111 完全失效時的降級備援 - # 模型已存在: qwen2.5:7b-instruct (4.5GB), 無需重拉 - OLLAMA_FALLBACK_188: "http://192.168.0.188:11434" - OLLAMA_188_MODEL: "qwen2.5:7b-instruct" -# --- 新增內容結束 --- -# -# 使用方式 (需用戶 review 後手動 apply): -# kubectl -n awoooi-prod apply -f k8s/awoooi-prod/04-configmap.yaml -# kubectl -n awoooi-prod rollout restart deployment/awoooi-api diff --git a/ops/monitoring/generated/blackbox-targets-generated.yaml b/ops/monitoring/generated/blackbox-targets-generated.yaml index 9866993c..c4dac3b5 100644 --- a/ops/monitoring/generated/blackbox-targets-generated.yaml +++ b/ops/monitoring/generated/blackbox-targets-generated.yaml @@ -26,8 +26,18 @@ - labels: criticality: P0 owner: ai-team - service: ollama - url: http://192.168.0.188:11434/api/tags + service: ollama-gcp-a + url: http://192.168.0.110:11435/api/tags +- labels: + criticality: P0 + owner: ai-team + service: ollama-gcp-b + url: http://192.168.0.110:11436/api/tags +- labels: + criticality: P0 + owner: ai-team + service: ollama-local + url: http://192.168.0.110:11437/api/tags - labels: criticality: P0 owner: ai-team diff --git a/ops/monitoring/generated/prometheus-scrape-generated.yaml b/ops/monitoring/generated/prometheus-scrape-generated.yaml index 5e8ce209..5c33abfa 100644 --- a/ops/monitoring/generated/prometheus-scrape-generated.yaml +++ b/ops/monitoring/generated/prometheus-scrape-generated.yaml @@ -92,7 +92,9 @@ scrape_configs: service: ollama type: docker targets: - - 192.168.0.188:11434 + - 192.168.0.110:11435 + - 192.168.0.110:11436 + - 192.168.0.110:11437 - job_name: openclaw static_configs: - labels: diff --git a/ops/monitoring/grafana/dashboards/ollama_failover.json b/ops/monitoring/grafana/dashboards/ollama_failover.json index 2fda28c1..b185e565 100644 --- a/ops/monitoring/grafana/dashboards/ollama_failover.json +++ b/ops/monitoring/grafana/dashboards/ollama_failover.json @@ -82,11 +82,11 @@ "textMode": "auto" }, "title": "Ollama 可用性", - "description": "up{job=~\"ollama_111|ollama_188\"} × 100\n- 綠色 100% = 主機在線\n- 紅色 0% = 主機離線(容災應已觸發)\n\n資料來源: Prometheus scrape job ollama_111 / ollama_188", + "description": "up{job=~\"ollama_gcp_a|ollama_gcp_b|ollama_local|ollama_111\"} × 100\n- 綠色 100% = 主機在線\n- 紅色 0% = 主機離線(容災應已觸發)\n\n資料來源: Prometheus scrape job ollama_gcp_a / ollama_gcp_b / ollama_local", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "up{job=~\"ollama_111|ollama_188\"} * 100", + "expr": "up{job=~\"ollama_gcp_a|ollama_gcp_b|ollama_local|ollama_111\"} * 100", "legendFormat": "{{ job }}", "refId": "A" } @@ -188,7 +188,7 @@ "tooltip": { "mode": "single", "sort": "none" } }, "title": "AI Provider 路由分布", - "description": "sum by (provider) (rate(ai_router_selected_provider_total[5m]))\n- 正常狀態: ollama 佔大多數\n- failover 中: gemini / ollama_188 比例上升\n- 全走 gemini = 111 完全 offline\n\n資料來源: OLLAMA_FAILOVER_TRIGGERED_TOTAL + AI_ROUTER_PROVIDER_TOTAL (src/core/metrics.py)", + "description": "sum by (provider) (rate(ai_router_selected_provider_total[5m]))\n- 正常狀態: ollama / ollama_gcp_a 佔大多數\n- failover 中: ollama_gcp_b / ollama_local / gemini 比例上升\n- 全走 gemini = Ollama provider pool 完全 offline\n\n資料來源: OLLAMA_FAILOVER_TRIGGERED_TOTAL + AI_ROUTER_PROVIDER_TOTAL (src/core/metrics.py)", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, diff --git a/ops/monitoring/ollama_health_rules.yaml b/ops/monitoring/ollama_health_rules.yaml index b9ab5637..b6e653ca 100644 --- a/ops/monitoring/ollama_health_rules.yaml +++ b/ops/monitoring/ollama_health_rules.yaml @@ -6,7 +6,7 @@ # 部署方式: 手動合併至 alerts-unified.yml,或 scripts/ops/deploy-alerts.sh 支援多檔時直接引用 # # 標籤規範 (對齊 alerts-unified.yml): -# layer: systemd-188 | docker-188 (Ollama 跑在 188 主機) +# layer: ai-provider # team: ai # auto_repair: "true" | "false" # @@ -28,16 +28,16 @@ groups: # ----------------------------------------------------------------------- # 🔴 [ACTIVE] Ollama 主機離線 - # metric: up{job=~"ollama_111|ollama_188"} - # 前置條件: Prometheus scrape job 命名為 ollama_111 / ollama_188 + # metric: up{job=~"ollama_gcp_a|ollama_gcp_b|ollama_local|ollama_111"} + # 前置條件: Prometheus scrape job 命名對齊 ADR-110 provider pool # (設定位於 ops/monitoring/generated/prometheus-scrape-generated.yaml) # ----------------------------------------------------------------------- - alert: OllamaInstanceDown - expr: up{job=~"ollama_111|ollama_188"} == 0 + expr: up{job=~"ollama_gcp_a|ollama_gcp_b|ollama_local|ollama_111"} == 0 for: 2m labels: severity: critical - layer: systemd-188 + layer: ai-provider team: ai auto_repair: "false" alert_category: "ollama_failover" @@ -57,7 +57,7 @@ groups: for: 10m labels: severity: warning - layer: systemd-188 + layer: ai-provider team: ai auto_repair: "false" alert_category: "ollama_failover" diff --git a/scripts/ai_code_reviewer.py b/scripts/ai_code_reviewer.py index 8c5694cd..fd17792e 100755 --- a/scripts/ai_code_reviewer.py +++ b/scripts/ai_code_reviewer.py @@ -19,6 +19,7 @@ Exit Codes: """ import json +import os import subprocess import sys from pathlib import Path @@ -29,7 +30,7 @@ import httpx # Configuration # ============================================================================= -OLLAMA_URL = "http://192.168.0.188:11434/api/generate" +OLLAMA_URL = os.getenv("OLLAMA_GENERATE_URL", "http://192.168.0.111:11434/api/generate") MODEL = "llama3.2:8b" PROJECT_ROOT = Path(__file__).parent.parent RULES_FILE = PROJECT_ROOT / ".awoooi-agent-rules.md" diff --git a/scripts/health_check_session.sh b/scripts/health_check_session.sh index d93924cb..d98a2f69 100755 --- a/scripts/health_check_session.sh +++ b/scripts/health_check_session.sh @@ -62,7 +62,6 @@ check_url "ArgoCD (121)" "https://192.168.0.121:30443" echo "" echo "--- AI 推理層 ---" check_url "Ollama 111 GPU" "http://192.168.0.111:11434/api/tags" -check_url "Ollama 188 Hub" "http://192.168.0.188:11434/api/tags" echo "" echo "--- 觀測層 ---" diff --git a/scripts/setup-guardrails.sh b/scripts/setup-guardrails.sh index b1f8d5d4..24673ae4 100755 --- a/scripts/setup-guardrails.sh +++ b/scripts/setup-guardrails.sh @@ -92,10 +92,10 @@ fi echo "" echo "🤖 Step 6: Verifying Ollama connection..." -OLLAMA_URL="http://192.168.0.188:11434/api/tags" +OLLAMA_URL="${OLLAMA_URL:-http://192.168.0.111:11434/api/tags}" if curl -s --connect-timeout 5 "$OLLAMA_URL" > /dev/null 2>&1; then - echo " ✅ Ollama reachable at 192.168.0.188:11434" + echo " ✅ Ollama reachable at ${OLLAMA_URL}" # Check if llama3.2:8b is available MODELS=$(curl -s "$OLLAMA_URL" | grep -o '"name":"[^"]*"' || echo "")