#!/usr/bin/env python3 """Operation Ollama-First v5.0 deploy doctor. 在 188 / 本機跑:python3 scripts/deploy_doctor_v5.py 檢查 v5.0 部署狀態,列出統帥手動還沒做的事。 退出碼:0=全綠,1=有 WARN,2=有 FAIL。 """ from __future__ import annotations import os import shutil import subprocess import sys from typing import Tuple # ───────────────────────────────────────────────────────────────────────── # v5.0 env vars: name → (criticality, expected_value_or_None, hint) # ───────────────────────────────────────────────────────────────────────── ENV_SPEC = { # API keys (FAIL if missing — 沒這些 v5.0 直接斷) 'ANTHROPIC_API_KEY': ('FAIL', None, 'Claude SDK 用;需 https://console.anthropic.com 申請'), 'GOOGLE_API_KEY': ('FAIL', None, 'Gemini fallback 用;舊有'), 'DEEPSEEK_API_KEY': ('WARN', None, '若 DEEPSEEK_DIRECT_ENABLED=true 需要'), 'TAVILY_API_KEY': ('WARN', None, 'MCP omnisearch 用;可選'), 'EXA_API_KEY': ('WARN', None, 'MCP omnisearch 用;可選'), 'TELEGRAM_ADMIN_CHAT_ID': ('WARN', None, 'Phase 28 PromotionGate Telegram 推播 audience'), 'TELEGRAM_BOT_TOKEN': ('FAIL', None, 'Telegram 推播必備'), # Feature flags (應 ON 才生效) 'MODEL_ROUTER_ENABLED': ('WARN', 'true', 'caller × context 動態路由(預設 true)'), 'COST_THROTTLE_ENABLED': ('WARN', 'true', '成本超 110% 自動 throttle(預設 true)'), 'MCP_ROUTER_ENABLED': ('WARN', 'true', 'MCP 4-server 統一路由(預設 false)'), 'PPT_VISION_ENABLED': ('WARN', 'true', 'PPT minicpm-v 視覺審核(預設 false)'), 'DEEPSEEK_DIRECT_ENABLED': ('INFO', None, '直連 DeepSeek API;省 NIM 中間層'), # RAG / embedding 'RAG_EMBED_MODEL': ('INFO', 'bge-m3', 'embedding 模型;ADR-026 鎖定 bge-m3'), 'RAG_EMBED_DIM': ('INFO', '1024', 'bge-m3 維度;migration 027 已寫死 1024'), } # 必跑 migrations REQUIRED_TABLES = { 'ai_calls': '024', 'mcp_calls': '025', 'ai_call_budgets': '025', 'rag_query_log': '027', 'learning_episodes': '028', } # 三主機 OLLAMA_HOSTS = [ ('Primary GCP', '34.143.170.20:11434'), ('Secondary GCP', '34.21.145.224:11434'), ('111 Mac', '192.168.0.111:11434'), ] def color(s: str, c: str) -> str: if not sys.stdout.isatty(): return s codes = {'red': 31, 'green': 32, 'yellow': 33, 'cyan': 36, 'bold': 1} return f'\x1b[{codes.get(c, 0)}m{s}\x1b[0m' def status(level: str) -> str: return { 'OK': color(' OK ', 'green'), 'WARN': color(' WARN ', 'yellow'), 'FAIL': color(' FAIL ', 'red'), 'INFO': color(' INFO ', 'cyan'), }.get(level, level) # ───────────────────────────────────────────────────────────────────────── # Checks # ───────────────────────────────────────────────────────────────────────── def check_env() -> Tuple[int, int]: """Return (warn_count, fail_count).""" print(color('\n[1/5] 環境變數', 'bold')) warn = fail = 0 for name, (crit, expected, hint) in ENV_SPEC.items(): val = os.getenv(name, '').strip() if not val: print(f' {status(crit)} {name:30} <未設> — {hint}') if crit == 'FAIL': fail += 1 elif crit == 'WARN': warn += 1 continue if expected and val.lower() != expected.lower(): print(f' {status("WARN")} {name:30} ={val!r} 期望={expected!r} — {hint}') warn += 1 else: shown = val[:8] + '...' if 'KEY' in name or 'TOKEN' in name else val print(f' {status("OK")} {name:30} ={shown}') return warn, fail def check_db() -> Tuple[int, int]: print(color('\n[2/5] 資料庫 migrations', 'bold')) warn = fail = 0 db_url = os.getenv('DATABASE_URL', '').strip() if not db_url: print(f' {status("WARN")} DATABASE_URL 未設 — 跳過 DB 檢查') return 1, 0 try: import psycopg2 # type: ignore except ImportError: print(f' {status("WARN")} psycopg2 not installed — 跳過 DB 檢查') return 1, 0 try: conn = psycopg2.connect(db_url, connect_timeout=5) except Exception as e: print(f' {status("FAIL")} DB 無法連線: {type(e).__name__}: {str(e)[:100]}') return 0, 1 try: cur = conn.cursor() for table, mig in REQUIRED_TABLES.items(): cur.execute( 'SELECT 1 FROM information_schema.tables WHERE table_name = %s', (table,), ) if cur.fetchone(): print(f' {status("OK")} {table:25} (migration {mig})') else: print(f' {status("FAIL")} {table:25} 不存在 — 跑 migrations/{mig}_*.sql') fail += 1 # ai_call_budgets seed 檢查 cur.execute('SELECT COUNT(*) FROM ai_call_budgets') n = cur.fetchone()[0] if n < 8: print(f' {status("WARN")} ai_call_budgets 只有 {n} 筆 (期望 ≥8) — migration 025 seed 可能漏掉') warn += 1 else: print(f' {status("OK")} ai_call_budgets {n} 筆 seed') except Exception as e: print(f' {status("FAIL")} 查詢失敗: {e}') fail += 1 finally: conn.close() return warn, fail def check_ollama() -> Tuple[int, int]: print(color('\n[3/5] Ollama 三主機', 'bold')) warn = fail = 0 try: import urllib.request except ImportError: return 0, 0 healthy = 0 for label, host in OLLAMA_HOSTS: try: with urllib.request.urlopen(f'http://{host}/api/tags', timeout=3) as r: if r.status == 200: print(f' {status("OK")} {label:18} {host}') healthy += 1 continue except Exception as e: print(f' {status("WARN")} {label:18} {host} — {type(e).__name__}') warn += 1 if healthy == 0: print(f' {status("FAIL")} 三主機全 DOWN — v5.0 無法運作') fail += 1 elif healthy < 3: print(f' {status("INFO")} {healthy}/3 healthy — retry 鏈有 fallback,可運作') return warn, fail def check_libreoffice() -> Tuple[int, int]: print(color('\n[4/5] LibreOffice (PPT vision)', 'bold')) bin_path = shutil.which('libreoffice') or shutil.which('soffice') if bin_path: try: ver = subprocess.check_output([bin_path, '--version'], timeout=5, text=True).strip() print(f' {status("OK")} {bin_path} — {ver}') return 0, 0 except Exception: print(f' {status("WARN")} {bin_path} 找到但跑不起來') return 1, 0 if os.getenv('PPT_VISION_ENABLED', '').lower() == 'true': print(f' {status("FAIL")} 未安裝;PPT_VISION_ENABLED=true 但 .pptx→.png 會失敗') print(f' apt install libreoffice # 188 上跑') return 0, 1 print(f' {status("INFO")} 未安裝;PPT_VISION_ENABLED 未開,可忽略') return 0, 0 def check_mcp() -> Tuple[int, int]: print(color('\n[5/5] MCP servers', 'bold')) if os.getenv('MCP_ROUTER_ENABLED', '').lower() != 'true': print(f' {status("INFO")} MCP_ROUTER_ENABLED=false — 跳過') return 0, 0 warn = 0 try: import urllib.request except ImportError: return 0, 0 for var in ('MCP_FIRECRAWL_URL', 'MCP_OMNISEARCH_URL', 'MCP_FILESYSTEM_URL', 'MCP_POSTGRES_URL'): url = os.getenv(var, '').strip() if not url: print(f' {status("WARN")} {var} 未設') warn += 1 continue try: with urllib.request.urlopen(url.rstrip('/') + '/health', timeout=2) as r: print(f' {status("OK")} {var:22} {url}') except Exception as e: print(f' {status("WARN")} {var:22} {url} — {type(e).__name__}') warn += 1 return warn, 0 def main() -> int: print(color('═══ Operation Ollama-First v5.0 Deploy Doctor ═══', 'bold')) print(f' cwd: {os.getcwd()}') print(f' host: {os.uname().nodename}') total_warn = total_fail = 0 for fn in (check_env, check_db, check_ollama, check_libreoffice, check_mcp): try: w, f = fn() total_warn += w total_fail += f except Exception as e: print(f' {status("FAIL")} {fn.__name__} 自身爆炸: {type(e).__name__}: {e}') total_fail += 1 print(color('\n═══ 總結 ═══', 'bold')) print(f' WARN: {total_warn}') print(f' FAIL: {total_fail}') if total_fail: print(color(' ❌ 有 FAIL — v5.0 部署未完成', 'red')) return 2 if total_warn: print(color(' ⚠️ 有 WARN — 可運作但部分 feature 未啟用', 'yellow')) return 1 print(color(' ✅ 全綠 — v5.0 部署完整', 'green')) return 0 if __name__ == '__main__': sys.exit(main())