Files
ewoooc/scripts/deploy_doctor_v5.py
OoO f2fbe5f929
All checks were successful
CD Pipeline / deploy (push) Successful in 2m33s
feat(p30): admin nav 互聯 + deploy doctor v5.0 腳本
(1) 6 個 admin 頁底部導覽全互聯(之前缺 Phase 29 兩頁的反向連結)
   - ai_calls / promotion_review / quality_trend / host_health
     全部加 |Budget|PPT Audit| 連結
   - 統帥從任一頁都可一鍵跳到其他 5 頁

(2) scripts/deploy_doctor_v5.py — 統帥手動待辦自助檢查
   5 階段檢查:env vars / DB migrations / Ollama 三主機 /
                LibreOffice / MCP servers
   - 14 個 v5.0 env vars(含 criticality 分級 FAIL/WARN/INFO)
   - 5 張 v5.0 必備 table(ai_calls/mcp_calls/ai_call_budgets/
     rag_query_log/learning_episodes)
   - ai_call_budgets seed ≥8 筆檢查
   - 三主機 /api/tags HTTP probe + healthy 數判定
   - 退出碼:0=全綠 1=WARN 2=FAIL(可進 CI)
   - SSH 188 / 本機都能跑:python3 scripts/deploy_doctor_v5.py

統帥之後想知道「v5.0 還有啥沒部署」直接跑 doctor 看清單,
不用再口頭追問哪些 env vars / 哪幾張 migration。
2026-05-04 13:48:06 +08:00

245 lines
9.4 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Operation Ollama-First v5.0 deploy doctor.
在 188 / 本機跑python3 scripts/deploy_doctor_v5.py
檢查 v5.0 部署狀態,列出統帥手動還沒做的事。
退出碼0=全綠1=有 WARN2=有 FAIL。
"""
from __future__ import annotations
import os
import shutil
import subprocess
import sys
from typing import Tuple
# ─────────────────────────────────────────────────────────────────────────
# v5.0 env vars: name → (criticality, expected_value_or_None, hint)
# ─────────────────────────────────────────────────────────────────────────
ENV_SPEC = {
# API keys (FAIL if missing — 沒這些 v5.0 直接斷)
'ANTHROPIC_API_KEY': ('FAIL', None, 'Claude SDK 用;需 https://console.anthropic.com 申請'),
'GOOGLE_API_KEY': ('FAIL', None, 'Gemini fallback 用;舊有'),
'DEEPSEEK_API_KEY': ('WARN', None, '若 DEEPSEEK_DIRECT_ENABLED=true 需要'),
'TAVILY_API_KEY': ('WARN', None, 'MCP omnisearch 用;可選'),
'EXA_API_KEY': ('WARN', None, 'MCP omnisearch 用;可選'),
'TELEGRAM_ADMIN_CHAT_ID': ('WARN', None, 'Phase 28 PromotionGate Telegram 推播 audience'),
'TELEGRAM_BOT_TOKEN': ('FAIL', None, 'Telegram 推播必備'),
# Feature flags (應 ON 才生效)
'MODEL_ROUTER_ENABLED': ('WARN', 'true', 'caller × context 動態路由(預設 true'),
'COST_THROTTLE_ENABLED': ('WARN', 'true', '成本超 110% 自動 throttle預設 true'),
'MCP_ROUTER_ENABLED': ('WARN', 'true', 'MCP 4-server 統一路由(預設 false'),
'PPT_VISION_ENABLED': ('WARN', 'true', 'PPT minicpm-v 視覺審核(預設 false'),
'DEEPSEEK_DIRECT_ENABLED': ('INFO', None, '直連 DeepSeek API省 NIM 中間層'),
# RAG / embedding
'RAG_EMBED_MODEL': ('INFO', 'bge-m3', 'embedding 模型ADR-026 鎖定 bge-m3'),
'RAG_EMBED_DIM': ('INFO', '1024', 'bge-m3 維度migration 027 已寫死 1024'),
}
# 必跑 migrations
REQUIRED_TABLES = {
'ai_calls': '024',
'mcp_calls': '025',
'ai_call_budgets': '025',
'rag_query_log': '027',
'learning_episodes': '028',
}
# 三主機
OLLAMA_HOSTS = [
('Primary GCP', '34.143.170.20:11434'),
('Secondary GCP', '34.21.145.224:11434'),
('111 Mac', '192.168.0.111:11434'),
]
def color(s: str, c: str) -> str:
if not sys.stdout.isatty():
return s
codes = {'red': 31, 'green': 32, 'yellow': 33, 'cyan': 36, 'bold': 1}
return f'\x1b[{codes.get(c, 0)}m{s}\x1b[0m'
def status(level: str) -> str:
return {
'OK': color(' OK ', 'green'),
'WARN': color(' WARN ', 'yellow'),
'FAIL': color(' FAIL ', 'red'),
'INFO': color(' INFO ', 'cyan'),
}.get(level, level)
# ─────────────────────────────────────────────────────────────────────────
# Checks
# ─────────────────────────────────────────────────────────────────────────
def check_env() -> Tuple[int, int]:
"""Return (warn_count, fail_count)."""
print(color('\n[1/5] 環境變數', 'bold'))
warn = fail = 0
for name, (crit, expected, hint) in ENV_SPEC.items():
val = os.getenv(name, '').strip()
if not val:
print(f' {status(crit)} {name:30} <未設> — {hint}')
if crit == 'FAIL':
fail += 1
elif crit == 'WARN':
warn += 1
continue
if expected and val.lower() != expected.lower():
print(f' {status("WARN")} {name:30} ={val!r} 期望={expected!r}{hint}')
warn += 1
else:
shown = val[:8] + '...' if 'KEY' in name or 'TOKEN' in name else val
print(f' {status("OK")} {name:30} ={shown}')
return warn, fail
def check_db() -> Tuple[int, int]:
print(color('\n[2/5] 資料庫 migrations', 'bold'))
warn = fail = 0
db_url = os.getenv('DATABASE_URL', '').strip()
if not db_url:
print(f' {status("WARN")} DATABASE_URL 未設 — 跳過 DB 檢查')
return 1, 0
try:
import psycopg2 # type: ignore
except ImportError:
print(f' {status("WARN")} psycopg2 not installed — 跳過 DB 檢查')
return 1, 0
try:
conn = psycopg2.connect(db_url, connect_timeout=5)
except Exception as e:
print(f' {status("FAIL")} DB 無法連線: {type(e).__name__}: {str(e)[:100]}')
return 0, 1
try:
cur = conn.cursor()
for table, mig in REQUIRED_TABLES.items():
cur.execute(
'SELECT 1 FROM information_schema.tables WHERE table_name = %s',
(table,),
)
if cur.fetchone():
print(f' {status("OK")} {table:25} (migration {mig})')
else:
print(f' {status("FAIL")} {table:25} 不存在 — 跑 migrations/{mig}_*.sql')
fail += 1
# ai_call_budgets seed 檢查
cur.execute('SELECT COUNT(*) FROM ai_call_budgets')
n = cur.fetchone()[0]
if n < 8:
print(f' {status("WARN")} ai_call_budgets 只有 {n} 筆 (期望 ≥8) — migration 025 seed 可能漏掉')
warn += 1
else:
print(f' {status("OK")} ai_call_budgets {n} 筆 seed')
except Exception as e:
print(f' {status("FAIL")} 查詢失敗: {e}')
fail += 1
finally:
conn.close()
return warn, fail
def check_ollama() -> Tuple[int, int]:
print(color('\n[3/5] Ollama 三主機', 'bold'))
warn = fail = 0
try:
import urllib.request
except ImportError:
return 0, 0
healthy = 0
for label, host in OLLAMA_HOSTS:
try:
with urllib.request.urlopen(f'http://{host}/api/tags', timeout=3) as r:
if r.status == 200:
print(f' {status("OK")} {label:18} {host}')
healthy += 1
continue
except Exception as e:
print(f' {status("WARN")} {label:18} {host}{type(e).__name__}')
warn += 1
if healthy == 0:
print(f' {status("FAIL")} 三主機全 DOWN — v5.0 無法運作')
fail += 1
elif healthy < 3:
print(f' {status("INFO")} {healthy}/3 healthy — retry 鏈有 fallback可運作')
return warn, fail
def check_libreoffice() -> Tuple[int, int]:
print(color('\n[4/5] LibreOffice (PPT vision)', 'bold'))
bin_path = shutil.which('libreoffice') or shutil.which('soffice')
if bin_path:
try:
ver = subprocess.check_output([bin_path, '--version'], timeout=5, text=True).strip()
print(f' {status("OK")} {bin_path}{ver}')
return 0, 0
except Exception:
print(f' {status("WARN")} {bin_path} 找到但跑不起來')
return 1, 0
if os.getenv('PPT_VISION_ENABLED', '').lower() == 'true':
print(f' {status("FAIL")} 未安裝PPT_VISION_ENABLED=true 但 .pptx→.png 會失敗')
print(f' apt install libreoffice # 188 上跑')
return 0, 1
print(f' {status("INFO")} 未安裝PPT_VISION_ENABLED 未開,可忽略')
return 0, 0
def check_mcp() -> Tuple[int, int]:
print(color('\n[5/5] MCP servers', 'bold'))
if os.getenv('MCP_ROUTER_ENABLED', '').lower() != 'true':
print(f' {status("INFO")} MCP_ROUTER_ENABLED=false — 跳過')
return 0, 0
warn = 0
try:
import urllib.request
except ImportError:
return 0, 0
for var in ('MCP_FIRECRAWL_URL', 'MCP_OMNISEARCH_URL', 'MCP_FILESYSTEM_URL', 'MCP_POSTGRES_URL'):
url = os.getenv(var, '').strip()
if not url:
print(f' {status("WARN")} {var} 未設')
warn += 1
continue
try:
with urllib.request.urlopen(url.rstrip('/') + '/health', timeout=2) as r:
print(f' {status("OK")} {var:22} {url}')
except Exception as e:
print(f' {status("WARN")} {var:22} {url}{type(e).__name__}')
warn += 1
return warn, 0
def main() -> int:
print(color('═══ Operation Ollama-First v5.0 Deploy Doctor ═══', 'bold'))
print(f' cwd: {os.getcwd()}')
print(f' host: {os.uname().nodename}')
total_warn = total_fail = 0
for fn in (check_env, check_db, check_ollama, check_libreoffice, check_mcp):
try:
w, f = fn()
total_warn += w
total_fail += f
except Exception as e:
print(f' {status("FAIL")} {fn.__name__} 自身爆炸: {type(e).__name__}: {e}')
total_fail += 1
print(color('\n═══ 總結 ═══', 'bold'))
print(f' WARN: {total_warn}')
print(f' FAIL: {total_fail}')
if total_fail:
print(color(' ❌ 有 FAIL — v5.0 部署未完成', 'red'))
return 2
if total_warn:
print(color(' ⚠️ 有 WARN — 可運作但部分 feature 未啟用', 'yellow'))
return 1
print(color(' ✅ 全綠 — v5.0 部署完整', 'green'))
return 0
if __name__ == '__main__':
sys.exit(main())