Files
ewoooc/migrations/029_create_host_health_probes.sql
OoO ba5fe06b13
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
fix: update ollama primary host
2026-06-18 14:24:55 +08:00

49 lines
2.4 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- =============================================================================
-- Migration 029: host_health_probes — 三主機健康歷史
-- Operation Ollama-First v5.0 — Phase 38
-- 日期: 2026-05-04 台北
-- 對應頁面: /observability/host_health
-- =============================================================================
-- 說明:
-- 原本 host_health 頁面每次刷新都即時 HTTP probe 三主機 /api/tags
-- 無歷史 → 無法看趨勢、無法回查「昨天 GCP 是不是有掛過」。
-- 本 migration 加表,每次 probe 寫一筆,留 30 天歷史cron 清理)。
--
-- 寫入點:
-- 1. routes/admin_observability_routes.py::host_health_dashboard 每次 render 寫
-- 2. scheduler.py 加每 5 分鐘 background probe即使無人開頁也記錄
--
-- 索引設計:
-- - (probed_at DESC) 給最新 N 筆查詢
-- - (host_label, probed_at DESC) 給「某台主機過去 24h 趨勢」
-- =============================================================================
CREATE TABLE IF NOT EXISTS host_health_probes (
id BIGSERIAL PRIMARY KEY,
probed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
host_label VARCHAR(64) NOT NULL, -- 'Primary (GCP)' / 'Secondary (GCP)' / 'Fallback (111)'
host_url VARCHAR(256) NOT NULL, -- http://34.87.90.216:11434 等
healthy BOOLEAN NOT NULL,
unhealthy_mark BOOLEAN NOT NULL DEFAULT FALSE, -- 對應 _is_unhealthy(host)
models_count INTEGER DEFAULT 0, -- 載入模型數
response_ms INTEGER, -- HTTP probe 耗時ms
error_msg TEXT, -- 失敗時的 exception 文字(截 500 字)
CONSTRAINT chk_host_label_029
CHECK (host_label IN ('Primary (GCP)', 'Secondary (GCP)', 'Fallback (111)'))
);
CREATE INDEX IF NOT EXISTS idx_host_health_probes_at
ON host_health_probes (probed_at DESC);
CREATE INDEX IF NOT EXISTS idx_host_health_probes_label_at
ON host_health_probes (host_label, probed_at DESC);
-- 清理舊資料(保留 30 天)— 由 scheduler 每日 03:00 跑:
-- DELETE FROM host_health_probes WHERE probed_at < NOW() - INTERVAL '30 days';
COMMENT ON TABLE host_health_probes IS
'三主機 Ollama 健康歷史;每次 host_health 頁面 render 或 scheduler 5min cron 寫入';
COMMENT ON COLUMN host_health_probes.host_label IS
'services/ollama_service.py::get_host_label() 對應標籤';