migrations 024/025/026 — 統一 LLM 遙測 + 預算告警 + RAG 一致性護欄 - 024: ai_calls 表 + 5 索引 + 6 CHECK constraint(H1/H2/M3/L3) - 025: mcp_calls + ai_call_budgets + 10 種子預算(含 ollama_secondary) - 026: ai_insights.embedding_signature + pgcrypto + CONCURRENTLY index A11 critic 三輪審查記錄完整保留: - Phase 1 schema review: 2 BLOCKER + 4 HIGH + 6 MEDIUM 全處理 - Phase 1 final sign-off: 0 BLOCKER + 2 HIGH + 4 MEDIUM - Phase 6 ADR review: 5 BLOCKER + 6 HIGH 全修 Operation Ollama-First v5.0 / Phase 0+1+6 護欄 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
156 lines
8.1 KiB
SQL
156 lines
8.1 KiB
SQL
-- =============================================================================
|
||
-- Migration 024: ai_calls — 統一 LLM 呼叫遙測表
|
||
-- Operation Ollama-First v5.0 — Phase 1
|
||
-- 日期: 2026-05-03 台北
|
||
-- 對應戰役: docs/phase0_audit_report_20260503.md(34 個 LLM 呼叫點,AIGenerationHistory 覆蓋率 11.8%)
|
||
-- =============================================================================
|
||
-- 說明:
|
||
-- 既有 ai_generation_history(4 處)/ ai_usage_tracking(通用)皆未串接其餘
|
||
-- 30 個 LLM 呼叫點,無法支撐 Phase 5 Token 日報、Phase 9 預算告警、Phase 11 RAG。
|
||
-- ai_calls 為 append-only 遙測表,所有 LLM 調用統一寫入;async fire-and-forget。
|
||
--
|
||
-- 設計決策(詳見 docs/phase1_db_design_20260503.md):
|
||
-- 1. BIGSERIAL:90 天保留 ~6.5M 筆,預留向上空間(INT4 上限 21 億夠用,但與 mcp_calls 保持一致用 BIGSERIAL)
|
||
-- 2. 不 partition(V1):6.5M / 90 天 ≈ 72k/day,PostgreSQL 單表可承受到 ~50M 才需要分區。
|
||
-- 若 Phase 5 後實測 query latency 退化或月寫入超 1M,再切 monthly partition。
|
||
-- 3. 90 天 hot data:以 created_at < NOW() - INTERVAL '90 days' DELETE,由 scheduler 跑(不 archive)
|
||
-- 4. cost_usd 預設 0:由 logger 端依 provider+model 試算填入;不可信時保 0 不誤導
|
||
-- 5. JSONB meta 不加 GIN index(V1):查詢需求未明,避免寫入放大;待 Phase 5 報表 patten 穩定再評估
|
||
--
|
||
-- 回滾腳本(緊急用):
|
||
-- DROP INDEX IF EXISTS idx_ai_calls_called_at;
|
||
-- DROP INDEX IF EXISTS idx_ai_calls_caller_called_at;
|
||
-- DROP INDEX IF EXISTS idx_ai_calls_provider_called_at;
|
||
-- DROP INDEX IF EXISTS idx_ai_calls_request_id;
|
||
-- DROP INDEX IF EXISTS idx_ai_calls_status_called_at;
|
||
-- DROP TABLE IF EXISTS ai_calls;
|
||
--
|
||
-- critic-A11 修補(2026-05-03):
|
||
-- B2 → 026 加 pgcrypto extension
|
||
-- H1 → provider CHECK 白名單(NOT VALID)
|
||
-- H2 → meta/error 大小 CHECK
|
||
-- M3 → status NOT NULL + fallback_to consistency CHECK
|
||
-- M5 → partial index 精確列舉 ('error','timeout','fallback')
|
||
-- L3 → duration_ms 範圍 CHECK
|
||
-- =============================================================================
|
||
|
||
CREATE TABLE IF NOT EXISTS ai_calls (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
called_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||
|
||
-- 呼叫點識別(A1 audit 34 點命名表,logger 須限制在白名單;新增需 ADR)
|
||
-- hermes_analyst, hermes_intent, hermes_ea_prefetch,
|
||
-- km_embedding_worker, km_embedding_realtime,
|
||
-- aider_heal,
|
||
-- mcp_l1_grounding, mcp_l2_grounding, mcp_l3_ollama,
|
||
-- openclaw_daily, openclaw_weekly, openclaw_monthly, openclaw_meta, openclaw_qa,
|
||
-- nemotron_dispatch,
|
||
-- code_review_hermes, code_review_openclaw, code_review_elephant,
|
||
-- ea_engine,
|
||
-- ppt_gemini, ppt_ollama, ppt_nim,
|
||
-- sales_copy, trend_match, trend_qa, product_insights, trend_keywords,
|
||
-- tg_bot_copy, tg_bot_copy_v2,
|
||
-- openclaw_bot_main, openclaw_bot_gemini, openclaw_bot_nim,
|
||
-- bot_api_copy, trend_crawler, ai_provider_generic
|
||
caller VARCHAR(64) NOT NULL,
|
||
|
||
-- 主機/供應商標籤(A1 audit Section 1.1 主機標記原則)
|
||
-- gcp_ollama / ollama_111 / gemini / claude / nim / openrouter / nim_via_elephant
|
||
provider VARCHAR(32) NOT NULL,
|
||
|
||
model VARCHAR(128) NOT NULL,
|
||
input_tokens INTEGER NOT NULL DEFAULT 0,
|
||
output_tokens INTEGER NOT NULL DEFAULT 0,
|
||
duration_ms INTEGER,
|
||
|
||
-- ok / fallback / error / timeout / cache_only
|
||
-- fallback 表示「主路徑失敗,觸發了下游 caller」;下游本身會另寫一筆 ok/error
|
||
-- M3: status NOT NULL,且 fallback_to 必須與 status='fallback' 一致
|
||
status VARCHAR(16) NOT NULL,
|
||
fallback_to VARCHAR(64),
|
||
|
||
cost_usd NUMERIC(10,6) NOT NULL DEFAULT 0,
|
||
|
||
-- Anthropic / Gemini prompt cache 命中(Phase 5 Token 日報降本指標)
|
||
cache_hit BOOLEAN NOT NULL DEFAULT FALSE,
|
||
-- Phase 11 RAG 預留:本次調用是否實質被 RAG 取代/前置攔截
|
||
rag_hit BOOLEAN NOT NULL DEFAULT FALSE,
|
||
|
||
-- 串接「同一邏輯請求」的多筆 call(如 Code Review 三鏈、Q&A fallback 鏈)
|
||
request_id VARCHAR(64),
|
||
|
||
error TEXT,
|
||
-- prompt_hash / temperature / max_tokens / fingerprint / etc.(不存原始 prompt)
|
||
meta JSONB,
|
||
|
||
-- ─────── critic-A11 修補:白名單 + PII/膨脹護欄 ───────
|
||
-- H1: provider 白名單(NOT VALID 不檢既存資料,僅檢未來寫入)
|
||
-- 三主機架構(統帥 2026-05-03 確認):
|
||
-- gcp_ollama = Primary 34.143.170.20 (SSD)
|
||
-- ollama_secondary = Secondary 34.21.145.224 (SSD)
|
||
-- ollama_111 = Fallback 192.168.0.111 (HDD/Local)
|
||
CONSTRAINT chk_ai_calls_provider CHECK (
|
||
provider IN ('gcp_ollama','ollama_secondary','ollama_111','gemini','claude',
|
||
'nim','openrouter','nim_via_elephant')
|
||
),
|
||
-- M3: status 白名單 + fallback_to 一致性
|
||
CONSTRAINT chk_ai_calls_status CHECK (
|
||
status IN ('ok','fallback','error','timeout','cache_only')
|
||
),
|
||
CONSTRAINT chk_ai_calls_fallback_consistent CHECK (
|
||
(status = 'fallback') = (fallback_to IS NOT NULL)
|
||
),
|
||
-- L3: duration 範圍 (0 ~ 10 分鐘)
|
||
CONSTRAINT chk_ai_calls_duration_range CHECK (
|
||
duration_ms IS NULL OR (duration_ms >= 0 AND duration_ms <= 600000)
|
||
),
|
||
-- H2: meta/error 大小護欄(避免 PII 落地與膨脹)
|
||
CONSTRAINT chk_ai_calls_meta_size CHECK (
|
||
meta IS NULL OR octet_length(meta::text) <= 8192
|
||
),
|
||
CONSTRAINT chk_ai_calls_error_size CHECK (
|
||
error IS NULL OR octet_length(error) <= 4096
|
||
)
|
||
);
|
||
|
||
-- ─────────────────────────────────────────────────────────────────────────────
|
||
-- 索引設計
|
||
-- ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
-- (1) 時間範圍掃描(日報/週報「過去 24h / 7d」必用,BRIN 不適合 OLTP 隨機讀)
|
||
CREATE INDEX IF NOT EXISTS idx_ai_calls_called_at
|
||
ON ai_calls (called_at DESC);
|
||
|
||
-- (2) GROUP BY caller 報表(日報 Section 3 TOP caller / 全鏈 trace)
|
||
CREATE INDEX IF NOT EXISTS idx_ai_calls_caller_called_at
|
||
ON ai_calls (caller, called_at DESC);
|
||
|
||
-- (3) 供應商分布報表(週報 by provider 統計、預算追蹤)
|
||
CREATE INDEX IF NOT EXISTS idx_ai_calls_provider_called_at
|
||
ON ai_calls (provider, called_at DESC);
|
||
|
||
-- (4) request_id 串鏈(部分查詢,sparse 欄位不全建)
|
||
CREATE INDEX IF NOT EXISTS idx_ai_calls_request_id
|
||
ON ai_calls (request_id)
|
||
WHERE request_id IS NOT NULL;
|
||
|
||
-- (5) 異常監控(M5: 精確列舉 error/timeout/fallback,避免未知 status 污染)
|
||
CREATE INDEX IF NOT EXISTS idx_ai_calls_status_called_at
|
||
ON ai_calls (status, called_at DESC)
|
||
WHERE status IN ('error','timeout','fallback');
|
||
|
||
-- ─────────────────────────────────────────────────────────────────────────────
|
||
-- 權限(沿襲 migration 023 慣例)
|
||
-- ─────────────────────────────────────────────────────────────────────────────
|
||
GRANT ALL PRIVILEGES ON ai_calls TO momo;
|
||
GRANT USAGE, SELECT ON SEQUENCE ai_calls_id_seq TO momo;
|
||
|
||
-- 註: 90 天保留由 scheduler 任務執行 (Phase 5 排程):
|
||
-- DELETE FROM ai_calls WHERE called_at < NOW() - INTERVAL '90 days';
|
||
-- 建議每日 03:00 跑,配合 idx_ai_calls_called_at DESC 倒序掃描可控制成本。
|
||
|
||
DO $$
|
||
BEGIN
|
||
RAISE NOTICE 'Migration 024 done: ai_calls + 5 indexes (Operation Ollama-First v5.0 P1)';
|
||
END $$;
|