-- ============================================================================= -- Migration 028: learning_episodes — 蒸餾池 / 知識庫前哨 -- Operation Ollama-First v5.0 — Phase 11 -- 日期: 2026-05-03 台北 -- 對應戰役: ADR-029(Hermes-First)+ Phase 11 PromotionGate 4 階段過濾 -- ============================================================================= -- 說明: -- LLM/MCP 結果先寫入 learning_episodes(蒸餾池),過 4 階段 PromotionGate -- 才晉升 ai_insights(知識庫主檔)。設計目的: -- - 隔離未驗證內容,避免直接污染 RAG 召回語料 -- - 保留 raw + distilled,方便事後重訓 -- - 高權重(>=0.8)走人工驗收,低權重走自動晉升 -- -- PromotionGate 狀態機: -- pending -- ├─[Stage 1: quality<0.7]→ rejected_quality -- ├─[Stage 2: 規則檢測幻覺]→ rejected_hallucination -- ├─[Stage 3: 與既有 insight cosine>0.95]→ rejected_duplicate -- ├─[Stage 4a: weight<0.8 + 過 1-3]→ approved → 寫 ai_insights → insight_id 回填 -- └─[Stage 4b: weight>=0.8]→ awaiting_review → Telegram 推播 -- ├─[人工 👍]→ approved -- ├─[人工 👎]→ rejected_human -- └─[24h 無反饋]→ expired (weight 降為 0.5 重走 Stage 4a) -- -- 設計決策: -- 1. insight_id 軟連結(不加 FK)—— ai_insights archive 不應 cascade 影響蒸餾池 -- 2. source_table + source_id 軟連結到 ai_calls / mcp_calls,方便事後重訓溯源 -- 3. embedding 與 rag_query_log 同 1024 維,跨表 cosine 一致 -- 4. 不設 90 天保留(蒸餾池長期保留;approved/rejected_* 進冷儲檔由後續 ADR 定) -- —— 短期內暴增風險:靠 partial index + monthly archive scheduler 控制 -- 5. promotion_status 用 VARCHAR(32) + CHECK 白名單;不上 ENUM 因新增狀態方便 -- 6. rejected_reason CHECK 強制 rejected_* 狀態必填,避免「沒原因的拒絕」 -- 7. human_approver 存 Telegram username 的 SHA1[:8],避免 PII 落地 -- -- 回滾腳本(緊急用): -- DROP INDEX IF EXISTS idx_le_embedding; -- DROP INDEX IF EXISTS idx_le_insight_id; -- DROP INDEX IF EXISTS idx_le_episode_type; -- DROP INDEX IF EXISTS idx_le_status; -- DROP INDEX IF EXISTS idx_le_created_at; -- DROP TABLE IF EXISTS learning_episodes; -- ============================================================================= CREATE TABLE IF NOT EXISTS learning_episodes ( id BIGSERIAL PRIMARY KEY, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- 來源類型 -- mcp_result = MCP server 抓回的事實(grounding / search / db query) -- llm_response = LLM 生成的洞察 / 摘要(hermes_analyst / openclaw 等) -- user_feedback = 用戶 Telegram 直接告知的事實(高 weight,需人工確認) -- manual_curated = 人工手動入庫(最高 weight,跳 PromotionGate) episode_type VARCHAR(32) NOT NULL, -- 軟連結來源(不加 FK) source_table VARCHAR(32), -- 'ai_calls' / 'mcp_calls' / NULL source_id BIGINT, -- 對應 source_table 的 id -- 蒸餾後的精煉文本(≤16KB;raw 不存在此表,由 source_table 透過 source_id 回查) distilled_text TEXT NOT NULL, embedding VECTOR(1024), -- 與 ai_insights / rag_query_log 同維 -- 蒸餾品質評分(0-1) -- <0.7 → Stage 1 直接 rejected_quality -- >=0.7 → 進 Stage 2-3 quality_score NUMERIC(4,3) NOT NULL DEFAULT 0.0, -- 權重(影響晉升路徑) -- <0.8 → Stage 4a 自動晉升 -- >=0.8 → Stage 4b 人工驗收 weight NUMERIC(4,3) NOT NULL DEFAULT 0.5, -- PromotionGate 狀態(見上方狀態機) promotion_status VARCHAR(32) NOT NULL DEFAULT 'pending', -- 晉升結果 insight_id BIGINT, -- 晉升後對應 ai_insights.id(軟連結,無 FK) rejected_reason TEXT, -- promotion_status=rejected_* 時必填 human_approver VARCHAR(64), -- Telegram username SHA1[:8] reviewed_at TIMESTAMPTZ, -- ─────── 護欄 (對齊 critic-A11 風格) ─────── CONSTRAINT chk_le_quality CHECK ( quality_score BETWEEN 0 AND 1 ), CONSTRAINT chk_le_weight CHECK ( weight BETWEEN 0 AND 1 ), CONSTRAINT chk_le_episode_type CHECK ( episode_type IN ('mcp_result','llm_response','user_feedback','manual_curated') ), CONSTRAINT chk_le_status CHECK ( promotion_status IN ( 'pending','approved','awaiting_review', 'rejected_quality','rejected_hallucination','rejected_duplicate','rejected_human', 'expired' ) ), CONSTRAINT chk_le_distilled_size CHECK ( octet_length(distilled_text) <= 16384 ), CONSTRAINT chk_le_rejected_reason CHECK ( (promotion_status NOT LIKE 'rejected_%') OR (rejected_reason IS NOT NULL) ), -- approved 必須有 insight_id;其他狀態不應有 CONSTRAINT chk_le_approved_consistent CHECK ( (promotion_status = 'approved') = (insight_id IS NOT NULL) ), -- source_table + source_id 一致性(要嘛兩個都 NULL,要嘛兩個都有) CONSTRAINT chk_le_source_consistent CHECK ( (source_table IS NULL AND source_id IS NULL) OR (source_table IS NOT NULL AND source_id IS NOT NULL) ), CONSTRAINT chk_le_source_table CHECK ( source_table IS NULL OR source_table IN ('ai_calls','mcp_calls') ), -- 人工驗收時 reviewed_at 必填 CONSTRAINT chk_le_review_consistent CHECK ( (human_approver IS NULL) OR (reviewed_at IS NOT NULL) ) ); -- ───────────────────────────────────────────────────────────────────────────── -- 索引設計 -- ───────────────────────────────────────────────────────────────────────────── -- (1) 時間範圍掃描(蒸餾池規模監控) CREATE INDEX IF NOT EXISTS idx_le_created_at ON learning_episodes (created_at DESC); -- (2) 待處理佇列查詢(PromotionGate worker / 人工驗收 dashboard) -- partial index 縮體積:只關心 pending / awaiting_review 兩種「活躍」狀態 CREATE INDEX IF NOT EXISTS idx_le_status ON learning_episodes (promotion_status, created_at DESC) WHERE promotion_status IN ('pending','awaiting_review'); -- (3) 來源類型分布報表 CREATE INDEX IF NOT EXISTS idx_le_episode_type ON learning_episodes (episode_type, created_at DESC); -- (4) insight_id 反查(從 ai_insights 反推蒸餾來源) CREATE INDEX IF NOT EXISTS idx_le_insight_id ON learning_episodes (insight_id) WHERE insight_id IS NOT NULL; -- (5) pgvector ivfflat(Stage 3 重複檢測 cosine query 主用) CREATE INDEX IF NOT EXISTS idx_le_embedding ON learning_episodes USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100) WHERE embedding IS NOT NULL; -- ───────────────────────────────────────────────────────────────────────────── -- 權限 -- ───────────────────────────────────────────────────────────────────────────── GRANT ALL PRIVILEGES ON learning_episodes TO momo; GRANT USAGE, SELECT ON SEQUENCE learning_episodes_id_seq TO momo; -- 註: expired 狀態降權 worker(24h 無反饋)由 scheduler 跑: -- UPDATE learning_episodes -- SET promotion_status='expired', weight=0.5 -- WHERE promotion_status='awaiting_review' -- AND created_at < NOW() - INTERVAL '24 hours'; -- 之後由 PromotionGate Stage 4a 重跑該批 expired 走自動晉升路徑。 DO $$ BEGIN RAISE NOTICE 'Migration 028 done: learning_episodes + 5 indexes + 9 CHECK constraints (Operation Ollama-First v5.0 P11)'; END $$;