-- ADR-110 GCP-A Primary Embedding 升級:nomic-embed-text 768 → bge-m3 1024 維 -- 2026-05-04 ogt + Claude Sonnet 4.6 -- -- 背景: -- GCP-A (34.143.170.20) 無 nomic-embed-text,改用 bge-m3:latest(專用 embedding 模型) -- bge-m3 產生 1024 維向量,現有 schema vector(768) 不相容,INSERT 會直接失敗 -- -- 影響範圍: -- 1. knowledge_entries.embedding vector(768) → vector(1024) -- 2. rag_chunks.embedding vector(768) → vector(1024) -- 3. playbook_embeddings.embedding vector(768) → vector(1024) -- -- 遷移策略:僅在欄位不是 vector(1024) 時清空現有向量資料,切換維度後由 re-embed script 重新嵌入 -- 已經是 vector(1024) 的環境重跑本 migration 時,必須保留既有向量資料。 -- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換) -- -- 執行前置條件: -- 1. pgvector >= 0.5.0 (已滿足) -- 2. 確認現有向量資料是否需要備份(重要 playbook 建議先備份) -- 3. embedding service 已切換到 bge-m3(models.json v1.4.0) -- -- 回滾方式:執行 embedding_rollback_768.sql(需重新嵌入至 nomic-embed-text 格式) BEGIN; -- 1. knowledge_entries:備份舊向量並清空,變更欄位維度 DO $$ DECLARE v_dim integer; BEGIN SELECT a.atttypmod INTO v_dim FROM pg_attribute a JOIN pg_class c ON a.attrelid = c.oid WHERE c.relname = 'knowledge_entries' AND a.attname = 'embedding'; IF v_dim IS DISTINCT FROM 1024 THEN EXECUTE $sql$ CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS SELECT id, embedding::text AS embedding_768, NOW() AS backed_up_at FROM knowledge_entries WHERE embedding IS NOT NULL $sql$; EXECUTE $sql$ ALTER TABLE knowledge_entries ALTER COLUMN embedding TYPE vector(1024) USING NULL $sql$; RAISE NOTICE 'knowledge_entries.embedding migrated from vector(%) to vector(1024); old embeddings were backed up and cleared', v_dim; ELSE RAISE NOTICE 'knowledge_entries.embedding already vector(1024); existing embeddings preserved'; END IF; END $$; COMMENT ON COLUMN knowledge_entries.embedding IS 'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-05 ADR-110 follow-up)'; -- 2. rag_chunks:清空向量資料,變更欄位維度 -- ivfflat index 必須先 DROP 才能 ALTER COLUMN DO $$ DECLARE v_dim integer; BEGIN SELECT a.atttypmod INTO v_dim FROM pg_attribute a JOIN pg_class c ON a.attrelid = c.oid WHERE c.relname = 'rag_chunks' AND a.attname = 'embedding'; IF v_dim IS DISTINCT FROM 1024 THEN EXECUTE 'DROP INDEX IF EXISTS idx_rag_chunks_embedding'; EXECUTE $sql$ ALTER TABLE rag_chunks ALTER COLUMN embedding TYPE vector(1024) USING NULL $sql$; RAISE NOTICE 'rag_chunks.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim; ELSE RAISE NOTICE 'rag_chunks.embedding already vector(1024); existing embeddings preserved'; END IF; END $$; -- 重建 ivfflat index(lists=100 適合 ~10k 筆以下資料) CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding ON rag_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); COMMENT ON COLUMN rag_chunks.embedding IS 'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)'; -- 3. playbook_embeddings:清空向量資料,變更欄位維度 DO $$ DECLARE v_dim integer; BEGIN SELECT a.atttypmod INTO v_dim FROM pg_attribute a JOIN pg_class c ON a.attrelid = c.oid WHERE c.relname = 'playbook_embeddings' AND a.attname = 'embedding'; IF v_dim IS DISTINCT FROM 1024 THEN EXECUTE 'DROP INDEX IF EXISTS ix_playbook_embeddings_vec'; EXECUTE $sql$ ALTER TABLE playbook_embeddings ALTER COLUMN embedding TYPE vector(1024) USING NULL $sql$; RAISE NOTICE 'playbook_embeddings.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim; ELSE RAISE NOTICE 'playbook_embeddings.embedding already vector(1024); existing embeddings preserved'; END IF; END $$; CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec ON playbook_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); COMMENT ON COLUMN playbook_embeddings.embedding IS 'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)'; COMMENT ON TABLE playbook_embeddings IS 'Playbook 向量索引 — ADR-110 GCP-A bge-m3 1024 維 (2026-05-04)'; -- 3. 驗證遷移結果 DO $$ DECLARE v_km_dim integer; v_rag_dim integer; v_pb_dim integer; BEGIN SELECT atttypmod INTO v_km_dim FROM pg_attribute JOIN pg_class ON attrelid = pg_class.oid WHERE relname = 'knowledge_entries' AND attname = 'embedding'; SELECT atttypmod INTO v_rag_dim FROM pg_attribute JOIN pg_class ON attrelid = pg_class.oid WHERE relname = 'rag_chunks' AND attname = 'embedding'; SELECT atttypmod INTO v_pb_dim FROM pg_attribute JOIN pg_class ON attrelid = pg_class.oid WHERE relname = 'playbook_embeddings' AND attname = 'embedding'; -- pgvector atttypmod stores the configured dimension. IF v_km_dim != 1024 THEN RAISE EXCEPTION 'knowledge_entries.embedding 維度驗證失敗:expected 1024, got %', v_km_dim; END IF; IF v_rag_dim != 1024 THEN RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗:expected 1024, got %', v_rag_dim; END IF; IF v_pb_dim != 1024 THEN RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗:expected 1024, got %', v_pb_dim; END IF; RAISE NOTICE '✅ embedding 遷移驗證通過:knowledge_entries、rag_chunks、playbook_embeddings 均為 vector(1024)'; END $$; COMMIT;