diff --git a/apps/api/migrations/embedding_bge_m3_1024.sql b/apps/api/migrations/embedding_bge_m3_1024.sql index 9ea09246..f13504ea 100644 --- a/apps/api/migrations/embedding_bge_m3_1024.sql +++ b/apps/api/migrations/embedding_bge_m3_1024.sql @@ -10,7 +10,8 @@ -- 2. rag_chunks.embedding vector(768) → vector(1024) -- 3. playbook_embeddings.embedding vector(768) → vector(1024) -- --- 遷移策略:清空現有向量資料,切換維度後由 re-embed script 重新嵌入 +-- 遷移策略:僅在欄位不是 vector(1024) 時清空現有向量資料,切換維度後由 re-embed script 重新嵌入 +-- 已經是 vector(1024) 的環境重跑本 migration 時,必須保留既有向量資料。 -- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換) -- -- 執行前置條件: @@ -23,17 +24,38 @@ BEGIN; -- 1. knowledge_entries:備份舊向量並清空,變更欄位維度 -CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS -SELECT - id, - embedding::text AS embedding_768, - NOW() AS backed_up_at -FROM knowledge_entries -WHERE embedding IS NOT NULL; +DO $$ +DECLARE + v_dim integer; +BEGIN + SELECT a.atttypmod INTO v_dim + FROM pg_attribute a + JOIN pg_class c ON a.attrelid = c.oid + WHERE c.relname = 'knowledge_entries' + AND a.attname = 'embedding'; -ALTER TABLE knowledge_entries - ALTER COLUMN embedding TYPE vector(1024) - USING NULL; -- 清空現有 768 維向量(維度不可轉換) + IF v_dim IS DISTINCT FROM 1024 THEN + EXECUTE $sql$ + CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS + SELECT + id, + embedding::text AS embedding_768, + NOW() AS backed_up_at + FROM knowledge_entries + WHERE embedding IS NOT NULL + $sql$; + + EXECUTE $sql$ + ALTER TABLE knowledge_entries + ALTER COLUMN embedding TYPE vector(1024) + USING NULL + $sql$; + + RAISE NOTICE 'knowledge_entries.embedding migrated from vector(%) to vector(1024); old embeddings were backed up and cleared', v_dim; + ELSE + RAISE NOTICE 'knowledge_entries.embedding already vector(1024); existing embeddings preserved'; + END IF; +END $$; COMMENT ON COLUMN knowledge_entries.embedding IS 'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-05 ADR-110 follow-up)'; @@ -41,11 +63,29 @@ COMMENT ON COLUMN knowledge_entries.embedding IS -- 2. rag_chunks:清空向量資料,變更欄位維度 -- ivfflat index 必須先 DROP 才能 ALTER COLUMN -DROP INDEX IF EXISTS idx_rag_chunks_embedding; +DO $$ +DECLARE + v_dim integer; +BEGIN + SELECT a.atttypmod INTO v_dim + FROM pg_attribute a + JOIN pg_class c ON a.attrelid = c.oid + WHERE c.relname = 'rag_chunks' + AND a.attname = 'embedding'; -ALTER TABLE rag_chunks - ALTER COLUMN embedding TYPE vector(1024) - USING NULL; -- 清空現有 768 維向量(維度不可轉換) + IF v_dim IS DISTINCT FROM 1024 THEN + EXECUTE 'DROP INDEX IF EXISTS idx_rag_chunks_embedding'; + EXECUTE $sql$ + ALTER TABLE rag_chunks + ALTER COLUMN embedding TYPE vector(1024) + USING NULL + $sql$; + + RAISE NOTICE 'rag_chunks.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim; + ELSE + RAISE NOTICE 'rag_chunks.embedding already vector(1024); existing embeddings preserved'; + END IF; +END $$; -- 重建 ivfflat index(lists=100 適合 ~10k 筆以下資料) CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding @@ -58,11 +98,29 @@ COMMENT ON COLUMN rag_chunks.embedding IS -- 3. playbook_embeddings:清空向量資料,變更欄位維度 -DROP INDEX IF EXISTS ix_playbook_embeddings_vec; +DO $$ +DECLARE + v_dim integer; +BEGIN + SELECT a.atttypmod INTO v_dim + FROM pg_attribute a + JOIN pg_class c ON a.attrelid = c.oid + WHERE c.relname = 'playbook_embeddings' + AND a.attname = 'embedding'; -ALTER TABLE playbook_embeddings - ALTER COLUMN embedding TYPE vector(1024) - USING NULL; -- 清空現有 768 維向量 + IF v_dim IS DISTINCT FROM 1024 THEN + EXECUTE 'DROP INDEX IF EXISTS ix_playbook_embeddings_vec'; + EXECUTE $sql$ + ALTER TABLE playbook_embeddings + ALTER COLUMN embedding TYPE vector(1024) + USING NULL + $sql$; + + RAISE NOTICE 'playbook_embeddings.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim; + ELSE + RAISE NOTICE 'playbook_embeddings.embedding already vector(1024); existing embeddings preserved'; + END IF; +END $$; CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec ON playbook_embeddings