Some checks failed
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 7s
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
116 lines
4.2 KiB
PL/PgSQL
116 lines
4.2 KiB
PL/PgSQL
-- ADR-110 GCP-A Primary Embedding 升級:nomic-embed-text 768 → bge-m3 1024 維
|
||
-- 2026-05-04 ogt + Claude Sonnet 4.6
|
||
--
|
||
-- 背景:
|
||
-- GCP-A (34.143.170.20) 無 nomic-embed-text,改用 bge-m3:latest(專用 embedding 模型)
|
||
-- bge-m3 產生 1024 維向量,現有 schema vector(768) 不相容,INSERT 會直接失敗
|
||
--
|
||
-- 影響範圍:
|
||
-- 1. knowledge_entries.embedding vector(768) → vector(1024)
|
||
-- 2. rag_chunks.embedding vector(768) → vector(1024)
|
||
-- 3. playbook_embeddings.embedding vector(768) → vector(1024)
|
||
--
|
||
-- 遷移策略:清空現有向量資料,切換維度後由 re-embed script 重新嵌入
|
||
-- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換)
|
||
--
|
||
-- 執行前置條件:
|
||
-- 1. pgvector >= 0.5.0 (已滿足)
|
||
-- 2. 確認現有向量資料是否需要備份(重要 playbook 建議先備份)
|
||
-- 3. embedding service 已切換到 bge-m3(models.json v1.4.0)
|
||
--
|
||
-- 回滾方式:執行 embedding_rollback_768.sql(需重新嵌入至 nomic-embed-text 格式)
|
||
|
||
BEGIN;
|
||
|
||
-- 1. knowledge_entries:備份舊向量並清空,變更欄位維度
|
||
CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS
|
||
SELECT
|
||
id,
|
||
embedding::text AS embedding_768,
|
||
NOW() AS backed_up_at
|
||
FROM knowledge_entries
|
||
WHERE embedding IS NOT NULL;
|
||
|
||
ALTER TABLE knowledge_entries
|
||
ALTER COLUMN embedding TYPE vector(1024)
|
||
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
|
||
|
||
COMMENT ON COLUMN knowledge_entries.embedding IS
|
||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-05 ADR-110 follow-up)';
|
||
|
||
|
||
-- 2. rag_chunks:清空向量資料,變更欄位維度
|
||
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
|
||
DROP INDEX IF EXISTS idx_rag_chunks_embedding;
|
||
|
||
ALTER TABLE rag_chunks
|
||
ALTER COLUMN embedding TYPE vector(1024)
|
||
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
|
||
|
||
-- 重建 ivfflat index(lists=100 適合 ~10k 筆以下資料)
|
||
CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding
|
||
ON rag_chunks
|
||
USING ivfflat (embedding vector_cosine_ops)
|
||
WITH (lists = 100);
|
||
|
||
COMMENT ON COLUMN rag_chunks.embedding IS
|
||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)';
|
||
|
||
|
||
-- 3. playbook_embeddings:清空向量資料,變更欄位維度
|
||
DROP INDEX IF EXISTS ix_playbook_embeddings_vec;
|
||
|
||
ALTER TABLE playbook_embeddings
|
||
ALTER COLUMN embedding TYPE vector(1024)
|
||
USING NULL; -- 清空現有 768 維向量
|
||
|
||
CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec
|
||
ON playbook_embeddings
|
||
USING ivfflat (embedding vector_cosine_ops)
|
||
WITH (lists = 100);
|
||
|
||
COMMENT ON COLUMN playbook_embeddings.embedding IS
|
||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)';
|
||
|
||
COMMENT ON TABLE playbook_embeddings IS
|
||
'Playbook 向量索引 — ADR-110 GCP-A bge-m3 1024 維 (2026-05-04)';
|
||
|
||
|
||
-- 3. 驗證遷移結果
|
||
DO $$
|
||
DECLARE
|
||
v_km_dim integer;
|
||
v_rag_dim integer;
|
||
v_pb_dim integer;
|
||
BEGIN
|
||
SELECT atttypmod INTO v_km_dim
|
||
FROM pg_attribute
|
||
JOIN pg_class ON attrelid = pg_class.oid
|
||
WHERE relname = 'knowledge_entries' AND attname = 'embedding';
|
||
|
||
SELECT atttypmod INTO v_rag_dim
|
||
FROM pg_attribute
|
||
JOIN pg_class ON attrelid = pg_class.oid
|
||
WHERE relname = 'rag_chunks' AND attname = 'embedding';
|
||
|
||
SELECT atttypmod INTO v_pb_dim
|
||
FROM pg_attribute
|
||
JOIN pg_class ON attrelid = pg_class.oid
|
||
WHERE relname = 'playbook_embeddings' AND attname = 'embedding';
|
||
|
||
-- pgvector atttypmod stores the configured dimension.
|
||
IF v_km_dim != 1024 THEN
|
||
RAISE EXCEPTION 'knowledge_entries.embedding 維度驗證失敗:expected 1024, got %', v_km_dim;
|
||
END IF;
|
||
IF v_rag_dim != 1024 THEN
|
||
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗:expected 1024, got %', v_rag_dim;
|
||
END IF;
|
||
IF v_pb_dim != 1024 THEN
|
||
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗:expected 1024, got %', v_pb_dim;
|
||
END IF;
|
||
|
||
RAISE NOTICE '✅ embedding 遷移驗證通過:knowledge_entries、rag_chunks、playbook_embeddings 均為 vector(1024)';
|
||
END $$;
|
||
|
||
COMMIT;
|