feat(embedding): ADR-110 升級 bge-m3:latest 1024 維向量
GCP-A (34.143.170.20) 無 nomic-embed-text,改用 bge-m3:latest(專用 多語言 embedding 模型),產生 1024 維向量。 變更: - embedding_service.py: 加入 bge-m3:latest=1024 維到 MODEL_DIMENSIONS, 預設模型改為 bge-m3:latest,更新文件說明 - playbook_embedding_repository.py + interfaces.py: 更新維度說明 - migrations/embedding_bge_m3_1024.sql: pgvector schema 遷移 rag_chunks + playbook_embeddings vector(768) → vector(1024) - scripts/reembed_bge_m3.py: 遷移後重新嵌入現有資料的 script 遷移步驟: 1. 執行 embedding_bge_m3_1024.sql(清空現有 768 維向量,變更維度) 2. 執行 python scripts/reembed_bge_m3.py 重新嵌入 2026-05-04 ogt + Claude Sonnet 4.6 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
88
apps/api/migrations/embedding_bge_m3_1024.sql
Normal file
88
apps/api/migrations/embedding_bge_m3_1024.sql
Normal file
@@ -0,0 +1,88 @@
|
||||
-- ADR-110 GCP-A Primary Embedding 升級:nomic-embed-text 768 → bge-m3 1024 維
|
||||
-- 2026-05-04 ogt + Claude Sonnet 4.6
|
||||
--
|
||||
-- 背景:
|
||||
-- GCP-A (34.143.170.20) 無 nomic-embed-text,改用 bge-m3:latest(專用 embedding 模型)
|
||||
-- bge-m3 產生 1024 維向量,現有 schema vector(768) 不相容,INSERT 會直接失敗
|
||||
--
|
||||
-- 影響範圍:
|
||||
-- 1. rag_chunks.embedding vector(768) → vector(1024)
|
||||
-- 2. playbook_embeddings.embedding vector(768) → vector(1024)
|
||||
--
|
||||
-- 遷移策略:清空現有向量資料,切換維度後由 re-embed script 重新嵌入
|
||||
-- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換)
|
||||
--
|
||||
-- 執行前置條件:
|
||||
-- 1. pgvector >= 0.5.0 (已滿足)
|
||||
-- 2. 確認現有向量資料是否需要備份(重要 playbook 建議先備份)
|
||||
-- 3. embedding service 已切換到 bge-m3(models.json v1.4.0)
|
||||
--
|
||||
-- 回滾方式:執行 embedding_rollback_768.sql(需重新嵌入至 nomic-embed-text 格式)
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- 1. rag_chunks:清空向量資料,變更欄位維度
|
||||
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
|
||||
DROP INDEX IF EXISTS idx_rag_chunks_embedding;
|
||||
|
||||
ALTER TABLE rag_chunks
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
|
||||
|
||||
-- 重建 ivfflat index(lists=100 適合 ~10k 筆以下資料)
|
||||
CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding
|
||||
ON rag_chunks
|
||||
USING ivfflat (embedding vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
COMMENT ON COLUMN rag_chunks.embedding IS
|
||||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)';
|
||||
|
||||
|
||||
-- 2. playbook_embeddings:清空向量資料,變更欄位維度
|
||||
DROP INDEX IF EXISTS ix_playbook_embeddings_vec;
|
||||
|
||||
ALTER TABLE playbook_embeddings
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL; -- 清空現有 768 維向量
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec
|
||||
ON playbook_embeddings
|
||||
USING ivfflat (embedding vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
COMMENT ON COLUMN playbook_embeddings.embedding IS
|
||||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)';
|
||||
|
||||
COMMENT ON TABLE playbook_embeddings IS
|
||||
'Playbook 向量索引 — ADR-110 GCP-A bge-m3 1024 維 (2026-05-04)';
|
||||
|
||||
|
||||
-- 3. 驗證遷移結果
|
||||
DO $$
|
||||
DECLARE
|
||||
v_rag_dim integer;
|
||||
v_pb_dim integer;
|
||||
BEGIN
|
||||
SELECT atttypmod INTO v_rag_dim
|
||||
FROM pg_attribute
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
WHERE relname = 'rag_chunks' AND attname = 'embedding';
|
||||
|
||||
SELECT atttypmod INTO v_pb_dim
|
||||
FROM pg_attribute
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
WHERE relname = 'playbook_embeddings' AND attname = 'embedding';
|
||||
|
||||
-- atttypmod for vector(1024) = 1024 + 1 = 1025
|
||||
IF v_rag_dim != 1025 THEN
|
||||
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗:expected 1025, got %', v_rag_dim;
|
||||
END IF;
|
||||
IF v_pb_dim != 1025 THEN
|
||||
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗:expected 1025, got %', v_pb_dim;
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE '✅ embedding 遷移驗證通過:rag_chunks 和 playbook_embeddings 均為 vector(1024)';
|
||||
END $$;
|
||||
|
||||
COMMIT;
|
||||
187
apps/api/scripts/reembed_bge_m3.py
Normal file
187
apps/api/scripts/reembed_bge_m3.py
Normal file
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Re-embed Script: bge-m3:latest 1024 維重新嵌入
|
||||
===============================================
|
||||
遷移 embedding_bge_m3_1024.sql 後執行,重新嵌入:
|
||||
1. rag_chunks(embedding IS NULL 的筆數)
|
||||
2. playbook_embeddings(embedding IS NULL 的筆數)
|
||||
|
||||
用法:
|
||||
cd apps/api
|
||||
python scripts/reembed_bge_m3.py [--dry-run] [--batch 50]
|
||||
|
||||
前置條件:
|
||||
1. embedding_bge_m3_1024.sql 已執行(schema 已升為 vector(1024))
|
||||
2. GCP-A Ollama (34.143.170.20:11434) 可連線且有 bge-m3:latest
|
||||
3. DATABASE_URL 環境變數已設定(或 .env 存在)
|
||||
|
||||
2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP-A Primary Embedding 升級
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 確保 src 在 import 路徑
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
logging = structlog.get_logger(__name__)
|
||||
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://34.143.170.20:11434")
|
||||
EMBEDDING_MODEL = "bge-m3:latest"
|
||||
EXPECTED_DIM = 1024
|
||||
|
||||
|
||||
async def embed_text(client: httpx.AsyncClient, text: str) -> list[float]:
|
||||
"""呼叫 Ollama bge-m3 嵌入單一文本"""
|
||||
resp = await client.post(
|
||||
f"{OLLAMA_URL}/api/embeddings",
|
||||
json={"model": EMBEDDING_MODEL, "prompt": text},
|
||||
timeout=60.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
embedding = resp.json().get("embedding", [])
|
||||
if len(embedding) != EXPECTED_DIM:
|
||||
raise ValueError(f"bge-m3 維度錯誤: got {len(embedding)}, expected {EXPECTED_DIM}")
|
||||
return embedding
|
||||
|
||||
|
||||
async def reembed_rag_chunks(
|
||||
conn: asyncpg.Connection,
|
||||
client: httpx.AsyncClient,
|
||||
batch_size: int,
|
||||
dry_run: bool,
|
||||
) -> int:
|
||||
rows = await conn.fetch(
|
||||
"SELECT id, content FROM rag_chunks WHERE embedding IS NULL ORDER BY id LIMIT $1",
|
||||
batch_size * 10,
|
||||
)
|
||||
if not rows:
|
||||
logging.info("rag_chunks_all_embedded")
|
||||
return 0
|
||||
|
||||
done = 0
|
||||
for row in rows:
|
||||
try:
|
||||
vec = await embed_text(client, row["content"])
|
||||
if not dry_run:
|
||||
vec_str = "[" + ",".join(f"{v:.8f}" for v in vec) + "]"
|
||||
await conn.execute(
|
||||
"UPDATE rag_chunks SET embedding = $1::vector WHERE id = $2",
|
||||
vec_str, row["id"],
|
||||
)
|
||||
done += 1
|
||||
if done % 10 == 0:
|
||||
logging.info("rag_chunks_progress", done=done, total=len(rows))
|
||||
except Exception as e:
|
||||
logging.error("rag_chunk_embed_failed", id=row["id"], error=str(e))
|
||||
|
||||
return done
|
||||
|
||||
|
||||
async def reembed_playbook_embeddings(
|
||||
conn: asyncpg.Connection,
|
||||
client: httpx.AsyncClient,
|
||||
batch_size: int,
|
||||
dry_run: bool,
|
||||
) -> int:
|
||||
# playbook_embeddings 關聯 playbooks 表取原始內容
|
||||
rows = await conn.fetch("""
|
||||
SELECT pe.playbook_id, p.title, p.description, p.steps
|
||||
FROM playbook_embeddings pe
|
||||
JOIN playbooks p ON pe.playbook_id = p.id
|
||||
WHERE pe.embedding IS NULL
|
||||
ORDER BY pe.playbook_id
|
||||
LIMIT $1
|
||||
""", batch_size * 10)
|
||||
|
||||
if not rows:
|
||||
logging.info("playbook_embeddings_all_embedded")
|
||||
return 0
|
||||
|
||||
done = 0
|
||||
for row in rows:
|
||||
text_parts = [row["title"] or "", row["description"] or ""]
|
||||
if row["steps"]:
|
||||
if isinstance(row["steps"], list):
|
||||
text_parts.extend(str(s) for s in row["steps"])
|
||||
else:
|
||||
text_parts.append(str(row["steps"]))
|
||||
text = "\n".join(p for p in text_parts if p)
|
||||
|
||||
try:
|
||||
vec = await embed_text(client, text)
|
||||
if not dry_run:
|
||||
vec_str = "[" + ",".join(f"{v:.8f}" for v in vec) + "]"
|
||||
await conn.execute(
|
||||
"UPDATE playbook_embeddings SET embedding = $1::vector WHERE playbook_id = $2",
|
||||
vec_str, row["playbook_id"],
|
||||
)
|
||||
done += 1
|
||||
if done % 10 == 0:
|
||||
logging.info("playbook_embed_progress", done=done, total=len(rows))
|
||||
except Exception as e:
|
||||
logging.error("playbook_embed_failed", playbook_id=row["playbook_id"], error=str(e))
|
||||
|
||||
return done
|
||||
|
||||
|
||||
async def main(dry_run: bool, batch_size: int) -> None:
|
||||
database_url = os.getenv("DATABASE_URL")
|
||||
if not database_url:
|
||||
# 嘗試讀 .env
|
||||
env_file = Path(__file__).parent.parent / ".env"
|
||||
if env_file.exists():
|
||||
for line in env_file.read_text().splitlines():
|
||||
if line.startswith("DATABASE_URL="):
|
||||
database_url = line.split("=", 1)[1].strip().strip('"\'')
|
||||
break
|
||||
if not database_url:
|
||||
print("❌ DATABASE_URL 未設定,請設定環境變數或 .env 檔案", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if dry_run:
|
||||
print("🔍 DRY RUN 模式 — 不會實際更新 DB")
|
||||
|
||||
async with httpx.AsyncClient() as http_client:
|
||||
# 先驗證 bge-m3 可用且維度正確
|
||||
print(f"🔗 驗證 GCP-A Ollama ({OLLAMA_URL}) bge-m3 連線...")
|
||||
try:
|
||||
test_vec = await embed_text(http_client, "連線測試")
|
||||
print(f"✅ bge-m3 可用,維度 = {len(test_vec)}")
|
||||
except Exception as e:
|
||||
print(f"❌ bge-m3 連線失敗: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
conn = await asyncpg.connect(database_url)
|
||||
try:
|
||||
# 統計待嵌入筆數
|
||||
rag_null = await conn.fetchval("SELECT COUNT(*) FROM rag_chunks WHERE embedding IS NULL")
|
||||
pb_null = await conn.fetchval("SELECT COUNT(*) FROM playbook_embeddings WHERE embedding IS NULL")
|
||||
print(f"📊 待嵌入:rag_chunks={rag_null} 筆,playbook_embeddings={pb_null} 筆")
|
||||
|
||||
if rag_null == 0 and pb_null == 0:
|
||||
print("✅ 所有向量已嵌入,無需重新處理")
|
||||
return
|
||||
|
||||
rag_done = await reembed_rag_chunks(conn, http_client, batch_size, dry_run)
|
||||
pb_done = await reembed_playbook_embeddings(conn, http_client, batch_size, dry_run)
|
||||
|
||||
print(f"{'[DRY RUN] ' if dry_run else ''}✅ 完成: rag_chunks={rag_done}, playbook_embeddings={pb_done}")
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Re-embed script for bge-m3 1024 維遷移")
|
||||
parser.add_argument("--dry-run", action="store_true", help="只統計,不寫 DB")
|
||||
parser.add_argument("--batch", type=int, default=50, help="每批次處理筆數")
|
||||
args = parser.parse_args()
|
||||
asyncio.run(main(dry_run=args.dry_run, batch_size=args.batch))
|
||||
@@ -274,7 +274,7 @@ class IKnowledgeRepository(Protocol):
|
||||
...
|
||||
|
||||
async def save_embedding(self, entry_id: str, embedding: list[float]) -> bool:
|
||||
"""儲存向量 embedding (768 維, pgvector)"""
|
||||
"""儲存向量 embedding (1024 維, pgvector, bge-m3:latest)"""
|
||||
...
|
||||
|
||||
async def semantic_search(
|
||||
|
||||
@@ -23,7 +23,7 @@ class PlaybookEmbeddingRepository:
|
||||
Playbook Embedding Repository
|
||||
|
||||
職責: playbook_embeddings 表 CRUD
|
||||
使用 pgvector 儲存 nomic-embed-text 768 維向量
|
||||
使用 pgvector 儲存 bge-m3:latest 1024 維向量(ADR-110 2026-05-04 升級自 768 維)
|
||||
|
||||
Args:
|
||||
db: SQLAlchemy AsyncSession (DI 注入)
|
||||
@@ -47,7 +47,7 @@ class PlaybookEmbeddingRepository:
|
||||
|
||||
Args:
|
||||
playbook_id: Playbook ID
|
||||
embedding: 768 維浮點向量 (list[float])
|
||||
embedding: 1024 維浮點向量 (list[float]),bge-m3:latest
|
||||
alert_names: 索引時的 alert_names 快照
|
||||
keywords: 索引時的 keywords 快照
|
||||
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
"""
|
||||
Embedding Service - Ollama BGE-M3 替代方案
|
||||
==========================================
|
||||
Embedding Service - Ollama bge-m3:latest 專用向量化
|
||||
===================================================
|
||||
|
||||
使用 Ollama qwen2.5:7b-instruct 提供文本向量化功能。
|
||||
雖非專用 embedding 模型,但支援多語言 (繁中/英文)。
|
||||
使用 Ollama bge-m3:latest 提供文本向量化功能(1024 維)。
|
||||
bge-m3 為專用多語言 embedding 模型,支援繁中/英文語義搜尋。
|
||||
|
||||
Phase 13.2 #84 - RAG Tool 基礎設施
|
||||
ADR-110 2026-05-04: GCP-A Primary 升級 bge-m3(768→1024 維遷移)
|
||||
|
||||
版本: v1.1
|
||||
版本: v1.2
|
||||
建立日期: 2026-03-26 20:30 (台北時區)
|
||||
更新日期: 2026-03-29 20:50 (台北時區)
|
||||
更新日期: 2026-05-04 (台北時區) — ADR-110 bge-m3 升級
|
||||
建立者: Claude Code
|
||||
更新者: Claude Code (P1 修復: 維度配置化)
|
||||
更新者: ogt + Claude Sonnet 4.6 (ADR-110 GCP-A Primary)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -58,7 +59,7 @@ class OllamaEmbeddingService:
|
||||
Ollama Embedding Service
|
||||
|
||||
使用 Ollama API 進行文本向量化。
|
||||
預設使用 qwen2.5:7b-instruct (3584 維向量)。
|
||||
預設使用 bge-m3:latest (1024 維向量),來自 GCP-A (34.143.170.20)。
|
||||
|
||||
Usage:
|
||||
service = OllamaEmbeddingService()
|
||||
@@ -71,12 +72,16 @@ class OllamaEmbeddingService:
|
||||
"qwen2.5:3b-instruct": 2048,
|
||||
"llama3.2:3b": 3072,
|
||||
"nomic-embed-text": 768,
|
||||
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP-A Primary — bge-m3 專用 embedding 模型
|
||||
# bge-m3 產生 1024 維向量;pgvector schema 已遷移至 vector(1024)(見 embedding_bge_m3_1024.sql)
|
||||
"bge-m3:latest": 1024,
|
||||
"bge-m3": 1024,
|
||||
}
|
||||
DEFAULT_DIMENSION = 3584 # 未知模型的預設值
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str = "qwen2.5:7b-instruct",
|
||||
model: str = "bge-m3:latest",
|
||||
ollama_url: str | None = None,
|
||||
timeout: float = 30.0,
|
||||
default_dimension: int | None = None,
|
||||
|
||||
Reference in New Issue
Block a user