fix(rag): use bge embeddings on GCP Ollama lane
This commit is contained in:
@@ -1,13 +1,13 @@
|
||||
"""
|
||||
AWOOOI — Knowledge RAG Service (Phase 33, ADR-067)
|
||||
==================================================
|
||||
本地 RAG 知識庫:nomic-embed-text 768維向量 + pgvector
|
||||
本地 RAG 知識庫:bge-m3 1024維向量 + pgvector
|
||||
|
||||
索引策略:
|
||||
- 初期 < 100 筆: 線性搜尋
|
||||
- 超過 100 筆: 執行 CREATE INDEX ivfflat (手動觸發)
|
||||
|
||||
向量模型: nomic-embed-text (Ollama 111, 768維) — 188:11434 被 NetworkPolicy v1.3 封閉
|
||||
向量模型: bge-m3 (GCP-A/GCP-B/111 Ollama lane, 1024維)
|
||||
生成模型: qwen2.5:7b-instruct (Ollama 111)
|
||||
|
||||
leWOOOgo: Service 層只處理業務邏輯,DB 存取委派 rag_chunk_repository
|
||||
@@ -21,11 +21,12 @@ import httpx
|
||||
import structlog
|
||||
|
||||
import src.repositories.rag_chunk_repository as rag_repo
|
||||
from src.core.config import settings
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
_EMBED_MODEL = "nomic-embed-text"
|
||||
_EMBED_MODEL = "bge-m3:latest"
|
||||
_GEN_MODEL = "qwen2.5:7b-instruct"
|
||||
_TOP_K = 5
|
||||
|
||||
@@ -131,7 +132,10 @@ class KnowledgeRAGService:
|
||||
http = await self._get_http()
|
||||
resp = await http.post(
|
||||
f"{resolve_ollama_endpoint('embedding')}/api/embeddings",
|
||||
json={"model": _EMBED_MODEL, "prompt": text},
|
||||
json={
|
||||
"model": getattr(settings, "OLLAMA_EMBEDDING_MODEL", _EMBED_MODEL),
|
||||
"prompt": text,
|
||||
},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get("embedding")
|
||||
|
||||
@@ -4,7 +4,7 @@ Playbook RAG Service - Phase 3 向量化語意搜尋
|
||||
ADR-030: 智能自動修復系統
|
||||
|
||||
使用 Embedding 進行 Playbook 語意搜尋:
|
||||
1. Ollama nomic-embed-text 生成向量
|
||||
1. Ollama bge-m3 生成向量
|
||||
2. Redis 儲存向量 (JSON 格式)
|
||||
3. 餘弦相似度搜尋
|
||||
|
||||
@@ -41,9 +41,9 @@ logger = structlog.get_logger(__name__)
|
||||
# Constants
|
||||
# =============================================================================
|
||||
|
||||
# Embedding Model (Ollama 本地)
|
||||
EMBEDDING_MODEL = "nomic-embed-text"
|
||||
EMBEDDING_DIM = 768 # nomic-embed-text 向量維度
|
||||
# Embedding Model (Ollama)
|
||||
EMBEDDING_MODEL = "bge-m3:latest"
|
||||
EMBEDDING_DIM = 1024 # bge-m3 向量維度
|
||||
|
||||
|
||||
def _dedupe_urls(urls: list[str]) -> list[str]:
|
||||
@@ -170,7 +170,7 @@ class PlaybookRAGService:
|
||||
getattr(settings, "OLLAMA_FALLBACK_URL", ""),
|
||||
]
|
||||
)
|
||||
self.embedding_model = EMBEDDING_MODEL
|
||||
self.embedding_model = str(getattr(settings, "OLLAMA_EMBEDDING_MODEL", EMBEDDING_MODEL) or EMBEDDING_MODEL)
|
||||
|
||||
# =========================================================================
|
||||
# Embedding Operations
|
||||
|
||||
Reference in New Issue
Block a user