diff --git a/apps/api/src/api/v1/rag.py b/apps/api/src/api/v1/rag.py index a385570a..ad806ff6 100644 --- a/apps/api/src/api/v1/rag.py +++ b/apps/api/src/api/v1/rag.py @@ -8,9 +8,10 @@ leWOOOgo 原則: Router 只做 HTTP 轉發,業務邏輯在 KnowledgeRAGService 建立者: Claude Code (Phase 33 ADR-067) """ -from fastapi import APIRouter, BackgroundTasks, HTTPException +from fastapi import APIRouter, BackgroundTasks from pydantic import BaseModel +from src.core.config import get_settings from src.services.knowledge_rag_service import get_knowledge_rag_service router = APIRouter(prefix="/rag", tags=["RAG Knowledge Base"]) @@ -43,9 +44,10 @@ async def trigger_index(background_tasks: BackgroundTasks) -> RagIndexResponse: - .agents/skills/*.md """ background_tasks.add_task(_run_index) + model = get_settings().OLLAMA_EMBEDDING_MODEL return RagIndexResponse( status="accepted", - message="索引已排程,背景執行中(nomic-embed-text @ Ollama 111)", + message=f"索引已排程,背景執行中({model} @ Ollama GCP-A/GCP-B/111)", ) @@ -76,15 +78,16 @@ async def rag_debug() -> dict: try: async with httpx.AsyncClient(timeout=10.0) as c: from src.core.config import get_settings as _gs + settings = _gs() r = await c.post( - f"{_gs().OLLAMA_URL}/api/embeddings", - json={"model": "nomic-embed-text", "prompt": "test"}, + f"{settings.OLLAMA_URL}/api/embeddings", + json={"model": settings.OLLAMA_EMBEDDING_MODEL, "prompt": "test"}, ) ollama_ok = r.status_code == 200 if r.status_code == 200 else f"http_{r.status_code}" except Exception as e: ollama_ok = f"error: {type(e).__name__}: {e}" - return {"cwd": os.getcwd(), "paths": paths_check, "ollama_111_embed": ollama_ok} + return {"cwd": os.getcwd(), "paths": paths_check, "ollama_embedding": ollama_ok} @router.get("/stats", summary="索引統計") diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index 351bddc6..e9e6d465 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -376,6 +376,10 @@ class Settings(BaseSettings): default="gemma3:4b", description="OllamaHealthMonitor 推理測試使用模型(P1.1)", ) + OLLAMA_EMBEDDING_MODEL: str = Field( + default="bge-m3:latest", + description="Ollama embedding model. ADR-110 migrated embeddings from nomic-embed-text to bge-m3.", + ) # 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單 # 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級,更新必要模型清單(nomic→bge-m3 + 新增 qwen3:14b + hermes3) OLLAMA_REQUIRED_MODELS: list[str] = Field( diff --git a/apps/api/src/services/knowledge_rag_service.py b/apps/api/src/services/knowledge_rag_service.py index 93191722..57c046df 100644 --- a/apps/api/src/services/knowledge_rag_service.py +++ b/apps/api/src/services/knowledge_rag_service.py @@ -1,13 +1,13 @@ """ AWOOOI — Knowledge RAG Service (Phase 33, ADR-067) ================================================== -本地 RAG 知識庫:nomic-embed-text 768維向量 + pgvector +本地 RAG 知識庫:bge-m3 1024維向量 + pgvector 索引策略: - 初期 < 100 筆: 線性搜尋 - 超過 100 筆: 執行 CREATE INDEX ivfflat (手動觸發) -向量模型: nomic-embed-text (Ollama 111, 768維) — 188:11434 被 NetworkPolicy v1.3 封閉 +向量模型: bge-m3 (GCP-A/GCP-B/111 Ollama lane, 1024維) 生成模型: qwen2.5:7b-instruct (Ollama 111) leWOOOgo: Service 層只處理業務邏輯,DB 存取委派 rag_chunk_repository @@ -21,11 +21,12 @@ import httpx import structlog import src.repositories.rag_chunk_repository as rag_repo +from src.core.config import settings from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint logger = structlog.get_logger(__name__) -_EMBED_MODEL = "nomic-embed-text" +_EMBED_MODEL = "bge-m3:latest" _GEN_MODEL = "qwen2.5:7b-instruct" _TOP_K = 5 @@ -131,7 +132,10 @@ class KnowledgeRAGService: http = await self._get_http() resp = await http.post( f"{resolve_ollama_endpoint('embedding')}/api/embeddings", - json={"model": _EMBED_MODEL, "prompt": text}, + json={ + "model": getattr(settings, "OLLAMA_EMBEDDING_MODEL", _EMBED_MODEL), + "prompt": text, + }, ) if resp.status_code == 200: return resp.json().get("embedding") diff --git a/apps/api/src/services/playbook_rag.py b/apps/api/src/services/playbook_rag.py index 62fe3b57..db818c73 100644 --- a/apps/api/src/services/playbook_rag.py +++ b/apps/api/src/services/playbook_rag.py @@ -4,7 +4,7 @@ Playbook RAG Service - Phase 3 向量化語意搜尋 ADR-030: 智能自動修復系統 使用 Embedding 進行 Playbook 語意搜尋: -1. Ollama nomic-embed-text 生成向量 +1. Ollama bge-m3 生成向量 2. Redis 儲存向量 (JSON 格式) 3. 餘弦相似度搜尋 @@ -41,9 +41,9 @@ logger = structlog.get_logger(__name__) # Constants # ============================================================================= -# Embedding Model (Ollama 本地) -EMBEDDING_MODEL = "nomic-embed-text" -EMBEDDING_DIM = 768 # nomic-embed-text 向量維度 +# Embedding Model (Ollama) +EMBEDDING_MODEL = "bge-m3:latest" +EMBEDDING_DIM = 1024 # bge-m3 向量維度 def _dedupe_urls(urls: list[str]) -> list[str]: @@ -170,7 +170,7 @@ class PlaybookRAGService: getattr(settings, "OLLAMA_FALLBACK_URL", ""), ] ) - self.embedding_model = EMBEDDING_MODEL + self.embedding_model = str(getattr(settings, "OLLAMA_EMBEDDING_MODEL", EMBEDDING_MODEL) or EMBEDDING_MODEL) # ========================================================================= # Embedding Operations diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 78dc65e3..4a3c5b12 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -14,7 +14,7 @@ - 新增 `INCIDENT_LLM_TIMEOUT_SECONDS`,production 設為 240s;Incident LLM 外層 guard 不再硬編 25s,且不得低於 `OPENCLAW_TIMEOUT`。 - 新增 `AGENT_DEBATE_GLOBAL_TIMEOUT_SEC`,production 設為 260s;Phase 2 debate 不再被 90s 固定值卡死。 - `ollama_endpoint_resolver` 改為非敏感工作(embedding/RAG/deep_rca/Hermes/code_review 等)GCP-A 優先、GCP-B 備援、111 兜底;只有 `local_required` / `privacy_sensitive` / `dr` 維持 local-first。 -- `PlaybookRAGService.embed_text()` 改為依序嘗試配置的 Ollama endpoints,單一 endpoint 失敗不再直接放棄 RAG。 +- `PlaybookRAGService.embed_text()` 改為依序嘗試配置的 Ollama endpoints,單一 endpoint 失敗不再直接放棄 RAG;Playbook/Knowledge RAG embedding model 改為 ADR-110 的 `bge-m3:latest`,避免 GCP-A/B 因舊 `nomic-embed-text` 回 404 後再掉到不可達的 111。 **驗證**: - `py_compile` touched backend files OK;ruff `E9,F401,F821,F841` OK。 diff --git a/k8s/awoooi-prod/04-configmap.yaml b/k8s/awoooi-prod/04-configmap.yaml index dd75197d..6ea0b5ec 100644 --- a/k8s/awoooi-prod/04-configmap.yaml +++ b/k8s/awoooi-prod/04-configmap.yaml @@ -64,6 +64,7 @@ data: ALERT_AI_ENFORCE_OLLAMA_FIRST: "true" ALERT_OLLAMA_MODEL: "qwen3:14b" OLLAMA_HEALTH_CHECK_MODEL: "gemma3:4b" + OLLAMA_EMBEDDING_MODEL: "bge-m3:latest" OPENCLAW_DEFAULT_MODEL: "qwen2.5:7b-instruct" OPENCLAW_TIMEOUT: "120" INCIDENT_LLM_TIMEOUT_SECONDS: "240" diff --git a/k8s/awoooi-prod/06-deployment-api.yaml b/k8s/awoooi-prod/06-deployment-api.yaml index 209f3695..91a73da1 100644 --- a/k8s/awoooi-prod/06-deployment-api.yaml +++ b/k8s/awoooi-prod/06-deployment-api.yaml @@ -81,6 +81,8 @@ spec: value: "qwen3:14b" # 2026-05-05 Codex: 告警以解決問題為目標,可等待深度診斷 - name: OLLAMA_HEALTH_CHECK_MODEL value: "gemma3:4b" # 2026-05-05 Codex: 避免 health probe 載入 qwen2.5 7B 污染 GCP alert lane + - name: OLLAMA_EMBEDDING_MODEL + value: "bge-m3:latest" - name: OPENCLAW_DEFAULT_MODEL value: "qwen2.5:7b-instruct" - name: OPENCLAW_TIMEOUT