Files
awoooi/apps/api/src/services/playbook_rag.py
Your Name 09256be62c
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m22s
CD Pipeline / build-and-deploy (push) Failing after 2h14m5s
CD Pipeline / post-deploy-checks (push) Has been cancelled
fix(rag): use bge embeddings on GCP Ollama lane
2026-05-06 05:49:37 +08:00

649 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Playbook RAG Service - Phase 3 向量化語意搜尋
=============================================
ADR-030: 智能自動修復系統
使用 Embedding 進行 Playbook 語意搜尋:
1. Ollama bge-m3 生成向量
2. Redis 儲存向量 (JSON 格式)
3. 餘弦相似度搜尋
設計原則:
- Embedding 快取,避免重複計算
- 混合搜尋 (向量 + Jaccard)
- Fallback: Embedding 失敗時用 Jaccard
- 2026-03-27 ogt: 模組化改造 (P1 違規修復)
- Repository Pattern for Redis
- DI httpx client from Lifespan
版本: v1.1
建立: 2026-03-26 (台北時區)
修改: 2026-03-27 (模組化改造)
"""
import math
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import Any
import httpx
import structlog
from src.core.config import settings
from src.models.playbook import Playbook, SymptomPattern
from src.repositories.interfaces import IEmbeddingCacheRepository
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
logger = structlog.get_logger(__name__)
# =============================================================================
# Constants
# =============================================================================
# Embedding Model (Ollama)
EMBEDDING_MODEL = "bge-m3:latest"
EMBEDDING_DIM = 1024 # bge-m3 向量維度
def _dedupe_urls(urls: list[str]) -> list[str]:
"""Return configured Ollama URLs in order without blanks or duplicates."""
deduped: list[str] = []
seen: set[str] = set()
for url in urls:
normalized = (url or "").rstrip("/")
if not normalized or normalized in seen:
continue
deduped.append(normalized)
seen.add(normalized)
return deduped
# =============================================================================
# Data Models
# =============================================================================
@dataclass
class PlaybookMatch:
"""Playbook 匹配結果"""
playbook_id: str
similarity_score: float # 0.0 ~ 1.0
match_type: str # "vector", "jaccard", "hybrid"
matched_keywords: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return {
"playbook_id": self.playbook_id,
"similarity_score": round(self.similarity_score, 4),
"match_type": self.match_type,
"matched_keywords": self.matched_keywords,
}
@dataclass
class EmbeddingResult:
"""Embedding 結果"""
text: str
vector: list[float]
model: str
created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
def to_dict(self) -> dict[str, Any]:
return {
"text_hash": hash(self.text) % 2**32, # 不存完整 text
"vector": self.vector,
"model": self.model,
"created_at": self.created_at.isoformat(),
}
# =============================================================================
# Vector Utilities
# =============================================================================
def cosine_similarity(vec_a: list[float], vec_b: list[float]) -> float:
"""計算餘弦相似度"""
if len(vec_a) != len(vec_b):
return 0.0
dot_product = sum(a * b for a, b in zip(vec_a, vec_b, strict=True))
norm_a = math.sqrt(sum(a * a for a in vec_a))
norm_b = math.sqrt(sum(b * b for b in vec_b))
if norm_a == 0 or norm_b == 0:
return 0.0
return dot_product / (norm_a * norm_b)
def normalize_vector(vec: list[float]) -> list[float]:
"""正規化向量 (L2 norm)"""
norm = math.sqrt(sum(v * v for v in vec))
if norm == 0:
return vec
return [v / norm for v in vec]
# =============================================================================
# Playbook RAG Service
# =============================================================================
class PlaybookRAGService:
"""
Playbook RAG 服務
功能:
1. 將 Playbook 向量化並存入 Redis
2. 語意搜尋相似的 Playbook
3. 混合搜尋 (向量 + Jaccard)
2026-03-27 ogt: 模組化改造
- 使用 DI 注入 http_client 和 embedding_cache
"""
def __init__(
self,
http_client: httpx.AsyncClient,
embedding_cache: IEmbeddingCacheRepository,
):
"""
初始化 RAG Service
Args:
http_client: httpx AsyncClient (DI 注入,來自 Lifespan)
embedding_cache: Embedding Cache Repository (DI 注入)
"""
self._http_client = http_client
self._embedding_cache = embedding_cache
self.ollama_url = resolve_ollama_endpoint("embedding")
self.ollama_urls = _dedupe_urls(
[
self.ollama_url,
getattr(settings, "OLLAMA_URL", ""),
getattr(settings, "OLLAMA_SECONDARY_URL", ""),
getattr(settings, "OLLAMA_FALLBACK_URL", ""),
]
)
self.embedding_model = str(getattr(settings, "OLLAMA_EMBEDDING_MODEL", EMBEDDING_MODEL) or EMBEDDING_MODEL)
# =========================================================================
# Embedding Operations
# =========================================================================
async def _get_http_client(self) -> httpx.AsyncClient:
"""
取得有效的 HTTP Client若已關閉則重建
2026-04-04 ogt: 修復滾動重啟後 is_closed=True 導致 embedding 失敗
"""
if self._http_client.is_closed:
logger.warning("playbook_rag_http_client_closed_rebuilding")
from src.core.http_client import get_general_client
self._http_client = await get_general_client()
return self._http_client
async def embed_text(self, text: str) -> list[float] | None:
"""
使用 Ollama 生成文字 embedding
2026-03-27 ogt: 改用 DI 注入的 http_client (P1 違規修復)
2026-04-04 ogt: 加入 is_closed 自動重建機制
Args:
text: 要向量化的文字
Returns:
向量 (768 維) 或 None (失敗時)
"""
try:
client = await self._get_http_client()
last_error = ""
for endpoint_url in self.ollama_urls:
try:
response = await client.post(
f"{endpoint_url}/api/embeddings",
json={
"model": self.embedding_model,
"prompt": text,
},
timeout=30.0, # 單次請求 timeout
)
if response.status_code != 200:
last_error = f"http_{response.status_code}"
logger.warning(
"ollama_embedding_failed",
endpoint=endpoint_url,
status_code=response.status_code,
text_preview=text[:50],
)
continue
result = response.json()
embedding = result.get("embedding", [])
if not embedding:
last_error = "empty_embedding"
logger.warning(
"ollama_embedding_empty",
endpoint=endpoint_url,
text_preview=text[:50],
)
continue
logger.info("ollama_embedding_success", endpoint=endpoint_url)
return normalize_vector(embedding)
except Exception as endpoint_error:
last_error = str(endpoint_error)
logger.warning(
"ollama_embedding_endpoint_error",
endpoint=endpoint_url,
error=last_error,
text_preview=text[:50],
)
logger.warning(
"ollama_embedding_error",
error=last_error or "all endpoints failed",
text_preview=text[:50],
)
return None
except Exception as e:
logger.warning(
"ollama_embedding_error",
error=str(e),
text_preview=text[:50],
)
return None
async def embed_playbook(self, playbook: Playbook) -> list[float] | None:
"""
將 Playbook 向量化
結合症狀模式和修復步驟生成向量
"""
# 構建文字表示
text_parts = []
# 症狀
if playbook.symptom_pattern:
sp = playbook.symptom_pattern
if sp.alert_names:
text_parts.append(f"告警: {', '.join(sp.alert_names)}")
if sp.affected_services:
text_parts.append(f"服務: {', '.join(sp.affected_services)}")
if sp.keywords:
text_parts.append(f"關鍵字: {', '.join(sp.keywords)}")
# 名稱和描述
text_parts.append(f"名稱: {playbook.name}")
if playbook.description:
text_parts.append(f"描述: {playbook.description}")
# 修復步驟
# 2026-04-04 ogt: 修正欄位名稱 s.sequence→s.step_number, s.description→s.command
if playbook.repair_steps:
steps_text = "; ".join(
f"{s.step_number}. {s.command}"
for s in playbook.repair_steps[:5] # 最多 5 步
)
text_parts.append(f"步驟: {steps_text}")
text = "\n".join(text_parts)
return await self.embed_text(text)
async def embed_incident_query(
self,
alert_names: list[str],
affected_services: list[str],
description: str | None = None,
) -> list[float] | None:
"""
為 Incident 查詢生成 embedding
用於搜尋相似 Playbook
"""
text_parts = []
if alert_names:
text_parts.append(f"告警: {', '.join(alert_names)}")
if affected_services:
text_parts.append(f"服務: {', '.join(affected_services)}")
if description:
text_parts.append(f"描述: {description}")
if not text_parts:
return None
text = "\n".join(text_parts)
return await self.embed_text(text)
# =========================================================================
# Storage Operations (委派給 Repository)
# 2026-03-27 ogt: 改用 DI 注入的 embedding_cache (P1 違規修復)
# =========================================================================
async def store_playbook_embedding(
self,
playbook_id: str,
embedding: list[float],
metadata: dict | None = None,
) -> bool:
"""儲存 Playbook 向量到 Redis (委派給 Repository)"""
return await self._embedding_cache.store(playbook_id, embedding, metadata)
async def get_playbook_embedding(
self,
playbook_id: str,
) -> list[float] | None:
"""取得 Playbook 向量 (委派給 Repository)"""
return await self._embedding_cache.get(playbook_id)
async def get_all_playbook_embeddings(self) -> dict[str, list[float]]:
"""取得所有 Playbook 向量 (委派給 Repository)"""
return await self._embedding_cache.get_all()
# =========================================================================
# Search Operations
# =========================================================================
async def search_similar(
self,
query_embedding: list[float],
top_k: int = 5,
min_similarity: float = 0.5,
) -> list[PlaybookMatch]:
"""
向量相似度搜尋
使用餘弦相似度在所有 Playbook 向量中搜尋
Args:
query_embedding: 查詢向量
top_k: 返回前 K 個結果
min_similarity: 最小相似度閾值
Returns:
排序後的 PlaybookMatch 列表
"""
# 取得所有 Playbook 向量
all_embeddings = await self.get_all_playbook_embeddings()
if not all_embeddings:
logger.debug("search_similar_no_embeddings")
return []
# 計算相似度
similarities: list[tuple[str, float]] = []
for playbook_id, embedding in all_embeddings.items():
sim = cosine_similarity(query_embedding, embedding)
if sim >= min_similarity:
similarities.append((playbook_id, sim))
# 排序 (降序)
similarities.sort(key=lambda x: x[1], reverse=True)
# 返回 Top K
results = [
PlaybookMatch(
playbook_id=pid,
similarity_score=sim,
match_type="vector",
)
for pid, sim in similarities[:top_k]
]
logger.info(
"playbook_vector_search",
total_embeddings=len(all_embeddings),
matches_found=len(results),
top_score=results[0].similarity_score if results else 0,
)
return results
async def search_by_incident(
self,
alert_names: list[str],
affected_services: list[str],
description: str | None = None,
top_k: int = 5,
min_similarity: float = 0.5,
) -> list[PlaybookMatch]:
"""
根據 Incident 資訊搜尋相似 Playbook
Convenience 方法,結合 embed + search
"""
# 生成查詢向量
query_embedding = await self.embed_incident_query(
alert_names=alert_names,
affected_services=affected_services,
description=description,
)
if not query_embedding:
logger.warning(
"search_by_incident_embedding_failed",
alert_names=alert_names,
)
return []
return await self.search_similar(
query_embedding=query_embedding,
top_k=top_k,
min_similarity=min_similarity,
)
# =========================================================================
# Hybrid Search (Vector + Jaccard)
# =========================================================================
async def hybrid_search(
self,
symptoms: SymptomPattern,
jaccard_results: list[tuple[str, float]], # (playbook_id, jaccard_score)
top_k: int = 5,
vector_weight: float = 0.6,
jaccard_weight: float = 0.4,
) -> list[PlaybookMatch]:
"""
混合搜尋 (向量 + Jaccard)
結合向量語意相似度和 Jaccard 精確匹配
Args:
symptoms: 症狀模式
jaccard_results: Jaccard 匹配結果
top_k: 返回前 K 個
vector_weight: 向量分數權重
jaccard_weight: Jaccard 分數權重
Returns:
混合排序後的結果
"""
# 1. 向量搜尋
query_embedding = await self.embed_incident_query(
alert_names=symptoms.alert_names,
affected_services=symptoms.affected_services,
description=None,
)
vector_scores: dict[str, float] = {}
if query_embedding:
vector_matches = await self.search_similar(
query_embedding=query_embedding,
top_k=top_k * 2,
min_similarity=0.3,
)
vector_scores = {m.playbook_id: m.similarity_score for m in vector_matches}
# 2. Jaccard 分數
jaccard_scores = dict(jaccard_results)
# 3. 合併所有 playbook_id
all_ids = set(vector_scores.keys()) | set(jaccard_scores.keys())
# 4. 計算混合分數
hybrid_results: list[tuple[str, float, str]] = []
for pid in all_ids:
v_score = vector_scores.get(pid, 0.0)
j_score = jaccard_scores.get(pid, 0.0)
hybrid_score = (v_score * vector_weight) + (j_score * jaccard_weight)
# 決定主要匹配類型
if v_score > 0 and j_score > 0:
match_type = "hybrid"
elif v_score > 0:
match_type = "vector"
else:
match_type = "jaccard"
hybrid_results.append((pid, hybrid_score, match_type))
# 5. 排序並返回 Top K
hybrid_results.sort(key=lambda x: x[1], reverse=True)
results = [
PlaybookMatch(
playbook_id=pid,
similarity_score=score,
match_type=match_type,
)
for pid, score, match_type in hybrid_results[:top_k]
]
logger.info(
"playbook_hybrid_search",
vector_count=len(vector_scores),
jaccard_count=len(jaccard_scores),
hybrid_count=len(results),
top_score=results[0].similarity_score if results else 0,
)
return results
# =========================================================================
# Index Management
# =========================================================================
async def index_playbook(self, playbook: Playbook) -> bool:
"""
為 Playbook 建立向量索引
呼叫時機: Playbook 建立或更新時
"""
embedding = await self.embed_playbook(playbook)
if not embedding:
logger.warning(
"playbook_index_embedding_failed",
playbook_id=playbook.playbook_id,
)
return False
return await self.store_playbook_embedding(
playbook_id=playbook.playbook_id,
embedding=embedding,
metadata={
"name": playbook.name,
"status": playbook.status.value if playbook.status else None,
"tags": playbook.tags,
},
)
async def remove_playbook_index(self, playbook_id: str) -> bool:
"""移除 Playbook 向量索引 (委派給 Repository)"""
return await self._embedding_cache.remove(playbook_id)
async def reindex_all_playbooks(
self,
playbooks: list[Playbook],
) -> tuple[int, int]:
"""
重建所有 Playbook 向量索引
Returns:
(成功數, 失敗數)
"""
success = 0
failed = 0
for playbook in playbooks:
if await self.index_playbook(playbook):
success += 1
else:
failed += 1
logger.info(
"playbook_reindex_complete",
success=success,
failed=failed,
total=len(playbooks),
)
return success, failed
# =============================================================================
# Factory (DI-aware)
# 2026-03-27 ogt: 模組化改造 - 支援 DI 注入
# =============================================================================
_rag_service: PlaybookRAGService | None = None
async def get_playbook_rag_service() -> PlaybookRAGService:
"""
取得 Playbook RAG 服務 singleton (lazy initialization)
2026-03-27 ogt: 改用 DI 注入,從 Lifespan 取得 http_client 和 Redis
"""
global _rag_service
# 2026-04-04 ogt: 滾動重啟後 http_client is_closed需重建 singleton
if _rag_service is None or _rag_service._http_client.is_closed:
# 延遲導入避免循環依賴
from src.core.http_client import get_general_client
from src.core.redis_client import get_redis
from src.repositories.embedding_repository import EmbeddingCacheRepository
http_client = await get_general_client()
redis = get_redis()
embedding_cache = EmbeddingCacheRepository(redis)
_rag_service = PlaybookRAGService(
http_client=http_client,
embedding_cache=embedding_cache,
)
return _rag_service
def create_playbook_rag_service(
http_client: httpx.AsyncClient,
embedding_cache: IEmbeddingCacheRepository,
) -> PlaybookRAGService:
"""
建立 PlaybookRAGService 實例 (工廠函數)
用於測試或需要自訂依賴的場景
Args:
http_client: httpx AsyncClient
embedding_cache: Embedding Cache Repository
Returns:
PlaybookRAGService 實例
"""
return PlaybookRAGService(
http_client=http_client,
embedding_cache=embedding_cache,
)