All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 12m47s
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
279 lines
10 KiB
Python
279 lines
10 KiB
Python
"""
|
||
Knowledge Service - 業務邏輯層
|
||
===============================
|
||
Knowledge Base Phase 1: CRUD + 狀態流轉 + 搜尋
|
||
|
||
建立時間: 2026-04-02 (台北時區)
|
||
建立者: Claude Code (Knowledge Base Phase 1)
|
||
|
||
遵循 leWOOOgo 積木化原則:
|
||
- Service 層封裝業務邏輯
|
||
- 依賴 IKnowledgeRepository Protocol
|
||
- Router 層禁止直接存取 DB
|
||
"""
|
||
|
||
import asyncio
|
||
|
||
import structlog
|
||
|
||
from src.db.base import get_db_context
|
||
from src.models.knowledge import (
|
||
CategoryCount,
|
||
EntryStatus,
|
||
EntryType,
|
||
KnowledgeEntry,
|
||
KnowledgeEntryCreate,
|
||
KnowledgeEntryUpdate,
|
||
KnowledgeListResponse,
|
||
)
|
||
from src.repositories.interfaces import IKnowledgeRepository
|
||
from src.repositories.knowledge_repository import KnowledgeDBRepository
|
||
from src.services.embedding_service import OllamaEmbeddingService
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
_knowledge_service: "KnowledgeService | None" = None
|
||
|
||
|
||
def get_knowledge_service() -> "KnowledgeService":
|
||
"""取得 Knowledge Service 實例"""
|
||
global _knowledge_service
|
||
if _knowledge_service is None:
|
||
_knowledge_service = KnowledgeService()
|
||
return _knowledge_service
|
||
|
||
|
||
class KnowledgeService:
|
||
"""Knowledge Base 業務邏輯"""
|
||
|
||
def __init__(self) -> None:
|
||
# I2: 注入 embedding service,避免每次呼叫 new 實例
|
||
self._embed_svc = OllamaEmbeddingService(model="nomic-embed-text", timeout=15.0)
|
||
# I1: 持有背景 Task 引用,防止 GC 提前回收
|
||
self._pending_tasks: set[asyncio.Task] = set() # type: ignore[type-arg]
|
||
|
||
async def create_entry(self, data: KnowledgeEntryCreate) -> KnowledgeEntry:
|
||
"""建立知識條目,建立後背景自動產生 embedding"""
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
entry = await repo.create(data)
|
||
logger.info(
|
||
"knowledge_entry_created",
|
||
entry_id=entry.id,
|
||
entry_type=entry.entry_type,
|
||
source=entry.source,
|
||
)
|
||
|
||
# 背景產生 embedding (不阻塞回應);持有引用防 GC 回收
|
||
task = asyncio.create_task(self._embed_entry(entry.id, data.title, data.content))
|
||
self._pending_tasks.add(task)
|
||
task.add_done_callback(self._pending_tasks.discard)
|
||
return entry
|
||
|
||
async def _embed_entry(self, entry_id: str, title: str, content: str) -> None:
|
||
"""背景任務:產生並儲存 embedding"""
|
||
try:
|
||
text = f"search_document: {title}\n\n{content[:2000]}"
|
||
embedding = await self._embed_svc.embed_text(text)
|
||
if not embedding:
|
||
logger.warning("knowledge_embedding_empty", entry_id=entry_id)
|
||
return
|
||
async with get_db_context() as db:
|
||
repo = KnowledgeDBRepository(db)
|
||
await repo.save_embedding(entry_id, embedding)
|
||
logger.info("knowledge_embedding_saved", entry_id=entry_id)
|
||
except Exception as e:
|
||
logger.warning("knowledge_embedding_failed", entry_id=entry_id, error=str(e))
|
||
|
||
async def get_entry(self, entry_id: str) -> KnowledgeEntry | None:
|
||
"""取得知識條目 (view_count +1)"""
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
entry = await repo.get_by_id(entry_id)
|
||
if entry:
|
||
await repo.increment_view_count(entry_id)
|
||
entry.view_count += 1
|
||
return entry
|
||
|
||
async def update_entry(
|
||
self, entry_id: str, data: KnowledgeEntryUpdate
|
||
) -> KnowledgeEntry | None:
|
||
"""更新知識條目"""
|
||
update_data = data.model_dump(exclude_none=True)
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
if not update_data:
|
||
return await repo.get_by_id(entry_id)
|
||
return await repo.update(entry_id, update_data)
|
||
|
||
async def approve_entry(self, entry_id: str) -> KnowledgeEntry | None:
|
||
"""審核通過 (draft/review → approved)"""
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
entry = await repo.get_by_id(entry_id)
|
||
if not entry:
|
||
return None
|
||
if entry.status == EntryStatus.APPROVED:
|
||
return entry
|
||
return await repo.update(entry_id, {"status": EntryStatus.APPROVED})
|
||
|
||
async def archive_entry(self, entry_id: str) -> bool:
|
||
"""封存 (軟刪除)"""
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
return await repo.delete(entry_id)
|
||
|
||
async def list_entries(
|
||
self,
|
||
category: str | None = None,
|
||
entry_type: EntryType | None = None,
|
||
status: EntryStatus | None = None,
|
||
tags: list[str] | None = None,
|
||
q: str | None = None,
|
||
limit: int = 20,
|
||
offset: int = 0,
|
||
) -> KnowledgeListResponse:
|
||
"""列出知識條目 + 分類統計"""
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
items, total = await repo.list_entries(
|
||
category=category,
|
||
entry_type=entry_type,
|
||
status=status,
|
||
tags=tags,
|
||
q=q,
|
||
limit=limit,
|
||
offset=offset,
|
||
)
|
||
categories_raw = await repo.get_categories()
|
||
categories = [
|
||
CategoryCount(category=cat, count=cnt) for cat, cnt in categories_raw
|
||
]
|
||
return KnowledgeListResponse(
|
||
items=items, total=total, categories=categories
|
||
)
|
||
|
||
async def get_categories(self) -> list[CategoryCount]:
|
||
"""取得分類統計(直接呼叫 repo,不走 list_entries)"""
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
categories_raw = await repo.get_categories()
|
||
return [CategoryCount(category=cat, count=cnt) for cat, cnt in categories_raw]
|
||
|
||
async def search(self, query: str, limit: int = 20) -> list[KnowledgeEntry]:
|
||
"""關鍵字搜尋"""
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
return await repo.search(query, limit)
|
||
|
||
async def semantic_search(
|
||
self,
|
||
query: str,
|
||
limit: int = 10,
|
||
threshold: float = 0.5,
|
||
) -> list[tuple[KnowledgeEntry, float]]:
|
||
"""
|
||
語意搜尋 (pgvector cosine similarity)
|
||
|
||
Returns:
|
||
list of (entry, score) 已按相似度降序排列
|
||
"""
|
||
query_text = f"search_query: {query}"
|
||
embedding = await self._embed_svc.embed_text(query_text)
|
||
if not embedding:
|
||
logger.warning("semantic_search_embedding_failed", query=query)
|
||
return []
|
||
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
return await repo.semantic_search(embedding, limit=limit, threshold=threshold)
|
||
|
||
async def embed_all_entries(self) -> dict[str, int]:
|
||
"""
|
||
批次為所有未 embed 的條目產生 embedding (管理用)
|
||
|
||
Returns:
|
||
{"total": N, "success": N, "failed": N}
|
||
"""
|
||
# C2 修復: 透過 Repository 取得資料,Service 不直接執行 raw SQL
|
||
async with get_db_context() as db:
|
||
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
|
||
rows = await repo.list_unembedded_entries()
|
||
|
||
success = failed = 0
|
||
for entry_id, title, content in rows:
|
||
try:
|
||
text = f"search_document: {title}\n\n{content[:2000]}"
|
||
embedding = await self._embed_svc.embed_text(text)
|
||
if embedding:
|
||
async with get_db_context() as db:
|
||
repo = KnowledgeDBRepository(db)
|
||
await repo.save_embedding(entry_id, embedding)
|
||
success += 1
|
||
else:
|
||
logger.warning("embed_all_empty_vector", entry_id=entry_id)
|
||
failed += 1
|
||
except Exception as e:
|
||
logger.warning("embed_all_failed", entry_id=entry_id, error=str(e))
|
||
failed += 1
|
||
|
||
logger.info("embed_all_complete", total=len(rows), success=success, failed=failed)
|
||
return {"total": len(rows), "success": success, "failed": failed}
|
||
|
||
async def check_anti_pattern(
|
||
self,
|
||
symptoms_hash: str,
|
||
days: int = 7,
|
||
) -> list[KnowledgeEntry]:
|
||
"""
|
||
2026-04-04 Claude Code: Phase 25 P1 — Anti-Pattern 閉環閘門
|
||
根據 symptoms_hash 查找近期失敗案例,供 auto_repair decide() 攔截用
|
||
|
||
Args:
|
||
symptoms_hash: SymptomPattern.compute_hash() 的 16 字元 hash
|
||
days: 查找幾天內的記錄(預設 7 天)
|
||
|
||
Returns:
|
||
list[KnowledgeEntry] — ANTI_PATTERN 條目,空表示無已知失敗案例
|
||
"""
|
||
from datetime import timedelta
|
||
from sqlalchemy import text as sa_text
|
||
from src.utils.timezone import now_taipei
|
||
|
||
cutoff = now_taipei() - timedelta(days=days)
|
||
|
||
async with get_db_context() as db:
|
||
result = await db.execute(
|
||
sa_text(
|
||
"SELECT id FROM knowledge_entries "
|
||
"WHERE entry_type = 'anti_pattern' "
|
||
"AND symptoms_hash = :hash "
|
||
"AND created_at >= :cutoff "
|
||
"AND status != 'ARCHIVED' "
|
||
"ORDER BY created_at DESC LIMIT 5"
|
||
),
|
||
{"hash": symptoms_hash, "cutoff": cutoff},
|
||
)
|
||
entry_ids = [row.id for row in result.fetchall()]
|
||
|
||
if not entry_ids:
|
||
return []
|
||
|
||
entries = []
|
||
for eid in entry_ids:
|
||
entry = await self.get_entry(eid)
|
||
if entry:
|
||
entries.append(entry)
|
||
|
||
logger.info(
|
||
"anti_pattern_check",
|
||
symptoms_hash=symptoms_hash,
|
||
days=days,
|
||
found=len(entries),
|
||
)
|
||
return entries
|