Files
awoooi/apps/api/src/services/knowledge_service.py
OG T 286a96d1aa
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 12m47s
fix(knowledge): entrystatus enum 大小寫修正 'archived' → 'ARCHIVED'
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-06 11:25:44 +08:00

279 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Knowledge Service - 業務邏輯層
===============================
Knowledge Base Phase 1: CRUD + 狀態流轉 + 搜尋
建立時間: 2026-04-02 (台北時區)
建立者: Claude Code (Knowledge Base Phase 1)
遵循 leWOOOgo 積木化原則:
- Service 層封裝業務邏輯
- 依賴 IKnowledgeRepository Protocol
- Router 層禁止直接存取 DB
"""
import asyncio
import structlog
from src.db.base import get_db_context
from src.models.knowledge import (
CategoryCount,
EntryStatus,
EntryType,
KnowledgeEntry,
KnowledgeEntryCreate,
KnowledgeEntryUpdate,
KnowledgeListResponse,
)
from src.repositories.interfaces import IKnowledgeRepository
from src.repositories.knowledge_repository import KnowledgeDBRepository
from src.services.embedding_service import OllamaEmbeddingService
logger = structlog.get_logger(__name__)
# =============================================================================
# Singleton
# =============================================================================
_knowledge_service: "KnowledgeService | None" = None
def get_knowledge_service() -> "KnowledgeService":
"""取得 Knowledge Service 實例"""
global _knowledge_service
if _knowledge_service is None:
_knowledge_service = KnowledgeService()
return _knowledge_service
class KnowledgeService:
"""Knowledge Base 業務邏輯"""
def __init__(self) -> None:
# I2: 注入 embedding service避免每次呼叫 new 實例
self._embed_svc = OllamaEmbeddingService(model="nomic-embed-text", timeout=15.0)
# I1: 持有背景 Task 引用,防止 GC 提前回收
self._pending_tasks: set[asyncio.Task] = set() # type: ignore[type-arg]
async def create_entry(self, data: KnowledgeEntryCreate) -> KnowledgeEntry:
"""建立知識條目,建立後背景自動產生 embedding"""
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
entry = await repo.create(data)
logger.info(
"knowledge_entry_created",
entry_id=entry.id,
entry_type=entry.entry_type,
source=entry.source,
)
# 背景產生 embedding (不阻塞回應);持有引用防 GC 回收
task = asyncio.create_task(self._embed_entry(entry.id, data.title, data.content))
self._pending_tasks.add(task)
task.add_done_callback(self._pending_tasks.discard)
return entry
async def _embed_entry(self, entry_id: str, title: str, content: str) -> None:
"""背景任務:產生並儲存 embedding"""
try:
text = f"search_document: {title}\n\n{content[:2000]}"
embedding = await self._embed_svc.embed_text(text)
if not embedding:
logger.warning("knowledge_embedding_empty", entry_id=entry_id)
return
async with get_db_context() as db:
repo = KnowledgeDBRepository(db)
await repo.save_embedding(entry_id, embedding)
logger.info("knowledge_embedding_saved", entry_id=entry_id)
except Exception as e:
logger.warning("knowledge_embedding_failed", entry_id=entry_id, error=str(e))
async def get_entry(self, entry_id: str) -> KnowledgeEntry | None:
"""取得知識條目 (view_count +1)"""
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
entry = await repo.get_by_id(entry_id)
if entry:
await repo.increment_view_count(entry_id)
entry.view_count += 1
return entry
async def update_entry(
self, entry_id: str, data: KnowledgeEntryUpdate
) -> KnowledgeEntry | None:
"""更新知識條目"""
update_data = data.model_dump(exclude_none=True)
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
if not update_data:
return await repo.get_by_id(entry_id)
return await repo.update(entry_id, update_data)
async def approve_entry(self, entry_id: str) -> KnowledgeEntry | None:
"""審核通過 (draft/review → approved)"""
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
entry = await repo.get_by_id(entry_id)
if not entry:
return None
if entry.status == EntryStatus.APPROVED:
return entry
return await repo.update(entry_id, {"status": EntryStatus.APPROVED})
async def archive_entry(self, entry_id: str) -> bool:
"""封存 (軟刪除)"""
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
return await repo.delete(entry_id)
async def list_entries(
self,
category: str | None = None,
entry_type: EntryType | None = None,
status: EntryStatus | None = None,
tags: list[str] | None = None,
q: str | None = None,
limit: int = 20,
offset: int = 0,
) -> KnowledgeListResponse:
"""列出知識條目 + 分類統計"""
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
items, total = await repo.list_entries(
category=category,
entry_type=entry_type,
status=status,
tags=tags,
q=q,
limit=limit,
offset=offset,
)
categories_raw = await repo.get_categories()
categories = [
CategoryCount(category=cat, count=cnt) for cat, cnt in categories_raw
]
return KnowledgeListResponse(
items=items, total=total, categories=categories
)
async def get_categories(self) -> list[CategoryCount]:
"""取得分類統計(直接呼叫 repo不走 list_entries"""
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
categories_raw = await repo.get_categories()
return [CategoryCount(category=cat, count=cnt) for cat, cnt in categories_raw]
async def search(self, query: str, limit: int = 20) -> list[KnowledgeEntry]:
"""關鍵字搜尋"""
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
return await repo.search(query, limit)
async def semantic_search(
self,
query: str,
limit: int = 10,
threshold: float = 0.5,
) -> list[tuple[KnowledgeEntry, float]]:
"""
語意搜尋 (pgvector cosine similarity)
Returns:
list of (entry, score) 已按相似度降序排列
"""
query_text = f"search_query: {query}"
embedding = await self._embed_svc.embed_text(query_text)
if not embedding:
logger.warning("semantic_search_embedding_failed", query=query)
return []
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
return await repo.semantic_search(embedding, limit=limit, threshold=threshold)
async def embed_all_entries(self) -> dict[str, int]:
"""
批次為所有未 embed 的條目產生 embedding (管理用)
Returns:
{"total": N, "success": N, "failed": N}
"""
# C2 修復: 透過 Repository 取得資料Service 不直接執行 raw SQL
async with get_db_context() as db:
repo: IKnowledgeRepository = KnowledgeDBRepository(db)
rows = await repo.list_unembedded_entries()
success = failed = 0
for entry_id, title, content in rows:
try:
text = f"search_document: {title}\n\n{content[:2000]}"
embedding = await self._embed_svc.embed_text(text)
if embedding:
async with get_db_context() as db:
repo = KnowledgeDBRepository(db)
await repo.save_embedding(entry_id, embedding)
success += 1
else:
logger.warning("embed_all_empty_vector", entry_id=entry_id)
failed += 1
except Exception as e:
logger.warning("embed_all_failed", entry_id=entry_id, error=str(e))
failed += 1
logger.info("embed_all_complete", total=len(rows), success=success, failed=failed)
return {"total": len(rows), "success": success, "failed": failed}
async def check_anti_pattern(
self,
symptoms_hash: str,
days: int = 7,
) -> list[KnowledgeEntry]:
"""
2026-04-04 Claude Code: Phase 25 P1 — Anti-Pattern 閉環閘門
根據 symptoms_hash 查找近期失敗案例,供 auto_repair decide() 攔截用
Args:
symptoms_hash: SymptomPattern.compute_hash() 的 16 字元 hash
days: 查找幾天內的記錄(預設 7 天)
Returns:
list[KnowledgeEntry] — ANTI_PATTERN 條目,空表示無已知失敗案例
"""
from datetime import timedelta
from sqlalchemy import text as sa_text
from src.utils.timezone import now_taipei
cutoff = now_taipei() - timedelta(days=days)
async with get_db_context() as db:
result = await db.execute(
sa_text(
"SELECT id FROM knowledge_entries "
"WHERE entry_type = 'anti_pattern' "
"AND symptoms_hash = :hash "
"AND created_at >= :cutoff "
"AND status != 'ARCHIVED' "
"ORDER BY created_at DESC LIMIT 5"
),
{"hash": symptoms_hash, "cutoff": cutoff},
)
entry_ids = [row.id for row in result.fetchall()]
if not entry_ids:
return []
entries = []
for eid in entry_ids:
entry = await self.get_entry(eid)
if entry:
entries.append(entry)
logger.info(
"anti_pattern_check",
symptoms_hash=symptoms_hash,
days=days,
found=len(entries),
)
return entries