""" Knowledge Service - 業務邏輯層 =============================== Knowledge Base Phase 1: CRUD + 狀態流轉 + 搜尋 建立時間: 2026-04-02 (台北時區) 建立者: Claude Code (Knowledge Base Phase 1) 遵循 leWOOOgo 積木化原則: - Service 層封裝業務邏輯 - 依賴 IKnowledgeRepository Protocol - Router 層禁止直接存取 DB """ import asyncio import structlog from src.db.base import get_db_context from src.models.knowledge import ( CategoryCount, EntryStatus, EntryType, KnowledgeEntry, KnowledgeEntryCreate, KnowledgeEntryUpdate, KnowledgeListResponse, ) from src.repositories.interfaces import IKnowledgeRepository from src.repositories.knowledge_repository import KnowledgeDBRepository from src.services.embedding_service import OllamaEmbeddingService from src.services.model_registry import get_model as _get_model logger = structlog.get_logger(__name__) # ============================================================================= # Singleton # ============================================================================= _knowledge_service: "KnowledgeService | None" = None def get_knowledge_service() -> "KnowledgeService": """取得 Knowledge Service 實例""" global _knowledge_service if _knowledge_service is None: _knowledge_service = KnowledgeService() return _knowledge_service class KnowledgeService: """Knowledge Base 業務邏輯""" def __init__(self) -> None: # I2: 注入 embedding service,避免每次呼叫 new 實例 # D1 集中化 2026-04-11: 從 models.json providers.ollama.models.embedding 讀取 self._embed_svc = OllamaEmbeddingService(model=_get_model("ollama", "embedding"), timeout=15.0) # I1: 持有背景 Task 引用,防止 GC 提前回收 self._pending_tasks: set[asyncio.Task] = set() # type: ignore[type-arg] async def create_entry(self, data: KnowledgeEntryCreate) -> KnowledgeEntry: """建立知識條目,建立後背景自動產生 embedding""" async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) entry = await repo.create(data) logger.info( "knowledge_entry_created", entry_id=entry.id, entry_type=entry.entry_type, source=entry.source, ) # 背景產生 embedding (不阻塞回應);持有引用防 GC 回收 task = asyncio.create_task(self._embed_entry(entry.id, data.title, data.content)) self._pending_tasks.add(task) task.add_done_callback(self._pending_tasks.discard) return entry async def _embed_entry(self, entry_id: str, title: str, content: str) -> None: """背景任務:產生並儲存 embedding""" try: text = f"search_document: {title}\n\n{content[:2000]}" embedding = await self._embed_svc.embed_text(text) if not embedding: logger.warning("knowledge_embedding_empty", entry_id=entry_id) return async with get_db_context() as db: repo = KnowledgeDBRepository(db) await repo.save_embedding(entry_id, embedding) logger.info("knowledge_embedding_saved", entry_id=entry_id) except Exception as e: logger.warning("knowledge_embedding_failed", entry_id=entry_id, error=str(e)) async def get_entry(self, entry_id: str) -> KnowledgeEntry | None: """取得知識條目 (view_count +1)""" async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) entry = await repo.get_by_id(entry_id) if entry: await repo.increment_view_count(entry_id) entry.view_count += 1 return entry async def update_entry( self, entry_id: str, data: KnowledgeEntryUpdate ) -> KnowledgeEntry | None: """更新知識條目""" update_data = data.model_dump(exclude_none=True) async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) if not update_data: return await repo.get_by_id(entry_id) return await repo.update(entry_id, update_data) async def approve_entry(self, entry_id: str) -> KnowledgeEntry | None: """審核通過 (draft/review → approved)""" async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) entry = await repo.get_by_id(entry_id) if not entry: return None if entry.status == EntryStatus.APPROVED: return entry return await repo.update(entry_id, {"status": EntryStatus.APPROVED}) async def archive_entry(self, entry_id: str) -> bool: """封存 (軟刪除)""" async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) return await repo.delete(entry_id) async def list_entries( self, category: str | None = None, entry_type: EntryType | None = None, status: EntryStatus | None = None, tags: list[str] | None = None, q: str | None = None, limit: int = 20, offset: int = 0, ) -> KnowledgeListResponse: """列出知識條目 + 分類統計""" async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) items, total = await repo.list_entries( category=category, entry_type=entry_type, status=status, tags=tags, q=q, limit=limit, offset=offset, ) categories_raw = await repo.get_categories() categories = [ CategoryCount(category=cat, count=cnt) for cat, cnt in categories_raw ] return KnowledgeListResponse( items=items, total=total, categories=categories ) async def get_categories(self) -> list[CategoryCount]: """取得分類統計(直接呼叫 repo,不走 list_entries)""" async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) categories_raw = await repo.get_categories() return [CategoryCount(category=cat, count=cnt) for cat, cnt in categories_raw] async def search(self, query: str, limit: int = 20) -> list[KnowledgeEntry]: """關鍵字搜尋""" async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) return await repo.search(query, limit) async def semantic_search( self, query: str, limit: int = 10, threshold: float = 0.5, ) -> list[tuple[KnowledgeEntry, float]]: """ 語意搜尋 (pgvector cosine similarity) Returns: list of (entry, score) 已按相似度降序排列 """ query_text = f"search_query: {query}" embedding = await self._embed_svc.embed_text(query_text) if not embedding: logger.warning("semantic_search_embedding_failed", query=query) return [] async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) return await repo.semantic_search(embedding, limit=limit, threshold=threshold) async def embed_all_entries(self) -> dict[str, int]: """ 批次為所有未 embed 的條目產生 embedding (管理用) Returns: {"total": N, "success": N, "failed": N} """ # C2 修復: 透過 Repository 取得資料,Service 不直接執行 raw SQL async with get_db_context() as db: repo: IKnowledgeRepository = KnowledgeDBRepository(db) rows = await repo.list_unembedded_entries() success = failed = 0 for entry_id, title, content in rows: try: text = f"search_document: {title}\n\n{content[:2000]}" embedding = await self._embed_svc.embed_text(text) if embedding: async with get_db_context() as db: repo = KnowledgeDBRepository(db) await repo.save_embedding(entry_id, embedding) success += 1 else: logger.warning("embed_all_empty_vector", entry_id=entry_id) failed += 1 except Exception as e: logger.warning("embed_all_failed", entry_id=entry_id, error=str(e)) failed += 1 logger.info("embed_all_complete", total=len(rows), success=success, failed=failed) return {"total": len(rows), "success": success, "failed": failed} async def check_anti_pattern( self, symptoms_hash: str, days: int = 7, ) -> list[KnowledgeEntry]: """ 2026-04-04 Claude Code: Phase 25 P1 — Anti-Pattern 閉環閘門 根據 symptoms_hash 查找近期失敗案例,供 auto_repair decide() 攔截用 Args: symptoms_hash: SymptomPattern.compute_hash() 的 16 字元 hash days: 查找幾天內的記錄(預設 7 天) Returns: list[KnowledgeEntry] — ANTI_PATTERN 條目,空表示無已知失敗案例 """ from datetime import timedelta from sqlalchemy import text as sa_text from src.utils.timezone import now_taipei cutoff = now_taipei() - timedelta(days=days) async with get_db_context() as db: result = await db.execute( sa_text( "SELECT id FROM knowledge_entries " "WHERE entry_type = 'anti_pattern' " "AND symptoms_hash = :hash " "AND created_at >= :cutoff " "AND status != 'ARCHIVED' " "ORDER BY created_at DESC LIMIT 5" ), {"hash": symptoms_hash, "cutoff": cutoff}, ) entry_ids = [row.id for row in result.fetchall()] if not entry_ids: return [] entries = [] for eid in entry_ids: entry = await self.get_entry(eid) if entry: entries.append(entry) logger.info( "anti_pattern_check", symptoms_hash=symptoms_hash, days=days, found=len(entries), ) return entries