P0 - DIAGNOSE Privacy-First Routing: - ai_router.py: _local_fallback_chain [NEMOTRON→OLLAMA→REJECT] - DIAGNOSE 意圖 override 改為 NEMOTRON (原 OLLAMA) - DIAGNOSE fallback 使用 local-only 鏈,不觸碰雲端 - 全部失敗時 REJECT + Telegram 通知 - config.py: NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS=30, OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=60 - nemotron.py: 根據 context[task_type] 選擇 timeout P1 - Knowledge Auto-Harvesting: - models/knowledge.py: EntryType.AUTO_RUNBOOK + ANTI_PATTERN + symptoms_hash - EntryStatus.PUBLISHED (ANTI_PATTERN 直接發布,無需審核) - models/playbook.py: SymptomPattern.compute_hash() (16字元確定性 hash) - services/runbook_generator.py: NemotronRunbookGenerator (v1.1) - generate_runbook() → AUTO_RUNBOOK (DRAFT) + Telegram 審核 card - generate_anti_pattern() → ANTI_PATTERN (PUBLISHED) + Telegram 通知 - 使用 nvidia.chat() (正確介面),Nemotron 超時時 Minimal fallback - knowledge_service.py: check_anti_pattern(symptoms_hash, days=7) - db/models.py: symptoms_hash VARCHAR(16) + ix_knowledge_symptoms_hash - repositories/knowledge_repository.py: create() 支援 symptoms_hash + status - auto_repair_service.py: anti_pattern_gate 在 decide() + runbook hook 在 execute() - migrations/phase8_symptoms_hash.sql: ALTER TABLE + partial index + PUBLISHED constraint P2 - Config Drift Detection: - models/drift.py: DriftItem/DriftReport/DriftLevel/DriftIntent/DriftStatus - services/drift_detector.py: GitStateReader + K8sStateReader + DriftDetector - services/drift_analyzer.py: 白名單過濾 + DriftLevel 分級 - services/drift_interpreter.py: NemotronDriftInterpreter(意圖分析,不生成修復指令) - services/drift_remediator.py: rollback(kubectl apply) + adopt(git push gitea) - api/v1/drift.py: POST /scan, GET /reports, POST /rollback, POST /adopt - migrations/phase9_drift_reports.sql: drift_reports 表 - k8s/drift-cronjob.yaml: 每小時自動掃描 CronJob Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
281 lines
10 KiB
Python
281 lines
10 KiB
Python
"""
|
||
Knowledge Repository - PostgreSQL 實作
|
||
=======================================
|
||
Knowledge Base Phase 1: CRUD + 搜尋
|
||
|
||
建立時間: 2026-04-02 (台北時區)
|
||
建立者: Claude Code (Knowledge Base Phase 1)
|
||
|
||
遵循 leWOOOgo 積木化原則:
|
||
- 實作 IKnowledgeRepository Protocol
|
||
- 只做資料存取,業務邏輯在 Service 層
|
||
"""
|
||
|
||
import structlog
|
||
from sqlalchemy import String, func, or_, select, update
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from src.db.models import KnowledgeEntryRecord
|
||
from src.models.knowledge import (
|
||
EntryStatus,
|
||
EntryType,
|
||
KnowledgeEntry,
|
||
KnowledgeEntryCreate,
|
||
)
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
class KnowledgeDBRepository:
|
||
"""
|
||
Knowledge Repository - PostgreSQL 實作
|
||
|
||
實作 IKnowledgeRepository Protocol
|
||
"""
|
||
|
||
def __init__(self, db: AsyncSession):
|
||
self.db = db
|
||
|
||
async def create(self, data: KnowledgeEntryCreate) -> KnowledgeEntry:
|
||
"""建立知識條目"""
|
||
record = KnowledgeEntryRecord(
|
||
title=data.title,
|
||
content=data.content,
|
||
entry_type=data.entry_type,
|
||
category=data.category,
|
||
tags=data.tags,
|
||
source=data.source,
|
||
# 2026-04-04 ogt: Phase 25 P1 — 支援指定 status(ANTI_PATTERN 直接 PUBLISHED)
|
||
status=data.status,
|
||
related_incident_id=data.related_incident_id,
|
||
related_playbook_id=data.related_playbook_id,
|
||
# 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 閉環用症狀 hash
|
||
symptoms_hash=data.symptoms_hash,
|
||
created_by=data.created_by,
|
||
)
|
||
self.db.add(record)
|
||
await self.db.flush()
|
||
logger.info("knowledge_entry_created", entry_id=record.id, title=record.title)
|
||
return self._to_model(record)
|
||
|
||
async def get_by_id(self, entry_id: str) -> KnowledgeEntry | None:
|
||
"""根據 ID 取得知識條目(排除 archived)"""
|
||
result = await self.db.execute(
|
||
select(KnowledgeEntryRecord).where(
|
||
KnowledgeEntryRecord.id == entry_id,
|
||
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED,
|
||
)
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
return self._to_model(record) if record else None
|
||
|
||
async def update(self, entry_id: str, data: dict) -> KnowledgeEntry | None:
|
||
"""更新知識條目"""
|
||
result = await self.db.execute(
|
||
select(KnowledgeEntryRecord).where(KnowledgeEntryRecord.id == entry_id)
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
if not record:
|
||
return None
|
||
|
||
for key, value in data.items():
|
||
if value is not None and hasattr(record, key):
|
||
setattr(record, key, value)
|
||
|
||
await self.db.flush()
|
||
logger.info("knowledge_entry_updated", entry_id=entry_id)
|
||
return self._to_model(record)
|
||
|
||
async def delete(self, entry_id: str) -> bool:
|
||
"""軟刪除 → status = archived"""
|
||
result = await self.db.execute(
|
||
update(KnowledgeEntryRecord)
|
||
.where(KnowledgeEntryRecord.id == entry_id)
|
||
.values(status=EntryStatus.ARCHIVED)
|
||
)
|
||
return result.rowcount > 0
|
||
|
||
async def list_entries(
|
||
self,
|
||
category: str | None = None,
|
||
entry_type: EntryType | None = None,
|
||
status: EntryStatus | None = None,
|
||
tags: list[str] | None = None,
|
||
q: str | None = None,
|
||
limit: int = 20,
|
||
offset: int = 0,
|
||
) -> tuple[list[KnowledgeEntry], int]:
|
||
"""列出知識條目 (支援篩選)"""
|
||
query = select(KnowledgeEntryRecord).where(
|
||
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED
|
||
)
|
||
count_query = select(func.count()).select_from(KnowledgeEntryRecord).where(
|
||
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED
|
||
)
|
||
|
||
if category:
|
||
query = query.where(KnowledgeEntryRecord.category == category)
|
||
count_query = count_query.where(KnowledgeEntryRecord.category == category)
|
||
if entry_type:
|
||
query = query.where(KnowledgeEntryRecord.entry_type == entry_type)
|
||
count_query = count_query.where(KnowledgeEntryRecord.entry_type == entry_type)
|
||
if status:
|
||
query = query.where(KnowledgeEntryRecord.status == status)
|
||
count_query = count_query.where(KnowledgeEntryRecord.status == status)
|
||
if tags:
|
||
for tag in tags:
|
||
tag_filter = KnowledgeEntryRecord.tags.op('@>')(f'["{tag}"]')
|
||
query = query.where(tag_filter)
|
||
count_query = count_query.where(tag_filter)
|
||
if q:
|
||
like_q = f"%{q}%"
|
||
filter_cond = or_(
|
||
KnowledgeEntryRecord.title.ilike(like_q),
|
||
KnowledgeEntryRecord.content.ilike(like_q),
|
||
)
|
||
query = query.where(filter_cond)
|
||
count_query = count_query.where(filter_cond)
|
||
|
||
total_result = await self.db.execute(count_query)
|
||
total = total_result.scalar() or 0
|
||
|
||
query = query.order_by(KnowledgeEntryRecord.updated_at.desc())
|
||
query = query.limit(limit).offset(offset)
|
||
|
||
result = await self.db.execute(query)
|
||
records = result.scalars().all()
|
||
|
||
return [self._to_model(r) for r in records], total
|
||
|
||
async def get_categories(self) -> list[tuple[str, int]]:
|
||
"""取得分類統計"""
|
||
result = await self.db.execute(
|
||
select(
|
||
KnowledgeEntryRecord.category,
|
||
func.count().label("cnt"),
|
||
)
|
||
.where(KnowledgeEntryRecord.status != EntryStatus.ARCHIVED)
|
||
.group_by(KnowledgeEntryRecord.category)
|
||
.order_by(func.count().desc())
|
||
)
|
||
return [(row.category, row.cnt) for row in result.all()]
|
||
|
||
async def search(self, query: str, limit: int = 20) -> list[KnowledgeEntry]:
|
||
"""關鍵字搜尋 (title + content + tags)"""
|
||
like_q = f"%{query}%"
|
||
result = await self.db.execute(
|
||
select(KnowledgeEntryRecord)
|
||
.where(
|
||
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED,
|
||
or_(
|
||
KnowledgeEntryRecord.title.ilike(like_q),
|
||
KnowledgeEntryRecord.content.ilike(like_q),
|
||
KnowledgeEntryRecord.tags.cast(String).ilike(like_q),
|
||
),
|
||
)
|
||
.order_by(KnowledgeEntryRecord.view_count.desc())
|
||
.limit(limit)
|
||
)
|
||
records = result.scalars().all()
|
||
return [self._to_model(r) for r in records]
|
||
|
||
async def increment_view_count(self, entry_id: str) -> bool:
|
||
"""view_count +1"""
|
||
result = await self.db.execute(
|
||
update(KnowledgeEntryRecord)
|
||
.where(KnowledgeEntryRecord.id == entry_id)
|
||
.values(view_count=KnowledgeEntryRecord.view_count + 1)
|
||
)
|
||
return result.rowcount > 0
|
||
|
||
async def list_unembedded_entries(self) -> list[tuple[str, str, str]]:
|
||
"""列出尚未產生 embedding 的條目 [(id, title, content)]"""
|
||
from sqlalchemy import text as sa_text
|
||
result = await self.db.execute(
|
||
sa_text(
|
||
"SELECT id, title, content FROM knowledge_entries "
|
||
"WHERE embedding IS NULL AND status != 'ARCHIVED'"
|
||
)
|
||
)
|
||
return [(row.id, row.title, row.content) for row in result.fetchall()]
|
||
|
||
async def save_embedding(self, entry_id: str, embedding: list[float]) -> bool:
|
||
"""儲存向量 embedding (768 維)
|
||
|
||
注意: asyncpg 不支援 :param::type 語法,必須用 CAST(:param AS vector)
|
||
"""
|
||
from sqlalchemy import text as sa_text
|
||
result = await self.db.execute(
|
||
sa_text(
|
||
"UPDATE knowledge_entries SET embedding = CAST(:emb AS vector) WHERE id = :id"
|
||
),
|
||
{"emb": str(embedding), "id": entry_id},
|
||
)
|
||
return result.rowcount > 0
|
||
|
||
async def semantic_search(
|
||
self,
|
||
query_embedding: list[float],
|
||
limit: int = 10,
|
||
threshold: float = 0.5,
|
||
) -> list[tuple[KnowledgeEntry, float]]:
|
||
"""
|
||
語意搜尋 — cosine similarity (pgvector)
|
||
|
||
Returns:
|
||
list of (entry, similarity_score) 已按分數降序排列
|
||
"""
|
||
from sqlalchemy import text as sa_text
|
||
sql = sa_text("""
|
||
SELECT id, 1 - (embedding <=> CAST(:emb AS vector)) AS score
|
||
FROM knowledge_entries
|
||
WHERE status != 'ARCHIVED'
|
||
AND embedding IS NOT NULL
|
||
AND 1 - (embedding <=> CAST(:emb AS vector)) >= :threshold
|
||
ORDER BY embedding <=> CAST(:emb AS vector)
|
||
LIMIT :limit
|
||
""")
|
||
rows = await self.db.execute(
|
||
sql,
|
||
{"emb": str(query_embedding), "threshold": threshold, "limit": limit},
|
||
)
|
||
rows = rows.fetchall()
|
||
|
||
if not rows:
|
||
return []
|
||
|
||
# 批次取得完整 entry
|
||
ids = [r[0] for r in rows]
|
||
scores = {r[0]: float(r[1]) for r in rows}
|
||
|
||
result = await self.db.execute(
|
||
select(KnowledgeEntryRecord).where(KnowledgeEntryRecord.id.in_(ids))
|
||
)
|
||
records = {r.id: r for r in result.scalars().all()}
|
||
|
||
return [
|
||
(self._to_model(records[entry_id]), scores[entry_id])
|
||
for entry_id in ids
|
||
if entry_id in records
|
||
]
|
||
|
||
def _to_model(self, record: KnowledgeEntryRecord) -> KnowledgeEntry:
|
||
"""ORM Record → Pydantic Model"""
|
||
return KnowledgeEntry(
|
||
id=record.id,
|
||
title=record.title,
|
||
content=record.content,
|
||
entry_type=record.entry_type,
|
||
category=record.category,
|
||
tags=record.tags or [],
|
||
source=record.source,
|
||
status=record.status,
|
||
related_incident_id=record.related_incident_id,
|
||
related_playbook_id=record.related_playbook_id,
|
||
symptoms_hash=getattr(record, "symptoms_hash", None),
|
||
view_count=record.view_count,
|
||
created_by=record.created_by,
|
||
created_at=record.created_at,
|
||
updated_at=record.updated_at,
|
||
)
|