Files
awoooi/apps/api/src/repositories/knowledge_repository.py
OG T 3455044457
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 38s
Type Sync Check / check-type-sync (push) Failing after 35s
feat(phase25): Nemotron 主動防禦三方向 P0+P1+P2 完整實作
P0 - DIAGNOSE Privacy-First Routing:
- ai_router.py: _local_fallback_chain [NEMOTRON→OLLAMA→REJECT]
- DIAGNOSE 意圖 override 改為 NEMOTRON (原 OLLAMA)
- DIAGNOSE fallback 使用 local-only 鏈,不觸碰雲端
- 全部失敗時 REJECT + Telegram 通知
- config.py: NEMOTRON_DIAGNOSE_TIMEOUT_SECONDS=30, OLLAMA_DIAGNOSE_TIMEOUT_SECONDS=60
- nemotron.py: 根據 context[task_type] 選擇 timeout

P1 - Knowledge Auto-Harvesting:
- models/knowledge.py: EntryType.AUTO_RUNBOOK + ANTI_PATTERN + symptoms_hash
- EntryStatus.PUBLISHED (ANTI_PATTERN 直接發布,無需審核)
- models/playbook.py: SymptomPattern.compute_hash() (16字元確定性 hash)
- services/runbook_generator.py: NemotronRunbookGenerator (v1.1)
  - generate_runbook() → AUTO_RUNBOOK (DRAFT) + Telegram 審核 card
  - generate_anti_pattern() → ANTI_PATTERN (PUBLISHED) + Telegram 通知
  - 使用 nvidia.chat() (正確介面),Nemotron 超時時 Minimal fallback
- knowledge_service.py: check_anti_pattern(symptoms_hash, days=7)
- db/models.py: symptoms_hash VARCHAR(16) + ix_knowledge_symptoms_hash
- repositories/knowledge_repository.py: create() 支援 symptoms_hash + status
- auto_repair_service.py: anti_pattern_gate 在 decide() + runbook hook 在 execute()
- migrations/phase8_symptoms_hash.sql: ALTER TABLE + partial index + PUBLISHED constraint

P2 - Config Drift Detection:
- models/drift.py: DriftItem/DriftReport/DriftLevel/DriftIntent/DriftStatus
- services/drift_detector.py: GitStateReader + K8sStateReader + DriftDetector
- services/drift_analyzer.py: 白名單過濾 + DriftLevel 分級
- services/drift_interpreter.py: NemotronDriftInterpreter(意圖分析,不生成修復指令)
- services/drift_remediator.py: rollback(kubectl apply) + adopt(git push gitea)
- api/v1/drift.py: POST /scan, GET /reports, POST /rollback, POST /adopt
- migrations/phase9_drift_reports.sql: drift_reports 表
- k8s/drift-cronjob.yaml: 每小時自動掃描 CronJob

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 12:35:05 +08:00

281 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Knowledge Repository - PostgreSQL 實作
=======================================
Knowledge Base Phase 1: CRUD + 搜尋
建立時間: 2026-04-02 (台北時區)
建立者: Claude Code (Knowledge Base Phase 1)
遵循 leWOOOgo 積木化原則:
- 實作 IKnowledgeRepository Protocol
- 只做資料存取,業務邏輯在 Service 層
"""
import structlog
from sqlalchemy import String, func, or_, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from src.db.models import KnowledgeEntryRecord
from src.models.knowledge import (
EntryStatus,
EntryType,
KnowledgeEntry,
KnowledgeEntryCreate,
)
logger = structlog.get_logger(__name__)
class KnowledgeDBRepository:
"""
Knowledge Repository - PostgreSQL 實作
實作 IKnowledgeRepository Protocol
"""
def __init__(self, db: AsyncSession):
self.db = db
async def create(self, data: KnowledgeEntryCreate) -> KnowledgeEntry:
"""建立知識條目"""
record = KnowledgeEntryRecord(
title=data.title,
content=data.content,
entry_type=data.entry_type,
category=data.category,
tags=data.tags,
source=data.source,
# 2026-04-04 ogt: Phase 25 P1 — 支援指定 statusANTI_PATTERN 直接 PUBLISHED
status=data.status,
related_incident_id=data.related_incident_id,
related_playbook_id=data.related_playbook_id,
# 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 閉環用症狀 hash
symptoms_hash=data.symptoms_hash,
created_by=data.created_by,
)
self.db.add(record)
await self.db.flush()
logger.info("knowledge_entry_created", entry_id=record.id, title=record.title)
return self._to_model(record)
async def get_by_id(self, entry_id: str) -> KnowledgeEntry | None:
"""根據 ID 取得知識條目(排除 archived"""
result = await self.db.execute(
select(KnowledgeEntryRecord).where(
KnowledgeEntryRecord.id == entry_id,
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED,
)
)
record = result.scalar_one_or_none()
return self._to_model(record) if record else None
async def update(self, entry_id: str, data: dict) -> KnowledgeEntry | None:
"""更新知識條目"""
result = await self.db.execute(
select(KnowledgeEntryRecord).where(KnowledgeEntryRecord.id == entry_id)
)
record = result.scalar_one_or_none()
if not record:
return None
for key, value in data.items():
if value is not None and hasattr(record, key):
setattr(record, key, value)
await self.db.flush()
logger.info("knowledge_entry_updated", entry_id=entry_id)
return self._to_model(record)
async def delete(self, entry_id: str) -> bool:
"""軟刪除 → status = archived"""
result = await self.db.execute(
update(KnowledgeEntryRecord)
.where(KnowledgeEntryRecord.id == entry_id)
.values(status=EntryStatus.ARCHIVED)
)
return result.rowcount > 0
async def list_entries(
self,
category: str | None = None,
entry_type: EntryType | None = None,
status: EntryStatus | None = None,
tags: list[str] | None = None,
q: str | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[KnowledgeEntry], int]:
"""列出知識條目 (支援篩選)"""
query = select(KnowledgeEntryRecord).where(
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED
)
count_query = select(func.count()).select_from(KnowledgeEntryRecord).where(
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED
)
if category:
query = query.where(KnowledgeEntryRecord.category == category)
count_query = count_query.where(KnowledgeEntryRecord.category == category)
if entry_type:
query = query.where(KnowledgeEntryRecord.entry_type == entry_type)
count_query = count_query.where(KnowledgeEntryRecord.entry_type == entry_type)
if status:
query = query.where(KnowledgeEntryRecord.status == status)
count_query = count_query.where(KnowledgeEntryRecord.status == status)
if tags:
for tag in tags:
tag_filter = KnowledgeEntryRecord.tags.op('@>')(f'["{tag}"]')
query = query.where(tag_filter)
count_query = count_query.where(tag_filter)
if q:
like_q = f"%{q}%"
filter_cond = or_(
KnowledgeEntryRecord.title.ilike(like_q),
KnowledgeEntryRecord.content.ilike(like_q),
)
query = query.where(filter_cond)
count_query = count_query.where(filter_cond)
total_result = await self.db.execute(count_query)
total = total_result.scalar() or 0
query = query.order_by(KnowledgeEntryRecord.updated_at.desc())
query = query.limit(limit).offset(offset)
result = await self.db.execute(query)
records = result.scalars().all()
return [self._to_model(r) for r in records], total
async def get_categories(self) -> list[tuple[str, int]]:
"""取得分類統計"""
result = await self.db.execute(
select(
KnowledgeEntryRecord.category,
func.count().label("cnt"),
)
.where(KnowledgeEntryRecord.status != EntryStatus.ARCHIVED)
.group_by(KnowledgeEntryRecord.category)
.order_by(func.count().desc())
)
return [(row.category, row.cnt) for row in result.all()]
async def search(self, query: str, limit: int = 20) -> list[KnowledgeEntry]:
"""關鍵字搜尋 (title + content + tags)"""
like_q = f"%{query}%"
result = await self.db.execute(
select(KnowledgeEntryRecord)
.where(
KnowledgeEntryRecord.status != EntryStatus.ARCHIVED,
or_(
KnowledgeEntryRecord.title.ilike(like_q),
KnowledgeEntryRecord.content.ilike(like_q),
KnowledgeEntryRecord.tags.cast(String).ilike(like_q),
),
)
.order_by(KnowledgeEntryRecord.view_count.desc())
.limit(limit)
)
records = result.scalars().all()
return [self._to_model(r) for r in records]
async def increment_view_count(self, entry_id: str) -> bool:
"""view_count +1"""
result = await self.db.execute(
update(KnowledgeEntryRecord)
.where(KnowledgeEntryRecord.id == entry_id)
.values(view_count=KnowledgeEntryRecord.view_count + 1)
)
return result.rowcount > 0
async def list_unembedded_entries(self) -> list[tuple[str, str, str]]:
"""列出尚未產生 embedding 的條目 [(id, title, content)]"""
from sqlalchemy import text as sa_text
result = await self.db.execute(
sa_text(
"SELECT id, title, content FROM knowledge_entries "
"WHERE embedding IS NULL AND status != 'ARCHIVED'"
)
)
return [(row.id, row.title, row.content) for row in result.fetchall()]
async def save_embedding(self, entry_id: str, embedding: list[float]) -> bool:
"""儲存向量 embedding (768 維)
注意: asyncpg 不支援 :param::type 語法,必須用 CAST(:param AS vector)
"""
from sqlalchemy import text as sa_text
result = await self.db.execute(
sa_text(
"UPDATE knowledge_entries SET embedding = CAST(:emb AS vector) WHERE id = :id"
),
{"emb": str(embedding), "id": entry_id},
)
return result.rowcount > 0
async def semantic_search(
self,
query_embedding: list[float],
limit: int = 10,
threshold: float = 0.5,
) -> list[tuple[KnowledgeEntry, float]]:
"""
語意搜尋 — cosine similarity (pgvector)
Returns:
list of (entry, similarity_score) 已按分數降序排列
"""
from sqlalchemy import text as sa_text
sql = sa_text("""
SELECT id, 1 - (embedding <=> CAST(:emb AS vector)) AS score
FROM knowledge_entries
WHERE status != 'ARCHIVED'
AND embedding IS NOT NULL
AND 1 - (embedding <=> CAST(:emb AS vector)) >= :threshold
ORDER BY embedding <=> CAST(:emb AS vector)
LIMIT :limit
""")
rows = await self.db.execute(
sql,
{"emb": str(query_embedding), "threshold": threshold, "limit": limit},
)
rows = rows.fetchall()
if not rows:
return []
# 批次取得完整 entry
ids = [r[0] for r in rows]
scores = {r[0]: float(r[1]) for r in rows}
result = await self.db.execute(
select(KnowledgeEntryRecord).where(KnowledgeEntryRecord.id.in_(ids))
)
records = {r.id: r for r in result.scalars().all()}
return [
(self._to_model(records[entry_id]), scores[entry_id])
for entry_id in ids
if entry_id in records
]
def _to_model(self, record: KnowledgeEntryRecord) -> KnowledgeEntry:
"""ORM Record → Pydantic Model"""
return KnowledgeEntry(
id=record.id,
title=record.title,
content=record.content,
entry_type=record.entry_type,
category=record.category,
tags=record.tags or [],
source=record.source,
status=record.status,
related_incident_id=record.related_incident_id,
related_playbook_id=record.related_playbook_id,
symptoms_hash=getattr(record, "symptoms_hash", None),
view_count=record.view_count,
created_by=record.created_by,
created_at=record.created_at,
updated_at=record.updated_at,
)