feat(drift): B4 drift_reports DB 持久化 + CronJob 修復
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 12m17s

- drift_repository.py: DriftReportRepository (save/get/list/update)
- drift.py router: 移除 in-memory dict,改用 DB repository
- drift-cronjob.yaml: 修正 SA/NetworkPolicy/NodePort 問題
- allow-intra-namespace NetworkPolicy (已套用至 prod)
- migrate-phase8/9: symptoms_hash + drift_reports migration Job YAML

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-09 20:28:55 +08:00
parent b1e207ffae
commit c92cdeea0f
5 changed files with 356 additions and 22 deletions

View File

@@ -21,6 +21,7 @@ from src.models.drift import (
DriftScanRequest,
DriftScanResponse,
)
from src.repositories.drift_repository import get_drift_repository
from src.services.drift_analyzer import get_drift_analyzer
from src.services.drift_detector import get_drift_detector
from src.services.drift_interpreter import get_drift_interpreter
@@ -28,8 +29,7 @@ from src.services.drift_remediator import get_drift_remediator
router = APIRouter(prefix="/drift", tags=["drift"])
# 本次 session 的漂移報告暫存prod 應存 DB
_recent_reports: dict[str, DriftReport] = {}
# 2026-04-09 Claude Sonnet 4.6: B4 drift_reports 持久化 — 改用 DB repository
@router.post("/scan", response_model=DriftScanResponse, summary="觸發漂移掃描")
@@ -49,6 +49,7 @@ async def trigger_drift_scan(
detector = get_drift_detector()
analyzer = get_drift_analyzer()
repo = get_drift_repository()
all_items = []
last_report: DriftReport | None = None
@@ -57,18 +58,12 @@ async def trigger_drift_scan(
classified_report = analyzer.classify(raw_report)
all_items.extend(classified_report.items)
if analyzer.needs_alert(classified_report):
# Nemotron 意圖分析(背景執行,避免阻塞)
background_tasks.add_task(
_analyze_and_notify, classified_report
)
last_report = classified_report
# 持久化到 DB
await repo.save(classified_report)
# 暫存(最多 50 筆)
_recent_reports[classified_report.report_id] = classified_report
if len(_recent_reports) > 50:
oldest_key = next(iter(_recent_reports))
del _recent_reports[oldest_key]
if analyzer.needs_alert(classified_report):
background_tasks.add_task(_analyze_and_notify, classified_report)
last_report = classified_report
# 若多 namespace彙總第一個 report 的計數
if last_report:
@@ -94,7 +89,8 @@ async def trigger_drift_scan(
@router.get("/reports", response_model=DriftListResponse, summary="列出最近漂移報告")
async def list_drift_reports() -> DriftListResponse:
"""列出最近 50 筆漂移報告(倒序)"""
items = list(reversed(list(_recent_reports.values())))
repo = get_drift_repository()
items = await repo.list_recent(limit=50)
return DriftListResponse(items=items, total=len(items))
@@ -105,7 +101,8 @@ async def rollback_drift(report_id: str) -> dict:
人工確認後才執行DriftRemediator 負責確定性修復
"""
report = _recent_reports.get(report_id)
repo = get_drift_repository()
report = await repo.get(report_id)
if not report:
raise HTTPException(status_code=404, detail=f"Report {report_id} not found")
@@ -122,7 +119,8 @@ async def adopt_drift(report_id: str) -> dict:
2026-04-05 Claude Code: ADR-057 實作 — 改用 Gitea PR API不再 git push main
流程: 建立 drift/adopt-* branch → commit YAML 注解 → 建立 PR → Telegram 通知 SRE
"""
report = _recent_reports.get(report_id)
repo = get_drift_repository()
report = await repo.get(report_id)
if not report:
raise HTTPException(status_code=404, detail=f"Report {report_id} not found")
@@ -161,8 +159,8 @@ async def _analyze_and_notify(report: DriftReport) -> None:
analyzer = get_drift_analyzer()
interpretation = await interpreter.analyze(report)
updated = report.model_copy(update={"interpretation": interpretation})
_recent_reports[report.report_id] = updated
repo = get_drift_repository()
await repo.update_interpretation(report.report_id, interpretation)
diff_summary = analyzer.format_diff_summary(report)
intent_label = {
@@ -201,11 +199,12 @@ async def _run_full_scan(namespaces: list[str]) -> None:
detector = get_drift_detector()
analyzer = get_drift_analyzer()
repo = get_drift_repository()
for namespace in namespaces:
try:
raw = await detector.scan(namespace, triggered_by="cron")
classified = analyzer.classify(raw)
_recent_reports[classified.report_id] = classified
await repo.save(classified)
if analyzer.needs_alert(classified):
await _analyze_and_notify(classified)

View File

@@ -0,0 +1,164 @@
"""
Drift Report Repository - PostgreSQL 實作
==========================================
Phase 25 P2 B4: drift_reports 表 DB 持久化
職責: DriftReport 的 CRUD 操作(取代 in-memory dict
設計: raw SQL via SQLAlchemy text()(表由 phase9 migration 建立)
版本: v1.0
建立: 2026-04-09 (台北時區)
建立者: Claude Sonnet 4.6 (B4 drift_reports 持久化)
"""
import json
from datetime import datetime
import structlog
from sqlalchemy import text
from src.db.base import get_db_context
from src.models.drift import DriftInterpretation, DriftIntent, DriftItem, DriftLevel, DriftReport, DriftStatus
logger = structlog.get_logger(__name__)
_MAX_REPORTS = 200 # DB 最多保留筆數(定期清理)
def _report_to_row(report: DriftReport) -> dict:
"""DriftReport → DB row dict"""
return {
"report_id": report.report_id,
"namespace": report.namespace,
"triggered_by": report.triggered_by,
"scanned_at": report.scanned_at,
"high_count": report.high_count,
"medium_count": report.medium_count,
"info_count": report.info_count,
"items": json.dumps([item.model_dump() for item in report.items]),
"interpretation": json.dumps(report.interpretation.model_dump()) if report.interpretation else None,
"status": report.status.value,
"created_at": report.created_at,
"resolved_at": report.resolved_at,
}
def _row_to_report(row) -> DriftReport:
"""DB row → DriftReport"""
items = []
for item_data in (row.items or []):
item_data["drift_level"] = DriftLevel(item_data.get("drift_level", "medium"))
items.append(DriftItem(**item_data))
interpretation = None
if row.interpretation:
d = row.interpretation
interpretation = DriftInterpretation(
intent=DriftIntent(d.get("intent", "unknown")),
explanation=d.get("explanation", ""),
risk=d.get("risk", "MEDIUM"),
confidence=d.get("confidence", 0.0),
)
return DriftReport(
report_id=row.report_id,
namespace=row.namespace,
triggered_by=row.triggered_by,
scanned_at=row.scanned_at,
high_count=row.high_count,
medium_count=row.medium_count,
info_count=row.info_count,
items=items,
interpretation=interpretation,
status=DriftStatus(row.status),
created_at=row.created_at,
resolved_at=row.resolved_at,
)
class DriftReportRepository:
"""drift_reports 表的 CRUD 操作"""
async def save(self, report: DriftReport) -> None:
"""新增或更新漂移報告upsert"""
row = _report_to_row(report)
async with get_db_context() as db:
await db.execute(
text("""
INSERT INTO drift_reports
(report_id, namespace, triggered_by, scanned_at,
high_count, medium_count, info_count,
items, interpretation, status, created_at, resolved_at)
VALUES
(:report_id, :namespace, :triggered_by, :scanned_at,
:high_count, :medium_count, :info_count,
:items::jsonb, :interpretation::jsonb, :status, :created_at, :resolved_at)
ON CONFLICT (report_id) DO UPDATE SET
items = EXCLUDED.items,
interpretation = EXCLUDED.interpretation,
status = EXCLUDED.status,
resolved_at = EXCLUDED.resolved_at,
high_count = EXCLUDED.high_count,
medium_count = EXCLUDED.medium_count,
info_count = EXCLUDED.info_count
"""),
row,
)
logger.info("drift_report_saved", report_id=report.report_id, namespace=report.namespace)
async def get(self, report_id: str) -> DriftReport | None:
"""依 report_id 查詢"""
async with get_db_context() as db:
result = await db.execute(
text("SELECT * FROM drift_reports WHERE report_id = :report_id"),
{"report_id": report_id},
)
row = result.fetchone()
return _row_to_report(row) if row else None
async def list_recent(self, limit: int = 50) -> list[DriftReport]:
"""列出最近 N 筆(倒序)"""
async with get_db_context() as db:
result = await db.execute(
text("SELECT * FROM drift_reports ORDER BY created_at DESC LIMIT :limit"),
{"limit": limit},
)
rows = result.fetchall()
return [_row_to_report(r) for r in rows]
async def update_status(self, report_id: str, status: DriftStatus, resolved_at: datetime | None = None) -> None:
"""更新處理狀態"""
async with get_db_context() as db:
await db.execute(
text("""
UPDATE drift_reports
SET status = :status, resolved_at = :resolved_at
WHERE report_id = :report_id
"""),
{"report_id": report_id, "status": status.value, "resolved_at": resolved_at},
)
async def update_interpretation(self, report_id: str, interpretation: DriftInterpretation) -> None:
"""更新 Nemotron 意圖分析結果"""
async with get_db_context() as db:
await db.execute(
text("""
UPDATE drift_reports
SET interpretation = :interpretation::jsonb
WHERE report_id = :report_id
"""),
{
"report_id": report_id,
"interpretation": json.dumps(interpretation.model_dump()),
},
)
_drift_repo: DriftReportRepository | None = None
def get_drift_repository() -> DriftReportRepository:
global _drift_repo
if _drift_repo is None:
_drift_repo = DriftReportRepository()
return _drift_repo