diff --git a/scripts/backfill_km_from_approvals.py b/scripts/backfill_km_from_approvals.py new file mode 100644 index 00000000..a935ec91 --- /dev/null +++ b/scripts/backfill_km_from_approvals.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +""" +Phase 26: 從 approval_records 歷史資料補寫 KM +2026-04-06 ogt + +問題: 258 筆 Approval 記錄,只有 9 筆 KM entry, + 大量告警處理歷史沒有沉澱到 Knowledge Base + +做法: 遍歷所有 APPROVED / EXECUTION_SUCCESS / EXECUTION_FAILED 的記錄, + 建立 INCIDENT_CASE 類型的 KM entry (不重複) +""" + +import asyncio +import os +import sys + +import asyncpg + + +async def backfill(): + db_url = os.environ.get("DATABASE_URL", "").replace("postgresql+asyncpg://", "postgresql://") + if not db_url: + print("ERROR: DATABASE_URL not set") + sys.exit(1) + + conn = await asyncpg.connect(db_url) + + # 取得所有有意義的 approval records (已批准/執行) + approvals = await conn.fetch(""" + SELECT id, action, description, status, risk_level, created_at, incident_id + FROM approval_records + WHERE status IN ('APPROVED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED') + ORDER BY created_at ASC + """) + + print(f"找到 {len(approvals)} 筆 Approval 記錄") + + # 取得已存在的 KM entries(避免重複) + existing = await conn.fetch(""" + SELECT related_approval_id FROM knowledge_entries + WHERE related_approval_id IS NOT NULL + """) + existing_ids = {r["related_approval_id"] for r in existing} + print(f"已有 {len(existing_ids)} 筆 KM entry 有 related_approval_id") + + inserted = 0 + skipped = 0 + + for ap in approvals: + ap_id = str(ap["id"]) + + if ap_id in existing_ids: + skipped += 1 + continue + + status = ap["status"] + status_icon = "✅" if status == "EXECUTION_SUCCESS" else ("⏳" if status == "APPROVED" else "❌") + status_text = {"EXECUTION_SUCCESS": "成功", "APPROVED": "已批准(未執行)", "EXECUTION_FAILED": "失敗"}[status] + + title = f"[告警處理記錄] {status_icon} {(ap['action'] or '')[:60]}" + content = ( + f"# {status_icon} 告警處理記錄\n\n" + f"**操作**: {ap['action'] or '無'}\n" + f"**說明**: {ap['description'] or '無'}\n" + f"**執行結果**: {status_text}\n" + f"**風險等級**: {ap['risk_level'] or '未知'}\n" + f"**Approval ID**: {ap_id}\n" + f"**Incident ID**: {ap['incident_id'] or '未關聯'}\n" + f"**建立時間**: {ap['created_at']}\n" + ) + + import json, uuid + tags_json = json.dumps(["backfill", "approval", status.lower()]) + entry_id = str(uuid.uuid4()) + await conn.execute(""" + INSERT INTO knowledge_entries + (id, title, content, entry_type, category, tags, source, status, + view_count, related_incident_id, related_approval_id, + created_by, created_at, updated_at) + VALUES + ($1, $2, $3, 'INCIDENT_CASE', 'alert_handling', + $4::json, 'AI_EXTRACTED', 'APPROVED', + 0, $5, $6, 'backfill_script', NOW(), NOW()) + """, + entry_id, + title, + content, + tags_json, + ap["incident_id"], + ap_id, + ) + inserted += 1 + + print(f"補寫完成: 新增 {inserted} 筆,略過 {skipped} 筆") + + # 確認最終狀態 + total = await conn.fetchrow("SELECT count(*) as cnt FROM knowledge_entries") + by_type = await conn.fetch("SELECT entry_type, count(*) FROM knowledge_entries GROUP BY entry_type ORDER BY count DESC") + print(f"\nKM 最終狀態 (共 {total['cnt']} 筆):") + for r in by_type: + print(f" {r['entry_type']}: {r['count']}") + + await conn.close() + + +if __name__ == "__main__": + asyncio.run(backfill())