diff --git a/apps/api/migrations/phase7_playbooks_table.sql b/apps/api/migrations/phase7_playbooks_table.sql new file mode 100644 index 00000000..61bc1c85 --- /dev/null +++ b/apps/api/migrations/phase7_playbooks_table.sql @@ -0,0 +1,58 @@ +-- Phase 7: Playbook 萃取功能 — playbooks 資料表 +-- 建立時間: 2026-04-04 (台北時區) +-- 建立者: Claude Code (Phase 7 補齊 migration) +-- 對應設計: memory/project_playbook_design.md +-- 對應模型: apps/api/src/models/playbook.py + +CREATE TABLE IF NOT EXISTS playbooks ( + -- 識別 + playbook_id VARCHAR(32) UNIQUE NOT NULL, + + -- 元資料 + name VARCHAR(256) NOT NULL, + description TEXT NOT NULL DEFAULT '', + status VARCHAR(32) NOT NULL DEFAULT 'draft', -- draft|approved|deprecated + source VARCHAR(32) NOT NULL DEFAULT 'extracted', -- extracted|manual + + -- 症狀模式 (SymptomPattern JSON) + symptom_pattern JSONB NOT NULL DEFAULT '{}', + + -- 修復步驟 (list[RepairStep] JSON) + repair_steps JSONB NOT NULL DEFAULT '[]', + estimated_duration_minutes INT NOT NULL DEFAULT 5, + + -- 來源追溯 + source_incident_ids TEXT[] NOT NULL DEFAULT '{}', + ai_confidence DECIMAL(4,3) NOT NULL DEFAULT 0.0, + + -- 統計數據 + success_count INT NOT NULL DEFAULT 0, + failure_count INT NOT NULL DEFAULT 0, + last_used_at TIMESTAMPTZ, + + -- 人工標記 + approved_by VARCHAR(128), + approved_at TIMESTAMPTZ, + tags TEXT[] NOT NULL DEFAULT '{}', + notes TEXT, + + -- 時間軸 + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- 索引 +CREATE INDEX IF NOT EXISTS idx_playbooks_status + ON playbooks(status); + +CREATE INDEX IF NOT EXISTS idx_playbooks_tags + ON playbooks USING GIN(tags); + +CREATE INDEX IF NOT EXISTS idx_playbooks_alert_names + ON playbooks USING GIN((symptom_pattern->'alert_names')); + +CREATE INDEX IF NOT EXISTS idx_playbooks_source_incidents + ON playbooks USING GIN(source_incident_ids); + +CREATE INDEX IF NOT EXISTS idx_playbooks_created_at + ON playbooks(created_at DESC); diff --git a/apps/api/src/services/approval_execution.py b/apps/api/src/services/approval_execution.py index d07e2bf0..f584a4f9 100644 --- a/apps/api/src/services/approval_execution.py +++ b/apps/api/src/services/approval_execution.py @@ -239,6 +239,13 @@ class ApprovalExecutionService: result=result, ) + # 2026-04-04 ogt: 執行結果沉澱到 KM + # 統帥鐵律: 所有異常與自動修復紀錄必須回寫 KM + import asyncio + asyncio.create_task( + self._write_execution_result_to_km(approval, success, error_message) + ) + except Exception as e: # 學習失敗不影響主流程 logger.warning( @@ -247,6 +254,57 @@ class ApprovalExecutionService: error=str(e), ) + async def _write_execution_result_to_km( + self, + approval: "ApprovalRequest", + success: bool, + error_message: str | None, + ) -> None: + """ + 執行結果沉澱到 KM (Knowledge Base) + + 2026-04-04 ogt: 統帥鐵律 — 成功/失敗執行記錄都必須回寫 KM + """ + try: + from src.models.knowledge import EntrySource, EntryType, KnowledgeEntryCreate + from src.services.knowledge_service import get_knowledge_service + + status_icon = "✅" if success else "❌" + status_text = "成功" if success else f"失敗: {error_message or '未知原因'}" + + content = ( + f"# {status_icon} 執行記錄: {approval.action[:80]}\n\n" + f"**Approval ID**: {approval.id}\n" + f"**Incident ID**: {approval.incident_id or '未關聯'}\n" + f"**執行結果**: {status_text}\n" + f"**風險等級**: {approval.risk_level.value if approval.risk_level else '未知'}\n\n" + f"## 操作內容\n{approval.description or '無描述'}\n" + ) + + entry_data = KnowledgeEntryCreate( + title=f"[執行記錄] {status_icon} {approval.action[:60]}", + content=content, + entry_type=EntryType.INCIDENT_CASE, + category="execution_result", + tags=["execution", "auto_repair" if success else "execution_failed"], + source=EntrySource.AI_EXTRACTED, + related_incident_id=approval.incident_id, + created_by="approval_execution", + ) + await get_knowledge_service().create_entry(entry_data) + + logger.debug( + "execution_result_written_to_km", + approval_id=str(approval.id), + success=success, + ) + except Exception as e: + logger.warning( + "execution_result_km_write_failed", + approval_id=str(approval.id), + error=str(e), + ) + async def _send_execution_notification( self, approval: ApprovalRequest, diff --git a/apps/api/src/services/playbook_service.py b/apps/api/src/services/playbook_service.py index a6b4c67c..1b2ba267 100644 --- a/apps/api/src/services/playbook_service.py +++ b/apps/api/src/services/playbook_service.py @@ -176,6 +176,10 @@ class PlaybookService: import asyncio asyncio.create_task(self._index_playbook_async(playbook)) + # 9. 2026-04-04 ogt: 沉澱到 KM (Knowledge Base) + # 統帥鐵律: 所有異常與自動修復紀錄必須回寫 KM + asyncio.create_task(self._write_to_km(playbook, incident)) + logger.info( "playbook_extracted", playbook_id=playbook.playbook_id, @@ -186,6 +190,62 @@ class PlaybookService: return playbook + async def _write_to_km(self, playbook: Playbook, incident: Incident) -> None: + """ + Playbook 萃取後沉澱到 KM (Knowledge Base) + + 2026-04-04 ogt: 統帥鐵律 — 異常+自動修復記錄必須回寫 KM + 火後不忘記 (fire-and-forget),失敗不影響主流程 + """ + try: + from src.models.knowledge import EntrySource, EntryType, KnowledgeEntryCreate + from src.services.knowledge_service import get_knowledge_service + + # 組 Playbook 修復步驟摘要 + steps_text = "\n".join( + f"{i+1}. [{s.action_type}] {s.command}" + for i, s in enumerate(playbook.repair_steps) + ) or "(無明確修復步驟)" + + alert_names = ", ".join(playbook.symptom_pattern.alert_names) or "未知" + services = ", ".join(playbook.symptom_pattern.affected_services) or "未知" + + content = ( + f"# Playbook: {playbook.name}\n\n" + f"**來源 Incident**: {', '.join(playbook.source_incident_ids)}\n" + f"**AI 信心度**: {playbook.ai_confidence:.0%}\n" + f"**狀態**: {playbook.status.value}\n\n" + f"## 症狀模式\n" + f"- 告警: {alert_names}\n" + f"- 受影響服務: {services}\n\n" + f"## 修復步驟\n{steps_text}\n\n" + f"## 描述\n{playbook.description}" + ) + + entry_data = KnowledgeEntryCreate( + title=f"[Playbook] {playbook.name}", + content=content, + entry_type=EntryType.INCIDENT_CASE, + category="auto_repair", + tags=[*playbook.tags, "playbook", "auto_extracted", playbook.status.value], + source=EntrySource.AI_EXTRACTED, + related_incident_id=incident.incident_id, + created_by="playbook_service", + ) + await get_knowledge_service().create_entry(entry_data) + + logger.info( + "playbook_written_to_km", + playbook_id=playbook.playbook_id, + incident_id=incident.incident_id, + ) + except Exception as e: + logger.warning( + "playbook_km_write_failed", + playbook_id=playbook.playbook_id, + error=str(e), + ) + async def _index_playbook_async(self, playbook: Playbook) -> None: """非同步建立 Playbook 向量索引 (ADR-030 Phase 3)""" try: