diff --git a/apps/api/migrations/phase7_playbooks_table.sql b/apps/api/migrations/phase7_playbooks_table.sql index 61bc1c85..0376567a 100644 --- a/apps/api/migrations/phase7_playbooks_table.sql +++ b/apps/api/migrations/phase7_playbooks_table.sql @@ -6,7 +6,8 @@ CREATE TABLE IF NOT EXISTS playbooks ( -- 識別 - playbook_id VARCHAR(32) UNIQUE NOT NULL, + -- 2026-04-04 ogt: 首席架構師 Review — 加 PRIMARY KEY,移除多餘 UNIQUE + playbook_id VARCHAR(32) PRIMARY KEY, -- 元資料 name VARCHAR(256) NOT NULL, diff --git a/apps/api/src/services/approval_execution.py b/apps/api/src/services/approval_execution.py index f584a4f9..87c81c06 100644 --- a/apps/api/src/services/approval_execution.py +++ b/apps/api/src/services/approval_execution.py @@ -239,13 +239,6 @@ class ApprovalExecutionService: result=result, ) - # 2026-04-04 ogt: 執行結果沉澱到 KM - # 統帥鐵律: 所有異常與自動修復紀錄必須回寫 KM - import asyncio - asyncio.create_task( - self._write_execution_result_to_km(approval, success, error_message) - ) - except Exception as e: # 學習失敗不影響主流程 logger.warning( @@ -254,6 +247,12 @@ class ApprovalExecutionService: error=str(e), ) + # 2026-04-04 ogt: 執行結果沉澱到 KM — 移出 try/except 確保 learning 失敗也寫入 + # 統帥鐵律: 所有異常與自動修復紀錄必須回寫 KM + asyncio.create_task( + self._write_execution_result_to_km(approval, success, error_message) + ) + async def _write_execution_result_to_km( self, approval: "ApprovalRequest", @@ -288,7 +287,7 @@ class ApprovalExecutionService: category="execution_result", tags=["execution", "auto_repair" if success else "execution_failed"], source=EntrySource.AI_EXTRACTED, - related_incident_id=approval.incident_id, + related_incident_id=approval.incident_id or None, created_by="approval_execution", ) await get_knowledge_service().create_entry(entry_data) @@ -395,8 +394,8 @@ class ApprovalExecutionService: # 暫時從 description 或 action 解析 incident_id = self._extract_incident_id_from_approval(approval) if not incident_id: - logger.debug( - "playbook_extraction_skip", + logger.info( + "playbook_extraction_skipped", approval_id=str(approval.id), reason="No incident_id found", ) @@ -409,43 +408,46 @@ class ApprovalExecutionService: incident = await incident_service.get_incident(incident_id) if not incident: - logger.debug( - "playbook_extraction_skip", + logger.info( + "playbook_extraction_skipped", approval_id=str(approval.id), incident_id=incident_id, reason="Incident not found", ) return - # 3. 檢查 Incident 狀態 - from src.models.incident import IncidentStatus + # 3. 執行成功後自動設定 outcome (冷啟動關鍵) + # 2026-04-04 ogt: 首席架構師 Review — 補上 execution_success + effectiveness_score + # 確保 Playbook 萃取前置條件能成立,不再依賴人工填分 + from src.models.incident import IncidentOutcome, IncidentStatus + from src.utils.timezone import now_taipei + if incident.outcome is None: + incident.outcome = IncidentOutcome() + if not incident.outcome.execution_success: + incident.outcome.execution_success = True + if incident.outcome.effectiveness_score is None or incident.outcome.effectiveness_score < 4: + incident.outcome.effectiveness_score = 4 # 系統判斷:K8s 執行成功 = 有效 if incident.status not in [IncidentStatus.RESOLVED, IncidentStatus.CLOSED]: - logger.debug( - "playbook_extraction_skip", - approval_id=str(approval.id), - incident_id=incident_id, - incident_status=incident.status.value, - reason="Incident not resolved", - ) - return + incident.status = IncidentStatus.RESOLVED + incident.resolved_at = now_taipei() - # 4. 檢查 effectiveness_score - effectiveness = incident.outcome.effectiveness_score if incident.outcome else 0 - if effectiveness < 4: - logger.debug( - "playbook_extraction_skip", - approval_id=str(approval.id), - incident_id=incident_id, - effectiveness=effectiveness, - reason="Low effectiveness score", - ) - return + # 回存 Incident(fire-and-forget 路徑,失敗不影響主流程) + await incident_service.save_to_working_memory(incident) - # 5. 觸發萃取 + logger.info( + "playbook_extraction_incident_updated", + approval_id=str(approval.id), + incident_id=incident_id, + effectiveness_score=incident.outcome.effectiveness_score, + status=incident.status.value, + ) + + # 4. 觸發萃取(effectiveness 已保證 >= 4) from src.services.playbook_service import get_playbook_service playbook_service = get_playbook_service() + effectiveness = incident.outcome.effectiveness_score or 4 playbook = await playbook_service.extract_from_incident( incident=incident, auto_approve=effectiveness >= 5, # 滿分自動核准 diff --git a/apps/api/src/services/playbook_rag.py b/apps/api/src/services/playbook_rag.py index 86b9059e..41a0c9f9 100644 --- a/apps/api/src/services/playbook_rag.py +++ b/apps/api/src/services/playbook_rag.py @@ -239,9 +239,10 @@ class PlaybookRAGService: text_parts.append(f"描述: {playbook.description}") # 修復步驟 + # 2026-04-04 ogt: 修正欄位名稱 s.sequence→s.step_number, s.description→s.command if playbook.repair_steps: steps_text = "; ".join( - f"{s.sequence}. {s.description}" + f"{s.step_number}. {s.command}" for s in playbook.repair_steps[:5] # 最多 5 步 ) text_parts.append(f"步驟: {steps_text}") diff --git a/apps/api/src/services/playbook_service.py b/apps/api/src/services/playbook_service.py index 1b2ba267..d51a40a5 100644 --- a/apps/api/src/services/playbook_service.py +++ b/apps/api/src/services/playbook_service.py @@ -87,14 +87,16 @@ class PlaybookService: def __init__(self, repository: IPlaybookRepository | None = None): self._repository = repository or get_playbook_repository() - # 2026-03-27 ogt: RAG Service 改為 lazy initialization (async factory) - self._rag_service = None async def _get_rag_service(self): - """Lazy initialization for RAG service (2026-03-27 async factory)""" - if self._rag_service is None: - self._rag_service = await get_playbook_rag_service() - return self._rag_service + """ + 取得 RAG Service — 每次走工廠,不在 Service 層快取 + + 2026-04-04 ogt: 首席架構師 Review — 移除 Service 層快取 + 原因: PlaybookService 快取舊實例會繞過工廠的 is_closed 重建邏輯 + 由 get_playbook_rag_service() 工廠統一管理生命週期 + """ + return await get_playbook_rag_service() # === Core Operations ===