""" Playbook Models - #7 Playbook 萃取 ================================== 從成功案例萃取的修復劇本資料模型 Phase 7.1: 資料模型定義 建立時間: 2026-03-26 (台北時區) 作者: Claude Code (Phase 7) 遵循 leWOOOgo 積木化原則: - Pydantic BaseModel 定義 - 支援 PostgreSQL + Redis 雙層儲存 """ from datetime import datetime from enum import Enum from typing import Any from uuid import uuid4 from pydantic import BaseModel, ConfigDict, Field from src.utils.timezone import now_taipei # ============================================================================= # Enums # ============================================================================= class PlaybookStatus(str, Enum): """Playbook 狀態""" DRAFT = "draft" # AI 萃取,待人工審核 REVIEW = "review" # AI 生成且安全檢查通過,等待治理晉級 APPROVED = "approved" # 人工核准,可用於推薦 DEPRECATED = "deprecated" # 已棄用 (有更好方案) class PlaybookSource(str, Enum): """Playbook 來源""" EXTRACTED = "extracted" # 從 Incident 自動萃取 LLM_GENERATED = "llm_generated" # ADR-104: LLM 從成功案例生成 MANUAL = "manual" # 人工建立 YAML_RULE = "yaml_rule" # 從 alert_rules.yaml 匯入(2026-04-15 ogt) class ActionType(str, Enum): """執行類型""" KUBECTL = "kubectl" # Kubernetes 命令 SCRIPT = "script" # 腳本執行 MANUAL = "manual" # 需人工操作 SSH_COMMAND = "ssh_command" # 2026-04-05 Claude Code: 主機層 SSH 修復 (Sprint 3) class RiskLevel(str, Enum): """風險等級""" LOW = "LOW" MEDIUM = "MEDIUM" HIGH = "HIGH" CRITICAL = "CRITICAL" # ============================================================================= # Sub-Models # ============================================================================= class SymptomPattern(BaseModel): """ 症狀模式 - 用於相似度比對 設計: 多維度特徵向量 - alert_names: 告警名稱集合 - affected_services: 受影響服務集合 - severity: 嚴重度 - labels: Prometheus 標籤 (k8s namespace, deployment, etc.) """ alert_names: list[str] = Field( default_factory=list, description="告警名稱模式 (如 HighCPU*, PodCrash*)", ) affected_services: list[str] = Field( default_factory=list, description="受影響服務模式", ) severity_range: list[str] = Field( default=["P1", "P2"], description="適用嚴重度範圍", ) label_patterns: dict[str, str] = Field( default_factory=dict, description="標籤匹配 (regex)", ) keywords: list[str] = Field( default_factory=list, description="關鍵字 (從 annotations 提取)", ) model_config = ConfigDict(extra="ignore") def compute_hash(self) -> str: """ 2026-04-04 Claude Code: Phase 25 P1 — Anti-Pattern 閉環攔截用 確定性 hash:alert_names + affected_services + label_patterns 目的:O(1) 精確比對,避免純語意搜尋的模糊性 """ import hashlib import json key = ( "|".join(sorted(self.alert_names)) + "||" + "|".join(sorted(self.affected_services)) + "||" + json.dumps(self.label_patterns, sort_keys=True) ) return hashlib.sha256(key.encode()).hexdigest()[:16] class RepairStep(BaseModel): """ 修復步驟 設計: 支援多種執行類型 - kubectl: Kubernetes 命令 - script: 腳本執行 - manual: 需人工操作 """ step_number: int = Field(ge=1, description="步驟序號") action_type: ActionType = Field(description="執行類型") command: str = Field(description="執行命令或操作描述") expected_result: str | None = Field(None, description="預期結果") rollback_command: str | None = Field(None, description="回滾命令") requires_approval: bool = Field(default=False, description="是否需要人工審核") risk_level: RiskLevel = Field(default=RiskLevel.MEDIUM, description="風險等級") model_config = ConfigDict(extra="ignore") # ============================================================================= # Core Model # ============================================================================= def generate_playbook_id() -> str: """生成 Playbook ID (台北時區)""" return f"PB-{now_taipei().strftime('%Y%m%d')}-{uuid4().hex[:6].upper()}" class Playbook(BaseModel): """ Playbook - 修復劇本 三層記憶位置: - Working Memory (Redis): playbook:{playbook_id} TTL 7天 - Episodic Memory (PostgreSQL): playbooks 表 - Semantic Memory (Vector DB): 向量化症狀特徵 (Phase 8+) 設計遵循: - ADR-003 leWOOOgo 模組化架構 - ADR-007 資料保留策略 """ # === 識別 === playbook_id: str = Field( default_factory=generate_playbook_id, description="Playbook 唯一識別碼", ) # === 元資料 === name: str = Field(description="Playbook 名稱 (人類可讀)") description: str = Field(description="問題描述與修復策略摘要") status: PlaybookStatus = Field(default=PlaybookStatus.DRAFT) source: PlaybookSource = Field(default=PlaybookSource.EXTRACTED) # === 症狀模式 === symptom_pattern: SymptomPattern = Field( default_factory=SymptomPattern, description="觸發此 Playbook 的症狀模式", ) # === 修復步驟 === repair_steps: list[RepairStep] = Field( default_factory=list, description="修復步驟列表", ) estimated_duration_minutes: int = Field( default=5, ge=1, le=480, description="預估修復時間 (分鐘)", ) # === 來源追溯 === source_incident_ids: list[str] = Field( default_factory=list, description="萃取來源的 Incident ID", ) version: int = Field(default=1, ge=1, description="Playbook lineage version") parent_playbook_id: str | None = Field(None, description="Root Playbook ID for this lineage") supersedes_playbook_id: str | None = Field(None, description="Previous Playbook version superseded by this one") version_reason: str | None = Field(None, description="Why this version was created") ai_confidence: float = Field( default=0.0, ge=0.0, le=1.0, description="AI 萃取信心度", ) # === 統計數據 === success_count: int = Field(default=0, ge=0, description="成功執行次數") failure_count: int = Field(default=0, ge=0, description="失敗執行次數") last_used_at: datetime | None = Field(None, description="最後使用時間") # ADR-083 Phase 3: EWMA 信任度(0.0-1.0,初值 0.3) # 成功: trust_new = 0.9 * trust_old + 0.1 * 1.0 # 失敗: trust_new = 0.8 * trust_old + 0.2 * 0.0(2x 衰減) # trust < 0.1 → 自動封存(由 Evolver Agent 處理) # 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 3 EWMA 負向強化 trust_score: float = Field(default=0.3, ge=0.0, le=1.0, description="EWMA 動態信任度(Phase 3 新增)") # === 人工標記 === approved_by: str | None = Field(None, description="核准者") approved_at: datetime | None = Field(None, description="核准時間") tags: list[str] = Field(default_factory=list, description="標籤") notes: str | None = Field(None, description="人工補充說明") # === Sprint 5.1 資料安全護欄 (2026-04-08 Claude Sonnet 4.6 Asia/Taipei) === requires_approval_level: str = Field( default="auto", description="auto=直接執行, standard=1票, critical=2票MultiSig(由 Service Registry 決定)", ) stateful_targets: list[str] = Field( default_factory=list, description="此 Playbook 操作的 Stateful 服務清單,對應 service-registry.yaml", ) requires_pre_backup: bool = Field( default=False, description="執行前是否需要 Pre-flight 備份檢查", ) review_required: bool = Field( default=False, description="KM/治理累積觸發人工或 AI 複審信號", ) # === 時間軸 === created_at: datetime = Field(default_factory=now_taipei) updated_at: datetime = Field(default_factory=now_taipei) model_config = ConfigDict(extra="ignore") @property def success_rate(self) -> float: """成功率""" total = self.success_count + self.failure_count return self.success_count / total if total > 0 else 0.0 @property def failure_rate(self) -> float: """失敗率 (2026-03-30 Claude Code: Learning Service 信心度調整用)""" total = self.success_count + self.failure_count return self.failure_count / total if total > 0 else 0.0 @property def is_high_quality(self) -> bool: """ 是否為高品質 Playbook (供 #8 自動升級參考) 條件: - 狀態為 APPROVED - 成功率 >= 80% (冷啟動期: 原 95%,2026-04-05 ogt 降低以打破零執行惡性循環) - 成功次數 >= 3 (冷啟動期: 原 10,累積足夠後再收緊) 待成熟後收緊為: success_rate >= 0.95, success_count >= 10 """ # 2026-04-05 ogt: 冷啟動調整,打破 zero-execution 惡性循環 return ( self.status == PlaybookStatus.APPROVED and self.success_rate >= 0.80 and self.success_count >= 3 ) @property def total_executions(self) -> int: """總執行次數""" return self.success_count + self.failure_count def to_redis_dict(self) -> dict[str, Any]: """轉換為 Redis 儲存格式""" return self.model_dump(mode="json") @classmethod def from_redis_dict(cls, data: dict[str, Any]) -> "Playbook": """從 Redis 資料還原""" return cls.model_validate(data) # ============================================================================= # Response Models # ============================================================================= class PlaybookRecommendation(BaseModel): """Playbook 推薦結果""" playbook: Playbook similarity_score: float = Field(ge=0.0, le=1.0, description="相似度分數") matched_symptoms: list[str] = Field( default_factory=list, description="匹配的症狀", ) reason: str = Field(description="推薦原因") model_config = ConfigDict(extra="ignore") class PlaybookResponse(BaseModel): """單一 Playbook 回應""" playbook: Playbook success_rate: float = Field(ge=0.0, le=1.0) is_high_quality: bool @classmethod def from_playbook(cls, playbook: Playbook) -> "PlaybookResponse": """從 Playbook 建立回應""" return cls( playbook=playbook, success_rate=playbook.success_rate, is_high_quality=playbook.is_high_quality, ) class PlaybookListResponse(BaseModel): """Playbook 列表回應""" items: list[PlaybookResponse] total: int limit: int offset: int # ============================================================================= # Request Models # ============================================================================= class PlaybookCreateRequest(BaseModel): """建立 Playbook 請求 (人工建立)""" name: str = Field(min_length=1, max_length=256) description: str = Field(min_length=1, max_length=2000) symptom_pattern: SymptomPattern repair_steps: list[RepairStep] = Field(min_length=1) estimated_duration_minutes: int = Field(default=5, ge=1, le=480) tags: list[str] = Field(default_factory=list) notes: str | None = None class PlaybookUpdateRequest(BaseModel): """更新 Playbook 請求""" name: str | None = Field(None, min_length=1, max_length=256) description: str | None = Field(None, min_length=1, max_length=2000) symptom_pattern: SymptomPattern | None = None repair_steps: list[RepairStep] | None = None estimated_duration_minutes: int | None = Field(None, ge=1, le=480) tags: list[str] | None = None notes: str | None = None status: PlaybookStatus | None = None class PlaybookApproveRequest(BaseModel): """核准 Playbook 請求""" approved_by: str = Field(min_length=1, max_length=128) notes: str | None = Field(None, max_length=1000) class SymptomPatternRequest(BaseModel): """症狀模式查詢請求""" alert_names: list[str] = Field(default_factory=list) affected_services: list[str] = Field(default_factory=list) severity: str | None = None keywords: list[str] = Field(default_factory=list) def to_symptom_pattern(self) -> SymptomPattern: """轉換為 SymptomPattern""" return SymptomPattern( alert_names=self.alert_names, affected_services=self.affected_services, severity_range=[self.severity] if self.severity else ["P1", "P2"], keywords=self.keywords, )