Files
awoooi/apps/api/src/models/playbook.py
Your Name f154ac022e
All checks were successful
CD Pipeline / tests (push) Successful in 1m34s
Code Review / ai-code-review (push) Successful in 28s
Type Sync Check / check-type-sync (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 10m19s
CD Pipeline / post-deploy-checks (push) Successful in 3m1s
feat(playbook): version generated playbooks
2026-04-30 23:59:39 +08:00

396 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Playbook Models - #7 Playbook 萃取
==================================
從成功案例萃取的修復劇本資料模型
Phase 7.1: 資料模型定義
建立時間: 2026-03-26 (台北時區)
作者: Claude Code (Phase 7)
遵循 leWOOOgo 積木化原則:
- Pydantic BaseModel 定義
- 支援 PostgreSQL + Redis 雙層儲存
"""
from datetime import datetime
from enum import Enum
from typing import Any
from uuid import uuid4
from pydantic import BaseModel, ConfigDict, Field
from src.utils.timezone import now_taipei
# =============================================================================
# Enums
# =============================================================================
class PlaybookStatus(str, Enum):
"""Playbook 狀態"""
DRAFT = "draft" # AI 萃取,待人工審核
REVIEW = "review" # AI 生成且安全檢查通過,等待治理晉級
APPROVED = "approved" # 人工核准,可用於推薦
DEPRECATED = "deprecated" # 已棄用 (有更好方案)
class PlaybookSource(str, Enum):
"""Playbook 來源"""
EXTRACTED = "extracted" # 從 Incident 自動萃取
LLM_GENERATED = "llm_generated" # ADR-104: LLM 從成功案例生成
MANUAL = "manual" # 人工建立
YAML_RULE = "yaml_rule" # 從 alert_rules.yaml 匯入2026-04-15 ogt
class ActionType(str, Enum):
"""執行類型"""
KUBECTL = "kubectl" # Kubernetes 命令
SCRIPT = "script" # 腳本執行
MANUAL = "manual" # 需人工操作
SSH_COMMAND = "ssh_command" # 2026-04-05 Claude Code: 主機層 SSH 修復 (Sprint 3)
class RiskLevel(str, Enum):
"""風險等級"""
LOW = "LOW"
MEDIUM = "MEDIUM"
HIGH = "HIGH"
CRITICAL = "CRITICAL"
# =============================================================================
# Sub-Models
# =============================================================================
class SymptomPattern(BaseModel):
"""
症狀模式 - 用於相似度比對
設計: 多維度特徵向量
- alert_names: 告警名稱集合
- affected_services: 受影響服務集合
- severity: 嚴重度
- labels: Prometheus 標籤 (k8s namespace, deployment, etc.)
"""
alert_names: list[str] = Field(
default_factory=list,
description="告警名稱模式 (如 HighCPU*, PodCrash*)",
)
affected_services: list[str] = Field(
default_factory=list,
description="受影響服務模式",
)
severity_range: list[str] = Field(
default=["P1", "P2"],
description="適用嚴重度範圍",
)
label_patterns: dict[str, str] = Field(
default_factory=dict,
description="標籤匹配 (regex)",
)
keywords: list[str] = Field(
default_factory=list,
description="關鍵字 (從 annotations 提取)",
)
model_config = ConfigDict(extra="ignore")
def compute_hash(self) -> str:
"""
2026-04-04 Claude Code: Phase 25 P1 — Anti-Pattern 閉環攔截用
確定性 hashalert_names + affected_services + label_patterns
目的O(1) 精確比對,避免純語意搜尋的模糊性
"""
import hashlib
import json
key = (
"|".join(sorted(self.alert_names)) + "||"
+ "|".join(sorted(self.affected_services)) + "||"
+ json.dumps(self.label_patterns, sort_keys=True)
)
return hashlib.sha256(key.encode()).hexdigest()[:16]
class RepairStep(BaseModel):
"""
修復步驟
設計: 支援多種執行類型
- kubectl: Kubernetes 命令
- script: 腳本執行
- manual: 需人工操作
"""
step_number: int = Field(ge=1, description="步驟序號")
action_type: ActionType = Field(description="執行類型")
command: str = Field(description="執行命令或操作描述")
expected_result: str | None = Field(None, description="預期結果")
rollback_command: str | None = Field(None, description="回滾命令")
requires_approval: bool = Field(default=False, description="是否需要人工審核")
risk_level: RiskLevel = Field(default=RiskLevel.MEDIUM, description="風險等級")
model_config = ConfigDict(extra="ignore")
# =============================================================================
# Core Model
# =============================================================================
def generate_playbook_id() -> str:
"""生成 Playbook ID (台北時區)"""
return f"PB-{now_taipei().strftime('%Y%m%d')}-{uuid4().hex[:6].upper()}"
class Playbook(BaseModel):
"""
Playbook - 修復劇本
三層記憶位置:
- Working Memory (Redis): playbook:{playbook_id} TTL 7天
- Episodic Memory (PostgreSQL): playbooks 表
- Semantic Memory (Vector DB): 向量化症狀特徵 (Phase 8+)
設計遵循:
- ADR-003 leWOOOgo 模組化架構
- ADR-007 資料保留策略
"""
# === 識別 ===
playbook_id: str = Field(
default_factory=generate_playbook_id,
description="Playbook 唯一識別碼",
)
# === 元資料 ===
name: str = Field(description="Playbook 名稱 (人類可讀)")
description: str = Field(description="問題描述與修復策略摘要")
status: PlaybookStatus = Field(default=PlaybookStatus.DRAFT)
source: PlaybookSource = Field(default=PlaybookSource.EXTRACTED)
# === 症狀模式 ===
symptom_pattern: SymptomPattern = Field(
default_factory=SymptomPattern,
description="觸發此 Playbook 的症狀模式",
)
# === 修復步驟 ===
repair_steps: list[RepairStep] = Field(
default_factory=list,
description="修復步驟列表",
)
estimated_duration_minutes: int = Field(
default=5,
ge=1,
le=480,
description="預估修復時間 (分鐘)",
)
# === 來源追溯 ===
source_incident_ids: list[str] = Field(
default_factory=list,
description="萃取來源的 Incident ID",
)
version: int = Field(default=1, ge=1, description="Playbook lineage version")
parent_playbook_id: str | None = Field(None, description="Root Playbook ID for this lineage")
supersedes_playbook_id: str | None = Field(None, description="Previous Playbook version superseded by this one")
version_reason: str | None = Field(None, description="Why this version was created")
ai_confidence: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="AI 萃取信心度",
)
# === 統計數據 ===
success_count: int = Field(default=0, ge=0, description="成功執行次數")
failure_count: int = Field(default=0, ge=0, description="失敗執行次數")
last_used_at: datetime | None = Field(None, description="最後使用時間")
# ADR-083 Phase 3: EWMA 信任度0.0-1.0,初值 0.3
# 成功: trust_new = 0.9 * trust_old + 0.1 * 1.0
# 失敗: trust_new = 0.8 * trust_old + 0.2 * 0.02x 衰減)
# trust < 0.1 → 自動封存(由 Evolver Agent 處理)
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 3 EWMA 負向強化
trust_score: float = Field(default=0.3, ge=0.0, le=1.0, description="EWMA 動態信任度Phase 3 新增)")
# === 人工標記 ===
approved_by: str | None = Field(None, description="核准者")
approved_at: datetime | None = Field(None, description="核准時間")
tags: list[str] = Field(default_factory=list, description="標籤")
notes: str | None = Field(None, description="人工補充說明")
# === Sprint 5.1 資料安全護欄 (2026-04-08 Claude Sonnet 4.6 Asia/Taipei) ===
requires_approval_level: str = Field(
default="auto",
description="auto=直接執行, standard=1票, critical=2票MultiSig由 Service Registry 決定)",
)
stateful_targets: list[str] = Field(
default_factory=list,
description="此 Playbook 操作的 Stateful 服務清單,對應 service-registry.yaml",
)
requires_pre_backup: bool = Field(
default=False,
description="執行前是否需要 Pre-flight 備份檢查",
)
review_required: bool = Field(
default=False,
description="KM/治理累積觸發人工或 AI 複審信號",
)
# === 時間軸 ===
created_at: datetime = Field(default_factory=now_taipei)
updated_at: datetime = Field(default_factory=now_taipei)
model_config = ConfigDict(extra="ignore")
@property
def success_rate(self) -> float:
"""成功率"""
total = self.success_count + self.failure_count
return self.success_count / total if total > 0 else 0.0
@property
def failure_rate(self) -> float:
"""失敗率 (2026-03-30 Claude Code: Learning Service 信心度調整用)"""
total = self.success_count + self.failure_count
return self.failure_count / total if total > 0 else 0.0
@property
def is_high_quality(self) -> bool:
"""
是否為高品質 Playbook (供 #8 自動升級參考)
條件:
- 狀態為 APPROVED
- 成功率 >= 80% (冷啟動期: 原 95%2026-04-05 ogt 降低以打破零執行惡性循環)
- 成功次數 >= 3 (冷啟動期: 原 10累積足夠後再收緊)
待成熟後收緊為: success_rate >= 0.95, success_count >= 10
"""
# 2026-04-05 ogt: 冷啟動調整,打破 zero-execution 惡性循環
return (
self.status == PlaybookStatus.APPROVED
and self.success_rate >= 0.80
and self.success_count >= 3
)
@property
def total_executions(self) -> int:
"""總執行次數"""
return self.success_count + self.failure_count
def to_redis_dict(self) -> dict[str, Any]:
"""轉換為 Redis 儲存格式"""
return self.model_dump(mode="json")
@classmethod
def from_redis_dict(cls, data: dict[str, Any]) -> "Playbook":
"""從 Redis 資料還原"""
return cls.model_validate(data)
# =============================================================================
# Response Models
# =============================================================================
class PlaybookRecommendation(BaseModel):
"""Playbook 推薦結果"""
playbook: Playbook
similarity_score: float = Field(ge=0.0, le=1.0, description="相似度分數")
matched_symptoms: list[str] = Field(
default_factory=list,
description="匹配的症狀",
)
reason: str = Field(description="推薦原因")
model_config = ConfigDict(extra="ignore")
class PlaybookResponse(BaseModel):
"""單一 Playbook 回應"""
playbook: Playbook
success_rate: float = Field(ge=0.0, le=1.0)
is_high_quality: bool
@classmethod
def from_playbook(cls, playbook: Playbook) -> "PlaybookResponse":
"""從 Playbook 建立回應"""
return cls(
playbook=playbook,
success_rate=playbook.success_rate,
is_high_quality=playbook.is_high_quality,
)
class PlaybookListResponse(BaseModel):
"""Playbook 列表回應"""
items: list[PlaybookResponse]
total: int
limit: int
offset: int
# =============================================================================
# Request Models
# =============================================================================
class PlaybookCreateRequest(BaseModel):
"""建立 Playbook 請求 (人工建立)"""
name: str = Field(min_length=1, max_length=256)
description: str = Field(min_length=1, max_length=2000)
symptom_pattern: SymptomPattern
repair_steps: list[RepairStep] = Field(min_length=1)
estimated_duration_minutes: int = Field(default=5, ge=1, le=480)
tags: list[str] = Field(default_factory=list)
notes: str | None = None
class PlaybookUpdateRequest(BaseModel):
"""更新 Playbook 請求"""
name: str | None = Field(None, min_length=1, max_length=256)
description: str | None = Field(None, min_length=1, max_length=2000)
symptom_pattern: SymptomPattern | None = None
repair_steps: list[RepairStep] | None = None
estimated_duration_minutes: int | None = Field(None, ge=1, le=480)
tags: list[str] | None = None
notes: str | None = None
status: PlaybookStatus | None = None
class PlaybookApproveRequest(BaseModel):
"""核准 Playbook 請求"""
approved_by: str = Field(min_length=1, max_length=128)
notes: str | None = Field(None, max_length=1000)
class SymptomPatternRequest(BaseModel):
"""症狀模式查詢請求"""
alert_names: list[str] = Field(default_factory=list)
affected_services: list[str] = Field(default_factory=list)
severity: str | None = None
keywords: list[str] = Field(default_factory=list)
def to_symptom_pattern(self) -> SymptomPattern:
"""轉換為 SymptomPattern"""
return SymptomPattern(
alert_names=self.alert_names,
affected_services=self.affected_services,
severity_range=[self.severity] if self.severity else ["P1", "P2"],
keywords=self.keywords,
)