All checks were successful
CD Pipeline / tests (push) Successful in 1m34s
Code Review / ai-code-review (push) Successful in 28s
Type Sync Check / check-type-sync (push) Successful in 1m10s
CD Pipeline / build-and-deploy (push) Successful in 10m19s
CD Pipeline / post-deploy-checks (push) Successful in 3m1s
396 lines
13 KiB
Python
396 lines
13 KiB
Python
"""
|
||
Playbook Models - #7 Playbook 萃取
|
||
==================================
|
||
從成功案例萃取的修復劇本資料模型
|
||
|
||
Phase 7.1: 資料模型定義
|
||
建立時間: 2026-03-26 (台北時區)
|
||
作者: Claude Code (Phase 7)
|
||
|
||
遵循 leWOOOgo 積木化原則:
|
||
- Pydantic BaseModel 定義
|
||
- 支援 PostgreSQL + Redis 雙層儲存
|
||
"""
|
||
|
||
from datetime import datetime
|
||
from enum import Enum
|
||
from typing import Any
|
||
from uuid import uuid4
|
||
|
||
from pydantic import BaseModel, ConfigDict, Field
|
||
|
||
from src.utils.timezone import now_taipei
|
||
|
||
# =============================================================================
|
||
# Enums
|
||
# =============================================================================
|
||
|
||
|
||
class PlaybookStatus(str, Enum):
|
||
"""Playbook 狀態"""
|
||
|
||
DRAFT = "draft" # AI 萃取,待人工審核
|
||
REVIEW = "review" # AI 生成且安全檢查通過,等待治理晉級
|
||
APPROVED = "approved" # 人工核准,可用於推薦
|
||
DEPRECATED = "deprecated" # 已棄用 (有更好方案)
|
||
|
||
|
||
class PlaybookSource(str, Enum):
|
||
"""Playbook 來源"""
|
||
|
||
EXTRACTED = "extracted" # 從 Incident 自動萃取
|
||
LLM_GENERATED = "llm_generated" # ADR-104: LLM 從成功案例生成
|
||
MANUAL = "manual" # 人工建立
|
||
YAML_RULE = "yaml_rule" # 從 alert_rules.yaml 匯入(2026-04-15 ogt)
|
||
|
||
|
||
class ActionType(str, Enum):
|
||
"""執行類型"""
|
||
|
||
KUBECTL = "kubectl" # Kubernetes 命令
|
||
SCRIPT = "script" # 腳本執行
|
||
MANUAL = "manual" # 需人工操作
|
||
SSH_COMMAND = "ssh_command" # 2026-04-05 Claude Code: 主機層 SSH 修復 (Sprint 3)
|
||
|
||
|
||
class RiskLevel(str, Enum):
|
||
"""風險等級"""
|
||
|
||
LOW = "LOW"
|
||
MEDIUM = "MEDIUM"
|
||
HIGH = "HIGH"
|
||
CRITICAL = "CRITICAL"
|
||
|
||
|
||
# =============================================================================
|
||
# Sub-Models
|
||
# =============================================================================
|
||
|
||
|
||
class SymptomPattern(BaseModel):
|
||
"""
|
||
症狀模式 - 用於相似度比對
|
||
|
||
設計: 多維度特徵向量
|
||
- alert_names: 告警名稱集合
|
||
- affected_services: 受影響服務集合
|
||
- severity: 嚴重度
|
||
- labels: Prometheus 標籤 (k8s namespace, deployment, etc.)
|
||
"""
|
||
|
||
alert_names: list[str] = Field(
|
||
default_factory=list,
|
||
description="告警名稱模式 (如 HighCPU*, PodCrash*)",
|
||
)
|
||
affected_services: list[str] = Field(
|
||
default_factory=list,
|
||
description="受影響服務模式",
|
||
)
|
||
severity_range: list[str] = Field(
|
||
default=["P1", "P2"],
|
||
description="適用嚴重度範圍",
|
||
)
|
||
label_patterns: dict[str, str] = Field(
|
||
default_factory=dict,
|
||
description="標籤匹配 (regex)",
|
||
)
|
||
keywords: list[str] = Field(
|
||
default_factory=list,
|
||
description="關鍵字 (從 annotations 提取)",
|
||
)
|
||
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
def compute_hash(self) -> str:
|
||
"""
|
||
2026-04-04 Claude Code: Phase 25 P1 — Anti-Pattern 閉環攔截用
|
||
確定性 hash:alert_names + affected_services + label_patterns
|
||
目的:O(1) 精確比對,避免純語意搜尋的模糊性
|
||
"""
|
||
import hashlib
|
||
import json
|
||
key = (
|
||
"|".join(sorted(self.alert_names)) + "||"
|
||
+ "|".join(sorted(self.affected_services)) + "||"
|
||
+ json.dumps(self.label_patterns, sort_keys=True)
|
||
)
|
||
return hashlib.sha256(key.encode()).hexdigest()[:16]
|
||
|
||
|
||
class RepairStep(BaseModel):
|
||
"""
|
||
修復步驟
|
||
|
||
設計: 支援多種執行類型
|
||
- kubectl: Kubernetes 命令
|
||
- script: 腳本執行
|
||
- manual: 需人工操作
|
||
"""
|
||
|
||
step_number: int = Field(ge=1, description="步驟序號")
|
||
action_type: ActionType = Field(description="執行類型")
|
||
command: str = Field(description="執行命令或操作描述")
|
||
expected_result: str | None = Field(None, description="預期結果")
|
||
rollback_command: str | None = Field(None, description="回滾命令")
|
||
requires_approval: bool = Field(default=False, description="是否需要人工審核")
|
||
risk_level: RiskLevel = Field(default=RiskLevel.MEDIUM, description="風險等級")
|
||
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
|
||
# =============================================================================
|
||
# Core Model
|
||
# =============================================================================
|
||
|
||
|
||
def generate_playbook_id() -> str:
|
||
"""生成 Playbook ID (台北時區)"""
|
||
return f"PB-{now_taipei().strftime('%Y%m%d')}-{uuid4().hex[:6].upper()}"
|
||
|
||
|
||
class Playbook(BaseModel):
|
||
"""
|
||
Playbook - 修復劇本
|
||
|
||
三層記憶位置:
|
||
- Working Memory (Redis): playbook:{playbook_id} TTL 7天
|
||
- Episodic Memory (PostgreSQL): playbooks 表
|
||
- Semantic Memory (Vector DB): 向量化症狀特徵 (Phase 8+)
|
||
|
||
設計遵循:
|
||
- ADR-003 leWOOOgo 模組化架構
|
||
- ADR-007 資料保留策略
|
||
"""
|
||
|
||
# === 識別 ===
|
||
playbook_id: str = Field(
|
||
default_factory=generate_playbook_id,
|
||
description="Playbook 唯一識別碼",
|
||
)
|
||
|
||
# === 元資料 ===
|
||
name: str = Field(description="Playbook 名稱 (人類可讀)")
|
||
description: str = Field(description="問題描述與修復策略摘要")
|
||
status: PlaybookStatus = Field(default=PlaybookStatus.DRAFT)
|
||
source: PlaybookSource = Field(default=PlaybookSource.EXTRACTED)
|
||
|
||
# === 症狀模式 ===
|
||
symptom_pattern: SymptomPattern = Field(
|
||
default_factory=SymptomPattern,
|
||
description="觸發此 Playbook 的症狀模式",
|
||
)
|
||
|
||
# === 修復步驟 ===
|
||
repair_steps: list[RepairStep] = Field(
|
||
default_factory=list,
|
||
description="修復步驟列表",
|
||
)
|
||
estimated_duration_minutes: int = Field(
|
||
default=5,
|
||
ge=1,
|
||
le=480,
|
||
description="預估修復時間 (分鐘)",
|
||
)
|
||
|
||
# === 來源追溯 ===
|
||
source_incident_ids: list[str] = Field(
|
||
default_factory=list,
|
||
description="萃取來源的 Incident ID",
|
||
)
|
||
version: int = Field(default=1, ge=1, description="Playbook lineage version")
|
||
parent_playbook_id: str | None = Field(None, description="Root Playbook ID for this lineage")
|
||
supersedes_playbook_id: str | None = Field(None, description="Previous Playbook version superseded by this one")
|
||
version_reason: str | None = Field(None, description="Why this version was created")
|
||
ai_confidence: float = Field(
|
||
default=0.0,
|
||
ge=0.0,
|
||
le=1.0,
|
||
description="AI 萃取信心度",
|
||
)
|
||
|
||
# === 統計數據 ===
|
||
success_count: int = Field(default=0, ge=0, description="成功執行次數")
|
||
failure_count: int = Field(default=0, ge=0, description="失敗執行次數")
|
||
last_used_at: datetime | None = Field(None, description="最後使用時間")
|
||
# ADR-083 Phase 3: EWMA 信任度(0.0-1.0,初值 0.3)
|
||
# 成功: trust_new = 0.9 * trust_old + 0.1 * 1.0
|
||
# 失敗: trust_new = 0.8 * trust_old + 0.2 * 0.0(2x 衰減)
|
||
# trust < 0.1 → 自動封存(由 Evolver Agent 處理)
|
||
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 3 EWMA 負向強化
|
||
trust_score: float = Field(default=0.3, ge=0.0, le=1.0, description="EWMA 動態信任度(Phase 3 新增)")
|
||
|
||
# === 人工標記 ===
|
||
approved_by: str | None = Field(None, description="核准者")
|
||
approved_at: datetime | None = Field(None, description="核准時間")
|
||
tags: list[str] = Field(default_factory=list, description="標籤")
|
||
notes: str | None = Field(None, description="人工補充說明")
|
||
|
||
# === Sprint 5.1 資料安全護欄 (2026-04-08 Claude Sonnet 4.6 Asia/Taipei) ===
|
||
requires_approval_level: str = Field(
|
||
default="auto",
|
||
description="auto=直接執行, standard=1票, critical=2票MultiSig(由 Service Registry 決定)",
|
||
)
|
||
stateful_targets: list[str] = Field(
|
||
default_factory=list,
|
||
description="此 Playbook 操作的 Stateful 服務清單,對應 service-registry.yaml",
|
||
)
|
||
requires_pre_backup: bool = Field(
|
||
default=False,
|
||
description="執行前是否需要 Pre-flight 備份檢查",
|
||
)
|
||
review_required: bool = Field(
|
||
default=False,
|
||
description="KM/治理累積觸發人工或 AI 複審信號",
|
||
)
|
||
|
||
# === 時間軸 ===
|
||
created_at: datetime = Field(default_factory=now_taipei)
|
||
updated_at: datetime = Field(default_factory=now_taipei)
|
||
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
@property
|
||
def success_rate(self) -> float:
|
||
"""成功率"""
|
||
total = self.success_count + self.failure_count
|
||
return self.success_count / total if total > 0 else 0.0
|
||
|
||
@property
|
||
def failure_rate(self) -> float:
|
||
"""失敗率 (2026-03-30 Claude Code: Learning Service 信心度調整用)"""
|
||
total = self.success_count + self.failure_count
|
||
return self.failure_count / total if total > 0 else 0.0
|
||
|
||
@property
|
||
def is_high_quality(self) -> bool:
|
||
"""
|
||
是否為高品質 Playbook (供 #8 自動升級參考)
|
||
|
||
條件:
|
||
- 狀態為 APPROVED
|
||
- 成功率 >= 80% (冷啟動期: 原 95%,2026-04-05 ogt 降低以打破零執行惡性循環)
|
||
- 成功次數 >= 3 (冷啟動期: 原 10,累積足夠後再收緊)
|
||
|
||
待成熟後收緊為: success_rate >= 0.95, success_count >= 10
|
||
"""
|
||
# 2026-04-05 ogt: 冷啟動調整,打破 zero-execution 惡性循環
|
||
return (
|
||
self.status == PlaybookStatus.APPROVED
|
||
and self.success_rate >= 0.80
|
||
and self.success_count >= 3
|
||
)
|
||
|
||
@property
|
||
def total_executions(self) -> int:
|
||
"""總執行次數"""
|
||
return self.success_count + self.failure_count
|
||
|
||
def to_redis_dict(self) -> dict[str, Any]:
|
||
"""轉換為 Redis 儲存格式"""
|
||
return self.model_dump(mode="json")
|
||
|
||
@classmethod
|
||
def from_redis_dict(cls, data: dict[str, Any]) -> "Playbook":
|
||
"""從 Redis 資料還原"""
|
||
return cls.model_validate(data)
|
||
|
||
|
||
# =============================================================================
|
||
# Response Models
|
||
# =============================================================================
|
||
|
||
|
||
class PlaybookRecommendation(BaseModel):
|
||
"""Playbook 推薦結果"""
|
||
|
||
playbook: Playbook
|
||
similarity_score: float = Field(ge=0.0, le=1.0, description="相似度分數")
|
||
matched_symptoms: list[str] = Field(
|
||
default_factory=list,
|
||
description="匹配的症狀",
|
||
)
|
||
reason: str = Field(description="推薦原因")
|
||
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
|
||
class PlaybookResponse(BaseModel):
|
||
"""單一 Playbook 回應"""
|
||
|
||
playbook: Playbook
|
||
success_rate: float = Field(ge=0.0, le=1.0)
|
||
is_high_quality: bool
|
||
|
||
@classmethod
|
||
def from_playbook(cls, playbook: Playbook) -> "PlaybookResponse":
|
||
"""從 Playbook 建立回應"""
|
||
return cls(
|
||
playbook=playbook,
|
||
success_rate=playbook.success_rate,
|
||
is_high_quality=playbook.is_high_quality,
|
||
)
|
||
|
||
|
||
class PlaybookListResponse(BaseModel):
|
||
"""Playbook 列表回應"""
|
||
|
||
items: list[PlaybookResponse]
|
||
total: int
|
||
limit: int
|
||
offset: int
|
||
|
||
|
||
# =============================================================================
|
||
# Request Models
|
||
# =============================================================================
|
||
|
||
|
||
class PlaybookCreateRequest(BaseModel):
|
||
"""建立 Playbook 請求 (人工建立)"""
|
||
|
||
name: str = Field(min_length=1, max_length=256)
|
||
description: str = Field(min_length=1, max_length=2000)
|
||
symptom_pattern: SymptomPattern
|
||
repair_steps: list[RepairStep] = Field(min_length=1)
|
||
estimated_duration_minutes: int = Field(default=5, ge=1, le=480)
|
||
tags: list[str] = Field(default_factory=list)
|
||
notes: str | None = None
|
||
|
||
|
||
class PlaybookUpdateRequest(BaseModel):
|
||
"""更新 Playbook 請求"""
|
||
|
||
name: str | None = Field(None, min_length=1, max_length=256)
|
||
description: str | None = Field(None, min_length=1, max_length=2000)
|
||
symptom_pattern: SymptomPattern | None = None
|
||
repair_steps: list[RepairStep] | None = None
|
||
estimated_duration_minutes: int | None = Field(None, ge=1, le=480)
|
||
tags: list[str] | None = None
|
||
notes: str | None = None
|
||
status: PlaybookStatus | None = None
|
||
|
||
|
||
class PlaybookApproveRequest(BaseModel):
|
||
"""核准 Playbook 請求"""
|
||
|
||
approved_by: str = Field(min_length=1, max_length=128)
|
||
notes: str | None = Field(None, max_length=1000)
|
||
|
||
|
||
class SymptomPatternRequest(BaseModel):
|
||
"""症狀模式查詢請求"""
|
||
|
||
alert_names: list[str] = Field(default_factory=list)
|
||
affected_services: list[str] = Field(default_factory=list)
|
||
severity: str | None = None
|
||
keywords: list[str] = Field(default_factory=list)
|
||
|
||
def to_symptom_pattern(self) -> SymptomPattern:
|
||
"""轉換為 SymptomPattern"""
|
||
return SymptomPattern(
|
||
alert_names=self.alert_names,
|
||
affected_services=self.affected_services,
|
||
severity_range=[self.severity] if self.severity else ["P1", "P2"],
|
||
keywords=self.keywords,
|
||
)
|