問題根因: 1. create_incident_for_approval 只存 Redis,不存 PostgreSQL → TTL 7天後消失,Playbook 萃取永遠找不到 Incident 2. ApprovalRecord 無 incident_id 欄位 → _trigger_playbook_extraction 靠 regex 掃中文文字找 INC-,永遠失敗 3. operation_parser namespace fallback 是 "default" → 所有 deployment 在 awoooi-prod,203 次執行全失敗 修復: - Incident 同時寫入 Redis + PostgreSQL (save_to_episodic_memory) - ApprovalRecord 加入 incident_id 欄位 (model + ORM + migration) - alertmanager_webhook 建立 Approval 後回寫 incident_id - _trigger_playbook_extraction 直接用 approval.incident_id - operation_parser DEFAULT_NAMESPACE = "awoooi-prod" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
577 lines
17 KiB
Python
577 lines
17 KiB
Python
"""
|
||
Database Models
|
||
===============
|
||
CTO-201: Approval & AuditLog persistence
|
||
|
||
Schema 設計原則:
|
||
- UUID 主鍵 (PostgreSQL 相容)
|
||
- JSON 欄位儲存複雜結構
|
||
- 完整時間戳記
|
||
- 索引優化查詢
|
||
"""
|
||
|
||
from datetime import datetime
|
||
from typing import Any
|
||
from uuid import uuid4
|
||
|
||
from sqlalchemy import (
|
||
JSON,
|
||
DateTime,
|
||
Index,
|
||
Integer,
|
||
String,
|
||
Text,
|
||
)
|
||
from sqlalchemy import (
|
||
Enum as SQLEnum,
|
||
)
|
||
from sqlalchemy.orm import Mapped, mapped_column
|
||
|
||
from src.db.base import Base
|
||
from src.models.approval import ApprovalStatus, RiskLevel
|
||
from src.models.incident import IncidentStatus, Severity
|
||
from src.models.knowledge import EntrySource, EntryStatus, EntryType
|
||
|
||
# =============================================================================
|
||
# Helper Functions
|
||
# =============================================================================
|
||
|
||
def taipei_now() -> datetime:
|
||
"""取得台北時區當前時間 (UTC+8)
|
||
|
||
🔴 HARD RULE: 全系統使用台北時區,禁止 UTC
|
||
2026-04-02 Claude Code: C1 時區統一遷移 (首席架構師審查)
|
||
"""
|
||
from src.utils.timezone import now_taipei
|
||
return now_taipei()
|
||
|
||
|
||
def generate_uuid() -> str:
|
||
"""Generate UUID string"""
|
||
return str(uuid4())
|
||
|
||
|
||
# =============================================================================
|
||
# ApprovalRecord - 授權記錄持久化
|
||
# =============================================================================
|
||
|
||
class ApprovalRecord(Base):
|
||
"""
|
||
授權記錄 - 對應 Pydantic ApprovalRequest
|
||
|
||
Note: 與 in-memory TrustEngine 的 ApprovalRequest 同步
|
||
"""
|
||
__tablename__ = "approval_records"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Core Fields
|
||
action: Mapped[str] = mapped_column(String(500), nullable=False)
|
||
description: Mapped[str] = mapped_column(Text, nullable=False)
|
||
status: Mapped[str] = mapped_column(
|
||
SQLEnum(ApprovalStatus),
|
||
default=ApprovalStatus.PENDING,
|
||
nullable=False,
|
||
)
|
||
risk_level: Mapped[str] = mapped_column(
|
||
SQLEnum(RiskLevel),
|
||
nullable=False,
|
||
)
|
||
|
||
# Signature Tracking
|
||
required_signatures: Mapped[int] = mapped_column(Integer, default=1)
|
||
current_signatures: Mapped[int] = mapped_column(Integer, default=0)
|
||
signatures: Mapped[dict[str, Any]] = mapped_column(JSON, default=list)
|
||
|
||
# Blast Radius (JSON)
|
||
blast_radius: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||
|
||
# Dry-Run Checks (JSON)
|
||
dry_run_checks: Mapped[list[dict[str, Any]]] = mapped_column(JSON, default=list)
|
||
|
||
# Metadata
|
||
requested_by: Mapped[str] = mapped_column(String(100), nullable=False)
|
||
rejection_reason: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
extra_metadata: Mapped[dict[str, Any] | None] = mapped_column(JSON, nullable=True)
|
||
|
||
# ==========================================================================
|
||
# 戰略 B: 告警風暴收斂 (Alert Storm Convergence)
|
||
# ==========================================================================
|
||
# 告警指紋 - 根據 namespace + deployment + alert_name 產生的唯一 Hash
|
||
fingerprint: Mapped[str | None] = mapped_column(
|
||
String(64),
|
||
nullable=True,
|
||
index=True,
|
||
comment="SHA256 hash of alert identity (namespace:deployment:alert_name)",
|
||
)
|
||
# 聚合次數 - 相同指紋告警的累計觸發次數
|
||
hit_count: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=1,
|
||
nullable=False,
|
||
comment="Number of times this alert pattern was triggered",
|
||
)
|
||
# 最後觸發時間 - 同指紋告警最近一次出現的時間
|
||
last_seen_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
nullable=False,
|
||
comment="Last time this alert pattern was seen",
|
||
)
|
||
|
||
# 2026-04-06 ogt: Phase 26 — 關聯 Incident ID
|
||
# Playbook 萃取和 KM 寫入必須知道 incident_id,不能靠文字解析
|
||
incident_id: Mapped[str | None] = mapped_column(
|
||
String(64),
|
||
nullable=True,
|
||
index=True,
|
||
comment="Associated Incident ID (INC-YYYYMMDD-XXXXXX)",
|
||
)
|
||
|
||
# Timestamps
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
updated_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
onupdate=taipei_now,
|
||
)
|
||
expires_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
resolved_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_approval_status", "status"),
|
||
Index("ix_approval_risk_level", "risk_level"),
|
||
Index("ix_approval_created_at", "created_at"),
|
||
Index("ix_approval_requested_by", "requested_by"),
|
||
Index("ix_approval_fingerprint", "fingerprint"), # 戰略 B: 指紋查詢優化
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# AuditLog - 稽核日誌
|
||
# =============================================================================
|
||
|
||
class TimelineEvent(Base):
|
||
"""
|
||
時間軸事件 - Phase 4 Action Timeline
|
||
|
||
事件類型:
|
||
- system: 系統告警接收
|
||
- agent: OpenClaw AI 分析
|
||
- security: 權限阻擋
|
||
- human: 人類授權
|
||
- exec: 執行完成
|
||
"""
|
||
__tablename__ = "timeline_events"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Event Type & Status
|
||
event_type: Mapped[str] = mapped_column(
|
||
String(20),
|
||
nullable=False,
|
||
comment="system, agent, security, human, exec",
|
||
)
|
||
status: Mapped[str] = mapped_column(
|
||
String(20),
|
||
nullable=False,
|
||
default="info",
|
||
comment="info, success, warning, error",
|
||
)
|
||
|
||
# Content
|
||
title: Mapped[str] = mapped_column(String(500), nullable=False)
|
||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# Actor
|
||
actor: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||
actor_role: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||
|
||
# Context
|
||
risk_level: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||
approval_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
|
||
|
||
# Timestamp
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_timeline_event_type", "event_type"),
|
||
Index("ix_timeline_created_at", "created_at"),
|
||
)
|
||
|
||
|
||
class AuditLog(Base):
|
||
"""
|
||
稽核日誌 - 記錄所有執行結果
|
||
|
||
每次 K8s 操作完成後寫入一筆記錄
|
||
"""
|
||
__tablename__ = "audit_logs"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Reference to Approval
|
||
approval_id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
nullable=False,
|
||
index=True,
|
||
)
|
||
|
||
# Operation Details
|
||
operation_type: Mapped[str] = mapped_column(
|
||
String(50),
|
||
nullable=False,
|
||
comment="e.g., RESTART_DEPLOYMENT, DELETE_POD",
|
||
)
|
||
target_resource: Mapped[str] = mapped_column(
|
||
String(200),
|
||
nullable=False,
|
||
comment="e.g., deployment/api-backend, pod/nginx-xxx",
|
||
)
|
||
namespace: Mapped[str] = mapped_column(
|
||
String(63),
|
||
default="default",
|
||
nullable=False,
|
||
)
|
||
|
||
# Execution Result
|
||
success: Mapped[bool] = mapped_column(default=False, nullable=False)
|
||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# K8s Response (Raw)
|
||
k8s_response: Mapped[dict[str, Any] | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="Raw Kubernetes API response",
|
||
)
|
||
|
||
# Execution Context
|
||
executed_by: Mapped[str] = mapped_column(
|
||
String(100),
|
||
nullable=False,
|
||
comment="Who triggered the execution",
|
||
)
|
||
execution_duration_ms: Mapped[int | None] = mapped_column(
|
||
Integer,
|
||
nullable=True,
|
||
comment="Execution time in milliseconds",
|
||
)
|
||
|
||
# Dry-Run Result (pre-execution validation)
|
||
dry_run_passed: Mapped[bool] = mapped_column(
|
||
default=True,
|
||
nullable=False,
|
||
)
|
||
dry_run_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# ==========================================================================
|
||
# Phase 18: 失敗自動修復閉環欄位 (2026-03-26)
|
||
# ==========================================================================
|
||
|
||
# 授權來源追蹤
|
||
authorization_channel: Mapped[str | None] = mapped_column(
|
||
String(20),
|
||
nullable=True,
|
||
comment="Authorization source: web, telegram, auto",
|
||
)
|
||
|
||
# 重試與修復追蹤
|
||
retry_count: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=0,
|
||
nullable=False,
|
||
comment="Number of retry attempts",
|
||
)
|
||
failure_classification: Mapped[str | None] = mapped_column(
|
||
String(50),
|
||
nullable=True,
|
||
comment="Failure type: TIMEOUT, K8S_ERROR, NETWORK_ERROR, PERMISSION_DENIED",
|
||
)
|
||
source_approval_id: Mapped[str | None] = mapped_column(
|
||
String(36),
|
||
nullable=True,
|
||
index=True,
|
||
comment="Original approval ID if this is a repair attempt",
|
||
)
|
||
|
||
# 自動修復狀態
|
||
auto_repair_attempted: Mapped[bool] = mapped_column(
|
||
default=False,
|
||
nullable=False,
|
||
comment="Whether auto-repair was attempted",
|
||
)
|
||
auto_repair_result: Mapped[str | None] = mapped_column(
|
||
Text,
|
||
nullable=True,
|
||
comment="Auto-repair result: AI analysis and repair outcome",
|
||
)
|
||
|
||
# Timestamps
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_audit_approval_id", "approval_id"),
|
||
Index("ix_audit_operation_type", "operation_type"),
|
||
Index("ix_audit_success", "success"),
|
||
Index("ix_audit_created_at", "created_at"),
|
||
Index("ix_audit_authorization_channel", "authorization_channel"), # Phase 18
|
||
Index("ix_audit_failure_classification", "failure_classification"), # Phase 18
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# IncidentRecord - Phase 6.2 Episodic Memory (PostgreSQL)
|
||
# =============================================================================
|
||
|
||
class IncidentRecord(Base):
|
||
"""
|
||
事件記錄 - 對應 Pydantic Incident Schema v0.3
|
||
|
||
Phase 6.2: Episodic Memory (長期記憶)
|
||
- 從 Working Memory (Redis) 遷移過來
|
||
- 永久保留,供 RAG 檢索
|
||
- 複雜結構使用 JSONB 欄位
|
||
|
||
三層記憶架構:
|
||
- Working Memory (Redis): 7 天 TTL
|
||
- Episodic Memory (PostgreSQL): 此表,永久保留
|
||
- Semantic Memory (Vector DB): Phase 6.3+
|
||
"""
|
||
__tablename__ = "incidents"
|
||
|
||
# === 主鍵 ===
|
||
incident_id: Mapped[str] = mapped_column(
|
||
String(30),
|
||
primary_key=True,
|
||
comment="事件唯一識別碼 (如 INC-20260322-A1B2C3)",
|
||
)
|
||
|
||
# === 狀態與嚴重度 ===
|
||
status: Mapped[str] = mapped_column(
|
||
SQLEnum(IncidentStatus),
|
||
default=IncidentStatus.INVESTIGATING,
|
||
nullable=False,
|
||
comment="事件狀態 (investigating, mitigating, resolved, closed, escalated)",
|
||
)
|
||
severity: Mapped[str] = mapped_column(
|
||
SQLEnum(Severity),
|
||
nullable=False,
|
||
comment="事件嚴重度 (P0, P1, P2, P3)",
|
||
)
|
||
|
||
# === 感知層 (Signals) - JSONB ===
|
||
signals: Mapped[list[dict[str, Any]]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="關聯的告警信號列表 (JSONB)",
|
||
)
|
||
affected_services: Mapped[list[str]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="受影響的服務列表",
|
||
)
|
||
|
||
# === 認知層 (AI Decision Chain) - JSONB ===
|
||
decision_chain: Mapped[dict[str, Any] | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="AI 決策鏈 (完整推論過程)",
|
||
)
|
||
|
||
# === 決策層 (Proposals) ===
|
||
proposal_ids: Mapped[list[str]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="關聯的 ApprovalRequest ID 列表",
|
||
)
|
||
|
||
# === 結果層 (Outcome) - JSONB ===
|
||
outcome: Mapped[dict[str, Any] | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="事件結果與人類回饋",
|
||
)
|
||
|
||
# === 時間軸 ===
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
nullable=False,
|
||
)
|
||
updated_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
onupdate=taipei_now,
|
||
nullable=False,
|
||
)
|
||
resolved_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
closed_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
|
||
# === 記憶管理 ===
|
||
ttl_days: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=7,
|
||
nullable=False,
|
||
comment="Working Memory TTL (天)",
|
||
)
|
||
vectorized: Mapped[bool] = mapped_column(
|
||
default=False,
|
||
nullable=False,
|
||
comment="是否已向量化到 Vector DB (Semantic Memory)",
|
||
)
|
||
|
||
# === 索引 ===
|
||
__table_args__ = (
|
||
Index("ix_incident_status", "status"),
|
||
Index("ix_incident_severity", "severity"),
|
||
Index("ix_incident_created_at", "created_at"),
|
||
Index("ix_incident_resolved_at", "resolved_at"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# KnowledgeEntry - Knowledge Base Phase 1
|
||
# =============================================================================
|
||
|
||
class KnowledgeEntryRecord(Base):
|
||
"""
|
||
知識庫條目 - Knowledge Base Phase 1
|
||
|
||
兩層架構:
|
||
- KnowledgeEntry: 知識條目 (此表)
|
||
- Playbook: 獨立 Redis,透過 related_playbook_id 關聯
|
||
|
||
建立時間: 2026-04-02 (台北時區)
|
||
建立者: Claude Code (Knowledge Base Phase 1)
|
||
"""
|
||
__tablename__ = "knowledge_entries"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Core Fields
|
||
title: Mapped[str] = mapped_column(String(255), nullable=False)
|
||
content: Mapped[str] = mapped_column(Text, nullable=False)
|
||
entry_type: Mapped[str] = mapped_column(
|
||
SQLEnum(EntryType),
|
||
nullable=False,
|
||
comment="incident_case / runbook / best_practice / postmortem",
|
||
)
|
||
category: Mapped[str] = mapped_column(
|
||
String(100),
|
||
nullable=False,
|
||
comment="分類樹節點 (基礎設施/應用層/AI系統/安全合規)",
|
||
)
|
||
tags: Mapped[list[str]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="標籤列表 (JSONB string array)",
|
||
)
|
||
|
||
# Source & Status
|
||
source: Mapped[str] = mapped_column(
|
||
SQLEnum(EntrySource),
|
||
nullable=False,
|
||
comment="ai_extracted / human",
|
||
)
|
||
status: Mapped[str] = mapped_column(
|
||
SQLEnum(EntryStatus),
|
||
default=EntryStatus.DRAFT,
|
||
nullable=False,
|
||
comment="draft / review / approved / archived",
|
||
)
|
||
|
||
# Relations (soft references, not FK)
|
||
related_incident_id: Mapped[str | None] = mapped_column(
|
||
String(30),
|
||
nullable=True,
|
||
comment="關聯 Incident ID",
|
||
)
|
||
related_playbook_id: Mapped[str | None] = mapped_column(
|
||
String(255),
|
||
nullable=True,
|
||
comment="關聯 Playbook Redis Key",
|
||
)
|
||
# 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 閉環攔截用症狀 hash (SymptomPattern.compute_hash())
|
||
symptoms_hash: Mapped[str | None] = mapped_column(
|
||
String(16),
|
||
nullable=True,
|
||
comment="症狀模式 hash (16字元 SHA256 前綴),Anti-Pattern 閉環攔截使用",
|
||
)
|
||
|
||
# Metrics
|
||
view_count: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=0,
|
||
nullable=False,
|
||
)
|
||
|
||
# Metadata
|
||
created_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
updated_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
onupdate=taipei_now,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_knowledge_entry_type", "entry_type"),
|
||
Index("ix_knowledge_category", "category"),
|
||
Index("ix_knowledge_status", "status"),
|
||
Index("ix_knowledge_created_at", "created_at"),
|
||
# 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 快速查詢
|
||
Index("ix_knowledge_symptoms_hash", "symptoms_hash"),
|
||
)
|