""" Database Models =============== CTO-201: Approval & AuditLog persistence Schema 設計原則: - UUID 主鍵 (PostgreSQL 相容) - JSON 欄位儲存複雜結構 - 完整時間戳記 - 索引優化查詢 """ from datetime import datetime from typing import Any from uuid import uuid4 from sqlalchemy import ( JSON, BigInteger, Boolean, CheckConstraint, DateTime, Float, Index, Integer, String, Text, text, ) from sqlalchemy import ( Enum as SQLEnum, ) from sqlalchemy.dialects.postgresql import ENUM as PgEnum from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column from src.db.base import Base from src.models.approval import ApprovalStatus, RiskLevel from src.models.incident import IncidentStatus, Severity from src.models.knowledge import EntrySource, EntryStatus, EntryType # ============================================================================= # Helper Functions # ============================================================================= def taipei_now() -> datetime: """取得台北時區當前時間 (UTC+8) 🔴 HARD RULE: 全系統使用台北時區,禁止 UTC 2026-04-02 Claude Code: C1 時區統一遷移 (首席架構師審查) """ from src.utils.timezone import now_taipei return now_taipei() def generate_uuid() -> str: """Generate UUID string""" return str(uuid4()) # ============================================================================= # ApprovalRecord - 授權記錄持久化 # ============================================================================= class ApprovalRecord(Base): """ 授權記錄 - 對應 Pydantic ApprovalRequest Note: 與 in-memory TrustEngine 的 ApprovalRequest 同步 """ __tablename__ = "approval_records" # Primary Key id: Mapped[str] = mapped_column( String(36), primary_key=True, default=generate_uuid, ) # Core Fields action: Mapped[str] = mapped_column(String(500), nullable=False) description: Mapped[str] = mapped_column(Text, nullable=False) status: Mapped[str] = mapped_column( SQLEnum(ApprovalStatus), default=ApprovalStatus.PENDING, nullable=False, ) risk_level: Mapped[str] = mapped_column( SQLEnum(RiskLevel), nullable=False, ) # Signature Tracking required_signatures: Mapped[int] = mapped_column(Integer, default=1) current_signatures: Mapped[int] = mapped_column(Integer, default=0) signatures: Mapped[dict[str, Any]] = mapped_column(JSON, default=list) # Blast Radius (JSON) blast_radius: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict) # Dry-Run Checks (JSON) dry_run_checks: Mapped[list[dict[str, Any]]] = mapped_column(JSON, default=list) # Metadata requested_by: Mapped[str] = mapped_column(String(100), nullable=False) rejection_reason: Mapped[str | None] = mapped_column(Text, nullable=True) extra_metadata: Mapped[dict[str, Any] | None] = mapped_column(JSON, nullable=True) # ========================================================================== # 戰略 B: 告警風暴收斂 (Alert Storm Convergence) # ========================================================================== # 告警指紋 - 根據 namespace + deployment + alert_name 產生的唯一 Hash fingerprint: Mapped[str | None] = mapped_column( String(64), nullable=True, index=True, comment="SHA256 hash of alert identity (namespace:deployment:alert_name)", ) # 聚合次數 - 相同指紋告警的累計觸發次數 hit_count: Mapped[int] = mapped_column( Integer, default=1, nullable=False, comment="Number of times this alert pattern was triggered", ) # 最後觸發時間 - 同指紋告警最近一次出現的時間 last_seen_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, nullable=False, comment="Last time this alert pattern was seen", ) # Sprint 5.1 MultiSig 雙簽核支援 (2026-04-08 Claude Sonnet 4.6 Asia/Taipei,ADR-062 Q3) approval_level: Mapped[str] = mapped_column( String(20), default="standard", nullable=False, comment="standard=1票審核, critical=2票MultiSig", ) approval_votes: Mapped[list[dict[str, Any]]] = mapped_column( JSON, default=list, nullable=False, comment="[{user_id, voted_at, action}]", ) required_votes: Mapped[int] = mapped_column( Integer, default=1, nullable=False, comment="standard=1, critical=2", ) # 2026-04-06 ogt: Phase 26 — 關聯 Incident ID # Playbook 萃取和 KM 寫入必須知道 incident_id,不能靠文字解析 incident_id: Mapped[str | None] = mapped_column( String(64), nullable=True, index=True, comment="Associated Incident ID (INC-YYYYMMDD-XXXXXX)", ) # 2026-04-09 Claude Sonnet 4.6: Telegram 訊息持久化 # Redis tg_msg:{id} TTL 24h 過期後仍可查詢,支援跨 Session 狀態更新 telegram_message_id: Mapped[int | None] = mapped_column( Integer, nullable=True, comment="Telegram message_id of the approval card sent to operator", ) telegram_chat_id: Mapped[int | None] = mapped_column( BigInteger, nullable=True, comment="Telegram chat_id where the approval card was sent (BIGINT: 支援群組負數 ID)", ) # B2 fix 2026-04-24 ogt + Claude Sonnet 4.6: Playbook 學習閉環斷鏈修復 # 原欄位缺失 → 人工審核後 matched_playbook_id 永遠 NULL → EWMA 無法更新 # 2026-04-25 db-expert-fix by Claude Engineer-B: 移除 index=True 避免自動生成 full index # Partial index 改在 __table_args__ 宣告(WHERE matched_playbook_id IS NOT NULL) matched_playbook_id: Mapped[str | None] = mapped_column( String(36), nullable=True, comment="匹配的 Playbook ID,學習服務用以更新 EWMA trust score", ) # 2026-04-26 P2-DB-Fix by Claude — db-expert P0 三修(P0.3): P2.1 DecisionFusionEngine 欄位 # composite_score / complexity_tier / decision_fusion_details # 僅在 AIOPS_P2_FUSION_ENABLED=True 且 fusion 成功時填入(nullable=True) composite_score: Mapped[float | None] = mapped_column( Float, nullable=True, comment="P2.1 DecisionFusion 合成分數(0.0-1.0),方法 III 加權結果", ) complexity_tier: Mapped[str | None] = mapped_column( String(16), nullable=True, comment="P2.1 告警複雜度分層:low / medium / high / critical", ) decision_fusion_details: Mapped[dict | None] = mapped_column( JSONB, nullable=True, comment=( "P2.1 DecisionFusionEngine: openclaw_score / hermes_score / " "playbook_score / mcp_health_score / elephant_score" ), ) # Timestamps created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, onupdate=taipei_now, ) expires_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True, ) resolved_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True, ) # Indexes __table_args__ = ( Index("ix_approval_status", "status"), Index("ix_approval_risk_level", "risk_level"), Index("ix_approval_created_at", "created_at"), Index("ix_approval_requested_by", "requested_by"), Index("ix_approval_fingerprint", "fingerprint"), # 戰略 B: 指紋查詢優化 # 2026-04-25 db-expert-fix by Claude Engineer-B: 改為 partial index,只索引非 NULL 值 # 原 full index 與 index=True 三重宣告衝突已修復(一個來源真相:此處) Index( "ix_approval_matched_playbook", "matched_playbook_id", postgresql_where=text("matched_playbook_id IS NOT NULL"), ), # 2026-04-26 P2-DB-Fix by Claude — db-expert P0 三修(P0.3): P2 DecisionFusion 欄位 # partial index:fusion fill rate 預期 <50%,只索引有值的行 Index( "ix_approval_composite_score", "composite_score", postgresql_where=text("composite_score IS NOT NULL"), ), Index( "ix_approval_complexity_tier", "complexity_tier", postgresql_where=text("complexity_tier IS NOT NULL"), ), CheckConstraint( "complexity_tier IN ('low','medium','high','critical') OR complexity_tier IS NULL", name="chk_complexity_tier", ), ) # ============================================================================= # AuditLog - 稽核日誌 # ============================================================================= class TimelineEvent(Base): """ 時間軸事件 - Phase 4 Action Timeline 事件類型: - system: 系統告警接收 - agent: OpenClaw AI 分析 - security: 權限阻擋 - human: 人類授權 - exec: 執行完成 """ __tablename__ = "timeline_events" # Primary Key id: Mapped[str] = mapped_column( String(36), primary_key=True, default=generate_uuid, ) # Event Type & Status event_type: Mapped[str] = mapped_column( String(20), nullable=False, comment="system, agent, security, human, exec", ) status: Mapped[str] = mapped_column( String(20), nullable=False, default="info", comment="info, success, warning, error", ) # Content title: Mapped[str] = mapped_column(String(500), nullable=False) description: Mapped[str | None] = mapped_column(Text, nullable=True) # Actor actor: Mapped[str | None] = mapped_column(String(100), nullable=True) actor_role: Mapped[str | None] = mapped_column(String(50), nullable=True) # Context risk_level: Mapped[str | None] = mapped_column(String(20), nullable=True) approval_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True) # P1.6 fix 2026-04-24 ogt + Claude Sonnet 4.6: pre_decision_investigator raw SQL 寫不存在欄位 # 原本 INSERT INTO timeline_events (incident_id, ...) 失敗 → 每天+1 錯誤靜默吞 incident_id: Mapped[str | None] = mapped_column( String(64), nullable=True, index=True, comment="關聯的 Incident ID(MCP 事件稽核用)", ) # Timestamp created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, ) # Indexes __table_args__ = ( Index("ix_timeline_event_type", "event_type"), Index("ix_timeline_created_at", "created_at"), Index("ix_timeline_incident_id", "incident_id"), # P1.6 fix ) class AuditLog(Base): """ 稽核日誌 - 記錄所有執行結果 每次 K8s 操作完成後寫入一筆記錄 """ __tablename__ = "audit_logs" # Primary Key id: Mapped[str] = mapped_column( String(36), primary_key=True, default=generate_uuid, ) # Reference to Approval approval_id: Mapped[str] = mapped_column( String(36), nullable=False, index=True, ) # Operation Details operation_type: Mapped[str] = mapped_column( String(50), nullable=False, comment="e.g., RESTART_DEPLOYMENT, DELETE_POD", ) target_resource: Mapped[str] = mapped_column( String(200), nullable=False, comment="e.g., deployment/api-backend, pod/nginx-xxx", ) namespace: Mapped[str] = mapped_column( String(63), default="default", nullable=False, ) # Execution Result success: Mapped[bool] = mapped_column(default=False, nullable=False) error_message: Mapped[str | None] = mapped_column(Text, nullable=True) # K8s Response (Raw) k8s_response: Mapped[dict[str, Any] | None] = mapped_column( JSON, nullable=True, comment="Raw Kubernetes API response", ) # Execution Context executed_by: Mapped[str] = mapped_column( String(100), nullable=False, comment="Who triggered the execution", ) execution_duration_ms: Mapped[int | None] = mapped_column( Integer, nullable=True, comment="Execution time in milliseconds", ) # Dry-Run Result (pre-execution validation) dry_run_passed: Mapped[bool] = mapped_column( default=True, nullable=False, ) dry_run_message: Mapped[str | None] = mapped_column(Text, nullable=True) # ========================================================================== # Phase 18: 失敗自動修復閉環欄位 (2026-03-26) # ========================================================================== # 授權來源追蹤 authorization_channel: Mapped[str | None] = mapped_column( String(20), nullable=True, comment="Authorization source: web, telegram, auto", ) # 重試與修復追蹤 retry_count: Mapped[int] = mapped_column( Integer, default=0, nullable=False, comment="Number of retry attempts", ) failure_classification: Mapped[str | None] = mapped_column( String(50), nullable=True, comment="Failure type: TIMEOUT, K8S_ERROR, NETWORK_ERROR, PERMISSION_DENIED", ) source_approval_id: Mapped[str | None] = mapped_column( String(36), nullable=True, index=True, comment="Original approval ID if this is a repair attempt", ) # 自動修復狀態 auto_repair_attempted: Mapped[bool] = mapped_column( default=False, nullable=False, comment="Whether auto-repair was attempted", ) auto_repair_result: Mapped[str | None] = mapped_column( Text, nullable=True, comment="Auto-repair result: AI analysis and repair outcome", ) # Timestamps created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, ) # Indexes __table_args__ = ( Index("ix_audit_approval_id", "approval_id"), Index("ix_audit_operation_type", "operation_type"), Index("ix_audit_success", "success"), Index("ix_audit_created_at", "created_at"), Index("ix_audit_authorization_channel", "authorization_channel"), # Phase 18 Index("ix_audit_failure_classification", "failure_classification"), # Phase 18 ) # ============================================================================= # AutoRepairExecution - Phase 10 操作記錄 # 2026-04-08 Claude Code: 統帥指令「所有操作都必須被記錄,寫入資料庫」 # ============================================================================= class AutoRepairExecution(Base): """ 自動修復執行記錄 每次 evaluate_auto_repair 觸發並執行 (成功或失敗) 都寫入此表。 不依賴 approval_id(自動修復不需人工批准)。 """ __tablename__ = "auto_repair_executions" id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid) # 關聯 incident_id: Mapped[str] = mapped_column(String(30), nullable=False, index=True) playbook_id: Mapped[str] = mapped_column(String(36), nullable=False, index=True) playbook_name: Mapped[str] = mapped_column(String(200), nullable=False) # 執行結果 success: Mapped[bool] = mapped_column(default=False, nullable=False) executed_steps: Mapped[list] = mapped_column(JSON, default=list, nullable=False) error_message: Mapped[str | None] = mapped_column(Text, nullable=True) # 執行上下文 triggered_by: Mapped[str] = mapped_column( String(50), default="auto_repair", nullable=False, comment="auto_repair / cold_start_trust", ) similarity_score: Mapped[float | None] = mapped_column(nullable=True) risk_level: Mapped[str | None] = mapped_column(String(20), nullable=True) execution_time_ms: Mapped[int | None] = mapped_column(Integer, nullable=True) # 時間戳 (台北時區) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now) __table_args__ = ( Index("ix_are_created_at", "created_at"), Index("ix_are_success", "success"), ) # ============================================================================= # AlertOperationLog - Phase 11 告警操作溯源 (Event Sourcing) # 2026-04-08 Claude Code: 統帥指令「所有操作都必須被記錄,寫入資料庫」 # 不可變 — 只 INSERT,不 UPDATE/DELETE # ============================================================================= class AlertOperationLog(Base): """ 告警操作完整溯源 Event Sourcing 模式:每個告警生命週期的每個事件都寫一筆。 不可變 (Immutable)。 event_type 值: ALERT_RECEIVED / TELEGRAM_SENT / USER_ACTION / AUTO_REPAIR_TRIGGERED / EXECUTION_STARTED / EXECUTION_COMPLETED / TELEGRAM_RESULT_SENT / RESOLVED / SILENCED / ESCALATED """ __tablename__ = "alert_operation_log" id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid) # 關聯 (允許 NULL,不同事件有不同關聯) incident_id: Mapped[str | None] = mapped_column(String(30), nullable=True, index=True) approval_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True) audit_log_id: Mapped[str | None] = mapped_column(String(36), nullable=True) auto_repair_id: Mapped[str | None] = mapped_column(String(36), nullable=True) # 事件核心 # 2026-04-08 Claude Sonnet 4.6: Sprint 5.1 — 修正 enum 型別不符 (String→PgEnum, create_type=False) event_type: Mapped[str] = mapped_column( PgEnum( "ALERT_RECEIVED", "TELEGRAM_SENT", "USER_ACTION", "AUTO_REPAIR_TRIGGERED", "EXECUTION_STARTED", "EXECUTION_COMPLETED", "TELEGRAM_RESULT_SENT", "RESOLVED", "SILENCED", "ESCALATED", "GUARDRAIL_BLOCKED", "PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED", "BACKUP_TRIGGERED", "BACKUP_COMPLETED", "BACKUP_FAILED", "APPROVAL_ESCALATED", "CHANGE_APPLIED", name="alert_event_type", create_type=False, ), nullable=False, index=True, ) actor: Mapped[str | None] = mapped_column(String(100), nullable=True, index=True) action_detail: Mapped[str | None] = mapped_column(String(200), nullable=True) # 執行結果 (NULL = 不適用) success: Mapped[bool | None] = mapped_column(nullable=True) error_message: Mapped[str | None] = mapped_column(Text, nullable=True) # 結構化上下文 context: Mapped[dict] = mapped_column(JSON, default=dict, nullable=False) # 時間戳 (台北時區,不可變) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now) __table_args__ = ( Index("ix_aol_created_at", "created_at"), ) # ============================================================================= # IncidentRecord - Phase 6.2 Episodic Memory (PostgreSQL) # ============================================================================= class IncidentRecord(Base): """ 事件記錄 - 對應 Pydantic Incident Schema v0.3 Phase 6.2: Episodic Memory (長期記憶) - 從 Working Memory (Redis) 遷移過來 - 永久保留,供 RAG 檢索 - 複雜結構使用 JSONB 欄位 三層記憶架構: - Working Memory (Redis): 7 天 TTL - Episodic Memory (PostgreSQL): 此表,永久保留 - Semantic Memory (Vector DB): Phase 6.3+ """ __tablename__ = "incidents" # === 主鍵 === incident_id: Mapped[str] = mapped_column( String(30), primary_key=True, comment="事件唯一識別碼 (如 INC-20260322-A1B2C3)", ) # === 狀態與嚴重度 === status: Mapped[str] = mapped_column( SQLEnum(IncidentStatus), default=IncidentStatus.INVESTIGATING, nullable=False, comment="事件狀態 (investigating, mitigating, resolved, closed, escalated)", ) severity: Mapped[str] = mapped_column( SQLEnum(Severity), nullable=False, comment="事件嚴重度 (P0, P1, P2, P3)", ) # === 感知層 (Signals) - JSONB === signals: Mapped[list[dict[str, Any]]] = mapped_column( JSON, default=list, nullable=False, comment="關聯的告警信號列表 (JSONB)", ) affected_services: Mapped[list[str]] = mapped_column( JSON, default=list, nullable=False, comment="受影響的服務列表", ) # === 認知層 (AI Decision Chain) - JSONB === decision_chain: Mapped[dict[str, Any] | None] = mapped_column( JSON, nullable=True, comment="AI 決策鏈 (完整推論過程)", ) # === 決策層 (Proposals) === proposal_ids: Mapped[list[str]] = mapped_column( JSON, default=list, nullable=False, comment="關聯的 ApprovalRequest ID 列表", ) # === 結果層 (Outcome) - JSONB === outcome: Mapped[dict[str, Any] | None] = mapped_column( JSON, nullable=True, comment="事件結果與人類回饋", ) # === ADR-073 Phase 2 欄位 (2026-04-12 ogt) === alertname: Mapped[str | None] = mapped_column( String(100), nullable=True, comment="告警名稱 (從 signals labels 抽取)", ) notification_type: Mapped[str | None] = mapped_column( String(10), nullable=True, comment="通知類型 TYPE-1/2/3/4/4D (早期分診)", ) alert_category: Mapped[str | None] = mapped_column( String(50), nullable=True, comment="告警類別 config_drift/info/backup/infrastructure/kubernetes/database/general", ) # === 頻率快照 (Phase 27, 2026-04-10 ogt) === # frequency_stats 原本只存記憶體/Redis(TTL=35天),Pod重啟或超期即失 # 此欄位在 incident 建立時寫入快照,永久保存當時的頻率統計 frequency_snapshot: Mapped[dict[str, Any] | None] = mapped_column( JSON, nullable=True, comment="建立時刻的 AnomalyFrequency 快照,永久保存 (Phase 27)", ) # === 時間軸 === created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, nullable=False, ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, onupdate=taipei_now, nullable=False, ) resolved_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True, ) closed_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True, ) # === 記憶管理 === ttl_days: Mapped[int] = mapped_column( Integer, default=7, nullable=False, comment="Working Memory TTL (天)", ) vectorized: Mapped[bool] = mapped_column( default=False, nullable=False, comment="是否已向量化到 Vector DB (Semantic Memory)", ) # === 索引 === __table_args__ = ( Index("ix_incident_status", "status"), Index("ix_incident_severity", "severity"), Index("ix_incident_created_at", "created_at"), Index("ix_incident_resolved_at", "resolved_at"), ) # ============================================================================= # KnowledgeEntry - Knowledge Base Phase 1 # ============================================================================= class KnowledgeEntryRecord(Base): """ 知識庫條目 - Knowledge Base Phase 1 兩層架構: - KnowledgeEntry: 知識條目 (此表) - Playbook: 獨立 Redis,透過 related_playbook_id 關聯 建立時間: 2026-04-02 (台北時區) 建立者: Claude Code (Knowledge Base Phase 1) """ __tablename__ = "knowledge_entries" # Primary Key id: Mapped[str] = mapped_column( String(36), primary_key=True, default=generate_uuid, ) # Core Fields title: Mapped[str] = mapped_column(String(255), nullable=False) content: Mapped[str] = mapped_column(Text, nullable=False) entry_type: Mapped[str] = mapped_column( SQLEnum(EntryType), nullable=False, comment="incident_case / runbook / best_practice / postmortem", ) category: Mapped[str] = mapped_column( String(100), nullable=False, comment="分類樹節點 (基礎設施/應用層/AI系統/安全合規)", ) tags: Mapped[list[str]] = mapped_column( JSON, default=list, nullable=False, comment="標籤列表 (JSONB string array)", ) # Source & Status source: Mapped[str] = mapped_column( SQLEnum(EntrySource), nullable=False, comment="ai_extracted / human", ) status: Mapped[str] = mapped_column( SQLEnum(EntryStatus), default=EntryStatus.DRAFT, nullable=False, comment="draft / review / approved / archived", ) # Relations (soft references, not FK) related_incident_id: Mapped[str | None] = mapped_column( String(30), nullable=True, comment="關聯 Incident ID", ) related_playbook_id: Mapped[str | None] = mapped_column( String(255), nullable=True, comment="關聯 Playbook Redis Key", ) # 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 閉環攔截用症狀 hash (SymptomPattern.compute_hash()) symptoms_hash: Mapped[str | None] = mapped_column( String(16), nullable=True, comment="症狀模式 hash (16字元 SHA256 前綴),Anti-Pattern 閉環攔截使用", ) # P1-1 2026-04-28 ogt + Claude Sonnet 4.6: M4 補反查鏈 # phase26_incident_km_integration.sql 已建立欄位與 partial index # KMWriter.write() 會自動填入並回填 Path A 條目(approval → KM 雙向追蹤) related_approval_id: Mapped[str | None] = mapped_column( String(36), nullable=True, comment="關聯 ApprovalRequest ID,P1-1 反查鏈修復(approval → KM 追蹤)", ) # P1-1 M3 2026-04-28 ogt + Claude Sonnet 4.6: 冪等 key 的一部分 # migration: p1_1_km_idempotent_path_type.sql # unique index: uix_knowledge_incident_path (related_incident_id, path_type) WHERE both NOT NULL path_type: Mapped[str | None] = mapped_column( String(50), nullable=True, comment="KMWriter 路徑類型,與 related_incident_id 構成冪等 key", ) # Metrics view_count: Mapped[int] = mapped_column( Integer, default=0, nullable=False, ) # Metadata created_by: Mapped[str | None] = mapped_column(String(100), nullable=True) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, onupdate=taipei_now, ) # Indexes __table_args__ = ( Index("ix_knowledge_entry_type", "entry_type"), Index("ix_knowledge_category", "category"), Index("ix_knowledge_status", "status"), Index("ix_knowledge_created_at", "created_at"), # 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 快速查詢 Index("ix_knowledge_symptoms_hash", "symptoms_hash"), # P1-1 2026-04-28 ogt + Claude Sonnet 4.6: M4 反查鏈 partial index(配合 phase26 migration) Index( "ix_knowledge_related_approval", "related_approval_id", postgresql_where=text("related_approval_id IS NOT NULL"), ), # P1-1 M3 2026-04-28 ogt + Claude Sonnet 4.6: 冪等 unique index # migration: p1_1_km_idempotent_path_type.sql Index( "uix_knowledge_incident_path", "related_incident_id", "path_type", unique=True, postgresql_where=text( "related_incident_id IS NOT NULL AND path_type IS NOT NULL" ), ), ) # IncidentEvidence — ADR-081 Phase 1 EvidenceSnapshot 持久化 # 2026-04-15 ogt + Claude Sonnet 4.6: AI 自主化飛輪 Phase 1 初始建立 class IncidentEvidence(Base): """ 不可變事件證據快照表 每次決策前 PreDecisionInvestigator 拍攝一次 EvidenceSnapshot, 寫入此表以供: - 決策溯源(LLM 推理過程的完整情報上下文) - 學習訓練(Phase 3 fine-tune pipeline 金礦資料) - 異常驗證(執行前 vs 執行後 state diff) ADR-081: PreDecisionInvestigator + EvidenceSnapshot 設計原則:只追加寫入,禁止 UPDATE(event sourcing 對齊) """ __tablename__ = "incident_evidence" id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid) # 關聯 incident_id: Mapped[str] = mapped_column(String(30), nullable=False) # index via __table_args__ # Phase 3 填充:matched_playbook_id 目前永久 null,Phase 3 修復 matched_playbook_id: Mapped[str | None] = mapped_column(String(36), nullable=True) # Schema 版本(方便 fine-tune pipeline 過濾相容版本) schema_version: Mapped[str] = mapped_column(String(10), default="v1", nullable=False) # 8D 感官數據(各維度 nullable — MCP 失敗時部分缺失) k8s_state: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="D1: kubectl describe pod + events" ) recent_logs: Mapped[str | None] = mapped_column( Text, nullable=True, comment="D2: container stderr tail-50,經 SanitizationService 清洗" ) metrics_snapshot: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="D3: Prometheus 5min vs 1h baseline 對比" ) recent_deployments: Mapped[list | None] = mapped_column( JSON, nullable=True, comment="D4: ArgoCD/Gitea 過去 1h 部署 diff" ) business_metrics: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="D5: 訂單量 / 登入成功率 / P0 SLI" ) historical_context: Mapped[str | None] = mapped_column( Text, nullable=True, comment="D6: 過去 30 天同 alertname 處置歷史摘要" ) peer_health: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="D7: 同 Deployment 其他 replica 健康度" ) dependency_topology: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="D8: Istio/Service Mesh 上下游 latency/error rate" ) # Phase 4 ADR-084: 動態異常偵測增強感官(DynamicBaseline + LogAnomaly + TrendPredictor) # 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 4 8D 升級 anomaly_context: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="Phase 4 動態異常上下文:baseline_anomalies / log_patterns / trend_breaches" ) # 感官品質指標 mcp_health: Mapped[dict] = mapped_column( JSON, default=dict, nullable=False, comment="各 MCP 呼叫成敗 {tool_name: bool},用於 decision_fusion 權重調整" ) collection_duration_ms: Mapped[int | None] = mapped_column( Integer, nullable=True, comment="情報蒐集總耗時(ms),P99 目標 < 8000" ) sensors_attempted: Mapped[int] = mapped_column( default=0, nullable=False, comment="嘗試啟動的感官數" ) sensors_succeeded: Mapped[int] = mapped_column( default=0, nullable=False, comment="成功回傳資料的感官數" ) # LLM 輸入摘要(不超 8K tokens,由 Investigator 壓縮) evidence_summary: Mapped[str | None] = mapped_column( Text, nullable=True, comment="最終餵給 LLM 的情報摘要(UTF-8,< 8K tokens)" ) # 執行前後 State(PostExecutionVerifier 填入 post_execution_state) pre_execution_state: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="執行前環境狀態快照(PostExecutionVerifier 基準線)" ) post_execution_state: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="執行後環境狀態(PostExecutionVerifier 抓取,Phase 1 接線)" ) verification_result: Mapped[str | None] = mapped_column( String(20), nullable=True, comment="success / degraded / failed / timeout(PostExecutionVerifier 填入)" ) # W2 PR-V1: SelfHealingValidator 自愈品質分數 (2026-04-28 ogt + Claude Sonnet 4.6) # 0.0-1.0:1.0=完全自愈,<0.5=觸發 rollback 提案(Telegram 警示) # base.py ALTER IF NOT EXISTS 補欄對應下方 self_healing_score: Mapped[float | None] = mapped_column( Float, nullable=True, comment="W2 PR-V1 SelfHealingValidator 自愈品質分數(0.0-1.0),<0.5 觸發 rollback 提案", ) self_healing_detail: Mapped[dict | None] = mapped_column( JSON, nullable=True, comment="W2 PR-V1 SelfHealingValidator 評估明細:root_cause_cleared/regressions/detail", ) # 時間戳(台北時區) collected_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, nullable=False ) __table_args__ = ( Index("ix_incident_evidence_incident_id", "incident_id"), Index("ix_incident_evidence_collected_at", "collected_at"), Index("ix_incident_evidence_playbook_id", "matched_playbook_id"), ) # ============================================================================= # PlaybookRecord — Phase 3.5 Playbook PostgreSQL 持久化 (System of Record) # ADR-085: AI 學習成果不可存在 Cache — Playbook 是 AI 的肌肉記憶 # 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 3.5 初始建立 # # 核心鐵律: # - PostgreSQL = System of Record(永久保存,AI 的長期記憶) # - Redis = Warm Cache(7天 TTL,加速讀取,DB 為 source of truth) # - trust_score, EWMA, 統計數據必須持久化 — 不能因 Redis TTL 消失 # ============================================================================= class PlaybookRecord(Base): """ Playbook 修復劇本 PostgreSQL ORM 與 Pydantic Playbook 模型對應。 Redis 為 warm cache(7d TTL),PostgreSQL 為 source of truth。 設計原則: - AI 的學習成果(trust_score、success_count、failure_count)永久保存 - EWMA 信任度在 Redis TTL 後不會重置,Pod 重啟後 AI 記憶不失 - 雙寫:create/update 先寫 PG,再更新 Redis cache - 讀取:Redis-first(cache hit),miss 時從 PG 載入並回填 Redis """ __tablename__ = "playbooks" # Primary Key playbook_id: Mapped[str] = mapped_column( String(36), primary_key=True, comment="Playbook 唯一識別碼 (PB-YYYYMMDD-XXXXXX)", ) # Core Fields name: Mapped[str] = mapped_column(String(256), nullable=False) description: Mapped[str] = mapped_column(Text, default="", nullable=False) status: Mapped[str] = mapped_column(String(20), default="draft", nullable=False) source: Mapped[str] = mapped_column(String(20), default="extracted", nullable=False) # Complex structures (JSONB) symptom_pattern: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict, nullable=False) repair_steps: Mapped[list[dict[str, Any]]] = mapped_column(JSON, default=list, nullable=False) # Timing estimated_duration_minutes: Mapped[int] = mapped_column(Integer, default=5, nullable=False) # Source tracing source_incident_ids: Mapped[list[str]] = mapped_column(JSON, default=list, nullable=False) ai_confidence: Mapped[float] = mapped_column(default=0.0, nullable=False) # Stats — MUST be in PG (AI learning artifacts, cannot expire) success_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) failure_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) last_used_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) # EWMA trust score — ADR-083 Phase 3, 絕對不能用 Redis TTL 管理 # trust_score 是 AI 累積學習的結晶,TTL 到期就歸零 = AI 記憶全部消失 trust_score: Mapped[float] = mapped_column(default=0.3, nullable=False, comment="EWMA 動態信任度 (Phase 3)。成功 α=0.1,失敗 α=0.2(2x 衰減)。< 0.1 → 封存") # Approval metadata approved_by: Mapped[str | None] = mapped_column(String(100), nullable=True) approved_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) tags: Mapped[list[str]] = mapped_column(JSON, default=list, nullable=False) notes: Mapped[str | None] = mapped_column(Text, nullable=True) # Sprint 5.1 護欄欄位 (2026-04-08) requires_approval_level: Mapped[str] = mapped_column( String(20), default="auto", nullable=False, comment="auto=直接執行, standard=1票, critical=2票MultiSig", ) stateful_targets: Mapped[list[str]] = mapped_column(JSON, default=list, nullable=False) requires_pre_backup: Mapped[bool] = mapped_column(default=False, nullable=False) # W2 PR-L1 2026-04-28 ogt + Claude Sonnet 4.6: KM→Playbook 互饋回路(飛輪 C3 修復) # 同 symptom_pattern_hash 累積 N=5 條 KM 後,LearningService 自動設 True # 人工 review 後可重設為 False(由 playbook_service 負責清除) review_required: Mapped[bool] = mapped_column( Boolean, default=False, nullable=False, comment="W2 PR-L1: True=KM 累積觸發人工複審信號(symptom_hash≥5 條),review 後清為 False", ) # Timestamps created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, onupdate=taipei_now, nullable=False) __table_args__ = ( Index("ix_playbook_status", "status"), Index("ix_playbook_trust_score", "trust_score"), Index("ix_playbook_created_at", "created_at"), # W2 PR-L1: 快速查詢需要人工 review 的 Playbook(預期數量少,partial index 最省空間) Index( "ix_playbook_review_required", "review_required", postgresql_where=text("review_required = true"), ), ) # ============================================================================= # DynamicBaselineRecord — Phase 4 Holt-Winters 訓練基線持久化 # ADR-084: 動態基線不能只存 Redis — AI 每天重學「正常」不是在學習 # 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 4 初始建立 # # 核心鐵律: # - 訓練好的 Holt-Winters 模型必須在 PG 長期保存 # - Redis 為 24h warm cache(加速 is_anomaly() 讀取) # - 基線消失 = AI 對「正常」的認識消失 = 每天從頭學習 = 不是 AI # ============================================================================= class DynamicBaselineRecord(Base): """ 動態基線訓練結果 PostgreSQL ORM Holt-Winters 訓練完成後: 1. 先寫入 PG(永久保存) 2. 再寫入 Redis(24h warm cache,加速讀取) Redis key: baseline:{metric_name} PG: 此表,metric_name 為主鍵,最新一筆 = 有效基線 """ __tablename__ = "dynamic_baselines" id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid) # 基線識別 metric_name: Mapped[str] = mapped_column( String(200), nullable=False, index=True, comment="基線識別名 (e.g. cpu_usage_node_mon)", ) # 訓練結果(Holt-Winters 統計) mean: Mapped[float] = mapped_column(nullable=False, comment="擬合值均值") std: Mapped[float] = mapped_column(nullable=False, comment="殘差標準差") # 24h 季節性因子(JSON 陣列,長度 24) seasonal_factors: Mapped[list[float]] = mapped_column( JSON, default=list, nullable=False, comment="24h 週期季節性因子(乘法形式,均值 ≈ 1.0)", ) # 訓練元資料 datapoint_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) promql: Mapped[str] = mapped_column(Text, default="", nullable=False, comment="訓練使用的 PromQL 查詢") lookback_hours: Mapped[int] = mapped_column(Integer, default=336, nullable=False) # Timestamps trained_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False) __table_args__ = ( Index("ix_dynamic_baseline_metric", "metric_name"), Index("ix_dynamic_baseline_trained_at", "trained_at"), ) # ============================================================================= # LogClusterRecord — Phase 4 Drain3 學習到的 Log Pattern 持久化 # ADR-084: Drain3 模板不能只存 Redis — 每次重啟 AI 把已知 pattern 當新 pattern # 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 4 初始建立 # # 核心鐵律: # - Drain3 學到的 log cluster template 必須在 PG 長期保存 # - 新 cluster 事件列表 (log_anomaly:new) 才存 Redis(短期工作記憶) # - 基礎知識庫(已學到的 pattern)必須在 PG # ============================================================================= class LogClusterRecord(Base): """ Drain3 Log Cluster Template 持久化 每個新 pattern 首次偵測到時: 1. 寫入 PG(永久保存,AI 的 log 語意理解) 2. 推送到 Redis list log_anomaly:new(短期工作記憶) Re-detect 相同 template 時只更新 last_seen_at + size,不重複寫入 PG。 """ __tablename__ = "log_clusters" id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid) # Cluster 識別(MD5[:8] of template) cluster_id: Mapped[str] = mapped_column( String(16), nullable=False, unique=True, index=True, comment="模板 MD5[:8].upper(),穩定 ID", ) # Drain3 模板 template: Mapped[str] = mapped_column( Text, nullable=False, comment="Drain3 萃取的 log 模板 (e.g. 'ERROR <*> connection failed to <*>')", ) # 統計 size: Mapped[int] = mapped_column(Integer, default=1, nullable=False, comment="命中次數(第一次 = 1)") source: Mapped[str] = mapped_column(String(50), default="k8s_pod", nullable=False, comment="k8s_pod | host_syslog | app_log") # 樣本日誌(保留首次觸發的原始行,供事後分析) sample_log: Mapped[str | None] = mapped_column(Text, nullable=True, comment="首次觸發的原始 log 行(前 500 字元)") # Timestamps first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False) last_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, onupdate=taipei_now, nullable=False) __table_args__ = ( Index("ix_log_cluster_first_seen", "first_seen_at"), Index("ix_log_cluster_source", "source"), ) # ============================================================================= # AgentSession — Phase 2 多 Agent 辯證 Audit Trail # ============================================================================= class AgentSession(Base): """ ADR-082 Phase 2: 多 Agent 辯證 Immutable Event Log 每個 Agent 每次「發言」寫一行。 session_id 串連同一次 Incident 決策的所有 Agent turns。 不可刪除 — 只能新增(Immutable Event Sourcing)。 Phase 3 學習閉環依賴此表(Critic 挑戰成功作為負向學習信號)。 ADR-082: 多 Agent 協作架構 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 2 初始建立 """ __tablename__ = "agent_sessions" id: Mapped[str] = mapped_column( String(36), primary_key=True, default=lambda: str(uuid4()), comment="行主鍵(UUID)" ) session_id: Mapped[str] = mapped_column( String(36), nullable=False, comment="辯證 Session ID(一次 Incident 決策的所有 turns 共用同一 session_id)" ) incident_id: Mapped[str] = mapped_column( String(50), nullable=False, comment="關聯 Incident ID" ) agent_role: Mapped[str] = mapped_column( String(20), nullable=False, comment="Agent 角色:diagnostician / solver / reviewer / critic / coordinator" ) # 輸入指紋(sha256[:16])— 用於查重、快取命中追蹤 input_hash: Mapped[str] = mapped_column( String(16), nullable=False, default="", comment="sha256(input_json)[:16],供查重與快取命中追蹤" ) # Agent 輸出(完整 JSON,供 Phase 3 學習 + 事後複盤) output_json: Mapped[dict] = mapped_column( JSON, nullable=False, default=dict, comment="Agent 原始輸出(DiagnosisReport / ActionPlan / 等序列化 dict)" ) # 品質指標 latency_ms: Mapped[int] = mapped_column( Integer, nullable=False, default=0, comment="此 Agent 的執行耗時(ms)" ) vote: Mapped[str] = mapped_column( String(20), nullable=False, default="abstain", comment="Agent 投票:approve / reject / request_revision / abstain / degraded" ) degraded: Mapped[bool] = mapped_column( nullable=False, default=False, comment="True = 此 Agent 因熔斷/超時降級,輸出為 rule-based mock" ) # 時間戳(台北時區) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, nullable=False ) __table_args__ = ( Index("ix_agent_sessions_session_id", "session_id"), Index("ix_agent_sessions_incident_id", "incident_id"), Index("ix_agent_sessions_created_at", "created_at"), # 查詢某 session 中特定 role 的 turn(Coordinator 聚合時常用) Index("ix_agent_sessions_session_role", "session_id", "agent_role"), ) # ============================================================================= # AiGovernanceEvent — Phase 6 自我治理事件溯源(不可刪除) # ADR-087: AI 自我治理閉環:SLO 違反 / 信任漂移 / KB 腐爛 / 自我降級 # 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 6 初始建立 # # 核心鐵律: # - 不可變 Event Sourcing — 只 INSERT,禁止 UPDATE/DELETE # - 所有治理事件必須落地 PG,SLO dashboard 依賴此表 # - resolved=True 僅由人工或下次計算時補填,不可自動翻轉未解決項目 # ============================================================================= class AiGovernanceEvent(Base): """ AI 自我治理事件記錄(不可變) event_type 值: slo_violation — SLO 計算結果違反閾值 trust_drift — Playbook 信任度分布偏態(全高或全低) kb_stale — KB 條目引用已廢棄 K8s API / Prometheus query self_demotion — 信心閾值自動調高(自我降級) conservative_mode — 連續 SLO 違反,全系統切保守模式 replay_degraded — 離線回放一致率連續下降 immutable — 只 INSERT,禁 UPDATE / DELETE """ __tablename__ = "ai_governance_events" id: Mapped[str] = mapped_column( String(36), primary_key=True, default=generate_uuid, comment="主鍵(UUID)" ) event_type: Mapped[str] = mapped_column( String(40), nullable=False, comment="slo_violation / trust_drift / kb_stale / self_demotion / conservative_mode / replay_degraded" ) triggered_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=taipei_now, nullable=False, comment="事件觸發時間(台北時區)" ) details: Mapped[dict] = mapped_column( JSON, nullable=False, default=dict, comment="事件詳情 JSONB(SLO 數值、漂移分布等)" ) resolved: Mapped[bool] = mapped_column( default=False, nullable=False, comment="是否已解決(人工確認或下次計算恢復正常後補填)" ) resolved_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True, comment="解決時間(僅人工/系統補填,不得自動反轉未解決項目)" ) __table_args__ = ( Index("ix_ai_governance_event_type", "event_type"), Index("ix_ai_governance_triggered_at", "triggered_at"), Index("ix_ai_governance_resolved", "resolved"), ) # ============================================================================= # TrustRecordDB - ADR-088 TrustScore 持久化 # ============================================================================= class TrustRecordDB(Base): """ Trust Score 持久化記錄 ADR-088: TrustScoreManager 從記憶體升級為 PostgreSQL 持久化。 Pod 重啟後分數不歸零,AI 能真正累積信任達到 L4 自動放行。 score >= 5: MEDIUM → LOW (自動執行) score >= 10: HIGH → MEDIUM (降一級) 2026-04-17 ogt + Claude Sonnet 4.6(亞太): Phase 4 信任持久化 """ __tablename__ = "trust_records" action_pattern: Mapped[str] = mapped_column( String(255), primary_key=True, comment="操作模式,例如 delete:nginx-frontend-*" ) score: Mapped[int] = mapped_column( Integer, nullable=False, default=0, comment="累積信任分數。+1/approve,reject 歸零" ) total_approvals: Mapped[int] = mapped_column( Integer, nullable=False, default=0, ) total_rejections: Mapped[int] = mapped_column( Integer, nullable=False, default=0, ) last_approval_by: Mapped[str | None] = mapped_column(String(100), nullable=True) last_approval_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True, ) last_rejection_by: Mapped[str | None] = mapped_column(String(100), nullable=True) last_rejection_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True, ) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False, default=taipei_now, ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False, default=taipei_now, onupdate=taipei_now, ) __table_args__ = ( Index("ix_trust_records_score", "score"), Index("ix_trust_records_updated", "updated_at"), ) # ============================================================================= # AIProviderVersionHistory - AI Provider 版本歷史 # 2026-04-27 P3.2.2 by Claude # ============================================================================= class AIProviderVersionHistory(Base): """AI Provider 版本探測歷史記錄 每次 ModelVersionTracker.run_probe_cycle() 寫入一筆。 changed=True 表示本次探測到版本或 digest 與上一筆不同。 Migration: apps/api/migrations/p3_2_provider_version_history.sql """ __tablename__ = "ai_provider_version_history" id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) provider: Mapped[str] = mapped_column(String(40), nullable=False, index=True) model: Mapped[str] = mapped_column(String(100), nullable=False) version: Mapped[str | None] = mapped_column(String(200), nullable=True) digest: Mapped[str | None] = mapped_column(String(80), nullable=True) captured_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False, default=taipei_now, ) prev_version: Mapped[str | None] = mapped_column(String(200), nullable=True) changed: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) __table_args__ = ( Index("ix_provider_version_captured", "provider", "captured_at"), )