Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m38s
W2 (onboarder 4 週飛輪 80→90 路徑第二週) + critic PR review 5 個 critical/major 全部修完,default flag=false 安全無爆炸風險。 ## W2 三件 PR ### PR-R2 — AOL → catalog confidence EWMA 回灌(修飛輪斷鏈 C2) - 新檔 `apps/api/src/jobs/aol_to_catalog_writeback_job.py` - 邏輯:每小時掃 AOL 計算 EWMA confidence (alpha=0.3) 回灌 alert_rule_catalog - 失敗閾值 N=5 連續低成功率 → review_status='draft' - Hermes _fetch_noisy_rules SQL 加 OR review_status='draft' - ENABLE_AOL_WRITEBACK_JOB=false (default) - 8 個測試(mock path 修正:lazy import → patch src.db.base.get_db_context) ### PR-V1 — self_healing_validator 串接 (修飛輪斷鏈 C6) - 新檔 `apps/api/src/services/self_healing_validator.py`(純函數 assess_self_healing) - post_execution_verifier.py step 5 串接(feature flag gate) - evidence_snapshot.py 加 self_healing_score / self_healing_detail 欄位 - db/models.py + base.py ALTER IF NOT EXISTS - score < 0.5 → 觸發 rollback 提案 Telegram alert(不自動執行) - ENABLE_SELF_HEALING_VALIDATOR=false (default) - 7 個測試 ### PR-L1 — KM ↔ Playbook 雙向回路 (修飛輪斷鏈 C3+C4) - learning_service.py 三條新邏輯: 1. _write_playbook_evolution_km:promote/demote 寫 KM 演化條目 2. _check_and_mark_playbook_review:N=5 累積觸發 review_required 3. _demote_alert_rule_catalog_confidence:DEPRECATED → confidence×=0.5 - PlaybookRecord 加 review_required 欄位(schema migration via base.py) - ENABLE_KM_PLAYBOOK_FEEDBACK_LOOP=false (default) - KM_PLAYBOOK_REVIEW_THRESHOLD=5 可調 - 6 個測試 ## KMWriter Critic 5 個 Critical/Major 修復(之前 critic PR review 發現) 之前 push commitc5753e1c已修,本 commit 補回 stash 中的對應檔案: - C1 km_writer.py:194 backfill 自打臉(已修:同步 await + DLQ) - C2 km_writer.py:391 KM_WRITE_AWAIT=false 路徑收緊 - M1 decision_manager.py:2178/2203 移除 _fire_and_forget - M2 incident_service.py:1099 自製 path 加 retry+DLQ - M3 km_writer.py:166 冪等聲明對齊(UPSERT + partial unique index) ## 驗證 - 1635 unit tests 全綠(+27 from 1608) - 與fb0c72db(推翻 A2 Ollama primary) 共存無衝突 - 所有新 Job/Service default flag=false(不爆炸) ## 期望影響 飛輪斷鏈 C2 + C3 + C4 + C6 全修 飛輪自主化評分:65 → 85 預估(W2 完成後) 啟用順序(待 prodfb0c72db驗證 OLLAMA primary 跑得起來後): 1. ENABLE_AOL_WRITEBACK_JOB=true 2. ENABLE_KM_PLAYBOOK_FEEDBACK_LOOP=true 3. ENABLE_SELF_HEALING_VALIDATOR=true Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1393 lines
52 KiB
Python
1393 lines
52 KiB
Python
"""
|
||
Database Models
|
||
===============
|
||
CTO-201: Approval & AuditLog persistence
|
||
|
||
Schema 設計原則:
|
||
- UUID 主鍵 (PostgreSQL 相容)
|
||
- JSON 欄位儲存複雜結構
|
||
- 完整時間戳記
|
||
- 索引優化查詢
|
||
"""
|
||
|
||
from datetime import datetime
|
||
from typing import Any
|
||
from uuid import uuid4
|
||
|
||
from sqlalchemy import (
|
||
JSON,
|
||
BigInteger,
|
||
Boolean,
|
||
CheckConstraint,
|
||
DateTime,
|
||
Float,
|
||
Index,
|
||
Integer,
|
||
String,
|
||
Text,
|
||
text,
|
||
)
|
||
from sqlalchemy import (
|
||
Enum as SQLEnum,
|
||
)
|
||
from sqlalchemy.dialects.postgresql import ENUM as PgEnum
|
||
from sqlalchemy.dialects.postgresql import JSONB
|
||
from sqlalchemy.orm import Mapped, mapped_column
|
||
|
||
from src.db.base import Base
|
||
from src.models.approval import ApprovalStatus, RiskLevel
|
||
from src.models.incident import IncidentStatus, Severity
|
||
from src.models.knowledge import EntrySource, EntryStatus, EntryType
|
||
|
||
# =============================================================================
|
||
# Helper Functions
|
||
# =============================================================================
|
||
|
||
def taipei_now() -> datetime:
|
||
"""取得台北時區當前時間 (UTC+8)
|
||
|
||
🔴 HARD RULE: 全系統使用台北時區,禁止 UTC
|
||
2026-04-02 Claude Code: C1 時區統一遷移 (首席架構師審查)
|
||
"""
|
||
from src.utils.timezone import now_taipei
|
||
return now_taipei()
|
||
|
||
|
||
def generate_uuid() -> str:
|
||
"""Generate UUID string"""
|
||
return str(uuid4())
|
||
|
||
|
||
# =============================================================================
|
||
# ApprovalRecord - 授權記錄持久化
|
||
# =============================================================================
|
||
|
||
class ApprovalRecord(Base):
|
||
"""
|
||
授權記錄 - 對應 Pydantic ApprovalRequest
|
||
|
||
Note: 與 in-memory TrustEngine 的 ApprovalRequest 同步
|
||
"""
|
||
__tablename__ = "approval_records"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Core Fields
|
||
action: Mapped[str] = mapped_column(String(500), nullable=False)
|
||
description: Mapped[str] = mapped_column(Text, nullable=False)
|
||
status: Mapped[str] = mapped_column(
|
||
SQLEnum(ApprovalStatus),
|
||
default=ApprovalStatus.PENDING,
|
||
nullable=False,
|
||
)
|
||
risk_level: Mapped[str] = mapped_column(
|
||
SQLEnum(RiskLevel),
|
||
nullable=False,
|
||
)
|
||
|
||
# Signature Tracking
|
||
required_signatures: Mapped[int] = mapped_column(Integer, default=1)
|
||
current_signatures: Mapped[int] = mapped_column(Integer, default=0)
|
||
signatures: Mapped[dict[str, Any]] = mapped_column(JSON, default=list)
|
||
|
||
# Blast Radius (JSON)
|
||
blast_radius: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||
|
||
# Dry-Run Checks (JSON)
|
||
dry_run_checks: Mapped[list[dict[str, Any]]] = mapped_column(JSON, default=list)
|
||
|
||
# Metadata
|
||
requested_by: Mapped[str] = mapped_column(String(100), nullable=False)
|
||
rejection_reason: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
extra_metadata: Mapped[dict[str, Any] | None] = mapped_column(JSON, nullable=True)
|
||
|
||
# ==========================================================================
|
||
# 戰略 B: 告警風暴收斂 (Alert Storm Convergence)
|
||
# ==========================================================================
|
||
# 告警指紋 - 根據 namespace + deployment + alert_name 產生的唯一 Hash
|
||
fingerprint: Mapped[str | None] = mapped_column(
|
||
String(64),
|
||
nullable=True,
|
||
index=True,
|
||
comment="SHA256 hash of alert identity (namespace:deployment:alert_name)",
|
||
)
|
||
# 聚合次數 - 相同指紋告警的累計觸發次數
|
||
hit_count: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=1,
|
||
nullable=False,
|
||
comment="Number of times this alert pattern was triggered",
|
||
)
|
||
# 最後觸發時間 - 同指紋告警最近一次出現的時間
|
||
last_seen_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
nullable=False,
|
||
comment="Last time this alert pattern was seen",
|
||
)
|
||
|
||
# Sprint 5.1 MultiSig 雙簽核支援 (2026-04-08 Claude Sonnet 4.6 Asia/Taipei,ADR-062 Q3)
|
||
approval_level: Mapped[str] = mapped_column(
|
||
String(20),
|
||
default="standard",
|
||
nullable=False,
|
||
comment="standard=1票審核, critical=2票MultiSig",
|
||
)
|
||
approval_votes: Mapped[list[dict[str, Any]]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="[{user_id, voted_at, action}]",
|
||
)
|
||
required_votes: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=1,
|
||
nullable=False,
|
||
comment="standard=1, critical=2",
|
||
)
|
||
|
||
# 2026-04-06 ogt: Phase 26 — 關聯 Incident ID
|
||
# Playbook 萃取和 KM 寫入必須知道 incident_id,不能靠文字解析
|
||
incident_id: Mapped[str | None] = mapped_column(
|
||
String(64),
|
||
nullable=True,
|
||
index=True,
|
||
comment="Associated Incident ID (INC-YYYYMMDD-XXXXXX)",
|
||
)
|
||
|
||
# 2026-04-09 Claude Sonnet 4.6: Telegram 訊息持久化
|
||
# Redis tg_msg:{id} TTL 24h 過期後仍可查詢,支援跨 Session 狀態更新
|
||
telegram_message_id: Mapped[int | None] = mapped_column(
|
||
Integer,
|
||
nullable=True,
|
||
comment="Telegram message_id of the approval card sent to operator",
|
||
)
|
||
telegram_chat_id: Mapped[int | None] = mapped_column(
|
||
BigInteger,
|
||
nullable=True,
|
||
comment="Telegram chat_id where the approval card was sent (BIGINT: 支援群組負數 ID)",
|
||
)
|
||
|
||
# B2 fix 2026-04-24 ogt + Claude Sonnet 4.6: Playbook 學習閉環斷鏈修復
|
||
# 原欄位缺失 → 人工審核後 matched_playbook_id 永遠 NULL → EWMA 無法更新
|
||
# 2026-04-25 db-expert-fix by Claude Engineer-B: 移除 index=True 避免自動生成 full index
|
||
# Partial index 改在 __table_args__ 宣告(WHERE matched_playbook_id IS NOT NULL)
|
||
matched_playbook_id: Mapped[str | None] = mapped_column(
|
||
String(36),
|
||
nullable=True,
|
||
comment="匹配的 Playbook ID,學習服務用以更新 EWMA trust score",
|
||
)
|
||
|
||
# 2026-04-26 P2-DB-Fix by Claude — db-expert P0 三修(P0.3): P2.1 DecisionFusionEngine 欄位
|
||
# composite_score / complexity_tier / decision_fusion_details
|
||
# 僅在 AIOPS_P2_FUSION_ENABLED=True 且 fusion 成功時填入(nullable=True)
|
||
composite_score: Mapped[float | None] = mapped_column(
|
||
Float,
|
||
nullable=True,
|
||
comment="P2.1 DecisionFusion 合成分數(0.0-1.0),方法 III 加權結果",
|
||
)
|
||
complexity_tier: Mapped[str | None] = mapped_column(
|
||
String(16),
|
||
nullable=True,
|
||
comment="P2.1 告警複雜度分層:low / medium / high / critical",
|
||
)
|
||
decision_fusion_details: Mapped[dict | None] = mapped_column(
|
||
JSONB,
|
||
nullable=True,
|
||
comment=(
|
||
"P2.1 DecisionFusionEngine: openclaw_score / hermes_score / "
|
||
"playbook_score / mcp_health_score / elephant_score"
|
||
),
|
||
)
|
||
|
||
# Timestamps
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
updated_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
onupdate=taipei_now,
|
||
)
|
||
expires_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
resolved_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_approval_status", "status"),
|
||
Index("ix_approval_risk_level", "risk_level"),
|
||
Index("ix_approval_created_at", "created_at"),
|
||
Index("ix_approval_requested_by", "requested_by"),
|
||
Index("ix_approval_fingerprint", "fingerprint"), # 戰略 B: 指紋查詢優化
|
||
# 2026-04-25 db-expert-fix by Claude Engineer-B: 改為 partial index,只索引非 NULL 值
|
||
# 原 full index 與 index=True 三重宣告衝突已修復(一個來源真相:此處)
|
||
Index(
|
||
"ix_approval_matched_playbook",
|
||
"matched_playbook_id",
|
||
postgresql_where=text("matched_playbook_id IS NOT NULL"),
|
||
),
|
||
# 2026-04-26 P2-DB-Fix by Claude — db-expert P0 三修(P0.3): P2 DecisionFusion 欄位
|
||
# partial index:fusion fill rate 預期 <50%,只索引有值的行
|
||
Index(
|
||
"ix_approval_composite_score",
|
||
"composite_score",
|
||
postgresql_where=text("composite_score IS NOT NULL"),
|
||
),
|
||
Index(
|
||
"ix_approval_complexity_tier",
|
||
"complexity_tier",
|
||
postgresql_where=text("complexity_tier IS NOT NULL"),
|
||
),
|
||
CheckConstraint(
|
||
"complexity_tier IN ('low','medium','high','critical') OR complexity_tier IS NULL",
|
||
name="chk_complexity_tier",
|
||
),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# AuditLog - 稽核日誌
|
||
# =============================================================================
|
||
|
||
class TimelineEvent(Base):
|
||
"""
|
||
時間軸事件 - Phase 4 Action Timeline
|
||
|
||
事件類型:
|
||
- system: 系統告警接收
|
||
- agent: OpenClaw AI 分析
|
||
- security: 權限阻擋
|
||
- human: 人類授權
|
||
- exec: 執行完成
|
||
"""
|
||
__tablename__ = "timeline_events"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Event Type & Status
|
||
event_type: Mapped[str] = mapped_column(
|
||
String(20),
|
||
nullable=False,
|
||
comment="system, agent, security, human, exec",
|
||
)
|
||
status: Mapped[str] = mapped_column(
|
||
String(20),
|
||
nullable=False,
|
||
default="info",
|
||
comment="info, success, warning, error",
|
||
)
|
||
|
||
# Content
|
||
title: Mapped[str] = mapped_column(String(500), nullable=False)
|
||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# Actor
|
||
actor: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||
actor_role: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||
|
||
# Context
|
||
risk_level: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||
approval_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
|
||
# P1.6 fix 2026-04-24 ogt + Claude Sonnet 4.6: pre_decision_investigator raw SQL 寫不存在欄位
|
||
# 原本 INSERT INTO timeline_events (incident_id, ...) 失敗 → 每天+1 錯誤靜默吞
|
||
incident_id: Mapped[str | None] = mapped_column(
|
||
String(64),
|
||
nullable=True,
|
||
index=True,
|
||
comment="關聯的 Incident ID(MCP 事件稽核用)",
|
||
)
|
||
|
||
# Timestamp
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_timeline_event_type", "event_type"),
|
||
Index("ix_timeline_created_at", "created_at"),
|
||
Index("ix_timeline_incident_id", "incident_id"), # P1.6 fix
|
||
)
|
||
|
||
|
||
class AuditLog(Base):
|
||
"""
|
||
稽核日誌 - 記錄所有執行結果
|
||
|
||
每次 K8s 操作完成後寫入一筆記錄
|
||
"""
|
||
__tablename__ = "audit_logs"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Reference to Approval
|
||
approval_id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
nullable=False,
|
||
index=True,
|
||
)
|
||
|
||
# Operation Details
|
||
operation_type: Mapped[str] = mapped_column(
|
||
String(50),
|
||
nullable=False,
|
||
comment="e.g., RESTART_DEPLOYMENT, DELETE_POD",
|
||
)
|
||
target_resource: Mapped[str] = mapped_column(
|
||
String(200),
|
||
nullable=False,
|
||
comment="e.g., deployment/api-backend, pod/nginx-xxx",
|
||
)
|
||
namespace: Mapped[str] = mapped_column(
|
||
String(63),
|
||
default="default",
|
||
nullable=False,
|
||
)
|
||
|
||
# Execution Result
|
||
success: Mapped[bool] = mapped_column(default=False, nullable=False)
|
||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# K8s Response (Raw)
|
||
k8s_response: Mapped[dict[str, Any] | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="Raw Kubernetes API response",
|
||
)
|
||
|
||
# Execution Context
|
||
executed_by: Mapped[str] = mapped_column(
|
||
String(100),
|
||
nullable=False,
|
||
comment="Who triggered the execution",
|
||
)
|
||
execution_duration_ms: Mapped[int | None] = mapped_column(
|
||
Integer,
|
||
nullable=True,
|
||
comment="Execution time in milliseconds",
|
||
)
|
||
|
||
# Dry-Run Result (pre-execution validation)
|
||
dry_run_passed: Mapped[bool] = mapped_column(
|
||
default=True,
|
||
nullable=False,
|
||
)
|
||
dry_run_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# ==========================================================================
|
||
# Phase 18: 失敗自動修復閉環欄位 (2026-03-26)
|
||
# ==========================================================================
|
||
|
||
# 授權來源追蹤
|
||
authorization_channel: Mapped[str | None] = mapped_column(
|
||
String(20),
|
||
nullable=True,
|
||
comment="Authorization source: web, telegram, auto",
|
||
)
|
||
|
||
# 重試與修復追蹤
|
||
retry_count: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=0,
|
||
nullable=False,
|
||
comment="Number of retry attempts",
|
||
)
|
||
failure_classification: Mapped[str | None] = mapped_column(
|
||
String(50),
|
||
nullable=True,
|
||
comment="Failure type: TIMEOUT, K8S_ERROR, NETWORK_ERROR, PERMISSION_DENIED",
|
||
)
|
||
source_approval_id: Mapped[str | None] = mapped_column(
|
||
String(36),
|
||
nullable=True,
|
||
index=True,
|
||
comment="Original approval ID if this is a repair attempt",
|
||
)
|
||
|
||
# 自動修復狀態
|
||
auto_repair_attempted: Mapped[bool] = mapped_column(
|
||
default=False,
|
||
nullable=False,
|
||
comment="Whether auto-repair was attempted",
|
||
)
|
||
auto_repair_result: Mapped[str | None] = mapped_column(
|
||
Text,
|
||
nullable=True,
|
||
comment="Auto-repair result: AI analysis and repair outcome",
|
||
)
|
||
|
||
# Timestamps
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_audit_approval_id", "approval_id"),
|
||
Index("ix_audit_operation_type", "operation_type"),
|
||
Index("ix_audit_success", "success"),
|
||
Index("ix_audit_created_at", "created_at"),
|
||
Index("ix_audit_authorization_channel", "authorization_channel"), # Phase 18
|
||
Index("ix_audit_failure_classification", "failure_classification"), # Phase 18
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# AutoRepairExecution - Phase 10 操作記錄
|
||
# 2026-04-08 Claude Code: 統帥指令「所有操作都必須被記錄,寫入資料庫」
|
||
# =============================================================================
|
||
|
||
class AutoRepairExecution(Base):
|
||
"""
|
||
自動修復執行記錄
|
||
|
||
每次 evaluate_auto_repair 觸發並執行 (成功或失敗) 都寫入此表。
|
||
不依賴 approval_id(自動修復不需人工批准)。
|
||
"""
|
||
__tablename__ = "auto_repair_executions"
|
||
|
||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
|
||
|
||
# 關聯
|
||
incident_id: Mapped[str] = mapped_column(String(30), nullable=False, index=True)
|
||
playbook_id: Mapped[str] = mapped_column(String(36), nullable=False, index=True)
|
||
playbook_name: Mapped[str] = mapped_column(String(200), nullable=False)
|
||
|
||
# 執行結果
|
||
success: Mapped[bool] = mapped_column(default=False, nullable=False)
|
||
executed_steps: Mapped[list] = mapped_column(JSON, default=list, nullable=False)
|
||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# 執行上下文
|
||
triggered_by: Mapped[str] = mapped_column(
|
||
String(50), default="auto_repair", nullable=False,
|
||
comment="auto_repair / cold_start_trust",
|
||
)
|
||
similarity_score: Mapped[float | None] = mapped_column(nullable=True)
|
||
risk_level: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||
execution_time_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||
|
||
# 時間戳 (台北時區)
|
||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now)
|
||
|
||
__table_args__ = (
|
||
Index("ix_are_created_at", "created_at"),
|
||
Index("ix_are_success", "success"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# AlertOperationLog - Phase 11 告警操作溯源 (Event Sourcing)
|
||
# 2026-04-08 Claude Code: 統帥指令「所有操作都必須被記錄,寫入資料庫」
|
||
# 不可變 — 只 INSERT,不 UPDATE/DELETE
|
||
# =============================================================================
|
||
|
||
class AlertOperationLog(Base):
|
||
"""
|
||
告警操作完整溯源
|
||
|
||
Event Sourcing 模式:每個告警生命週期的每個事件都寫一筆。
|
||
不可變 (Immutable)。
|
||
|
||
event_type 值:
|
||
ALERT_RECEIVED / TELEGRAM_SENT / USER_ACTION /
|
||
AUTO_REPAIR_TRIGGERED / EXECUTION_STARTED / EXECUTION_COMPLETED /
|
||
TELEGRAM_RESULT_SENT / RESOLVED / SILENCED / ESCALATED
|
||
"""
|
||
__tablename__ = "alert_operation_log"
|
||
|
||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
|
||
|
||
# 關聯 (允許 NULL,不同事件有不同關聯)
|
||
incident_id: Mapped[str | None] = mapped_column(String(30), nullable=True, index=True)
|
||
approval_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
|
||
audit_log_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
|
||
auto_repair_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
|
||
|
||
# 事件核心
|
||
# 2026-04-08 Claude Sonnet 4.6: Sprint 5.1 — 修正 enum 型別不符 (String→PgEnum, create_type=False)
|
||
event_type: Mapped[str] = mapped_column(
|
||
PgEnum(
|
||
"ALERT_RECEIVED", "TELEGRAM_SENT", "USER_ACTION", "AUTO_REPAIR_TRIGGERED",
|
||
"EXECUTION_STARTED", "EXECUTION_COMPLETED", "TELEGRAM_RESULT_SENT",
|
||
"RESOLVED", "SILENCED", "ESCALATED", "GUARDRAIL_BLOCKED",
|
||
"PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED", "BACKUP_TRIGGERED",
|
||
"BACKUP_COMPLETED", "BACKUP_FAILED", "APPROVAL_ESCALATED", "CHANGE_APPLIED",
|
||
name="alert_event_type", create_type=False,
|
||
),
|
||
nullable=False, index=True,
|
||
)
|
||
actor: Mapped[str | None] = mapped_column(String(100), nullable=True, index=True)
|
||
action_detail: Mapped[str | None] = mapped_column(String(200), nullable=True)
|
||
|
||
# 執行結果 (NULL = 不適用)
|
||
success: Mapped[bool | None] = mapped_column(nullable=True)
|
||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# 結構化上下文
|
||
context: Mapped[dict] = mapped_column(JSON, default=dict, nullable=False)
|
||
|
||
# 時間戳 (台北時區,不可變)
|
||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now)
|
||
|
||
__table_args__ = (
|
||
Index("ix_aol_created_at", "created_at"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# IncidentRecord - Phase 6.2 Episodic Memory (PostgreSQL)
|
||
# =============================================================================
|
||
|
||
class IncidentRecord(Base):
|
||
"""
|
||
事件記錄 - 對應 Pydantic Incident Schema v0.3
|
||
|
||
Phase 6.2: Episodic Memory (長期記憶)
|
||
- 從 Working Memory (Redis) 遷移過來
|
||
- 永久保留,供 RAG 檢索
|
||
- 複雜結構使用 JSONB 欄位
|
||
|
||
三層記憶架構:
|
||
- Working Memory (Redis): 7 天 TTL
|
||
- Episodic Memory (PostgreSQL): 此表,永久保留
|
||
- Semantic Memory (Vector DB): Phase 6.3+
|
||
"""
|
||
__tablename__ = "incidents"
|
||
|
||
# === 主鍵 ===
|
||
incident_id: Mapped[str] = mapped_column(
|
||
String(30),
|
||
primary_key=True,
|
||
comment="事件唯一識別碼 (如 INC-20260322-A1B2C3)",
|
||
)
|
||
|
||
# === 狀態與嚴重度 ===
|
||
status: Mapped[str] = mapped_column(
|
||
SQLEnum(IncidentStatus),
|
||
default=IncidentStatus.INVESTIGATING,
|
||
nullable=False,
|
||
comment="事件狀態 (investigating, mitigating, resolved, closed, escalated)",
|
||
)
|
||
severity: Mapped[str] = mapped_column(
|
||
SQLEnum(Severity),
|
||
nullable=False,
|
||
comment="事件嚴重度 (P0, P1, P2, P3)",
|
||
)
|
||
|
||
# === 感知層 (Signals) - JSONB ===
|
||
signals: Mapped[list[dict[str, Any]]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="關聯的告警信號列表 (JSONB)",
|
||
)
|
||
affected_services: Mapped[list[str]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="受影響的服務列表",
|
||
)
|
||
|
||
# === 認知層 (AI Decision Chain) - JSONB ===
|
||
decision_chain: Mapped[dict[str, Any] | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="AI 決策鏈 (完整推論過程)",
|
||
)
|
||
|
||
# === 決策層 (Proposals) ===
|
||
proposal_ids: Mapped[list[str]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="關聯的 ApprovalRequest ID 列表",
|
||
)
|
||
|
||
# === 結果層 (Outcome) - JSONB ===
|
||
outcome: Mapped[dict[str, Any] | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="事件結果與人類回饋",
|
||
)
|
||
|
||
# === ADR-073 Phase 2 欄位 (2026-04-12 ogt) ===
|
||
alertname: Mapped[str | None] = mapped_column(
|
||
String(100),
|
||
nullable=True,
|
||
comment="告警名稱 (從 signals labels 抽取)",
|
||
)
|
||
notification_type: Mapped[str | None] = mapped_column(
|
||
String(10),
|
||
nullable=True,
|
||
comment="通知類型 TYPE-1/2/3/4/4D (早期分診)",
|
||
)
|
||
alert_category: Mapped[str | None] = mapped_column(
|
||
String(50),
|
||
nullable=True,
|
||
comment="告警類別 config_drift/info/backup/infrastructure/kubernetes/database/general",
|
||
)
|
||
|
||
# === 頻率快照 (Phase 27, 2026-04-10 ogt) ===
|
||
# frequency_stats 原本只存記憶體/Redis(TTL=35天),Pod重啟或超期即失
|
||
# 此欄位在 incident 建立時寫入快照,永久保存當時的頻率統計
|
||
frequency_snapshot: Mapped[dict[str, Any] | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="建立時刻的 AnomalyFrequency 快照,永久保存 (Phase 27)",
|
||
)
|
||
|
||
# === 時間軸 ===
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
nullable=False,
|
||
)
|
||
updated_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
onupdate=taipei_now,
|
||
nullable=False,
|
||
)
|
||
resolved_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
closed_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True),
|
||
nullable=True,
|
||
)
|
||
|
||
# === 記憶管理 ===
|
||
ttl_days: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=7,
|
||
nullable=False,
|
||
comment="Working Memory TTL (天)",
|
||
)
|
||
vectorized: Mapped[bool] = mapped_column(
|
||
default=False,
|
||
nullable=False,
|
||
comment="是否已向量化到 Vector DB (Semantic Memory)",
|
||
)
|
||
|
||
# === 索引 ===
|
||
__table_args__ = (
|
||
Index("ix_incident_status", "status"),
|
||
Index("ix_incident_severity", "severity"),
|
||
Index("ix_incident_created_at", "created_at"),
|
||
Index("ix_incident_resolved_at", "resolved_at"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# KnowledgeEntry - Knowledge Base Phase 1
|
||
# =============================================================================
|
||
|
||
class KnowledgeEntryRecord(Base):
|
||
"""
|
||
知識庫條目 - Knowledge Base Phase 1
|
||
|
||
兩層架構:
|
||
- KnowledgeEntry: 知識條目 (此表)
|
||
- Playbook: 獨立 Redis,透過 related_playbook_id 關聯
|
||
|
||
建立時間: 2026-04-02 (台北時區)
|
||
建立者: Claude Code (Knowledge Base Phase 1)
|
||
"""
|
||
__tablename__ = "knowledge_entries"
|
||
|
||
# Primary Key
|
||
id: Mapped[str] = mapped_column(
|
||
String(36),
|
||
primary_key=True,
|
||
default=generate_uuid,
|
||
)
|
||
|
||
# Core Fields
|
||
title: Mapped[str] = mapped_column(String(255), nullable=False)
|
||
content: Mapped[str] = mapped_column(Text, nullable=False)
|
||
entry_type: Mapped[str] = mapped_column(
|
||
SQLEnum(EntryType),
|
||
nullable=False,
|
||
comment="incident_case / runbook / best_practice / postmortem",
|
||
)
|
||
category: Mapped[str] = mapped_column(
|
||
String(100),
|
||
nullable=False,
|
||
comment="分類樹節點 (基礎設施/應用層/AI系統/安全合規)",
|
||
)
|
||
tags: Mapped[list[str]] = mapped_column(
|
||
JSON,
|
||
default=list,
|
||
nullable=False,
|
||
comment="標籤列表 (JSONB string array)",
|
||
)
|
||
|
||
# Source & Status
|
||
source: Mapped[str] = mapped_column(
|
||
SQLEnum(EntrySource),
|
||
nullable=False,
|
||
comment="ai_extracted / human",
|
||
)
|
||
status: Mapped[str] = mapped_column(
|
||
SQLEnum(EntryStatus),
|
||
default=EntryStatus.DRAFT,
|
||
nullable=False,
|
||
comment="draft / review / approved / archived",
|
||
)
|
||
|
||
# Relations (soft references, not FK)
|
||
related_incident_id: Mapped[str | None] = mapped_column(
|
||
String(30),
|
||
nullable=True,
|
||
comment="關聯 Incident ID",
|
||
)
|
||
related_playbook_id: Mapped[str | None] = mapped_column(
|
||
String(255),
|
||
nullable=True,
|
||
comment="關聯 Playbook Redis Key",
|
||
)
|
||
# 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 閉環攔截用症狀 hash (SymptomPattern.compute_hash())
|
||
symptoms_hash: Mapped[str | None] = mapped_column(
|
||
String(16),
|
||
nullable=True,
|
||
comment="症狀模式 hash (16字元 SHA256 前綴),Anti-Pattern 閉環攔截使用",
|
||
)
|
||
# P1-1 2026-04-28 ogt + Claude Sonnet 4.6: M4 補反查鏈
|
||
# phase26_incident_km_integration.sql 已建立欄位與 partial index
|
||
# KMWriter.write() 會自動填入並回填 Path A 條目(approval → KM 雙向追蹤)
|
||
related_approval_id: Mapped[str | None] = mapped_column(
|
||
String(36),
|
||
nullable=True,
|
||
comment="關聯 ApprovalRequest ID,P1-1 反查鏈修復(approval → KM 追蹤)",
|
||
)
|
||
# P1-1 M3 2026-04-28 ogt + Claude Sonnet 4.6: 冪等 key 的一部分
|
||
# migration: p1_1_km_idempotent_path_type.sql
|
||
# unique index: uix_knowledge_incident_path (related_incident_id, path_type) WHERE both NOT NULL
|
||
path_type: Mapped[str | None] = mapped_column(
|
||
String(50),
|
||
nullable=True,
|
||
comment="KMWriter 路徑類型,與 related_incident_id 構成冪等 key",
|
||
)
|
||
|
||
# Metrics
|
||
view_count: Mapped[int] = mapped_column(
|
||
Integer,
|
||
default=0,
|
||
nullable=False,
|
||
)
|
||
|
||
# Metadata
|
||
created_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
)
|
||
updated_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True),
|
||
default=taipei_now,
|
||
onupdate=taipei_now,
|
||
)
|
||
|
||
# Indexes
|
||
__table_args__ = (
|
||
Index("ix_knowledge_entry_type", "entry_type"),
|
||
Index("ix_knowledge_category", "category"),
|
||
Index("ix_knowledge_status", "status"),
|
||
Index("ix_knowledge_created_at", "created_at"),
|
||
# 2026-04-04 ogt: Phase 25 P1 — Anti-Pattern 快速查詢
|
||
Index("ix_knowledge_symptoms_hash", "symptoms_hash"),
|
||
# P1-1 2026-04-28 ogt + Claude Sonnet 4.6: M4 反查鏈 partial index(配合 phase26 migration)
|
||
Index(
|
||
"ix_knowledge_related_approval",
|
||
"related_approval_id",
|
||
postgresql_where=text("related_approval_id IS NOT NULL"),
|
||
),
|
||
# P1-1 M3 2026-04-28 ogt + Claude Sonnet 4.6: 冪等 unique index
|
||
# migration: p1_1_km_idempotent_path_type.sql
|
||
Index(
|
||
"uix_knowledge_incident_path",
|
||
"related_incident_id",
|
||
"path_type",
|
||
unique=True,
|
||
postgresql_where=text(
|
||
"related_incident_id IS NOT NULL AND path_type IS NOT NULL"
|
||
),
|
||
),
|
||
)
|
||
|
||
|
||
# IncidentEvidence — ADR-081 Phase 1 EvidenceSnapshot 持久化
|
||
# 2026-04-15 ogt + Claude Sonnet 4.6: AI 自主化飛輪 Phase 1 初始建立
|
||
class IncidentEvidence(Base):
|
||
"""
|
||
不可變事件證據快照表
|
||
|
||
每次決策前 PreDecisionInvestigator 拍攝一次 EvidenceSnapshot,
|
||
寫入此表以供:
|
||
- 決策溯源(LLM 推理過程的完整情報上下文)
|
||
- 學習訓練(Phase 3 fine-tune pipeline 金礦資料)
|
||
- 異常驗證(執行前 vs 執行後 state diff)
|
||
|
||
ADR-081: PreDecisionInvestigator + EvidenceSnapshot
|
||
設計原則:只追加寫入,禁止 UPDATE(event sourcing 對齊)
|
||
"""
|
||
__tablename__ = "incident_evidence"
|
||
|
||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
|
||
|
||
# 關聯
|
||
incident_id: Mapped[str] = mapped_column(String(30), nullable=False) # index via __table_args__
|
||
# Phase 3 填充:matched_playbook_id 目前永久 null,Phase 3 修復
|
||
matched_playbook_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
|
||
|
||
# Schema 版本(方便 fine-tune pipeline 過濾相容版本)
|
||
schema_version: Mapped[str] = mapped_column(String(10), default="v1", nullable=False)
|
||
|
||
# 8D 感官數據(各維度 nullable — MCP 失敗時部分缺失)
|
||
k8s_state: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True, comment="D1: kubectl describe pod + events"
|
||
)
|
||
recent_logs: Mapped[str | None] = mapped_column(
|
||
Text, nullable=True, comment="D2: container stderr tail-50,經 SanitizationService 清洗"
|
||
)
|
||
metrics_snapshot: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True, comment="D3: Prometheus 5min vs 1h baseline 對比"
|
||
)
|
||
recent_deployments: Mapped[list | None] = mapped_column(
|
||
JSON, nullable=True, comment="D4: ArgoCD/Gitea 過去 1h 部署 diff"
|
||
)
|
||
business_metrics: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True, comment="D5: 訂單量 / 登入成功率 / P0 SLI"
|
||
)
|
||
historical_context: Mapped[str | None] = mapped_column(
|
||
Text, nullable=True, comment="D6: 過去 30 天同 alertname 處置歷史摘要"
|
||
)
|
||
peer_health: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True, comment="D7: 同 Deployment 其他 replica 健康度"
|
||
)
|
||
dependency_topology: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True, comment="D8: Istio/Service Mesh 上下游 latency/error rate"
|
||
)
|
||
# Phase 4 ADR-084: 動態異常偵測增強感官(DynamicBaseline + LogAnomaly + TrendPredictor)
|
||
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 4 8D 升級
|
||
anomaly_context: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True,
|
||
comment="Phase 4 動態異常上下文:baseline_anomalies / log_patterns / trend_breaches"
|
||
)
|
||
|
||
# 感官品質指標
|
||
mcp_health: Mapped[dict] = mapped_column(
|
||
JSON, default=dict, nullable=False,
|
||
comment="各 MCP 呼叫成敗 {tool_name: bool},用於 decision_fusion 權重調整"
|
||
)
|
||
collection_duration_ms: Mapped[int | None] = mapped_column(
|
||
Integer, nullable=True, comment="情報蒐集總耗時(ms),P99 目標 < 8000"
|
||
)
|
||
sensors_attempted: Mapped[int] = mapped_column(
|
||
default=0, nullable=False, comment="嘗試啟動的感官數"
|
||
)
|
||
sensors_succeeded: Mapped[int] = mapped_column(
|
||
default=0, nullable=False, comment="成功回傳資料的感官數"
|
||
)
|
||
|
||
# LLM 輸入摘要(不超 8K tokens,由 Investigator 壓縮)
|
||
evidence_summary: Mapped[str | None] = mapped_column(
|
||
Text, nullable=True, comment="最終餵給 LLM 的情報摘要(UTF-8,< 8K tokens)"
|
||
)
|
||
|
||
# 執行前後 State(PostExecutionVerifier 填入 post_execution_state)
|
||
pre_execution_state: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True, comment="執行前環境狀態快照(PostExecutionVerifier 基準線)"
|
||
)
|
||
post_execution_state: Mapped[dict | None] = mapped_column(
|
||
JSON, nullable=True, comment="執行後環境狀態(PostExecutionVerifier 抓取,Phase 1 接線)"
|
||
)
|
||
verification_result: Mapped[str | None] = mapped_column(
|
||
String(20), nullable=True, comment="success / degraded / failed / timeout(PostExecutionVerifier 填入)"
|
||
)
|
||
|
||
# W2 PR-V1: SelfHealingValidator 自愈品質分數 (2026-04-28 ogt + Claude Sonnet 4.6)
|
||
# 0.0-1.0:1.0=完全自愈,<0.5=觸發 rollback 提案(Telegram 警示)
|
||
# base.py ALTER IF NOT EXISTS 補欄對應下方
|
||
self_healing_score: Mapped[float | None] = mapped_column(
|
||
Float,
|
||
nullable=True,
|
||
comment="W2 PR-V1 SelfHealingValidator 自愈品質分數(0.0-1.0),<0.5 觸發 rollback 提案",
|
||
)
|
||
self_healing_detail: Mapped[dict | None] = mapped_column(
|
||
JSON,
|
||
nullable=True,
|
||
comment="W2 PR-V1 SelfHealingValidator 評估明細:root_cause_cleared/regressions/detail",
|
||
)
|
||
|
||
# 時間戳(台北時區)
|
||
collected_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True), default=taipei_now, nullable=False
|
||
)
|
||
|
||
__table_args__ = (
|
||
Index("ix_incident_evidence_incident_id", "incident_id"),
|
||
Index("ix_incident_evidence_collected_at", "collected_at"),
|
||
Index("ix_incident_evidence_playbook_id", "matched_playbook_id"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# PlaybookRecord — Phase 3.5 Playbook PostgreSQL 持久化 (System of Record)
|
||
# ADR-085: AI 學習成果不可存在 Cache — Playbook 是 AI 的肌肉記憶
|
||
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 3.5 初始建立
|
||
#
|
||
# 核心鐵律:
|
||
# - PostgreSQL = System of Record(永久保存,AI 的長期記憶)
|
||
# - Redis = Warm Cache(7天 TTL,加速讀取,DB 為 source of truth)
|
||
# - trust_score, EWMA, 統計數據必須持久化 — 不能因 Redis TTL 消失
|
||
# =============================================================================
|
||
|
||
class PlaybookRecord(Base):
|
||
"""
|
||
Playbook 修復劇本 PostgreSQL ORM
|
||
|
||
與 Pydantic Playbook 模型對應。
|
||
Redis 為 warm cache(7d TTL),PostgreSQL 為 source of truth。
|
||
|
||
設計原則:
|
||
- AI 的學習成果(trust_score、success_count、failure_count)永久保存
|
||
- EWMA 信任度在 Redis TTL 後不會重置,Pod 重啟後 AI 記憶不失
|
||
- 雙寫:create/update 先寫 PG,再更新 Redis cache
|
||
- 讀取:Redis-first(cache hit),miss 時從 PG 載入並回填 Redis
|
||
"""
|
||
__tablename__ = "playbooks"
|
||
|
||
# Primary Key
|
||
playbook_id: Mapped[str] = mapped_column(
|
||
String(36), primary_key=True,
|
||
comment="Playbook 唯一識別碼 (PB-YYYYMMDD-XXXXXX)",
|
||
)
|
||
|
||
# Core Fields
|
||
name: Mapped[str] = mapped_column(String(256), nullable=False)
|
||
description: Mapped[str] = mapped_column(Text, default="", nullable=False)
|
||
status: Mapped[str] = mapped_column(String(20), default="draft", nullable=False)
|
||
source: Mapped[str] = mapped_column(String(20), default="extracted", nullable=False)
|
||
|
||
# Complex structures (JSONB)
|
||
symptom_pattern: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict, nullable=False)
|
||
repair_steps: Mapped[list[dict[str, Any]]] = mapped_column(JSON, default=list, nullable=False)
|
||
|
||
# Timing
|
||
estimated_duration_minutes: Mapped[int] = mapped_column(Integer, default=5, nullable=False)
|
||
|
||
# Source tracing
|
||
source_incident_ids: Mapped[list[str]] = mapped_column(JSON, default=list, nullable=False)
|
||
ai_confidence: Mapped[float] = mapped_column(default=0.0, nullable=False)
|
||
|
||
# Stats — MUST be in PG (AI learning artifacts, cannot expire)
|
||
success_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||
failure_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||
last_used_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||
|
||
# EWMA trust score — ADR-083 Phase 3, 絕對不能用 Redis TTL 管理
|
||
# trust_score 是 AI 累積學習的結晶,TTL 到期就歸零 = AI 記憶全部消失
|
||
trust_score: Mapped[float] = mapped_column(default=0.3, nullable=False,
|
||
comment="EWMA 動態信任度 (Phase 3)。成功 α=0.1,失敗 α=0.2(2x 衰減)。< 0.1 → 封存")
|
||
|
||
# Approval metadata
|
||
approved_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||
approved_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||
tags: Mapped[list[str]] = mapped_column(JSON, default=list, nullable=False)
|
||
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
|
||
# Sprint 5.1 護欄欄位 (2026-04-08)
|
||
requires_approval_level: Mapped[str] = mapped_column(
|
||
String(20), default="auto", nullable=False,
|
||
comment="auto=直接執行, standard=1票, critical=2票MultiSig",
|
||
)
|
||
stateful_targets: Mapped[list[str]] = mapped_column(JSON, default=list, nullable=False)
|
||
requires_pre_backup: Mapped[bool] = mapped_column(default=False, nullable=False)
|
||
|
||
# W2 PR-L1 2026-04-28 ogt + Claude Sonnet 4.6: KM→Playbook 互饋回路(飛輪 C3 修復)
|
||
# 同 symptom_pattern_hash 累積 N=5 條 KM 後,LearningService 自動設 True
|
||
# 人工 review 後可重設為 False(由 playbook_service 負責清除)
|
||
review_required: Mapped[bool] = mapped_column(
|
||
Boolean, default=False, nullable=False,
|
||
comment="W2 PR-L1: True=KM 累積觸發人工複審信號(symptom_hash≥5 條),review 後清為 False",
|
||
)
|
||
|
||
# Timestamps
|
||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False)
|
||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now,
|
||
onupdate=taipei_now, nullable=False)
|
||
|
||
__table_args__ = (
|
||
Index("ix_playbook_status", "status"),
|
||
Index("ix_playbook_trust_score", "trust_score"),
|
||
Index("ix_playbook_created_at", "created_at"),
|
||
# W2 PR-L1: 快速查詢需要人工 review 的 Playbook(預期數量少,partial index 最省空間)
|
||
Index(
|
||
"ix_playbook_review_required",
|
||
"review_required",
|
||
postgresql_where=text("review_required = true"),
|
||
),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# DynamicBaselineRecord — Phase 4 Holt-Winters 訓練基線持久化
|
||
# ADR-084: 動態基線不能只存 Redis — AI 每天重學「正常」不是在學習
|
||
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 4 初始建立
|
||
#
|
||
# 核心鐵律:
|
||
# - 訓練好的 Holt-Winters 模型必須在 PG 長期保存
|
||
# - Redis 為 24h warm cache(加速 is_anomaly() 讀取)
|
||
# - 基線消失 = AI 對「正常」的認識消失 = 每天從頭學習 = 不是 AI
|
||
# =============================================================================
|
||
|
||
class DynamicBaselineRecord(Base):
|
||
"""
|
||
動態基線訓練結果 PostgreSQL ORM
|
||
|
||
Holt-Winters 訓練完成後:
|
||
1. 先寫入 PG(永久保存)
|
||
2. 再寫入 Redis(24h warm cache,加速讀取)
|
||
|
||
Redis key: baseline:{metric_name}
|
||
PG: 此表,metric_name 為主鍵,最新一筆 = 有效基線
|
||
"""
|
||
__tablename__ = "dynamic_baselines"
|
||
|
||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
|
||
|
||
# 基線識別
|
||
metric_name: Mapped[str] = mapped_column(
|
||
String(200), nullable=False, index=True,
|
||
comment="基線識別名 (e.g. cpu_usage_node_mon)",
|
||
)
|
||
|
||
# 訓練結果(Holt-Winters 統計)
|
||
mean: Mapped[float] = mapped_column(nullable=False, comment="擬合值均值")
|
||
std: Mapped[float] = mapped_column(nullable=False, comment="殘差標準差")
|
||
|
||
# 24h 季節性因子(JSON 陣列,長度 24)
|
||
seasonal_factors: Mapped[list[float]] = mapped_column(
|
||
JSON, default=list, nullable=False,
|
||
comment="24h 週期季節性因子(乘法形式,均值 ≈ 1.0)",
|
||
)
|
||
|
||
# 訓練元資料
|
||
datapoint_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||
promql: Mapped[str] = mapped_column(Text, default="", nullable=False,
|
||
comment="訓練使用的 PromQL 查詢")
|
||
lookback_hours: Mapped[int] = mapped_column(Integer, default=336, nullable=False)
|
||
|
||
# Timestamps
|
||
trained_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False)
|
||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False)
|
||
|
||
__table_args__ = (
|
||
Index("ix_dynamic_baseline_metric", "metric_name"),
|
||
Index("ix_dynamic_baseline_trained_at", "trained_at"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# LogClusterRecord — Phase 4 Drain3 學習到的 Log Pattern 持久化
|
||
# ADR-084: Drain3 模板不能只存 Redis — 每次重啟 AI 把已知 pattern 當新 pattern
|
||
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 4 初始建立
|
||
#
|
||
# 核心鐵律:
|
||
# - Drain3 學到的 log cluster template 必須在 PG 長期保存
|
||
# - 新 cluster 事件列表 (log_anomaly:new) 才存 Redis(短期工作記憶)
|
||
# - 基礎知識庫(已學到的 pattern)必須在 PG
|
||
# =============================================================================
|
||
|
||
class LogClusterRecord(Base):
|
||
"""
|
||
Drain3 Log Cluster Template 持久化
|
||
|
||
每個新 pattern 首次偵測到時:
|
||
1. 寫入 PG(永久保存,AI 的 log 語意理解)
|
||
2. 推送到 Redis list log_anomaly:new(短期工作記憶)
|
||
|
||
Re-detect 相同 template 時只更新 last_seen_at + size,不重複寫入 PG。
|
||
"""
|
||
__tablename__ = "log_clusters"
|
||
|
||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=generate_uuid)
|
||
|
||
# Cluster 識別(MD5[:8] of template)
|
||
cluster_id: Mapped[str] = mapped_column(
|
||
String(16), nullable=False, unique=True, index=True,
|
||
comment="模板 MD5[:8].upper(),穩定 ID",
|
||
)
|
||
|
||
# Drain3 模板
|
||
template: Mapped[str] = mapped_column(
|
||
Text, nullable=False,
|
||
comment="Drain3 萃取的 log 模板 (e.g. 'ERROR <*> connection failed to <*>')",
|
||
)
|
||
|
||
# 統計
|
||
size: Mapped[int] = mapped_column(Integer, default=1, nullable=False,
|
||
comment="命中次數(第一次 = 1)")
|
||
source: Mapped[str] = mapped_column(String(50), default="k8s_pod", nullable=False,
|
||
comment="k8s_pod | host_syslog | app_log")
|
||
|
||
# 樣本日誌(保留首次觸發的原始行,供事後分析)
|
||
sample_log: Mapped[str | None] = mapped_column(Text, nullable=True,
|
||
comment="首次觸發的原始 log 行(前 500 字元)")
|
||
|
||
# Timestamps
|
||
first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now, nullable=False)
|
||
last_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=taipei_now,
|
||
onupdate=taipei_now, nullable=False)
|
||
|
||
__table_args__ = (
|
||
Index("ix_log_cluster_first_seen", "first_seen_at"),
|
||
Index("ix_log_cluster_source", "source"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# AgentSession — Phase 2 多 Agent 辯證 Audit Trail
|
||
# =============================================================================
|
||
|
||
class AgentSession(Base):
|
||
"""
|
||
ADR-082 Phase 2: 多 Agent 辯證 Immutable Event Log
|
||
|
||
每個 Agent 每次「發言」寫一行。
|
||
session_id 串連同一次 Incident 決策的所有 Agent turns。
|
||
|
||
不可刪除 — 只能新增(Immutable Event Sourcing)。
|
||
Phase 3 學習閉環依賴此表(Critic 挑戰成功作為負向學習信號)。
|
||
|
||
ADR-082: 多 Agent 協作架構
|
||
2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 2 初始建立
|
||
"""
|
||
__tablename__ = "agent_sessions"
|
||
|
||
id: Mapped[str] = mapped_column(
|
||
String(36), primary_key=True, default=lambda: str(uuid4()),
|
||
comment="行主鍵(UUID)"
|
||
)
|
||
session_id: Mapped[str] = mapped_column(
|
||
String(36), nullable=False,
|
||
comment="辯證 Session ID(一次 Incident 決策的所有 turns 共用同一 session_id)"
|
||
)
|
||
incident_id: Mapped[str] = mapped_column(
|
||
String(50), nullable=False,
|
||
comment="關聯 Incident ID"
|
||
)
|
||
agent_role: Mapped[str] = mapped_column(
|
||
String(20), nullable=False,
|
||
comment="Agent 角色:diagnostician / solver / reviewer / critic / coordinator"
|
||
)
|
||
|
||
# 輸入指紋(sha256[:16])— 用於查重、快取命中追蹤
|
||
input_hash: Mapped[str] = mapped_column(
|
||
String(16), nullable=False, default="",
|
||
comment="sha256(input_json)[:16],供查重與快取命中追蹤"
|
||
)
|
||
|
||
# Agent 輸出(完整 JSON,供 Phase 3 學習 + 事後複盤)
|
||
output_json: Mapped[dict] = mapped_column(
|
||
JSON, nullable=False, default=dict,
|
||
comment="Agent 原始輸出(DiagnosisReport / ActionPlan / 等序列化 dict)"
|
||
)
|
||
|
||
# 品質指標
|
||
latency_ms: Mapped[int] = mapped_column(
|
||
Integer, nullable=False, default=0,
|
||
comment="此 Agent 的執行耗時(ms)"
|
||
)
|
||
vote: Mapped[str] = mapped_column(
|
||
String(20), nullable=False, default="abstain",
|
||
comment="Agent 投票:approve / reject / request_revision / abstain / degraded"
|
||
)
|
||
degraded: Mapped[bool] = mapped_column(
|
||
nullable=False, default=False,
|
||
comment="True = 此 Agent 因熔斷/超時降級,輸出為 rule-based mock"
|
||
)
|
||
|
||
# 時間戳(台北時區)
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True), default=taipei_now, nullable=False
|
||
)
|
||
|
||
__table_args__ = (
|
||
Index("ix_agent_sessions_session_id", "session_id"),
|
||
Index("ix_agent_sessions_incident_id", "incident_id"),
|
||
Index("ix_agent_sessions_created_at", "created_at"),
|
||
# 查詢某 session 中特定 role 的 turn(Coordinator 聚合時常用)
|
||
Index("ix_agent_sessions_session_role", "session_id", "agent_role"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# AiGovernanceEvent — Phase 6 自我治理事件溯源(不可刪除)
|
||
# ADR-087: AI 自我治理閉環:SLO 違反 / 信任漂移 / KB 腐爛 / 自我降級
|
||
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 6 初始建立
|
||
#
|
||
# 核心鐵律:
|
||
# - 不可變 Event Sourcing — 只 INSERT,禁止 UPDATE/DELETE
|
||
# - 所有治理事件必須落地 PG,SLO dashboard 依賴此表
|
||
# - resolved=True 僅由人工或下次計算時補填,不可自動翻轉未解決項目
|
||
# =============================================================================
|
||
|
||
class AiGovernanceEvent(Base):
|
||
"""
|
||
AI 自我治理事件記錄(不可變)
|
||
|
||
event_type 值:
|
||
slo_violation — SLO 計算結果違反閾值
|
||
trust_drift — Playbook 信任度分布偏態(全高或全低)
|
||
kb_stale — KB 條目引用已廢棄 K8s API / Prometheus query
|
||
self_demotion — 信心閾值自動調高(自我降級)
|
||
conservative_mode — 連續 SLO 違反,全系統切保守模式
|
||
replay_degraded — 離線回放一致率連續下降
|
||
|
||
immutable — 只 INSERT,禁 UPDATE / DELETE
|
||
"""
|
||
__tablename__ = "ai_governance_events"
|
||
|
||
id: Mapped[str] = mapped_column(
|
||
String(36), primary_key=True, default=generate_uuid,
|
||
comment="主鍵(UUID)"
|
||
)
|
||
event_type: Mapped[str] = mapped_column(
|
||
String(40), nullable=False,
|
||
comment="slo_violation / trust_drift / kb_stale / self_demotion / conservative_mode / replay_degraded"
|
||
)
|
||
triggered_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True), default=taipei_now, nullable=False,
|
||
comment="事件觸發時間(台北時區)"
|
||
)
|
||
details: Mapped[dict] = mapped_column(
|
||
JSON, nullable=False, default=dict,
|
||
comment="事件詳情 JSONB(SLO 數值、漂移分布等)"
|
||
)
|
||
resolved: Mapped[bool] = mapped_column(
|
||
default=False, nullable=False,
|
||
comment="是否已解決(人工確認或下次計算恢復正常後補填)"
|
||
)
|
||
resolved_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True), nullable=True,
|
||
comment="解決時間(僅人工/系統補填,不得自動反轉未解決項目)"
|
||
)
|
||
|
||
__table_args__ = (
|
||
Index("ix_ai_governance_event_type", "event_type"),
|
||
Index("ix_ai_governance_triggered_at", "triggered_at"),
|
||
Index("ix_ai_governance_resolved", "resolved"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# TrustRecordDB - ADR-088 TrustScore 持久化
|
||
# =============================================================================
|
||
|
||
class TrustRecordDB(Base):
|
||
"""
|
||
Trust Score 持久化記錄
|
||
|
||
ADR-088: TrustScoreManager 從記憶體升級為 PostgreSQL 持久化。
|
||
Pod 重啟後分數不歸零,AI 能真正累積信任達到 L4 自動放行。
|
||
|
||
score >= 5: MEDIUM → LOW (自動執行)
|
||
score >= 10: HIGH → MEDIUM (降一級)
|
||
|
||
2026-04-17 ogt + Claude Sonnet 4.6(亞太): Phase 4 信任持久化
|
||
"""
|
||
__tablename__ = "trust_records"
|
||
|
||
action_pattern: Mapped[str] = mapped_column(
|
||
String(255), primary_key=True,
|
||
comment="操作模式,例如 delete:nginx-frontend-*"
|
||
)
|
||
score: Mapped[int] = mapped_column(
|
||
Integer, nullable=False, default=0,
|
||
comment="累積信任分數。+1/approve,reject 歸零"
|
||
)
|
||
total_approvals: Mapped[int] = mapped_column(
|
||
Integer, nullable=False, default=0,
|
||
)
|
||
total_rejections: Mapped[int] = mapped_column(
|
||
Integer, nullable=False, default=0,
|
||
)
|
||
last_approval_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||
last_approval_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True), nullable=True,
|
||
)
|
||
last_rejection_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||
last_rejection_at: Mapped[datetime | None] = mapped_column(
|
||
DateTime(timezone=True), nullable=True,
|
||
)
|
||
created_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True), nullable=False, default=taipei_now,
|
||
)
|
||
updated_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True), nullable=False, default=taipei_now, onupdate=taipei_now,
|
||
)
|
||
|
||
__table_args__ = (
|
||
Index("ix_trust_records_score", "score"),
|
||
Index("ix_trust_records_updated", "updated_at"),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# AIProviderVersionHistory - AI Provider 版本歷史
|
||
# 2026-04-27 P3.2.2 by Claude
|
||
# =============================================================================
|
||
|
||
class AIProviderVersionHistory(Base):
|
||
"""AI Provider 版本探測歷史記錄
|
||
|
||
每次 ModelVersionTracker.run_probe_cycle() 寫入一筆。
|
||
changed=True 表示本次探測到版本或 digest 與上一筆不同。
|
||
|
||
Migration: apps/api/migrations/p3_2_provider_version_history.sql
|
||
"""
|
||
__tablename__ = "ai_provider_version_history"
|
||
|
||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||
provider: Mapped[str] = mapped_column(String(40), nullable=False, index=True)
|
||
model: Mapped[str] = mapped_column(String(100), nullable=False)
|
||
version: Mapped[str | None] = mapped_column(String(200), nullable=True)
|
||
digest: Mapped[str | None] = mapped_column(String(80), nullable=True)
|
||
captured_at: Mapped[datetime] = mapped_column(
|
||
DateTime(timezone=True), nullable=False, default=taipei_now,
|
||
)
|
||
prev_version: Mapped[str | None] = mapped_column(String(200), nullable=True)
|
||
changed: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||
|
||
__table_args__ = (
|
||
Index("ix_provider_version_captured", "provider", "captured_at"),
|
||
)
|