-- Migration 013: AIOps 自動修復三張表 -- incidents / playbooks / heal_logs -- 建立日期:2026-04-19 -- ───────────────────────────────────────────────── -- 表 1: playbooks (PlayBook 規則庫) -- ───────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS playbooks ( id SERIAL PRIMARY KEY, name VARCHAR(200) NOT NULL UNIQUE, error_type VARCHAR(50) NOT NULL, match_pattern TEXT NOT NULL, -- JSON 陣列 severity_min VARCHAR(5) DEFAULT 'P3', action_type VARCHAR(30) NOT NULL, -- SSH_CMD / DOCKER_RESTART / ALERT_ONLY / WAIT_RETRY action_params TEXT, -- JSON 物件 cooldown_min INTEGER DEFAULT 30, max_retries INTEGER DEFAULT 3, is_active BOOLEAN DEFAULT TRUE, success_count INTEGER DEFAULT 0, fail_count INTEGER DEFAULT 0, km_synced BOOLEAN DEFAULT FALSE, created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_playbook_error_type ON playbooks(error_type, is_active); -- ───────────────────────────────────────────────── -- 表 2: incidents (事件主表) -- ───────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS incidents ( id SERIAL PRIMARY KEY, task_name VARCHAR(100) NOT NULL, error_type VARCHAR(50) NOT NULL, error_message TEXT NOT NULL, error_traceback TEXT, severity VARCHAR(5) NOT NULL DEFAULT 'P2', status VARCHAR(20) NOT NULL DEFAULT 'open', playbook_id INTEGER REFERENCES playbooks(id), retry_count INTEGER DEFAULT 0, resolved_at TIMESTAMP, created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_incident_status_created ON incidents(status, created_at); CREATE INDEX IF NOT EXISTS idx_incident_task_error ON incidents(task_name, error_type); -- ───────────────────────────────────────────────── -- 表 3: heal_logs (修復執行紀錄) -- ───────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS heal_logs ( id SERIAL PRIMARY KEY, incident_id INTEGER NOT NULL REFERENCES incidents(id), playbook_id INTEGER REFERENCES playbooks(id), action_type VARCHAR(30), action_detail TEXT, result VARCHAR(20) DEFAULT 'pending', -- success / failed / skipped result_output TEXT, duration_ms FLOAT DEFAULT 0, created_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_heal_log_incident ON heal_logs(incident_id, created_at); CREATE INDEX IF NOT EXISTS idx_heal_log_result ON heal_logs(result, created_at); -- ───────────────────────────────────────────────── -- 種子 PlayBook 資料(首次初始化,已存在則略過) -- ───────────────────────────────────────────────── INSERT INTO playbooks (name, error_type, match_pattern, severity_min, action_type, action_params, cooldown_min, max_retries) SELECT * FROM (VALUES ( 'Docker DNS 解析失敗修復', 'DNS_FAIL', '["name resolution", "could not translate host name", "Temporary failure in name resolution"]', 'P2', 'ALERT_ONLY', '{"message": "DB/DNS 異常,依 ADR-011 不重啟 momo-db,轉人工檢查。"}', 30, 3 ), ( 'DB 連線被拒修復', 'DB_UNREACHABLE', '["connection refused", "Connection reset by peer", "could not connect to server"]', 'P2', 'ALERT_ONLY', '{"message": "DB 無法連線,依 ADR-011 不重啟 momo-db,轉人工檢查。"}', 30, 3 ), ( 'App OOM 自動重啟', 'OOM', '["SIGKILL", "out of memory", "Worker was sent SIGKILL", "MemoryError"]', 'P1', 'DOCKER_RESTART', '{"container": "momo-pro-system"}', 60, 2 ), ( 'Scheduler OOM 自動重啟', 'OOM', '["SIGKILL", "Worker was sent SIGKILL"]', 'P1', 'DOCKER_RESTART', '{"container": "momo-scheduler"}', 60, 2 ), ( 'PostgreSQL SSL 連線中斷', 'SSL_FAIL', '["SSL connection has been closed unexpectedly", "SSL SYSCALL error"]', 'P2', 'DOCKER_RESTART', '{"container": "momo-pro-system"}', 15, 3 ), ( 'Google Drive 認證失敗告警', 'AUTH_FAIL', '["invalid_grant", "google_token.pickle", "Token has been expired or revoked"]', 'P2', 'ALERT_ONLY', '{"message": "Google Drive OAuth Token 已過期,請人工重新認證。參閱 docs/guides/google_drive_setup.md"}', 240, 1 ), ( '爬蟲 HTTP 429 限流等待', 'CRAWLER_FAIL', '["429 Too Many Requests", "rate limit", "Retry-After"]', 'P3', 'WAIT_RETRY', '{"wait_minutes": 30}', 30, 2 ) ) AS v(name, error_type, match_pattern, severity_min, action_type, action_params, cooldown_min, max_retries) WHERE NOT EXISTS (SELECT 1 FROM playbooks WHERE playbooks.name = v.name);