Some checks failed
CD Pipeline / deploy (push) Failing after 5m18s
🔴 Critical - auto_heal_service: 補 import re + sqlalchemy.text + 修正 orchestrator 變數名 + autoheal_playbook→playbooks 表名 + _alert_and_store cooldown 修復 - aider_heal_executor: shell injection 改 shell=False + list 參數 - docker-compose: DISABLE_LOGIN 改 env var + 移除密碼 fallback + POSTGRES_HOST 修正 - app.py: /api/backup /api/run_task 等 6 個管理 API 加 @login_required - config.py + pg_sync + e2e_test: 移除 wooo_pg_2026 hardcoded 密碼 fallback - pg_backup.sh: 移除 TELEGRAM_TOKEN= 中間變數,直接用 $TELEGRAM_BOT_TOKEN - migration 014: trigger_pattern→match_pattern + 補 error_type NOT NULL 欄位 🟡 High - telegram_bot_service: str(e) 改通用訊息 + session try/finally + 移除 pa:/pr: 舊 callback - run_scheduler: ElephantAlpha thread 死亡監控 + 自動重啟 + Telegram 告警 + agent_context 03:30 TTL 定時清理任務 - openclaw_learning_service: build_rag_context 兩路徑加 .limit(200) - hooks: commit-quality + momo-prod-guard 空 catch 改 stderr+exit(1) - scripts/code_review: auto_yes 預設改 false - db_backup_service: PGPASSWORD 透過 env dict 傳遞 📦 Migrations - 013_autoheal: 修正建表順序 playbooks→incidents(外鍵前向引用) - 018_add_missing_indexes: heal_logs/incidents 外鍵索引 + cleanup_expired_agent_context() 🟢 Infrastructure - requirements.txt: 加版本下界 Flask>=2.3 SQLAlchemy>=1.4 等 - cd.yaml: 新增 run_scheduler.py + run_telegram_bot.py 監聽路徑 - .gitignore: insert_playbook_local.py 加入忽略 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
130 lines
5.8 KiB
SQL
130 lines
5.8 KiB
SQL
-- Migration 013: AIOps 自動修復三張表
|
||
-- incidents / playbooks / heal_logs
|
||
-- 建立日期:2026-04-19
|
||
|
||
-- ─────────────────────────────────────────────────
|
||
-- 表 1: playbooks (PlayBook 規則庫)
|
||
-- ─────────────────────────────────────────────────
|
||
CREATE TABLE IF NOT EXISTS playbooks (
|
||
id SERIAL PRIMARY KEY,
|
||
name VARCHAR(200) NOT NULL UNIQUE,
|
||
error_type VARCHAR(50) NOT NULL,
|
||
match_pattern TEXT NOT NULL, -- JSON 陣列
|
||
severity_min VARCHAR(5) DEFAULT 'P3',
|
||
action_type VARCHAR(30) NOT NULL, -- SSH_CMD / DOCKER_RESTART / ALERT_ONLY / WAIT_RETRY
|
||
action_params TEXT, -- JSON 物件
|
||
cooldown_min INTEGER DEFAULT 30,
|
||
max_retries INTEGER DEFAULT 3,
|
||
is_active BOOLEAN DEFAULT TRUE,
|
||
success_count INTEGER DEFAULT 0,
|
||
fail_count INTEGER DEFAULT 0,
|
||
km_synced BOOLEAN DEFAULT FALSE,
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_playbook_error_type ON playbooks(error_type, is_active);
|
||
|
||
-- ─────────────────────────────────────────────────
|
||
-- 表 2: incidents (事件主表)
|
||
-- ─────────────────────────────────────────────────
|
||
CREATE TABLE IF NOT EXISTS incidents (
|
||
id SERIAL PRIMARY KEY,
|
||
task_name VARCHAR(100) NOT NULL,
|
||
error_type VARCHAR(50) NOT NULL,
|
||
error_message TEXT NOT NULL,
|
||
error_traceback TEXT,
|
||
severity VARCHAR(5) NOT NULL DEFAULT 'P2',
|
||
status VARCHAR(20) NOT NULL DEFAULT 'open',
|
||
playbook_id INTEGER REFERENCES playbooks(id),
|
||
retry_count INTEGER DEFAULT 0,
|
||
resolved_at TIMESTAMP,
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_incident_status_created ON incidents(status, created_at);
|
||
CREATE INDEX IF NOT EXISTS idx_incident_task_error ON incidents(task_name, error_type);
|
||
|
||
-- ─────────────────────────────────────────────────
|
||
-- 表 3: heal_logs (修復執行紀錄)
|
||
-- ─────────────────────────────────────────────────
|
||
CREATE TABLE IF NOT EXISTS heal_logs (
|
||
id SERIAL PRIMARY KEY,
|
||
incident_id INTEGER NOT NULL REFERENCES incidents(id),
|
||
playbook_id INTEGER REFERENCES playbooks(id),
|
||
action_type VARCHAR(30),
|
||
action_detail TEXT,
|
||
result VARCHAR(20) DEFAULT 'pending', -- success / failed / skipped
|
||
result_output TEXT,
|
||
duration_ms FLOAT DEFAULT 0,
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_heal_log_incident ON heal_logs(incident_id, created_at);
|
||
CREATE INDEX IF NOT EXISTS idx_heal_log_result ON heal_logs(result, created_at);
|
||
|
||
-- ─────────────────────────────────────────────────
|
||
-- 種子 PlayBook 資料(首次初始化,已存在則略過)
|
||
-- ─────────────────────────────────────────────────
|
||
INSERT INTO playbooks (name, error_type, match_pattern, severity_min, action_type, action_params, cooldown_min, max_retries)
|
||
SELECT * FROM (VALUES
|
||
(
|
||
'Docker DNS 解析失敗修復',
|
||
'DNS_FAIL',
|
||
'["name resolution", "could not translate host name", "Temporary failure in name resolution"]',
|
||
'P2', 'DOCKER_RESTART',
|
||
'{"container": "momo-db"}',
|
||
30, 3
|
||
),
|
||
(
|
||
'DB 連線被拒修復',
|
||
'DB_UNREACHABLE',
|
||
'["connection refused", "Connection reset by peer", "could not connect to server"]',
|
||
'P2', 'DOCKER_RESTART',
|
||
'{"container": "momo-db", "compose": true}',
|
||
30, 3
|
||
),
|
||
(
|
||
'App OOM 自動重啟',
|
||
'OOM',
|
||
'["SIGKILL", "out of memory", "Worker was sent SIGKILL", "MemoryError"]',
|
||
'P1', 'DOCKER_RESTART',
|
||
'{"container": "momo-pro-system"}',
|
||
60, 2
|
||
),
|
||
(
|
||
'Scheduler OOM 自動重啟',
|
||
'OOM',
|
||
'["SIGKILL", "Worker was sent SIGKILL"]',
|
||
'P1', 'DOCKER_RESTART',
|
||
'{"container": "momo-scheduler"}',
|
||
60, 2
|
||
),
|
||
(
|
||
'PostgreSQL SSL 連線中斷',
|
||
'SSL_FAIL',
|
||
'["SSL connection has been closed unexpectedly", "SSL SYSCALL error"]',
|
||
'P2', 'DOCKER_RESTART',
|
||
'{"container": "momo-pro-system"}',
|
||
15, 3
|
||
),
|
||
(
|
||
'Google Drive 認證失敗告警',
|
||
'AUTH_FAIL',
|
||
'["invalid_grant", "google_token.pickle", "Token has been expired or revoked"]',
|
||
'P2', 'ALERT_ONLY',
|
||
'{"message": "Google Drive OAuth Token 已過期,請人工重新認證。參閱 docs/guides/google_drive_setup.md"}',
|
||
240, 1
|
||
),
|
||
(
|
||
'爬蟲 HTTP 429 限流等待',
|
||
'CRAWLER_FAIL',
|
||
'["429 Too Many Requests", "rate limit", "Retry-After"]',
|
||
'P3', 'WAIT_RETRY',
|
||
'{"wait_minutes": 30}',
|
||
30, 2
|
||
)
|
||
) AS v(name, error_type, match_pattern, severity_min, action_type, action_params, cooldown_min, max_retries)
|
||
WHERE NOT EXISTS (SELECT 1 FROM playbooks WHERE playbooks.name = v.name);
|