From caa6263872b10e6dc52fc668bac57bba69a86e08 Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 12 May 2026 23:31:33 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=8C=E6=AD=A5=20incidents=20=E7=9B=B8?= =?UTF-8?q?=E5=AE=B9=E6=AC=84=E4=BD=8D=E5=AF=AB=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- database/autoheal_models.py | 5 +- .../036_normalize_incidents_dual_columns.sql | 85 +++++++++++++++++++ services/auto_heal_service.py | 21 ++++- tests/test_auto_heal_safety.py | 50 +++++++++++ 4 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 migrations/036_normalize_incidents_dual_columns.sql diff --git a/database/autoheal_models.py b/database/autoheal_models.py index 862006b..65c55bb 100644 --- a/database/autoheal_models.py +++ b/database/autoheal_models.py @@ -111,16 +111,19 @@ class Incident(Base): task_name = Column(String(100), nullable=False) error_type = Column(String(50), nullable=False) error_message = Column(Text, nullable=False) + error_traceback = Column(Text) traceback_str = Column(Text) severity = Column(String(20), default='medium') status = Column(String(20), default='open') # open/healing/closed/escalated retry_count = Column(Integer, default=0) + playbook_id = Column(Integer, ForeignKey('playbooks.id'), nullable=True) matched_playbook_id = Column(Integer, ForeignKey('playbooks.id'), nullable=True) + resolved_at = Column(DateTime, nullable=True) created_at = Column(DateTime, default=datetime.now) updated_at = Column(DateTime, default=datetime.now) # Relationship - playbook = relationship("Playbook", backref="incidents") + playbook = relationship("Playbook", foreign_keys=[matched_playbook_id], backref="incidents") class Playbook(Base): diff --git a/migrations/036_normalize_incidents_dual_columns.sql b/migrations/036_normalize_incidents_dual_columns.sql new file mode 100644 index 0000000..7e21f7e --- /dev/null +++ b/migrations/036_normalize_incidents_dual_columns.sql @@ -0,0 +1,85 @@ +-- ============================================================================= +-- Migration 036: normalize incidents legacy/current columns +-- 日期: 2026-05-12 台北 +-- ============================================================================= +-- 背景: +-- migration 013 建立 incidents.error_traceback / playbook_id / resolved_at; +-- 後續 ORM 改用 traceback_str / matched_playbook_id。migration 031 已加新欄位, +-- 但 create_all-only 舊環境可能缺 legacy 欄位,且新寫入只更新新欄位。 +-- +-- 設計: +-- 1. 保留雙欄做向下相容,不做破壞性 drop/rename。 +-- 2. 補齊缺欄,並用 COALESCE 雙向回填,讓分析查詢可安全 coalesce。 +-- 3. 補 playbook_id FK 與 matched_playbook_id FK;已存在則略過。 +-- ============================================================================= + +ALTER TABLE IF EXISTS incidents + ADD COLUMN IF NOT EXISTS error_traceback TEXT; + +ALTER TABLE IF EXISTS incidents + ADD COLUMN IF NOT EXISTS traceback_str TEXT; + +ALTER TABLE IF EXISTS incidents + ADD COLUMN IF NOT EXISTS playbook_id INTEGER; + +ALTER TABLE IF EXISTS incidents + ADD COLUMN IF NOT EXISTS matched_playbook_id INTEGER; + +ALTER TABLE IF EXISTS incidents + ADD COLUMN IF NOT EXISTS resolved_at TIMESTAMP; + +ALTER TABLE IF EXISTS incidents + ALTER COLUMN severity TYPE VARCHAR(20); + +UPDATE incidents +SET traceback_str = error_traceback +WHERE traceback_str IS NULL + AND error_traceback IS NOT NULL; + +UPDATE incidents +SET error_traceback = traceback_str +WHERE error_traceback IS NULL + AND traceback_str IS NOT NULL; + +UPDATE incidents +SET matched_playbook_id = playbook_id +WHERE matched_playbook_id IS NULL + AND playbook_id IS NOT NULL; + +UPDATE incidents +SET playbook_id = matched_playbook_id +WHERE playbook_id IS NULL + AND matched_playbook_id IS NOT NULL; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'incidents_playbook_id_fkey' + ) THEN + ALTER TABLE incidents + ADD CONSTRAINT incidents_playbook_id_fkey + FOREIGN KEY (playbook_id) + REFERENCES playbooks(id); + END IF; + + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'incidents_matched_playbook_id_fkey' + ) THEN + ALTER TABLE incidents + ADD CONSTRAINT incidents_matched_playbook_id_fkey + FOREIGN KEY (matched_playbook_id) + REFERENCES playbooks(id); + END IF; +END $$; + +CREATE INDEX IF NOT EXISTS idx_incidents_matched_playbook_id + ON incidents(matched_playbook_id); + +DO $$ +BEGIN + RAISE NOTICE 'Migration 036 done: incidents legacy/current columns normalized'; +END $$; diff --git a/services/auto_heal_service.py b/services/auto_heal_service.py index 96f1bf1..9137748 100644 --- a/services/auto_heal_service.py +++ b/services/auto_heal_service.py @@ -232,6 +232,11 @@ class AutoHealService: record_autoheal_action(action="NO_PLAYBOOK", error_type=error_type, success=False) return AutoHealResult(success=False, action=None, message=msg) + playbook_id = int(playbook.get("id") or 0) + if playbook_id > 0: + context["matched_playbook_id"] = playbook_id + context["playbook_id"] = playbook_id + if playbook["action_type"] not in _ALLOWED_ACTION_TYPES: msg = f"action_type '{playbook['action_type']}' is not allowed" self._log.warning("[AutoHeal] %s", msg) @@ -317,6 +322,7 @@ class AutoHealService: error_type=error_type, error_message=str(context.get("exception") or context.get("error_message") or error_type)[:2000], traceback_str=str(context.get("traceback_str") or "")[:8000] or None, + error_traceback=str(context.get("traceback_str") or "")[:8000] or None, severity=str(context.get("severity") or "medium"), status="healing", retry_count=0, @@ -569,15 +575,28 @@ class AutoHealService: session = None try: session = get_session() + status = "closed" if result.success else "escalated" + playbook_id = int( + context.get("matched_playbook_id") + or context.get("playbook_id") + or 0 + ) or None session.execute( text(""" UPDATE incidents SET status = :status, + matched_playbook_id = COALESCE(:playbook_id, matched_playbook_id), + playbook_id = COALESCE(:playbook_id, playbook_id), + resolved_at = CASE + WHEN :status = 'closed' THEN COALESCE(resolved_at, NOW()) + ELSE resolved_at + END, updated_at = NOW() WHERE id = :incident_id """), { - "status": "closed" if result.success else "escalated", + "status": status, + "playbook_id": playbook_id, "incident_id": int(incident_id), }, ) diff --git a/tests/test_auto_heal_safety.py b/tests/test_auto_heal_safety.py index 3e9f4f0..b3fa2ae 100644 --- a/tests/test_auto_heal_safety.py +++ b/tests/test_auto_heal_safety.py @@ -40,3 +40,53 @@ def test_auto_heal_code_fix_writes_audit(monkeypatch): assert result.success is True assert calls assert calls[0][2].action == "CODE_FIX" + + +def test_incident_model_keeps_legacy_and_current_columns(): + from database.manager import Base + + columns = set(Base.metadata.tables["incidents"].columns.keys()) + + assert { + "error_traceback", + "traceback_str", + "playbook_id", + "matched_playbook_id", + "resolved_at", + } <= columns + + +def test_auto_heal_status_update_backfills_dual_playbook_columns(monkeypatch): + from services.auto_heal_service import AutoHealResult, AutoHealService + + captured = {} + + class Session: + def execute(self, stmt, params): + captured["sql"] = str(stmt) + captured["params"] = params + + def commit(self): + captured["committed"] = True + + def rollback(self): + captured["rolled_back"] = True + + def close(self): + captured["closed"] = True + + monkeypatch.setattr("services.auto_heal_service.get_session", lambda: Session()) + + svc = AutoHealService() + svc._update_incident_status( + {"incident_id": 123, "matched_playbook_id": 77}, + AutoHealResult(True, "ALERT_ONLY", "ok"), + ) + + assert "matched_playbook_id" in captured["sql"] + assert "playbook_id" in captured["sql"] + assert "resolved_at" in captured["sql"] + assert captured["params"]["status"] == "closed" + assert captured["params"]["playbook_id"] == 77 + assert captured["params"]["incident_id"] == 123 + assert captured["committed"] is True