From de2d34d4cd08411eb473be09d14757a87803c4f8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 20 Apr 2026 20:18:03 +0800 Subject: [PATCH] =?UTF-8?q?fix(playbook):=20C1-C4=20=E5=85=A8=E6=B5=81?= =?UTF-8?q?=E7=A8=8B=E4=B8=B2=E6=8E=A5=20=E2=80=94=20evolver=E4=BF=9D?= =?UTF-8?q?=E8=AD=B7+seeder=E5=BE=A9=E6=B4=BB+=E8=A6=8F=E5=89=87=E5=8D=B3?= =?UTF-8?q?=E6=99=82=E5=BB=BA=E7=AB=8B+watchdog=20W-4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C1: playbook_evolver — yaml_rule source playbooks 加 YAML_RULE guard, evolver 不再封存 seeder 建立的 APPROVED playbook,保護自動修復鏈路 C2: playbook_seed_service — idempotency SQL 排除 DEPRECATED 記錄, evolver 封存後重啟可復活 yaml_rule playbooks C3: alert_rule_engine — AI 自動生成規則成功後立即呼叫 seed_playbooks_from_rules(), 不等下次重啟即可建立對應 APPROVED Playbook C4: ai_slo_watchdog_job — 新增 W-4 APPROVED playbook 數量為 0 告警, 鏈路斷裂立即 TYPE-8M;total checks 由 3 升為 4 Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/src/jobs/ai_slo_watchdog_job.py | 20 ++++++++++++++++++- apps/api/src/services/alert_rule_engine.py | 4 ++++ apps/api/src/services/playbook_evolver.py | 6 +++++- .../api/src/services/playbook_seed_service.py | 5 ++++- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/apps/api/src/jobs/ai_slo_watchdog_job.py b/apps/api/src/jobs/ai_slo_watchdog_job.py index 02b1bc6a..74978187 100644 --- a/apps/api/src/jobs/ai_slo_watchdog_job.py +++ b/apps/api/src/jobs/ai_slo_watchdog_job.py @@ -79,8 +79,16 @@ async def _check_once() -> None: except Exception as e: logger.warning("watchdog_w3_flywheel_check_failed", error=str(e)) + # W-4: 無 APPROVED Playbook(自動修復鏈路斷裂) + try: + approved_count = await _count_approved_playbooks() + if approved_count == 0: + violations.append("無 APPROVED Playbook — 自動修復鏈路斷裂(evolver 可能全部封存)") + except Exception as e: + logger.warning("watchdog_w4_playbook_check_failed", error=str(e)) + if not violations: - logger.debug("ai_slo_watchdog_all_ok", checks=3) + logger.debug("ai_slo_watchdog_all_ok", checks=4) return # 去重:violations 相同內容 1 小時內不重複發 @@ -146,3 +154,13 @@ async def _count_pending_no_tg_sent() -> int: silent += 1 return silent + + +async def _count_approved_playbooks() -> int: + """查詢 APPROVED 狀態 Playbook 數量,為 0 代表自動修復鏈路斷裂。""" + from sqlalchemy import text as sa_text + async with get_db_context() as db: + result = await db.execute( + sa_text("SELECT COUNT(*) FROM playbooks WHERE status = 'approved'") + ) + return result.scalar() or 0 diff --git a/apps/api/src/services/alert_rule_engine.py b/apps/api/src/services/alert_rule_engine.py index c8cda91e..5a2e6cd5 100644 --- a/apps/api/src/services/alert_rule_engine.py +++ b/apps/api/src/services/alert_rule_engine.py @@ -716,6 +716,10 @@ async def auto_generate_rule( success = _append_rule_to_yaml(yaml_block, alertname_safe) if success: logger.info("auto_rule_success", alertname=alertname_safe, rule_id=rule_id) + # 立即為新規則建立 APPROVED Playbook(不等下次重啟) + import asyncio as _asyncio + from src.services.playbook_seed_service import seed_playbooks_from_rules + _asyncio.create_task(seed_playbooks_from_rules()) else: logger.warning("auto_rule_failed_validation", alertname=alertname_safe) diff --git a/apps/api/src/services/playbook_evolver.py b/apps/api/src/services/playbook_evolver.py index bcd439b9..3859a7bf 100644 --- a/apps/api/src/services/playbook_evolver.py +++ b/apps/api/src/services/playbook_evolver.py @@ -28,7 +28,7 @@ from datetime import timedelta import structlog -from src.models.playbook import Playbook, PlaybookStatus +from src.models.playbook import Playbook, PlaybookSource, PlaybookStatus from src.utils.timezone import now_taipei logger = structlog.get_logger(__name__) @@ -129,6 +129,8 @@ async def _archive_low_trust(playbooks: list[Playbook], report: EvolverReport) - for pb in playbooks: if pb.status == PlaybookStatus.DEPRECATED: continue + if pb.source == PlaybookSource.YAML_RULE: + continue # yaml_rule playbooks 由 seeder 管理,不受 trust 封存,保護自動修復鏈路 if pb.trust_score < TRUST_ARCHIVE_THRESHOLD: try: await service.update_with_validation( @@ -164,6 +166,8 @@ async def _archive_dormant(playbooks: list[Playbook], report: EvolverReport) -> for pb in playbooks: if pb.status == PlaybookStatus.DEPRECATED: continue + if pb.source == PlaybookSource.YAML_RULE: + continue # yaml_rule playbooks 由 seeder 管理,不受休眠封存,保護自動修復鏈路 if pb.last_used_at is None: # 從未使用過 — 只在 trust 低於閾值時封存 if pb.trust_score >= DORMANT_TRUST_THRESHOLD: diff --git a/apps/api/src/services/playbook_seed_service.py b/apps/api/src/services/playbook_seed_service.py index 2f45b2fa..0417ce6c 100644 --- a/apps/api/src/services/playbook_seed_service.py +++ b/apps/api/src/services/playbook_seed_service.py @@ -49,7 +49,10 @@ async def seed_playbooks_from_rules() -> None: from sqlalchemy import text as sa_text async with get_db_context() as db: rows = await db.execute( - sa_text("SELECT name FROM playbooks WHERE source = 'yaml_rule'") + sa_text( + "SELECT name FROM playbooks WHERE source = 'yaml_rule'" + " AND status != 'deprecated'" + ) ) existing_names = {r[0] for r in rows.fetchall()}