diff --git a/apps/api/src/jobs/ai_slo_watchdog_job.py b/apps/api/src/jobs/ai_slo_watchdog_job.py index 02b1bc6a..74978187 100644 --- a/apps/api/src/jobs/ai_slo_watchdog_job.py +++ b/apps/api/src/jobs/ai_slo_watchdog_job.py @@ -79,8 +79,16 @@ async def _check_once() -> None: except Exception as e: logger.warning("watchdog_w3_flywheel_check_failed", error=str(e)) + # W-4: 無 APPROVED Playbook(自動修復鏈路斷裂) + try: + approved_count = await _count_approved_playbooks() + if approved_count == 0: + violations.append("無 APPROVED Playbook — 自動修復鏈路斷裂(evolver 可能全部封存)") + except Exception as e: + logger.warning("watchdog_w4_playbook_check_failed", error=str(e)) + if not violations: - logger.debug("ai_slo_watchdog_all_ok", checks=3) + logger.debug("ai_slo_watchdog_all_ok", checks=4) return # 去重:violations 相同內容 1 小時內不重複發 @@ -146,3 +154,13 @@ async def _count_pending_no_tg_sent() -> int: silent += 1 return silent + + +async def _count_approved_playbooks() -> int: + """查詢 APPROVED 狀態 Playbook 數量,為 0 代表自動修復鏈路斷裂。""" + from sqlalchemy import text as sa_text + async with get_db_context() as db: + result = await db.execute( + sa_text("SELECT COUNT(*) FROM playbooks WHERE status = 'approved'") + ) + return result.scalar() or 0 diff --git a/apps/api/src/services/alert_rule_engine.py b/apps/api/src/services/alert_rule_engine.py index c8cda91e..5a2e6cd5 100644 --- a/apps/api/src/services/alert_rule_engine.py +++ b/apps/api/src/services/alert_rule_engine.py @@ -716,6 +716,10 @@ async def auto_generate_rule( success = _append_rule_to_yaml(yaml_block, alertname_safe) if success: logger.info("auto_rule_success", alertname=alertname_safe, rule_id=rule_id) + # 立即為新規則建立 APPROVED Playbook(不等下次重啟) + import asyncio as _asyncio + from src.services.playbook_seed_service import seed_playbooks_from_rules + _asyncio.create_task(seed_playbooks_from_rules()) else: logger.warning("auto_rule_failed_validation", alertname=alertname_safe) diff --git a/apps/api/src/services/playbook_evolver.py b/apps/api/src/services/playbook_evolver.py index bcd439b9..3859a7bf 100644 --- a/apps/api/src/services/playbook_evolver.py +++ b/apps/api/src/services/playbook_evolver.py @@ -28,7 +28,7 @@ from datetime import timedelta import structlog -from src.models.playbook import Playbook, PlaybookStatus +from src.models.playbook import Playbook, PlaybookSource, PlaybookStatus from src.utils.timezone import now_taipei logger = structlog.get_logger(__name__) @@ -129,6 +129,8 @@ async def _archive_low_trust(playbooks: list[Playbook], report: EvolverReport) - for pb in playbooks: if pb.status == PlaybookStatus.DEPRECATED: continue + if pb.source == PlaybookSource.YAML_RULE: + continue # yaml_rule playbooks 由 seeder 管理,不受 trust 封存,保護自動修復鏈路 if pb.trust_score < TRUST_ARCHIVE_THRESHOLD: try: await service.update_with_validation( @@ -164,6 +166,8 @@ async def _archive_dormant(playbooks: list[Playbook], report: EvolverReport) -> for pb in playbooks: if pb.status == PlaybookStatus.DEPRECATED: continue + if pb.source == PlaybookSource.YAML_RULE: + continue # yaml_rule playbooks 由 seeder 管理,不受休眠封存,保護自動修復鏈路 if pb.last_used_at is None: # 從未使用過 — 只在 trust 低於閾值時封存 if pb.trust_score >= DORMANT_TRUST_THRESHOLD: diff --git a/apps/api/src/services/playbook_seed_service.py b/apps/api/src/services/playbook_seed_service.py index 2f45b2fa..0417ce6c 100644 --- a/apps/api/src/services/playbook_seed_service.py +++ b/apps/api/src/services/playbook_seed_service.py @@ -49,7 +49,10 @@ async def seed_playbooks_from_rules() -> None: from sqlalchemy import text as sa_text async with get_db_context() as db: rows = await db.execute( - sa_text("SELECT name FROM playbooks WHERE source = 'yaml_rule'") + sa_text( + "SELECT name FROM playbooks WHERE source = 'yaml_rule'" + " AND status != 'deprecated'" + ) ) existing_names = {r[0] for r in rows.fetchall()}