fix(playbook): C1-C4 全流程串接 — evolver保護+seeder復活+規則即時建立+watchdog W-4
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
C1: playbook_evolver — yaml_rule source playbooks 加 YAML_RULE guard,
evolver 不再封存 seeder 建立的 APPROVED playbook,保護自動修復鏈路
C2: playbook_seed_service — idempotency SQL 排除 DEPRECATED 記錄,
evolver 封存後重啟可復活 yaml_rule playbooks
C3: alert_rule_engine — AI 自動生成規則成功後立即呼叫 seed_playbooks_from_rules(),
不等下次重啟即可建立對應 APPROVED Playbook
C4: ai_slo_watchdog_job — 新增 W-4 APPROVED playbook 數量為 0 告警,
鏈路斷裂立即 TYPE-8M;total checks 由 3 升為 4
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -79,8 +79,16 @@ async def _check_once() -> None:
|
||||
except Exception as e:
|
||||
logger.warning("watchdog_w3_flywheel_check_failed", error=str(e))
|
||||
|
||||
# W-4: 無 APPROVED Playbook(自動修復鏈路斷裂)
|
||||
try:
|
||||
approved_count = await _count_approved_playbooks()
|
||||
if approved_count == 0:
|
||||
violations.append("無 APPROVED Playbook — 自動修復鏈路斷裂(evolver 可能全部封存)")
|
||||
except Exception as e:
|
||||
logger.warning("watchdog_w4_playbook_check_failed", error=str(e))
|
||||
|
||||
if not violations:
|
||||
logger.debug("ai_slo_watchdog_all_ok", checks=3)
|
||||
logger.debug("ai_slo_watchdog_all_ok", checks=4)
|
||||
return
|
||||
|
||||
# 去重:violations 相同內容 1 小時內不重複發
|
||||
@@ -146,3 +154,13 @@ async def _count_pending_no_tg_sent() -> int:
|
||||
silent += 1
|
||||
|
||||
return silent
|
||||
|
||||
|
||||
async def _count_approved_playbooks() -> int:
|
||||
"""查詢 APPROVED 狀態 Playbook 數量,為 0 代表自動修復鏈路斷裂。"""
|
||||
from sqlalchemy import text as sa_text
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
sa_text("SELECT COUNT(*) FROM playbooks WHERE status = 'approved'")
|
||||
)
|
||||
return result.scalar() or 0
|
||||
|
||||
@@ -716,6 +716,10 @@ async def auto_generate_rule(
|
||||
success = _append_rule_to_yaml(yaml_block, alertname_safe)
|
||||
if success:
|
||||
logger.info("auto_rule_success", alertname=alertname_safe, rule_id=rule_id)
|
||||
# 立即為新規則建立 APPROVED Playbook(不等下次重啟)
|
||||
import asyncio as _asyncio
|
||||
from src.services.playbook_seed_service import seed_playbooks_from_rules
|
||||
_asyncio.create_task(seed_playbooks_from_rules())
|
||||
else:
|
||||
logger.warning("auto_rule_failed_validation", alertname=alertname_safe)
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ from datetime import timedelta
|
||||
|
||||
import structlog
|
||||
|
||||
from src.models.playbook import Playbook, PlaybookStatus
|
||||
from src.models.playbook import Playbook, PlaybookSource, PlaybookStatus
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -129,6 +129,8 @@ async def _archive_low_trust(playbooks: list[Playbook], report: EvolverReport) -
|
||||
for pb in playbooks:
|
||||
if pb.status == PlaybookStatus.DEPRECATED:
|
||||
continue
|
||||
if pb.source == PlaybookSource.YAML_RULE:
|
||||
continue # yaml_rule playbooks 由 seeder 管理,不受 trust 封存,保護自動修復鏈路
|
||||
if pb.trust_score < TRUST_ARCHIVE_THRESHOLD:
|
||||
try:
|
||||
await service.update_with_validation(
|
||||
@@ -164,6 +166,8 @@ async def _archive_dormant(playbooks: list[Playbook], report: EvolverReport) ->
|
||||
for pb in playbooks:
|
||||
if pb.status == PlaybookStatus.DEPRECATED:
|
||||
continue
|
||||
if pb.source == PlaybookSource.YAML_RULE:
|
||||
continue # yaml_rule playbooks 由 seeder 管理,不受休眠封存,保護自動修復鏈路
|
||||
if pb.last_used_at is None:
|
||||
# 從未使用過 — 只在 trust 低於閾值時封存
|
||||
if pb.trust_score >= DORMANT_TRUST_THRESHOLD:
|
||||
|
||||
@@ -49,7 +49,10 @@ async def seed_playbooks_from_rules() -> None:
|
||||
from sqlalchemy import text as sa_text
|
||||
async with get_db_context() as db:
|
||||
rows = await db.execute(
|
||||
sa_text("SELECT name FROM playbooks WHERE source = 'yaml_rule'")
|
||||
sa_text(
|
||||
"SELECT name FROM playbooks WHERE source = 'yaml_rule'"
|
||||
" AND status != 'deprecated'"
|
||||
)
|
||||
)
|
||||
existing_names = {r[0] for r in rows.fetchall()}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user