fix(auto_repair): playbook_seed_service — 從 alert_rules.yaml 初始化 APPROVED Playbook
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
根本原因: playbooks 表空 → NO_MATCH → 永遠走審批,從不自動修復 修復: startup 時從 alert_rules.yaml seed APPROVED Playbook(冪等) 確保自動修復鏈路有規則可用 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -286,6 +286,15 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
# 2026-04-10 Claude Sonnet 4.6 Asia/Taipei
|
||||
# 目的: 確保 playbook_embeddings 表有最新向量,供語義相似度查詢
|
||||
# 使用 asyncio.create_task 非阻塞 — 不影響 API 啟動速度
|
||||
# Phase ADR-068 2026-04-10: 從 alert_rules.yaml seed Playbook(冪等)
|
||||
# 必須在 embedding indexing 之前,確保 playbook 表有資料
|
||||
try:
|
||||
from src.services.playbook_seed_service import seed_playbooks_from_rules
|
||||
asyncio.create_task(seed_playbooks_from_rules())
|
||||
logger.info("playbook_seed_scheduled")
|
||||
except Exception as e:
|
||||
logger.warning("playbook_seed_schedule_failed", error=str(e))
|
||||
|
||||
try:
|
||||
from src.services.playbook_embedding_service import ensure_playbook_embeddings_indexed
|
||||
asyncio.create_task(ensure_playbook_embeddings_indexed())
|
||||
|
||||
104
apps/api/src/services/playbook_seed_service.py
Normal file
104
apps/api/src/services/playbook_seed_service.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
Playbook Seed Service — 從 alert_rules.yaml 初始化 Playbook 資料
|
||||
=================================================================
|
||||
職責:
|
||||
- 啟動時讀取 alert_rules.yaml
|
||||
- 將每條規則轉換為 APPROVED Playbook 寫入 DB(冪等:已存在則跳過)
|
||||
- 確保自動修復鏈路有資料可用
|
||||
|
||||
呼叫方: main.py lifespan (asyncio.create_task — 非阻塞)
|
||||
|
||||
2026-04-10 Claude Sonnet 4.6 Asia/Taipei
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import structlog
|
||||
import yaml
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
_RULES_PATH = Path(__file__).parent.parent.parent / "alert_rules.yaml"
|
||||
|
||||
|
||||
async def seed_playbooks_from_rules() -> None:
|
||||
"""從 alert_rules.yaml 匯入 APPROVED Playbook(冪等)"""
|
||||
try:
|
||||
if not _RULES_PATH.exists():
|
||||
logger.warning("playbook_seed_rules_not_found", path=str(_RULES_PATH))
|
||||
return
|
||||
|
||||
data = yaml.safe_load(_RULES_PATH.read_text())
|
||||
rules = data.get("rules", [])
|
||||
if not rules:
|
||||
return
|
||||
|
||||
from src.models.playbook import (
|
||||
ActionType, Playbook, PlaybookSource, PlaybookStatus,
|
||||
RepairStep, RiskLevel, SymptomPattern,
|
||||
)
|
||||
from src.repositories.playbook_repository import get_playbook_repository
|
||||
|
||||
repo = get_playbook_repository()
|
||||
|
||||
# 取得現有 playbook source_ids,避免重複建立
|
||||
existing = await repo.list_playbooks(status=PlaybookStatus.APPROVED, limit=500)
|
||||
existing_sources = {p.source for p in existing if p.source}
|
||||
|
||||
seeded = 0
|
||||
for rule in rules:
|
||||
rule_id = rule.get("id", "")
|
||||
source_key = f"alert_rule:{rule_id}"
|
||||
if source_key in existing_sources:
|
||||
continue
|
||||
|
||||
resp = rule.get("response", {})
|
||||
kubectl_cmd = resp.get("kubectl_command", "").strip()
|
||||
if not kubectl_cmd:
|
||||
continue
|
||||
|
||||
risk_str = resp.get("risk", "medium").lower()
|
||||
risk_map = {"low": RiskLevel.LOW, "medium": RiskLevel.MEDIUM, "critical": RiskLevel.HIGH}
|
||||
risk = risk_map.get(risk_str, RiskLevel.MEDIUM)
|
||||
|
||||
alertnames = rule.get("match", {}).get("alertname", [])
|
||||
action_type = ActionType.KUBECTL
|
||||
if kubectl_cmd.startswith("ssh"):
|
||||
action_type = ActionType.SSH_COMMAND
|
||||
|
||||
playbook = Playbook(
|
||||
name=rule.get("description", rule_id),
|
||||
description=resp.get("description", rule.get("description", "")),
|
||||
status=PlaybookStatus.APPROVED,
|
||||
source=source_key,
|
||||
symptom_pattern=SymptomPattern(
|
||||
alert_names=alertnames,
|
||||
affected_services=[],
|
||||
severity_range=["P2", "P3"],
|
||||
),
|
||||
repair_steps=[
|
||||
RepairStep(
|
||||
step_number=1,
|
||||
action_type=action_type,
|
||||
command=kubectl_cmd,
|
||||
expected_result=resp.get("action_title", ""),
|
||||
risk_level=risk,
|
||||
requires_approval=False,
|
||||
)
|
||||
],
|
||||
ai_confidence=1.0,
|
||||
approved_by="alert_rules_yaml",
|
||||
)
|
||||
|
||||
try:
|
||||
await repo.create(playbook)
|
||||
seeded += 1
|
||||
logger.info("playbook_seeded", rule_id=rule_id, name=playbook.name)
|
||||
except Exception as e:
|
||||
logger.warning("playbook_seed_failed", rule_id=rule_id, error=str(e))
|
||||
|
||||
logger.info("playbook_seed_complete", seeded=seeded, total=len(rules))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("playbook_seed_error", error=str(e))
|
||||
Reference in New Issue
Block a user