Files
awoooi/apps/api/src/services/playbook_match_resolver.py
Your Name b1ecb55bd6
All checks were successful
Code Review / ai-code-review (push) Successful in 18s
CD Pipeline / tests (push) Successful in 1m2s
CD Pipeline / build-and-deploy (push) Successful in 3m31s
CD Pipeline / post-deploy-checks (push) Successful in 1m39s
fix(verification): align playbook and mcp evidence for canary alerts
2026-05-14 00:21:44 +08:00

191 lines
6.1 KiB
Python

"""Resolve alert/rule context to a real Playbook ID.
The learning loop updates EWMA trust only when ``approval_records`` carries the
actual ``playbooks.playbook_id``. YAML rule IDs such as ``host_resource_alert``
are not Playbook IDs, so this resolver bridges rule/alert context to the
canonical DB identity before an ApprovalRecord is created.
"""
from __future__ import annotations
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
import structlog
import yaml
from sqlalchemy import text as sa_text
from src.db.base import get_db_context
logger = structlog.get_logger(__name__)
@dataclass(frozen=True)
class PlaybookMatch:
playbook_id: str
source: str
async def resolve_playbook_id_for_alert(
*,
rule_id: str | None = None,
alertname: str | None = None,
affected_services: list[str] | None = None,
severity: str | None = None,
) -> str | None:
"""Return a real ``playbooks.playbook_id`` for alert context if available."""
match = await _resolve_exact_yaml_rule(rule_id=rule_id, alertname=alertname)
if match:
return match.playbook_id
match = await _resolve_by_recommendation(
alertname=alertname,
affected_services=affected_services or [],
severity=severity,
)
return match.playbook_id if match else None
async def _resolve_exact_yaml_rule(
*,
rule_id: str | None,
alertname: str | None,
) -> PlaybookMatch | None:
"""Use deterministic DB fields before falling back to fuzzy recommendations."""
rule_id = (rule_id or "").strip()
alertname = (alertname or "").strip()
if not rule_id and not alertname:
return None
alertname_candidates = [alertname]
alertname_candidates.extend(_alertnames_for_rule_id(rule_id))
alertname_candidates = list(dict.fromkeys(name for name in alertname_candidates if name))
try:
async with get_db_context() as db:
if rule_id:
row = (
await db.execute(
sa_text(
"""
SELECT playbook_id
FROM playbooks
WHERE source = 'yaml_rule'
AND status = 'approved'
AND (
name = ('AutoMigrated: ' || :rule_id)
OR notes ILIKE ('%rule.id=' || :rule_id || '%')
)
ORDER BY updated_at DESC
LIMIT 1
"""
),
{"rule_id": rule_id},
)
).first()
if row:
return PlaybookMatch(playbook_id=str(row[0]), source="exact_yaml_rule")
for candidate in alertname_candidates:
row = (
await db.execute(
sa_text(
"""
SELECT playbook_id
FROM playbooks
WHERE status = 'approved'
AND (symptom_pattern::jsonb->'alert_names') ? :alertname
ORDER BY
CASE WHEN source = 'yaml_rule' THEN 0 ELSE 1 END,
updated_at DESC
LIMIT 1
"""
),
{"alertname": candidate},
)
).first()
if row:
return PlaybookMatch(playbook_id=str(row[0]), source="exact_alertname")
return None
except Exception as exc:
logger.warning(
"playbook_exact_match_failed",
rule_id=rule_id,
alertname=alertname,
error=str(exc),
)
return None
@lru_cache(maxsize=1)
def _rule_alertname_index() -> dict[str, tuple[str, ...]]:
rules_path = Path(__file__).resolve().parents[2] / "alert_rules.yaml"
try:
data = yaml.safe_load(rules_path.read_text(encoding="utf-8")) or {}
except Exception as exc:
logger.debug("playbook_rule_index_load_failed", path=str(rules_path), error=str(exc))
return {}
index: dict[str, tuple[str, ...]] = {}
for rule in data.get("rules", []):
if not isinstance(rule, dict):
continue
rule_id = str(rule.get("id") or "").strip()
alertnames = rule.get("match", {}).get("alertname", [])
if rule_id and isinstance(alertnames, list):
index[rule_id] = tuple(str(name) for name in alertnames if name)
return index
def _alertnames_for_rule_id(rule_id: str) -> tuple[str, ...]:
if not rule_id:
return ()
return _rule_alertname_index().get(rule_id, ())
async def _resolve_by_recommendation(
*,
alertname: str | None,
affected_services: list[str],
severity: str | None,
) -> PlaybookMatch | None:
alertname = (alertname or "").strip()
if not alertname and not affected_services:
return None
try:
from src.models.playbook import SymptomPattern
from src.services.playbook_service import get_playbook_service
symptoms = SymptomPattern(
alert_names=[alertname] if alertname else [],
affected_services=affected_services,
severity_range=[severity or "P2"],
)
recommendations = await get_playbook_service().get_recommendations(
symptoms=symptoms,
top_k=1,
use_rag=False,
)
if not recommendations:
return None
best = recommendations[0]
if best.similarity_score < 0.5:
return None
return PlaybookMatch(
playbook_id=best.playbook.playbook_id,
source="symptom_recommendation",
)
except Exception as exc:
logger.debug(
"playbook_recommendation_match_skipped",
alertname=alertname,
affected_services=affected_services,
error=str(exc),
)
return None