191 lines
6.1 KiB
Python
191 lines
6.1 KiB
Python
"""Resolve alert/rule context to a real Playbook ID.
|
|
|
|
The learning loop updates EWMA trust only when ``approval_records`` carries the
|
|
actual ``playbooks.playbook_id``. YAML rule IDs such as ``host_resource_alert``
|
|
are not Playbook IDs, so this resolver bridges rule/alert context to the
|
|
canonical DB identity before an ApprovalRecord is created.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from functools import lru_cache
|
|
from pathlib import Path
|
|
|
|
import structlog
|
|
import yaml
|
|
from sqlalchemy import text as sa_text
|
|
|
|
from src.db.base import get_db_context
|
|
|
|
logger = structlog.get_logger(__name__)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PlaybookMatch:
|
|
playbook_id: str
|
|
source: str
|
|
|
|
|
|
async def resolve_playbook_id_for_alert(
|
|
*,
|
|
rule_id: str | None = None,
|
|
alertname: str | None = None,
|
|
affected_services: list[str] | None = None,
|
|
severity: str | None = None,
|
|
) -> str | None:
|
|
"""Return a real ``playbooks.playbook_id`` for alert context if available."""
|
|
|
|
match = await _resolve_exact_yaml_rule(rule_id=rule_id, alertname=alertname)
|
|
if match:
|
|
return match.playbook_id
|
|
|
|
match = await _resolve_by_recommendation(
|
|
alertname=alertname,
|
|
affected_services=affected_services or [],
|
|
severity=severity,
|
|
)
|
|
return match.playbook_id if match else None
|
|
|
|
|
|
async def _resolve_exact_yaml_rule(
|
|
*,
|
|
rule_id: str | None,
|
|
alertname: str | None,
|
|
) -> PlaybookMatch | None:
|
|
"""Use deterministic DB fields before falling back to fuzzy recommendations."""
|
|
|
|
rule_id = (rule_id or "").strip()
|
|
alertname = (alertname or "").strip()
|
|
if not rule_id and not alertname:
|
|
return None
|
|
|
|
alertname_candidates = [alertname]
|
|
alertname_candidates.extend(_alertnames_for_rule_id(rule_id))
|
|
alertname_candidates = list(dict.fromkeys(name for name in alertname_candidates if name))
|
|
|
|
try:
|
|
async with get_db_context() as db:
|
|
if rule_id:
|
|
row = (
|
|
await db.execute(
|
|
sa_text(
|
|
"""
|
|
SELECT playbook_id
|
|
FROM playbooks
|
|
WHERE source = 'yaml_rule'
|
|
AND status = 'approved'
|
|
AND (
|
|
name = ('AutoMigrated: ' || :rule_id)
|
|
OR notes ILIKE ('%rule.id=' || :rule_id || '%')
|
|
)
|
|
ORDER BY updated_at DESC
|
|
LIMIT 1
|
|
"""
|
|
),
|
|
{"rule_id": rule_id},
|
|
)
|
|
).first()
|
|
if row:
|
|
return PlaybookMatch(playbook_id=str(row[0]), source="exact_yaml_rule")
|
|
|
|
for candidate in alertname_candidates:
|
|
row = (
|
|
await db.execute(
|
|
sa_text(
|
|
"""
|
|
SELECT playbook_id
|
|
FROM playbooks
|
|
WHERE status = 'approved'
|
|
AND (symptom_pattern::jsonb->'alert_names') ? :alertname
|
|
ORDER BY
|
|
CASE WHEN source = 'yaml_rule' THEN 0 ELSE 1 END,
|
|
updated_at DESC
|
|
LIMIT 1
|
|
"""
|
|
),
|
|
{"alertname": candidate},
|
|
)
|
|
).first()
|
|
if row:
|
|
return PlaybookMatch(playbook_id=str(row[0]), source="exact_alertname")
|
|
|
|
return None
|
|
except Exception as exc:
|
|
logger.warning(
|
|
"playbook_exact_match_failed",
|
|
rule_id=rule_id,
|
|
alertname=alertname,
|
|
error=str(exc),
|
|
)
|
|
return None
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def _rule_alertname_index() -> dict[str, tuple[str, ...]]:
|
|
rules_path = Path(__file__).resolve().parents[2] / "alert_rules.yaml"
|
|
try:
|
|
data = yaml.safe_load(rules_path.read_text(encoding="utf-8")) or {}
|
|
except Exception as exc:
|
|
logger.debug("playbook_rule_index_load_failed", path=str(rules_path), error=str(exc))
|
|
return {}
|
|
|
|
index: dict[str, tuple[str, ...]] = {}
|
|
for rule in data.get("rules", []):
|
|
if not isinstance(rule, dict):
|
|
continue
|
|
rule_id = str(rule.get("id") or "").strip()
|
|
alertnames = rule.get("match", {}).get("alertname", [])
|
|
if rule_id and isinstance(alertnames, list):
|
|
index[rule_id] = tuple(str(name) for name in alertnames if name)
|
|
return index
|
|
|
|
|
|
def _alertnames_for_rule_id(rule_id: str) -> tuple[str, ...]:
|
|
if not rule_id:
|
|
return ()
|
|
return _rule_alertname_index().get(rule_id, ())
|
|
|
|
|
|
async def _resolve_by_recommendation(
|
|
*,
|
|
alertname: str | None,
|
|
affected_services: list[str],
|
|
severity: str | None,
|
|
) -> PlaybookMatch | None:
|
|
alertname = (alertname or "").strip()
|
|
if not alertname and not affected_services:
|
|
return None
|
|
|
|
try:
|
|
from src.models.playbook import SymptomPattern
|
|
from src.services.playbook_service import get_playbook_service
|
|
|
|
symptoms = SymptomPattern(
|
|
alert_names=[alertname] if alertname else [],
|
|
affected_services=affected_services,
|
|
severity_range=[severity or "P2"],
|
|
)
|
|
recommendations = await get_playbook_service().get_recommendations(
|
|
symptoms=symptoms,
|
|
top_k=1,
|
|
use_rag=False,
|
|
)
|
|
if not recommendations:
|
|
return None
|
|
best = recommendations[0]
|
|
if best.similarity_score < 0.5:
|
|
return None
|
|
return PlaybookMatch(
|
|
playbook_id=best.playbook.playbook_id,
|
|
source="symptom_recommendation",
|
|
)
|
|
except Exception as exc:
|
|
logger.debug(
|
|
"playbook_recommendation_match_skipped",
|
|
alertname=alertname,
|
|
affected_services=affected_services,
|
|
error=str(exc),
|
|
)
|
|
return None
|