fix(auto-repair): preserve exact playbook candidates
This commit is contained in:
@@ -14,6 +14,7 @@ Phase 3 ADR-030: RAG 向量搜尋整合
|
||||
- 封裝所有業務邏輯
|
||||
"""
|
||||
|
||||
import re as _re
|
||||
from typing import Protocol
|
||||
|
||||
import structlog
|
||||
@@ -32,13 +33,11 @@ from src.models.playbook import (
|
||||
)
|
||||
from src.repositories.interfaces import IPlaybookRepository
|
||||
from src.repositories.playbook_repository import get_playbook_repository
|
||||
from src.services.playbook_rag import get_playbook_rag_service
|
||||
from src.services.playbook_rag import PlaybookMatch, get_playbook_rag_service
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
import re as _re
|
||||
|
||||
|
||||
def _parse_ssh_command(ssh_cmd: str) -> tuple[str, str]:
|
||||
"""
|
||||
@@ -275,16 +274,16 @@ class PlaybookService:
|
||||
|
||||
payload = KMWritePayload(
|
||||
path_type="playbook_extract",
|
||||
entry_create_kwargs=dict(
|
||||
title=f"[Playbook] {playbook.name}",
|
||||
content=body,
|
||||
entry_type=EntryType.INCIDENT_CASE,
|
||||
category="auto_repair",
|
||||
tags=[*playbook.tags, "playbook", "auto_extracted", playbook.status.value],
|
||||
source=EntrySource.AI_EXTRACTED,
|
||||
related_incident_id=incident.incident_id,
|
||||
created_by="playbook_service",
|
||||
),
|
||||
entry_create_kwargs={
|
||||
"title": f"[Playbook] {playbook.name}",
|
||||
"content": body,
|
||||
"entry_type": EntryType.INCIDENT_CASE,
|
||||
"category": "auto_repair",
|
||||
"tags": [*playbook.tags, "playbook", "auto_extracted", playbook.status.value],
|
||||
"source": EntrySource.AI_EXTRACTED,
|
||||
"related_incident_id": incident.incident_id,
|
||||
"created_by": "playbook_service",
|
||||
},
|
||||
incident_id=incident.incident_id,
|
||||
)
|
||||
result = await km_write_with_flag(payload)
|
||||
@@ -348,6 +347,17 @@ class PlaybookService:
|
||||
vector_weight=0.6,
|
||||
jaccard_weight=0.4,
|
||||
)
|
||||
hybrid_by_id = {match.playbook_id: match for match in hybrid_matches}
|
||||
for playbook_id, jaccard_score in jaccard_results:
|
||||
if playbook_id in hybrid_by_id:
|
||||
continue
|
||||
hybrid_matches.append(
|
||||
PlaybookMatch(
|
||||
playbook_id=playbook_id,
|
||||
similarity_score=jaccard_score,
|
||||
match_type="jaccard",
|
||||
)
|
||||
)
|
||||
|
||||
# 補充 playbook_map (RAG 可能找到 Jaccard 沒找到的)
|
||||
for match in hybrid_matches:
|
||||
@@ -404,9 +414,9 @@ class PlaybookService:
|
||||
)
|
||||
)
|
||||
|
||||
# Step 4: 按綜合分數排序 (similarity * success_rate)
|
||||
# Step 4: 先保住 exact signal,避免精準 Playbook 被語意近似項擠掉。
|
||||
recommendations.sort(
|
||||
key=lambda r: r.similarity_score * (0.5 + 0.5 * r.playbook.success_rate),
|
||||
key=lambda r: self._recommendation_priority(r, symptoms),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
@@ -821,6 +831,25 @@ class PlaybookService:
|
||||
|
||||
return matched
|
||||
|
||||
@staticmethod
|
||||
def _normalized_overlap(left: list[str], right: list[str]) -> bool:
|
||||
left_values = {value.casefold() for value in left if value}
|
||||
right_values = {value.casefold() for value in right if value}
|
||||
return bool(left_values & right_values)
|
||||
|
||||
def _recommendation_priority(
|
||||
self,
|
||||
recommendation: PlaybookRecommendation,
|
||||
symptoms: SymptomPattern,
|
||||
) -> tuple[bool, bool, float]:
|
||||
pattern = recommendation.playbook.symptom_pattern
|
||||
alert_exact = self._normalized_overlap(symptoms.alert_names, pattern.alert_names)
|
||||
service_exact = self._normalized_overlap(symptoms.affected_services, pattern.affected_services)
|
||||
quality_score = recommendation.similarity_score * (
|
||||
0.5 + 0.5 * recommendation.playbook.success_rate
|
||||
)
|
||||
return (alert_exact, service_exact, quality_score)
|
||||
|
||||
def _generate_recommendation_reason(
|
||||
self,
|
||||
playbook: Playbook,
|
||||
|
||||
@@ -25,6 +25,7 @@ from src.models.playbook import (
|
||||
RiskLevel,
|
||||
SymptomPattern,
|
||||
)
|
||||
from src.services.playbook_rag import PlaybookMatch
|
||||
from src.services.playbook_service import PlaybookService
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -282,6 +283,92 @@ class TestPlaybookService:
|
||||
# Should be empty or have very low similarity
|
||||
assert len(recommendations) == 0 or recommendations[0].similarity_score < 0.4
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_recommendations_prioritizes_exact_alert(self, service, mock_repo):
|
||||
"""Exact alert matches should win over fuzzy service matches."""
|
||||
exact_alert = create_test_playbook(
|
||||
playbook_id="PB-EXACT-ALERT",
|
||||
success_count=0,
|
||||
failure_count=0,
|
||||
)
|
||||
exact_alert.symptom_pattern.alert_names = ["AwoooPT16E"]
|
||||
exact_alert.symptom_pattern.affected_services = ["different-service"]
|
||||
competing_service = create_test_playbook(
|
||||
playbook_id="PB-FUZZY-SERVICE",
|
||||
success_count=20,
|
||||
failure_count=0,
|
||||
)
|
||||
competing_service.symptom_pattern.alert_names = ["SentryDown"]
|
||||
competing_service.symptom_pattern.affected_services = [
|
||||
"awoooi-auto-repair-canary-livefire"
|
||||
]
|
||||
await mock_repo.create(exact_alert)
|
||||
await mock_repo.create(competing_service)
|
||||
|
||||
symptoms = SymptomPattern(
|
||||
alert_names=["AwoooPT16E"],
|
||||
affected_services=["awoooi-auto-repair-canary-livefire"],
|
||||
severity_range=["P2"],
|
||||
)
|
||||
|
||||
recommendations = await service.get_recommendations(
|
||||
symptoms,
|
||||
top_k=1,
|
||||
use_rag=False,
|
||||
)
|
||||
|
||||
assert recommendations[0].playbook.playbook_id == "PB-EXACT-ALERT"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_recommendations_preserves_jaccard_candidates(
|
||||
self,
|
||||
service,
|
||||
mock_repo,
|
||||
):
|
||||
"""RAG hybrid top-k must not drop exact Jaccard candidates."""
|
||||
exact_alert = create_test_playbook(
|
||||
playbook_id="PB-EXACT-JACCARD",
|
||||
success_count=0,
|
||||
failure_count=0,
|
||||
)
|
||||
exact_alert.symptom_pattern.alert_names = ["AwoooPT16F"]
|
||||
exact_alert.symptom_pattern.affected_services = ["different-service"]
|
||||
competing_vector = create_test_playbook(
|
||||
playbook_id="PB-VECTOR-ONLY",
|
||||
success_count=20,
|
||||
failure_count=0,
|
||||
)
|
||||
competing_vector.symptom_pattern.alert_names = ["SentryDown"]
|
||||
competing_vector.symptom_pattern.affected_services = [
|
||||
"awoooi-auto-repair-canary-livefire"
|
||||
]
|
||||
await mock_repo.create(exact_alert)
|
||||
await mock_repo.create(competing_vector)
|
||||
|
||||
class FakeRagService:
|
||||
async def hybrid_search(self, **_kwargs):
|
||||
return [
|
||||
PlaybookMatch(
|
||||
playbook_id="PB-VECTOR-ONLY",
|
||||
similarity_score=0.99,
|
||||
match_type="vector",
|
||||
)
|
||||
]
|
||||
|
||||
async def fake_rag_service():
|
||||
return FakeRagService()
|
||||
|
||||
service._get_rag_service = fake_rag_service
|
||||
symptoms = SymptomPattern(
|
||||
alert_names=["AwoooPT16F"],
|
||||
affected_services=["awoooi-auto-repair-canary-livefire"],
|
||||
severity_range=["P2"],
|
||||
)
|
||||
|
||||
recommendations = await service.get_recommendations(symptoms, top_k=1)
|
||||
|
||||
assert recommendations[0].playbook.playbook_id == "PB-EXACT-JACCARD"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_playbook(self, service, mock_repo):
|
||||
"""Test approving a draft playbook"""
|
||||
|
||||
@@ -27,7 +27,7 @@ for _api_root in (_IMAGE_API_ROOT, _REPO_API_ROOT):
|
||||
sys.path.insert(0, str(_api_root))
|
||||
break
|
||||
|
||||
from src.models.playbook import (
|
||||
from src.models.playbook import ( # noqa: E402
|
||||
ActionType,
|
||||
Playbook,
|
||||
PlaybookSource,
|
||||
@@ -36,9 +36,9 @@ from src.models.playbook import (
|
||||
RiskLevel,
|
||||
SymptomPattern,
|
||||
)
|
||||
from src.core.redis_client import close_redis_pool, init_redis_pool
|
||||
from src.repositories.playbook_repository import get_playbook_repository
|
||||
from src.utils.timezone import now_taipei
|
||||
from src.core.redis_client import close_redis_pool, init_redis_pool # noqa: E402
|
||||
from src.repositories.playbook_repository import get_playbook_repository # noqa: E402
|
||||
from src.utils.timezone import now_taipei # noqa: E402
|
||||
|
||||
|
||||
DEFAULT_ALERTNAME = "AwoooPAutoRepairCanaryT16"
|
||||
@@ -63,10 +63,11 @@ class SeedResult:
|
||||
def _playbook_id_for_alertname(alertname: str) -> str:
|
||||
if alertname == DEFAULT_ALERTNAME:
|
||||
return "PB-AWOOOP-T16-CANARY"
|
||||
prefix = "PB-AWOOOP-CANARY-"
|
||||
suffix = re.sub(r"[^A-Z0-9]+", "-", alertname.upper()).strip("-")
|
||||
suffix = suffix.replace("AWOOOP-AUTO-REPAIR-CANARY-", "")
|
||||
suffix = suffix[:18] or "T16"
|
||||
return f"PB-AWOOOP-CANARY-{suffix}"
|
||||
suffix = suffix[: 32 - len(prefix)] or "T16"
|
||||
return f"{prefix}{suffix}"
|
||||
|
||||
|
||||
async def seed_canary_playbook(
|
||||
|
||||
Reference in New Issue
Block a user