fix(auto-repair): preserve exact playbook candidates
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 5m46s
CD Pipeline / build-and-deploy (push) Successful in 4m6s
CD Pipeline / post-deploy-checks (push) Successful in 1m28s

This commit is contained in:
Your Name
2026-05-13 23:37:59 +08:00
parent 5161a9dfd6
commit a0a0731cd6
3 changed files with 138 additions and 21 deletions

View File

@@ -14,6 +14,7 @@ Phase 3 ADR-030: RAG 向量搜尋整合
- 封裝所有業務邏輯
"""
import re as _re
from typing import Protocol
import structlog
@@ -32,13 +33,11 @@ from src.models.playbook import (
)
from src.repositories.interfaces import IPlaybookRepository
from src.repositories.playbook_repository import get_playbook_repository
from src.services.playbook_rag import get_playbook_rag_service
from src.services.playbook_rag import PlaybookMatch, get_playbook_rag_service
from src.utils.timezone import now_taipei
logger = structlog.get_logger(__name__)
import re as _re
def _parse_ssh_command(ssh_cmd: str) -> tuple[str, str]:
"""
@@ -275,16 +274,16 @@ class PlaybookService:
payload = KMWritePayload(
path_type="playbook_extract",
entry_create_kwargs=dict(
title=f"[Playbook] {playbook.name}",
content=body,
entry_type=EntryType.INCIDENT_CASE,
category="auto_repair",
tags=[*playbook.tags, "playbook", "auto_extracted", playbook.status.value],
source=EntrySource.AI_EXTRACTED,
related_incident_id=incident.incident_id,
created_by="playbook_service",
),
entry_create_kwargs={
"title": f"[Playbook] {playbook.name}",
"content": body,
"entry_type": EntryType.INCIDENT_CASE,
"category": "auto_repair",
"tags": [*playbook.tags, "playbook", "auto_extracted", playbook.status.value],
"source": EntrySource.AI_EXTRACTED,
"related_incident_id": incident.incident_id,
"created_by": "playbook_service",
},
incident_id=incident.incident_id,
)
result = await km_write_with_flag(payload)
@@ -348,6 +347,17 @@ class PlaybookService:
vector_weight=0.6,
jaccard_weight=0.4,
)
hybrid_by_id = {match.playbook_id: match for match in hybrid_matches}
for playbook_id, jaccard_score in jaccard_results:
if playbook_id in hybrid_by_id:
continue
hybrid_matches.append(
PlaybookMatch(
playbook_id=playbook_id,
similarity_score=jaccard_score,
match_type="jaccard",
)
)
# 補充 playbook_map (RAG 可能找到 Jaccard 沒找到的)
for match in hybrid_matches:
@@ -404,9 +414,9 @@ class PlaybookService:
)
)
# Step 4: 按綜合分數排序 (similarity * success_rate)
# Step 4: 先保住 exact signal避免精準 Playbook 被語意近似項擠掉。
recommendations.sort(
key=lambda r: r.similarity_score * (0.5 + 0.5 * r.playbook.success_rate),
key=lambda r: self._recommendation_priority(r, symptoms),
reverse=True,
)
@@ -821,6 +831,25 @@ class PlaybookService:
return matched
@staticmethod
def _normalized_overlap(left: list[str], right: list[str]) -> bool:
left_values = {value.casefold() for value in left if value}
right_values = {value.casefold() for value in right if value}
return bool(left_values & right_values)
def _recommendation_priority(
self,
recommendation: PlaybookRecommendation,
symptoms: SymptomPattern,
) -> tuple[bool, bool, float]:
pattern = recommendation.playbook.symptom_pattern
alert_exact = self._normalized_overlap(symptoms.alert_names, pattern.alert_names)
service_exact = self._normalized_overlap(symptoms.affected_services, pattern.affected_services)
quality_score = recommendation.similarity_score * (
0.5 + 0.5 * recommendation.playbook.success_rate
)
return (alert_exact, service_exact, quality_score)
def _generate_recommendation_reason(
self,
playbook: Playbook,

View File

@@ -25,6 +25,7 @@ from src.models.playbook import (
RiskLevel,
SymptomPattern,
)
from src.services.playbook_rag import PlaybookMatch
from src.services.playbook_service import PlaybookService
from src.utils.timezone import now_taipei
@@ -282,6 +283,92 @@ class TestPlaybookService:
# Should be empty or have very low similarity
assert len(recommendations) == 0 or recommendations[0].similarity_score < 0.4
@pytest.mark.asyncio
async def test_get_recommendations_prioritizes_exact_alert(self, service, mock_repo):
"""Exact alert matches should win over fuzzy service matches."""
exact_alert = create_test_playbook(
playbook_id="PB-EXACT-ALERT",
success_count=0,
failure_count=0,
)
exact_alert.symptom_pattern.alert_names = ["AwoooPT16E"]
exact_alert.symptom_pattern.affected_services = ["different-service"]
competing_service = create_test_playbook(
playbook_id="PB-FUZZY-SERVICE",
success_count=20,
failure_count=0,
)
competing_service.symptom_pattern.alert_names = ["SentryDown"]
competing_service.symptom_pattern.affected_services = [
"awoooi-auto-repair-canary-livefire"
]
await mock_repo.create(exact_alert)
await mock_repo.create(competing_service)
symptoms = SymptomPattern(
alert_names=["AwoooPT16E"],
affected_services=["awoooi-auto-repair-canary-livefire"],
severity_range=["P2"],
)
recommendations = await service.get_recommendations(
symptoms,
top_k=1,
use_rag=False,
)
assert recommendations[0].playbook.playbook_id == "PB-EXACT-ALERT"
@pytest.mark.asyncio
async def test_get_recommendations_preserves_jaccard_candidates(
self,
service,
mock_repo,
):
"""RAG hybrid top-k must not drop exact Jaccard candidates."""
exact_alert = create_test_playbook(
playbook_id="PB-EXACT-JACCARD",
success_count=0,
failure_count=0,
)
exact_alert.symptom_pattern.alert_names = ["AwoooPT16F"]
exact_alert.symptom_pattern.affected_services = ["different-service"]
competing_vector = create_test_playbook(
playbook_id="PB-VECTOR-ONLY",
success_count=20,
failure_count=0,
)
competing_vector.symptom_pattern.alert_names = ["SentryDown"]
competing_vector.symptom_pattern.affected_services = [
"awoooi-auto-repair-canary-livefire"
]
await mock_repo.create(exact_alert)
await mock_repo.create(competing_vector)
class FakeRagService:
async def hybrid_search(self, **_kwargs):
return [
PlaybookMatch(
playbook_id="PB-VECTOR-ONLY",
similarity_score=0.99,
match_type="vector",
)
]
async def fake_rag_service():
return FakeRagService()
service._get_rag_service = fake_rag_service
symptoms = SymptomPattern(
alert_names=["AwoooPT16F"],
affected_services=["awoooi-auto-repair-canary-livefire"],
severity_range=["P2"],
)
recommendations = await service.get_recommendations(symptoms, top_k=1)
assert recommendations[0].playbook.playbook_id == "PB-EXACT-JACCARD"
@pytest.mark.asyncio
async def test_approve_playbook(self, service, mock_repo):
"""Test approving a draft playbook"""

View File

@@ -27,7 +27,7 @@ for _api_root in (_IMAGE_API_ROOT, _REPO_API_ROOT):
sys.path.insert(0, str(_api_root))
break
from src.models.playbook import (
from src.models.playbook import ( # noqa: E402
ActionType,
Playbook,
PlaybookSource,
@@ -36,9 +36,9 @@ from src.models.playbook import (
RiskLevel,
SymptomPattern,
)
from src.core.redis_client import close_redis_pool, init_redis_pool
from src.repositories.playbook_repository import get_playbook_repository
from src.utils.timezone import now_taipei
from src.core.redis_client import close_redis_pool, init_redis_pool # noqa: E402
from src.repositories.playbook_repository import get_playbook_repository # noqa: E402
from src.utils.timezone import now_taipei # noqa: E402
DEFAULT_ALERTNAME = "AwoooPAutoRepairCanaryT16"
@@ -63,10 +63,11 @@ class SeedResult:
def _playbook_id_for_alertname(alertname: str) -> str:
if alertname == DEFAULT_ALERTNAME:
return "PB-AWOOOP-T16-CANARY"
prefix = "PB-AWOOOP-CANARY-"
suffix = re.sub(r"[^A-Z0-9]+", "-", alertname.upper()).strip("-")
suffix = suffix.replace("AWOOOP-AUTO-REPAIR-CANARY-", "")
suffix = suffix[:18] or "T16"
return f"PB-AWOOOP-CANARY-{suffix}"
suffix = suffix[: 32 - len(prefix)] or "T16"
return f"{prefix}{suffix}"
async def seed_canary_playbook(