From a0a0731cd60b339573bdf0d44e9518ff3a1b7f7a Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 13 May 2026 23:37:59 +0800 Subject: [PATCH] fix(auto-repair): preserve exact playbook candidates --- apps/api/src/services/playbook_service.py | 59 +++++++++---- apps/api/tests/test_playbook_service.py | 87 +++++++++++++++++++ ...awooop-seed-auto-repair-canary-playbook.py | 13 +-- 3 files changed, 138 insertions(+), 21 deletions(-) diff --git a/apps/api/src/services/playbook_service.py b/apps/api/src/services/playbook_service.py index 9d4299b9..557e187a 100644 --- a/apps/api/src/services/playbook_service.py +++ b/apps/api/src/services/playbook_service.py @@ -14,6 +14,7 @@ Phase 3 ADR-030: RAG 向量搜尋整合 - 封裝所有業務邏輯 """ +import re as _re from typing import Protocol import structlog @@ -32,13 +33,11 @@ from src.models.playbook import ( ) from src.repositories.interfaces import IPlaybookRepository from src.repositories.playbook_repository import get_playbook_repository -from src.services.playbook_rag import get_playbook_rag_service +from src.services.playbook_rag import PlaybookMatch, get_playbook_rag_service from src.utils.timezone import now_taipei logger = structlog.get_logger(__name__) -import re as _re - def _parse_ssh_command(ssh_cmd: str) -> tuple[str, str]: """ @@ -275,16 +274,16 @@ class PlaybookService: payload = KMWritePayload( path_type="playbook_extract", - entry_create_kwargs=dict( - title=f"[Playbook] {playbook.name}", - content=body, - entry_type=EntryType.INCIDENT_CASE, - category="auto_repair", - tags=[*playbook.tags, "playbook", "auto_extracted", playbook.status.value], - source=EntrySource.AI_EXTRACTED, - related_incident_id=incident.incident_id, - created_by="playbook_service", - ), + entry_create_kwargs={ + "title": f"[Playbook] {playbook.name}", + "content": body, + "entry_type": EntryType.INCIDENT_CASE, + "category": "auto_repair", + "tags": [*playbook.tags, "playbook", "auto_extracted", playbook.status.value], + "source": EntrySource.AI_EXTRACTED, + "related_incident_id": incident.incident_id, + "created_by": "playbook_service", + }, incident_id=incident.incident_id, ) result = await km_write_with_flag(payload) @@ -348,6 +347,17 @@ class PlaybookService: vector_weight=0.6, jaccard_weight=0.4, ) + hybrid_by_id = {match.playbook_id: match for match in hybrid_matches} + for playbook_id, jaccard_score in jaccard_results: + if playbook_id in hybrid_by_id: + continue + hybrid_matches.append( + PlaybookMatch( + playbook_id=playbook_id, + similarity_score=jaccard_score, + match_type="jaccard", + ) + ) # 補充 playbook_map (RAG 可能找到 Jaccard 沒找到的) for match in hybrid_matches: @@ -404,9 +414,9 @@ class PlaybookService: ) ) - # Step 4: 按綜合分數排序 (similarity * success_rate) + # Step 4: 先保住 exact signal,避免精準 Playbook 被語意近似項擠掉。 recommendations.sort( - key=lambda r: r.similarity_score * (0.5 + 0.5 * r.playbook.success_rate), + key=lambda r: self._recommendation_priority(r, symptoms), reverse=True, ) @@ -821,6 +831,25 @@ class PlaybookService: return matched + @staticmethod + def _normalized_overlap(left: list[str], right: list[str]) -> bool: + left_values = {value.casefold() for value in left if value} + right_values = {value.casefold() for value in right if value} + return bool(left_values & right_values) + + def _recommendation_priority( + self, + recommendation: PlaybookRecommendation, + symptoms: SymptomPattern, + ) -> tuple[bool, bool, float]: + pattern = recommendation.playbook.symptom_pattern + alert_exact = self._normalized_overlap(symptoms.alert_names, pattern.alert_names) + service_exact = self._normalized_overlap(symptoms.affected_services, pattern.affected_services) + quality_score = recommendation.similarity_score * ( + 0.5 + 0.5 * recommendation.playbook.success_rate + ) + return (alert_exact, service_exact, quality_score) + def _generate_recommendation_reason( self, playbook: Playbook, diff --git a/apps/api/tests/test_playbook_service.py b/apps/api/tests/test_playbook_service.py index d7d8ff78..d2386150 100644 --- a/apps/api/tests/test_playbook_service.py +++ b/apps/api/tests/test_playbook_service.py @@ -25,6 +25,7 @@ from src.models.playbook import ( RiskLevel, SymptomPattern, ) +from src.services.playbook_rag import PlaybookMatch from src.services.playbook_service import PlaybookService from src.utils.timezone import now_taipei @@ -282,6 +283,92 @@ class TestPlaybookService: # Should be empty or have very low similarity assert len(recommendations) == 0 or recommendations[0].similarity_score < 0.4 + @pytest.mark.asyncio + async def test_get_recommendations_prioritizes_exact_alert(self, service, mock_repo): + """Exact alert matches should win over fuzzy service matches.""" + exact_alert = create_test_playbook( + playbook_id="PB-EXACT-ALERT", + success_count=0, + failure_count=0, + ) + exact_alert.symptom_pattern.alert_names = ["AwoooPT16E"] + exact_alert.symptom_pattern.affected_services = ["different-service"] + competing_service = create_test_playbook( + playbook_id="PB-FUZZY-SERVICE", + success_count=20, + failure_count=0, + ) + competing_service.symptom_pattern.alert_names = ["SentryDown"] + competing_service.symptom_pattern.affected_services = [ + "awoooi-auto-repair-canary-livefire" + ] + await mock_repo.create(exact_alert) + await mock_repo.create(competing_service) + + symptoms = SymptomPattern( + alert_names=["AwoooPT16E"], + affected_services=["awoooi-auto-repair-canary-livefire"], + severity_range=["P2"], + ) + + recommendations = await service.get_recommendations( + symptoms, + top_k=1, + use_rag=False, + ) + + assert recommendations[0].playbook.playbook_id == "PB-EXACT-ALERT" + + @pytest.mark.asyncio + async def test_get_recommendations_preserves_jaccard_candidates( + self, + service, + mock_repo, + ): + """RAG hybrid top-k must not drop exact Jaccard candidates.""" + exact_alert = create_test_playbook( + playbook_id="PB-EXACT-JACCARD", + success_count=0, + failure_count=0, + ) + exact_alert.symptom_pattern.alert_names = ["AwoooPT16F"] + exact_alert.symptom_pattern.affected_services = ["different-service"] + competing_vector = create_test_playbook( + playbook_id="PB-VECTOR-ONLY", + success_count=20, + failure_count=0, + ) + competing_vector.symptom_pattern.alert_names = ["SentryDown"] + competing_vector.symptom_pattern.affected_services = [ + "awoooi-auto-repair-canary-livefire" + ] + await mock_repo.create(exact_alert) + await mock_repo.create(competing_vector) + + class FakeRagService: + async def hybrid_search(self, **_kwargs): + return [ + PlaybookMatch( + playbook_id="PB-VECTOR-ONLY", + similarity_score=0.99, + match_type="vector", + ) + ] + + async def fake_rag_service(): + return FakeRagService() + + service._get_rag_service = fake_rag_service + symptoms = SymptomPattern( + alert_names=["AwoooPT16F"], + affected_services=["awoooi-auto-repair-canary-livefire"], + severity_range=["P2"], + ) + + recommendations = await service.get_recommendations(symptoms, top_k=1) + + assert recommendations[0].playbook.playbook_id == "PB-EXACT-JACCARD" + @pytest.mark.asyncio async def test_approve_playbook(self, service, mock_repo): """Test approving a draft playbook""" diff --git a/scripts/ops/awooop-seed-auto-repair-canary-playbook.py b/scripts/ops/awooop-seed-auto-repair-canary-playbook.py index f10e09b9..1670465a 100644 --- a/scripts/ops/awooop-seed-auto-repair-canary-playbook.py +++ b/scripts/ops/awooop-seed-auto-repair-canary-playbook.py @@ -27,7 +27,7 @@ for _api_root in (_IMAGE_API_ROOT, _REPO_API_ROOT): sys.path.insert(0, str(_api_root)) break -from src.models.playbook import ( +from src.models.playbook import ( # noqa: E402 ActionType, Playbook, PlaybookSource, @@ -36,9 +36,9 @@ from src.models.playbook import ( RiskLevel, SymptomPattern, ) -from src.core.redis_client import close_redis_pool, init_redis_pool -from src.repositories.playbook_repository import get_playbook_repository -from src.utils.timezone import now_taipei +from src.core.redis_client import close_redis_pool, init_redis_pool # noqa: E402 +from src.repositories.playbook_repository import get_playbook_repository # noqa: E402 +from src.utils.timezone import now_taipei # noqa: E402 DEFAULT_ALERTNAME = "AwoooPAutoRepairCanaryT16" @@ -63,10 +63,11 @@ class SeedResult: def _playbook_id_for_alertname(alertname: str) -> str: if alertname == DEFAULT_ALERTNAME: return "PB-AWOOOP-T16-CANARY" + prefix = "PB-AWOOOP-CANARY-" suffix = re.sub(r"[^A-Z0-9]+", "-", alertname.upper()).strip("-") suffix = suffix.replace("AWOOOP-AUTO-REPAIR-CANARY-", "") - suffix = suffix[:18] or "T16" - return f"PB-AWOOOP-CANARY-{suffix}" + suffix = suffix[: 32 - len(prefix)] or "T16" + return f"{prefix}{suffix}" async def seed_canary_playbook(