diff --git a/apps/api/src/services/drift_adopt_service.py b/apps/api/src/services/drift_adopt_service.py index 7e30dac5..6229e384 100644 --- a/apps/api/src/services/drift_adopt_service.py +++ b/apps/api/src/services/drift_adopt_service.py @@ -277,6 +277,12 @@ class DriftAdoptService: # Step 3: 找出受影響的 YAML 檔並 commit 更新 committed_files = await self._commit_drift_yaml(client, headers, branch_name, report) + if not committed_files: + return { + "success": False, + "message": "無直接匹配的 YAML 檔,未建立零 diff 承認 PR", + "pr_url": None, + } # Step 4: 建立 PR pr_url = await self._create_pr( @@ -292,7 +298,7 @@ class DriftAdoptService: logger.info("drift_adopt_pr_created", report_id=report.report_id, pr_url=pr_url) return { "success": True, - "message": f"PR 已建立,請 SRE review 後 merge", + "message": "PR 已建立,請 SRE review 後 merge", "pr_url": pr_url, } @@ -345,7 +351,9 @@ class DriftAdoptService: if not item.is_allowlisted } - for yaml_file in sorted(self._k8s_dir.glob("*.yaml")): + for yaml_file in sorted(self._k8s_dir.rglob("*.yaml")): + if not yaml_file.is_file(): + continue # 判斷此 YAML 是否與漂移相關 file_stem = yaml_file.stem.lower() if not any(kind in file_stem for kind in affected_kinds): diff --git a/apps/api/src/services/emergency_escalation_service.py b/apps/api/src/services/emergency_escalation_service.py index 24e26509..7de5aac9 100644 --- a/apps/api/src/services/emergency_escalation_service.py +++ b/apps/api/src/services/emergency_escalation_service.py @@ -16,6 +16,24 @@ from src.core.redis_client import get_redis logger = structlog.get_logger(__name__) +def _drift_emergency_fingerprint(report: Any) -> str: + """Return stable fingerprint for one drift escalation dedup window.""" + try: + from src.services.drift_repeat_state import build_drift_fingerprint + + return build_drift_fingerprint( + str(getattr(report, "namespace", "") or ""), + list(getattr(report, "items", []) or []), + ) + except Exception as exc: + logger.warning( + "drift_emergency_fingerprint_failed", + report_id=getattr(report, "report_id", None), + error=str(exc), + ) + return str(getattr(report, "report_id", "") or "unknown") + + async def escalate_auto_repair_unavailable( *, incident_id: str, @@ -120,9 +138,14 @@ async def escalate_drift_auto_adopt_blocked( ) -> None: """Notify the emergency channel when drift cannot be auto-adopted safely.""" - dedup_key = f"drift:auto_adopt_emergency:{report.report_id}" - if not await _dedup_first_send(dedup_key, ttl=3600, event="drift"): - logger.info("drift_emergency_escalation_dedup_skipped", report_id=report.report_id) + fingerprint = _drift_emergency_fingerprint(report) + dedup_key = f"drift:auto_adopt_emergency:fp:{fingerprint}" + if not await _dedup_first_send(dedup_key, ttl=86400, event="drift"): + logger.info( + "drift_emergency_escalation_dedup_skipped", + report_id=report.report_id, + fingerprint=fingerprint, + ) return try: @@ -150,7 +173,8 @@ async def escalate_drift_auto_adopt_blocked( current_impact=( f"namespace={report.namespace} high={report.high_count} " f"medium={report.medium_count} actionable={actionable_count} " - f"intent={intent} confidence={confidence:.0%} risk={risk}" + f"intent={intent} confidence={confidence:.0%} risk={risk} " + f"fingerprint={fingerprint}" ), group_chat_id=settings.SRE_GROUP_CHAT_ID or None, ) @@ -169,6 +193,7 @@ async def escalate_drift_auto_adopt_blocked( "intent": intent, "confidence": confidence, "risk": risk, + "fingerprint": fingerprint, }, ) try: @@ -198,6 +223,7 @@ async def escalate_drift_auto_adopt_blocked( high=report.high_count, medium=report.medium_count, actionable=actionable_count, + fingerprint=fingerprint, ) except Exception as exc: logger.warning( diff --git a/apps/api/tests/test_emergency_escalation_service.py b/apps/api/tests/test_emergency_escalation_service.py index 5d4a0e55..fcd75257 100644 --- a/apps/api/tests/test_emergency_escalation_service.py +++ b/apps/api/tests/test_emergency_escalation_service.py @@ -10,8 +10,10 @@ async def test_drift_emergency_escalation_writes_aol_and_timeline(monkeypatch): sent_cards = [] aol_calls = [] timeline_calls = [] + dedup_calls = [] async def fake_dedup(*args, **kwargs): + dedup_calls.append((args, kwargs)) return True class FakeGateway: @@ -65,7 +67,11 @@ async def test_drift_emergency_escalation_writes_aol_and_timeline(monkeypatch): ) assert sent_cards and sent_cards[0]["incident_id"] == "drift-123" + assert "fingerprint=dfp_" in sent_cards[0]["current_impact"] + assert dedup_calls[0][0][0].startswith("drift:auto_adopt_emergency:fp:dfp_") + assert dedup_calls[0][1]["ttl"] == 86400 assert aol_calls and aol_calls[0][0][0] == "APPROVAL_ESCALATED" assert aol_calls[0][1]["actor"] == "drift_auto_adopt" assert aol_calls[0][1]["context"]["intent"] == "emergency_hotfix" + assert aol_calls[0][1]["context"]["fingerprint"].startswith("dfp_") assert timeline_calls and timeline_calls[0]["actor_role"] == "emergency_intervention"