diff --git a/apps/api/src/services/emergency_escalation_service.py b/apps/api/src/services/emergency_escalation_service.py index dd38053d..24e26509 100644 --- a/apps/api/src/services/emergency_escalation_service.py +++ b/apps/api/src/services/emergency_escalation_service.py @@ -28,12 +28,19 @@ async def escalate_auto_repair_unavailable( ) -> None: """Open an emergency channel when auto repair cannot safely continue.""" - dedup_key = f"auto_repair:emergency_escalated:{incident_id}" - if not await _dedup_first_send(dedup_key, ttl=900, event="auto_repair"): + # 2026-05-02 Claude Opus 4.7 + 統帥 ogt:dedup key 從 incident_id → fingerprint(alertname+target) + # 鐵證:4 條 ESCALATION 卡 17:35-17:36 連發(HostOutOfDiskSpace + 3×HostDiskUsageHigh,全 target=node-exporter-110) + # 原本 incident_id 是 uuid4 隨機,TTL 900s 太短 → 同症狀換 INC ID 完全不去重 + # 改成 alertname+target fingerprint + TTL 86400s,與 decision_manager.py:218 對齊。 + _alertname_fp = (alert_type or "AutoRepairBlocked").strip().lower().replace(" ", "_")[:60] + _target_fp = (target_resource or "unknown").lower()[:40] + dedup_key = f"auto_repair:emergency_escalated:fp:{_alertname_fp}:{_target_fp}" + if not await _dedup_first_send(dedup_key, ttl=86400, event="auto_repair"): logger.info( "auto_repair_escalation_dedup_skipped", incident_id=incident_id, approval_id=approval_id, + fingerprint=f"{_alertname_fp}:{_target_fp}", ) return