fix(drift): dedupe blocked auto-adopt escalations
This commit is contained in:
@@ -277,6 +277,12 @@ class DriftAdoptService:
|
||||
|
||||
# Step 3: 找出受影響的 YAML 檔並 commit 更新
|
||||
committed_files = await self._commit_drift_yaml(client, headers, branch_name, report)
|
||||
if not committed_files:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "無直接匹配的 YAML 檔,未建立零 diff 承認 PR",
|
||||
"pr_url": None,
|
||||
}
|
||||
|
||||
# Step 4: 建立 PR
|
||||
pr_url = await self._create_pr(
|
||||
@@ -292,7 +298,7 @@ class DriftAdoptService:
|
||||
logger.info("drift_adopt_pr_created", report_id=report.report_id, pr_url=pr_url)
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"PR 已建立,請 SRE review 後 merge",
|
||||
"message": "PR 已建立,請 SRE review 後 merge",
|
||||
"pr_url": pr_url,
|
||||
}
|
||||
|
||||
@@ -345,7 +351,9 @@ class DriftAdoptService:
|
||||
if not item.is_allowlisted
|
||||
}
|
||||
|
||||
for yaml_file in sorted(self._k8s_dir.glob("*.yaml")):
|
||||
for yaml_file in sorted(self._k8s_dir.rglob("*.yaml")):
|
||||
if not yaml_file.is_file():
|
||||
continue
|
||||
# 判斷此 YAML 是否與漂移相關
|
||||
file_stem = yaml_file.stem.lower()
|
||||
if not any(kind in file_stem for kind in affected_kinds):
|
||||
|
||||
@@ -16,6 +16,24 @@ from src.core.redis_client import get_redis
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
def _drift_emergency_fingerprint(report: Any) -> str:
|
||||
"""Return stable fingerprint for one drift escalation dedup window."""
|
||||
try:
|
||||
from src.services.drift_repeat_state import build_drift_fingerprint
|
||||
|
||||
return build_drift_fingerprint(
|
||||
str(getattr(report, "namespace", "") or ""),
|
||||
list(getattr(report, "items", []) or []),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"drift_emergency_fingerprint_failed",
|
||||
report_id=getattr(report, "report_id", None),
|
||||
error=str(exc),
|
||||
)
|
||||
return str(getattr(report, "report_id", "") or "unknown")
|
||||
|
||||
|
||||
async def escalate_auto_repair_unavailable(
|
||||
*,
|
||||
incident_id: str,
|
||||
@@ -120,9 +138,14 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
) -> None:
|
||||
"""Notify the emergency channel when drift cannot be auto-adopted safely."""
|
||||
|
||||
dedup_key = f"drift:auto_adopt_emergency:{report.report_id}"
|
||||
if not await _dedup_first_send(dedup_key, ttl=3600, event="drift"):
|
||||
logger.info("drift_emergency_escalation_dedup_skipped", report_id=report.report_id)
|
||||
fingerprint = _drift_emergency_fingerprint(report)
|
||||
dedup_key = f"drift:auto_adopt_emergency:fp:{fingerprint}"
|
||||
if not await _dedup_first_send(dedup_key, ttl=86400, event="drift"):
|
||||
logger.info(
|
||||
"drift_emergency_escalation_dedup_skipped",
|
||||
report_id=report.report_id,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
@@ -150,7 +173,8 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
current_impact=(
|
||||
f"namespace={report.namespace} high={report.high_count} "
|
||||
f"medium={report.medium_count} actionable={actionable_count} "
|
||||
f"intent={intent} confidence={confidence:.0%} risk={risk}"
|
||||
f"intent={intent} confidence={confidence:.0%} risk={risk} "
|
||||
f"fingerprint={fingerprint}"
|
||||
),
|
||||
group_chat_id=settings.SRE_GROUP_CHAT_ID or None,
|
||||
)
|
||||
@@ -169,6 +193,7 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
"intent": intent,
|
||||
"confidence": confidence,
|
||||
"risk": risk,
|
||||
"fingerprint": fingerprint,
|
||||
},
|
||||
)
|
||||
try:
|
||||
@@ -198,6 +223,7 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
high=report.high_count,
|
||||
medium=report.medium_count,
|
||||
actionable=actionable_count,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
|
||||
Reference in New Issue
Block a user