fix(drift): dedupe blocked auto-adopt escalations
This commit is contained in:
@@ -277,6 +277,12 @@ class DriftAdoptService:
|
||||
|
||||
# Step 3: 找出受影響的 YAML 檔並 commit 更新
|
||||
committed_files = await self._commit_drift_yaml(client, headers, branch_name, report)
|
||||
if not committed_files:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "無直接匹配的 YAML 檔,未建立零 diff 承認 PR",
|
||||
"pr_url": None,
|
||||
}
|
||||
|
||||
# Step 4: 建立 PR
|
||||
pr_url = await self._create_pr(
|
||||
@@ -292,7 +298,7 @@ class DriftAdoptService:
|
||||
logger.info("drift_adopt_pr_created", report_id=report.report_id, pr_url=pr_url)
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"PR 已建立,請 SRE review 後 merge",
|
||||
"message": "PR 已建立,請 SRE review 後 merge",
|
||||
"pr_url": pr_url,
|
||||
}
|
||||
|
||||
@@ -345,7 +351,9 @@ class DriftAdoptService:
|
||||
if not item.is_allowlisted
|
||||
}
|
||||
|
||||
for yaml_file in sorted(self._k8s_dir.glob("*.yaml")):
|
||||
for yaml_file in sorted(self._k8s_dir.rglob("*.yaml")):
|
||||
if not yaml_file.is_file():
|
||||
continue
|
||||
# 判斷此 YAML 是否與漂移相關
|
||||
file_stem = yaml_file.stem.lower()
|
||||
if not any(kind in file_stem for kind in affected_kinds):
|
||||
|
||||
@@ -16,6 +16,24 @@ from src.core.redis_client import get_redis
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
def _drift_emergency_fingerprint(report: Any) -> str:
|
||||
"""Return stable fingerprint for one drift escalation dedup window."""
|
||||
try:
|
||||
from src.services.drift_repeat_state import build_drift_fingerprint
|
||||
|
||||
return build_drift_fingerprint(
|
||||
str(getattr(report, "namespace", "") or ""),
|
||||
list(getattr(report, "items", []) or []),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"drift_emergency_fingerprint_failed",
|
||||
report_id=getattr(report, "report_id", None),
|
||||
error=str(exc),
|
||||
)
|
||||
return str(getattr(report, "report_id", "") or "unknown")
|
||||
|
||||
|
||||
async def escalate_auto_repair_unavailable(
|
||||
*,
|
||||
incident_id: str,
|
||||
@@ -120,9 +138,14 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
) -> None:
|
||||
"""Notify the emergency channel when drift cannot be auto-adopted safely."""
|
||||
|
||||
dedup_key = f"drift:auto_adopt_emergency:{report.report_id}"
|
||||
if not await _dedup_first_send(dedup_key, ttl=3600, event="drift"):
|
||||
logger.info("drift_emergency_escalation_dedup_skipped", report_id=report.report_id)
|
||||
fingerprint = _drift_emergency_fingerprint(report)
|
||||
dedup_key = f"drift:auto_adopt_emergency:fp:{fingerprint}"
|
||||
if not await _dedup_first_send(dedup_key, ttl=86400, event="drift"):
|
||||
logger.info(
|
||||
"drift_emergency_escalation_dedup_skipped",
|
||||
report_id=report.report_id,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
@@ -150,7 +173,8 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
current_impact=(
|
||||
f"namespace={report.namespace} high={report.high_count} "
|
||||
f"medium={report.medium_count} actionable={actionable_count} "
|
||||
f"intent={intent} confidence={confidence:.0%} risk={risk}"
|
||||
f"intent={intent} confidence={confidence:.0%} risk={risk} "
|
||||
f"fingerprint={fingerprint}"
|
||||
),
|
||||
group_chat_id=settings.SRE_GROUP_CHAT_ID or None,
|
||||
)
|
||||
@@ -169,6 +193,7 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
"intent": intent,
|
||||
"confidence": confidence,
|
||||
"risk": risk,
|
||||
"fingerprint": fingerprint,
|
||||
},
|
||||
)
|
||||
try:
|
||||
@@ -198,6 +223,7 @@ async def escalate_drift_auto_adopt_blocked(
|
||||
high=report.high_count,
|
||||
medium=report.medium_count,
|
||||
actionable=actionable_count,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
|
||||
@@ -10,8 +10,10 @@ async def test_drift_emergency_escalation_writes_aol_and_timeline(monkeypatch):
|
||||
sent_cards = []
|
||||
aol_calls = []
|
||||
timeline_calls = []
|
||||
dedup_calls = []
|
||||
|
||||
async def fake_dedup(*args, **kwargs):
|
||||
dedup_calls.append((args, kwargs))
|
||||
return True
|
||||
|
||||
class FakeGateway:
|
||||
@@ -65,7 +67,11 @@ async def test_drift_emergency_escalation_writes_aol_and_timeline(monkeypatch):
|
||||
)
|
||||
|
||||
assert sent_cards and sent_cards[0]["incident_id"] == "drift-123"
|
||||
assert "fingerprint=dfp_" in sent_cards[0]["current_impact"]
|
||||
assert dedup_calls[0][0][0].startswith("drift:auto_adopt_emergency:fp:dfp_")
|
||||
assert dedup_calls[0][1]["ttl"] == 86400
|
||||
assert aol_calls and aol_calls[0][0][0] == "APPROVAL_ESCALATED"
|
||||
assert aol_calls[0][1]["actor"] == "drift_auto_adopt"
|
||||
assert aol_calls[0][1]["context"]["intent"] == "emergency_hotfix"
|
||||
assert aol_calls[0][1]["context"]["fingerprint"].startswith("dfp_")
|
||||
assert timeline_calls and timeline_calls[0]["actor_role"] == "emergency_intervention"
|
||||
|
||||
Reference in New Issue
Block a user