fix(drift): dedupe blocked auto-adopt escalations
This commit is contained in:
@@ -277,6 +277,12 @@ class DriftAdoptService:
|
|||||||
|
|
||||||
# Step 3: 找出受影響的 YAML 檔並 commit 更新
|
# Step 3: 找出受影響的 YAML 檔並 commit 更新
|
||||||
committed_files = await self._commit_drift_yaml(client, headers, branch_name, report)
|
committed_files = await self._commit_drift_yaml(client, headers, branch_name, report)
|
||||||
|
if not committed_files:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"message": "無直接匹配的 YAML 檔,未建立零 diff 承認 PR",
|
||||||
|
"pr_url": None,
|
||||||
|
}
|
||||||
|
|
||||||
# Step 4: 建立 PR
|
# Step 4: 建立 PR
|
||||||
pr_url = await self._create_pr(
|
pr_url = await self._create_pr(
|
||||||
@@ -292,7 +298,7 @@ class DriftAdoptService:
|
|||||||
logger.info("drift_adopt_pr_created", report_id=report.report_id, pr_url=pr_url)
|
logger.info("drift_adopt_pr_created", report_id=report.report_id, pr_url=pr_url)
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"message": f"PR 已建立,請 SRE review 後 merge",
|
"message": "PR 已建立,請 SRE review 後 merge",
|
||||||
"pr_url": pr_url,
|
"pr_url": pr_url,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -345,7 +351,9 @@ class DriftAdoptService:
|
|||||||
if not item.is_allowlisted
|
if not item.is_allowlisted
|
||||||
}
|
}
|
||||||
|
|
||||||
for yaml_file in sorted(self._k8s_dir.glob("*.yaml")):
|
for yaml_file in sorted(self._k8s_dir.rglob("*.yaml")):
|
||||||
|
if not yaml_file.is_file():
|
||||||
|
continue
|
||||||
# 判斷此 YAML 是否與漂移相關
|
# 判斷此 YAML 是否與漂移相關
|
||||||
file_stem = yaml_file.stem.lower()
|
file_stem = yaml_file.stem.lower()
|
||||||
if not any(kind in file_stem for kind in affected_kinds):
|
if not any(kind in file_stem for kind in affected_kinds):
|
||||||
|
|||||||
@@ -16,6 +16,24 @@ from src.core.redis_client import get_redis
|
|||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _drift_emergency_fingerprint(report: Any) -> str:
|
||||||
|
"""Return stable fingerprint for one drift escalation dedup window."""
|
||||||
|
try:
|
||||||
|
from src.services.drift_repeat_state import build_drift_fingerprint
|
||||||
|
|
||||||
|
return build_drift_fingerprint(
|
||||||
|
str(getattr(report, "namespace", "") or ""),
|
||||||
|
list(getattr(report, "items", []) or []),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"drift_emergency_fingerprint_failed",
|
||||||
|
report_id=getattr(report, "report_id", None),
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
return str(getattr(report, "report_id", "") or "unknown")
|
||||||
|
|
||||||
|
|
||||||
async def escalate_auto_repair_unavailable(
|
async def escalate_auto_repair_unavailable(
|
||||||
*,
|
*,
|
||||||
incident_id: str,
|
incident_id: str,
|
||||||
@@ -120,9 +138,14 @@ async def escalate_drift_auto_adopt_blocked(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Notify the emergency channel when drift cannot be auto-adopted safely."""
|
"""Notify the emergency channel when drift cannot be auto-adopted safely."""
|
||||||
|
|
||||||
dedup_key = f"drift:auto_adopt_emergency:{report.report_id}"
|
fingerprint = _drift_emergency_fingerprint(report)
|
||||||
if not await _dedup_first_send(dedup_key, ttl=3600, event="drift"):
|
dedup_key = f"drift:auto_adopt_emergency:fp:{fingerprint}"
|
||||||
logger.info("drift_emergency_escalation_dedup_skipped", report_id=report.report_id)
|
if not await _dedup_first_send(dedup_key, ttl=86400, event="drift"):
|
||||||
|
logger.info(
|
||||||
|
"drift_emergency_escalation_dedup_skipped",
|
||||||
|
report_id=report.report_id,
|
||||||
|
fingerprint=fingerprint,
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -150,7 +173,8 @@ async def escalate_drift_auto_adopt_blocked(
|
|||||||
current_impact=(
|
current_impact=(
|
||||||
f"namespace={report.namespace} high={report.high_count} "
|
f"namespace={report.namespace} high={report.high_count} "
|
||||||
f"medium={report.medium_count} actionable={actionable_count} "
|
f"medium={report.medium_count} actionable={actionable_count} "
|
||||||
f"intent={intent} confidence={confidence:.0%} risk={risk}"
|
f"intent={intent} confidence={confidence:.0%} risk={risk} "
|
||||||
|
f"fingerprint={fingerprint}"
|
||||||
),
|
),
|
||||||
group_chat_id=settings.SRE_GROUP_CHAT_ID or None,
|
group_chat_id=settings.SRE_GROUP_CHAT_ID or None,
|
||||||
)
|
)
|
||||||
@@ -169,6 +193,7 @@ async def escalate_drift_auto_adopt_blocked(
|
|||||||
"intent": intent,
|
"intent": intent,
|
||||||
"confidence": confidence,
|
"confidence": confidence,
|
||||||
"risk": risk,
|
"risk": risk,
|
||||||
|
"fingerprint": fingerprint,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
@@ -198,6 +223,7 @@ async def escalate_drift_auto_adopt_blocked(
|
|||||||
high=report.high_count,
|
high=report.high_count,
|
||||||
medium=report.medium_count,
|
medium=report.medium_count,
|
||||||
actionable=actionable_count,
|
actionable=actionable_count,
|
||||||
|
fingerprint=fingerprint,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|||||||
@@ -10,8 +10,10 @@ async def test_drift_emergency_escalation_writes_aol_and_timeline(monkeypatch):
|
|||||||
sent_cards = []
|
sent_cards = []
|
||||||
aol_calls = []
|
aol_calls = []
|
||||||
timeline_calls = []
|
timeline_calls = []
|
||||||
|
dedup_calls = []
|
||||||
|
|
||||||
async def fake_dedup(*args, **kwargs):
|
async def fake_dedup(*args, **kwargs):
|
||||||
|
dedup_calls.append((args, kwargs))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
class FakeGateway:
|
class FakeGateway:
|
||||||
@@ -65,7 +67,11 @@ async def test_drift_emergency_escalation_writes_aol_and_timeline(monkeypatch):
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert sent_cards and sent_cards[0]["incident_id"] == "drift-123"
|
assert sent_cards and sent_cards[0]["incident_id"] == "drift-123"
|
||||||
|
assert "fingerprint=dfp_" in sent_cards[0]["current_impact"]
|
||||||
|
assert dedup_calls[0][0][0].startswith("drift:auto_adopt_emergency:fp:dfp_")
|
||||||
|
assert dedup_calls[0][1]["ttl"] == 86400
|
||||||
assert aol_calls and aol_calls[0][0][0] == "APPROVAL_ESCALATED"
|
assert aol_calls and aol_calls[0][0][0] == "APPROVAL_ESCALATED"
|
||||||
assert aol_calls[0][1]["actor"] == "drift_auto_adopt"
|
assert aol_calls[0][1]["actor"] == "drift_auto_adopt"
|
||||||
assert aol_calls[0][1]["context"]["intent"] == "emergency_hotfix"
|
assert aol_calls[0][1]["context"]["intent"] == "emergency_hotfix"
|
||||||
|
assert aol_calls[0][1]["context"]["fingerprint"].startswith("dfp_")
|
||||||
assert timeline_calls and timeline_calls[0]["actor_role"] == "emergency_intervention"
|
assert timeline_calls and timeline_calls[0]["actor_role"] == "emergency_intervention"
|
||||||
|
|||||||
Reference in New Issue
Block a user