From 93ac6030cf54d6fa045dad655a779d1fa0b7a4e2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 18 Jun 2026 14:23:13 +0800 Subject: [PATCH] =?UTF-8?q?fix(ops):=20=E5=90=8C=E6=AD=A5=20source=20provi?= =?UTF-8?q?der=20freshness=20=E5=91=8A=E8=AD=A6=E8=A6=8F=E5=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ops/monitoring/alerts-unified.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ops/monitoring/alerts-unified.yml b/ops/monitoring/alerts-unified.yml index 66294877..59895c58 100644 --- a/ops/monitoring/alerts-unified.yml +++ b/ops/monitoring/alerts-unified.yml @@ -651,6 +651,24 @@ groups: summary: "2 小時內未收到任何告警 ({{ $labels.source }})" description: "可能是告警鏈路問題,請執行 Smoke Test" + - alert: SourceProviderIngestionStale + expr: | + time() - max by (source) ( + awoooi_alert_chain_last_success_timestamp{source=~"sentry|signoz"} + ) > 86400 + for: 15m + labels: + severity: warning + layer: k8s + component: source-ingestion + team: platform + auto_repair: "false" + alert_category: "alertchain_provider_freshness" + annotations: + summary: "{{ $labels.source }} source ingestion 超過 24 小時未更新" + description: "{{ $labels.source }} webhook endpoint 可能仍健康,但 AwoooP source dossier 已超過 24 小時沒有新事件。這是 provider ingestion / upstream smoke / correlation freshness 缺口,不是 Alertmanager 主鏈路故障。" + runbook: "先查 /api/v1/webhooks/{{ $labels.source }}/health,再查 /api/v1/platform/events/dossier/coverage?provider={{ $labels.source }};若 endpoint OK 但 latest stale,檢查上游 Sentry/SignOz notification channel 或排程 smoke。" + - alert: AlertChainUnhealthy expr: awoooi_alert_chain_healthy == 0 for: 5m