fix(ops): 同步 source provider freshness 告警規則
This commit is contained in:
@@ -651,6 +651,24 @@ groups:
|
||||
summary: "2 小時內未收到任何告警 ({{ $labels.source }})"
|
||||
description: "可能是告警鏈路問題,請執行 Smoke Test"
|
||||
|
||||
- alert: SourceProviderIngestionStale
|
||||
expr: |
|
||||
time() - max by (source) (
|
||||
awoooi_alert_chain_last_success_timestamp{source=~"sentry|signoz"}
|
||||
) > 86400
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
layer: k8s
|
||||
component: source-ingestion
|
||||
team: platform
|
||||
auto_repair: "false"
|
||||
alert_category: "alertchain_provider_freshness"
|
||||
annotations:
|
||||
summary: "{{ $labels.source }} source ingestion 超過 24 小時未更新"
|
||||
description: "{{ $labels.source }} webhook endpoint 可能仍健康,但 AwoooP source dossier 已超過 24 小時沒有新事件。這是 provider ingestion / upstream smoke / correlation freshness 缺口,不是 Alertmanager 主鏈路故障。"
|
||||
runbook: "先查 /api/v1/webhooks/{{ $labels.source }}/health,再查 /api/v1/platform/events/dossier/coverage?provider={{ $labels.source }};若 endpoint OK 但 latest stale,檢查上游 Sentry/SignOz notification channel 或排程 smoke。"
|
||||
|
||||
- alert: AlertChainUnhealthy
|
||||
expr: awoooi_alert_chain_healthy == 0
|
||||
for: 5m
|
||||
|
||||
Reference in New Issue
Block a user