fix(ci): use internal metrics for provider freshness smoke
This commit is contained in:
@@ -61,6 +61,7 @@ jobs:
|
||||
AWOOOP_OPERATOR_ID=gitea-e2e-health \
|
||||
python3 scripts/alert_chain_smoke_test.py \
|
||||
--api-url https://awoooi.wooo.work \
|
||||
--metrics-api-url http://192.168.0.125:32334 \
|
||||
--source-provider-heartbeat \
|
||||
--json
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import unittest
|
||||
@@ -78,6 +79,50 @@ class AlertChainSmokeMetricTest(unittest.TestCase):
|
||||
self.assertFalse(result.passed)
|
||||
self.assertTrue(result.critical)
|
||||
|
||||
def test_alert_chain_metric_checks_app_metric_when_prometheus_is_stale(self):
|
||||
fresh_ts = time.time() - 30
|
||||
stale_ts = time.time() - alert_chain_smoke_test.MAX_ALERT_CHAIN_SILENCE_SECONDS - 60
|
||||
|
||||
def fake_get(url, *, params=None, timeout=None):
|
||||
if url.endswith("/api/v1/query"):
|
||||
return alert_chain_smoke_test.HttpGetResult(
|
||||
200,
|
||||
json.dumps(
|
||||
{
|
||||
"data": {
|
||||
"result": [
|
||||
{
|
||||
"metric": {"source": "sentry"},
|
||||
"value": [time.time(), str(stale_ts)],
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
),
|
||||
)
|
||||
if url.endswith("/metrics"):
|
||||
return alert_chain_smoke_test.HttpGetResult(
|
||||
200,
|
||||
'awoooi_alert_chain_last_success_timestamp{source="sentry"} '
|
||||
f"{fresh_ts}",
|
||||
)
|
||||
raise AssertionError(f"unexpected url {url}")
|
||||
|
||||
original_get = alert_chain_smoke_test.http_get
|
||||
try:
|
||||
alert_chain_smoke_test.http_get = fake_get
|
||||
result = alert_chain_smoke_test.check_alert_chain_metric(
|
||||
"http://prometheus",
|
||||
"http://api",
|
||||
source="sentry",
|
||||
)
|
||||
finally:
|
||||
alert_chain_smoke_test.http_get = original_get
|
||||
|
||||
self.assertTrue(result.passed)
|
||||
self.assertIn("app_metrics", result.message)
|
||||
self.assertIn("Prometheus scrape 尚未看到", result.message)
|
||||
|
||||
def test_source_provider_heartbeat_requires_operator_key(self):
|
||||
result = alert_chain_smoke_test.send_source_provider_heartbeat(
|
||||
"https://awoooi.example",
|
||||
|
||||
@@ -364,6 +364,7 @@ def check_alert_chain_metric(
|
||||
"awoooi_alert_chain_last_success_timestamp"
|
||||
f'{{source="{_escape_prometheus_label_value(source)}"}}'
|
||||
)
|
||||
prometheus_result: CheckResult | None = None
|
||||
try:
|
||||
resp = http_get(
|
||||
f"{prometheus_url}/api/v1/query",
|
||||
@@ -379,11 +380,17 @@ def check_alert_chain_metric(
|
||||
source,
|
||||
)
|
||||
if sample:
|
||||
return _alert_chain_metric_result(sample)
|
||||
prometheus_result = _alert_chain_metric_result(sample)
|
||||
if prometheus_result.passed:
|
||||
return prometheus_result
|
||||
except (URLError, TimeoutError, OSError, json.JSONDecodeError) as e:
|
||||
prometheus_error = _http_error_message(e)
|
||||
else:
|
||||
prometheus_error = "Prometheus 未抓到"
|
||||
prometheus_error = (
|
||||
prometheus_result.message
|
||||
if prometheus_result is not None
|
||||
else "Prometheus 未抓到"
|
||||
)
|
||||
|
||||
try:
|
||||
app_resp = http_get(f"{api_url}/metrics", timeout=TIMEOUT)
|
||||
@@ -394,8 +401,13 @@ def check_alert_chain_metric(
|
||||
source,
|
||||
)
|
||||
if app_sample:
|
||||
return _alert_chain_metric_result(app_sample, fallback=True)
|
||||
app_result = _alert_chain_metric_result(app_sample, fallback=True)
|
||||
if app_result.passed or prometheus_result is None:
|
||||
return app_result
|
||||
return prometheus_result
|
||||
except (URLError, TimeoutError, OSError) as e:
|
||||
if prometheus_result is not None:
|
||||
return prometheus_result
|
||||
return CheckResult(
|
||||
"Alert Chain Metric",
|
||||
False,
|
||||
@@ -406,6 +418,9 @@ def check_alert_chain_metric(
|
||||
critical=False,
|
||||
)
|
||||
|
||||
if prometheus_result is not None:
|
||||
return prometheus_result
|
||||
|
||||
return CheckResult(
|
||||
"Alert Chain Metric",
|
||||
False,
|
||||
@@ -610,6 +625,7 @@ def run_smoke_test(
|
||||
api_url: str,
|
||||
fail_fast: bool = False,
|
||||
*,
|
||||
metrics_api_url: str | None = None,
|
||||
source_provider_heartbeat: bool = False,
|
||||
source_providers: list[str] | None = None,
|
||||
operator_key: str | None = None,
|
||||
@@ -617,9 +633,12 @@ def run_smoke_test(
|
||||
run_ref: str | None = None,
|
||||
) -> SmokeTestReport:
|
||||
report = SmokeTestReport()
|
||||
metrics_url = metrics_api_url or api_url
|
||||
|
||||
print("\n🔍 AWOOOI Alert Chain Smoke Test")
|
||||
print(f" API: {api_url}")
|
||||
if metrics_url != api_url:
|
||||
print(f" Metrics API: {metrics_url}")
|
||||
print(f" 時間: {time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
||||
print("-" * 50)
|
||||
|
||||
@@ -629,7 +648,7 @@ def run_smoke_test(
|
||||
return report
|
||||
|
||||
# Check 2: Alert Chain Metric
|
||||
report.add(check_alert_chain_metric(PROMETHEUS_URL, api_url))
|
||||
report.add(check_alert_chain_metric(PROMETHEUS_URL, metrics_url))
|
||||
|
||||
# Check 3: Webhook Health
|
||||
for result in check_webhook_health(api_url):
|
||||
@@ -652,7 +671,13 @@ def run_smoke_test(
|
||||
|
||||
if heartbeat_result.passed:
|
||||
for source in provider_list:
|
||||
report.add(check_alert_chain_metric(PROMETHEUS_URL, api_url, source=source))
|
||||
report.add(
|
||||
check_alert_chain_metric(
|
||||
PROMETHEUS_URL,
|
||||
metrics_url,
|
||||
source=source,
|
||||
)
|
||||
)
|
||||
|
||||
# Check 4: SigNoz
|
||||
report.add(check_signoz_reachable(SIGNOZ_URL))
|
||||
@@ -671,6 +696,14 @@ def main() -> int:
|
||||
parser.add_argument(
|
||||
"--api-url", default=DEFAULT_API_URL, help="API base URL"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--metrics-api-url",
|
||||
default=os.environ.get("ALERT_CHAIN_METRICS_API_URL"),
|
||||
help=(
|
||||
"API base URL used only for /metrics fallback; useful when public "
|
||||
"API routes /metrics to the frontend"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fail-fast", action="store_true", help="第一個 critical 失敗即中止"
|
||||
)
|
||||
@@ -708,6 +741,7 @@ def main() -> int:
|
||||
report = run_smoke_test(
|
||||
args.api_url,
|
||||
args.fail_fast,
|
||||
metrics_api_url=args.metrics_api_url,
|
||||
source_provider_heartbeat=args.source_provider_heartbeat,
|
||||
source_providers=args.source_provider,
|
||||
operator_key=os.environ.get(args.operator_key_env),
|
||||
|
||||
Reference in New Issue
Block a user