fix(ci): use internal metrics for provider freshness smoke
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 4m0s
CD Pipeline / build-and-deploy (push) Successful in 3m30s
CD Pipeline / post-deploy-checks (push) Successful in 2m9s

This commit is contained in:
Your Name
2026-05-20 19:51:15 +08:00
parent 6003fd03ec
commit 017d57c96a
3 changed files with 85 additions and 5 deletions

View File

@@ -61,6 +61,7 @@ jobs:
AWOOOP_OPERATOR_ID=gitea-e2e-health \
python3 scripts/alert_chain_smoke_test.py \
--api-url https://awoooi.wooo.work \
--metrics-api-url http://192.168.0.125:32334 \
--source-provider-heartbeat \
--json

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import importlib.util
import json
import sys
import time
import unittest
@@ -78,6 +79,50 @@ class AlertChainSmokeMetricTest(unittest.TestCase):
self.assertFalse(result.passed)
self.assertTrue(result.critical)
def test_alert_chain_metric_checks_app_metric_when_prometheus_is_stale(self):
fresh_ts = time.time() - 30
stale_ts = time.time() - alert_chain_smoke_test.MAX_ALERT_CHAIN_SILENCE_SECONDS - 60
def fake_get(url, *, params=None, timeout=None):
if url.endswith("/api/v1/query"):
return alert_chain_smoke_test.HttpGetResult(
200,
json.dumps(
{
"data": {
"result": [
{
"metric": {"source": "sentry"},
"value": [time.time(), str(stale_ts)],
}
]
}
}
),
)
if url.endswith("/metrics"):
return alert_chain_smoke_test.HttpGetResult(
200,
'awoooi_alert_chain_last_success_timestamp{source="sentry"} '
f"{fresh_ts}",
)
raise AssertionError(f"unexpected url {url}")
original_get = alert_chain_smoke_test.http_get
try:
alert_chain_smoke_test.http_get = fake_get
result = alert_chain_smoke_test.check_alert_chain_metric(
"http://prometheus",
"http://api",
source="sentry",
)
finally:
alert_chain_smoke_test.http_get = original_get
self.assertTrue(result.passed)
self.assertIn("app_metrics", result.message)
self.assertIn("Prometheus scrape 尚未看到", result.message)
def test_source_provider_heartbeat_requires_operator_key(self):
result = alert_chain_smoke_test.send_source_provider_heartbeat(
"https://awoooi.example",

View File

@@ -364,6 +364,7 @@ def check_alert_chain_metric(
"awoooi_alert_chain_last_success_timestamp"
f'{{source="{_escape_prometheus_label_value(source)}"}}'
)
prometheus_result: CheckResult | None = None
try:
resp = http_get(
f"{prometheus_url}/api/v1/query",
@@ -379,11 +380,17 @@ def check_alert_chain_metric(
source,
)
if sample:
return _alert_chain_metric_result(sample)
prometheus_result = _alert_chain_metric_result(sample)
if prometheus_result.passed:
return prometheus_result
except (URLError, TimeoutError, OSError, json.JSONDecodeError) as e:
prometheus_error = _http_error_message(e)
else:
prometheus_error = "Prometheus 未抓到"
prometheus_error = (
prometheus_result.message
if prometheus_result is not None
else "Prometheus 未抓到"
)
try:
app_resp = http_get(f"{api_url}/metrics", timeout=TIMEOUT)
@@ -394,8 +401,13 @@ def check_alert_chain_metric(
source,
)
if app_sample:
return _alert_chain_metric_result(app_sample, fallback=True)
app_result = _alert_chain_metric_result(app_sample, fallback=True)
if app_result.passed or prometheus_result is None:
return app_result
return prometheus_result
except (URLError, TimeoutError, OSError) as e:
if prometheus_result is not None:
return prometheus_result
return CheckResult(
"Alert Chain Metric",
False,
@@ -406,6 +418,9 @@ def check_alert_chain_metric(
critical=False,
)
if prometheus_result is not None:
return prometheus_result
return CheckResult(
"Alert Chain Metric",
False,
@@ -610,6 +625,7 @@ def run_smoke_test(
api_url: str,
fail_fast: bool = False,
*,
metrics_api_url: str | None = None,
source_provider_heartbeat: bool = False,
source_providers: list[str] | None = None,
operator_key: str | None = None,
@@ -617,9 +633,12 @@ def run_smoke_test(
run_ref: str | None = None,
) -> SmokeTestReport:
report = SmokeTestReport()
metrics_url = metrics_api_url or api_url
print("\n🔍 AWOOOI Alert Chain Smoke Test")
print(f" API: {api_url}")
if metrics_url != api_url:
print(f" Metrics API: {metrics_url}")
print(f" 時間: {time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
print("-" * 50)
@@ -629,7 +648,7 @@ def run_smoke_test(
return report
# Check 2: Alert Chain Metric
report.add(check_alert_chain_metric(PROMETHEUS_URL, api_url))
report.add(check_alert_chain_metric(PROMETHEUS_URL, metrics_url))
# Check 3: Webhook Health
for result in check_webhook_health(api_url):
@@ -652,7 +671,13 @@ def run_smoke_test(
if heartbeat_result.passed:
for source in provider_list:
report.add(check_alert_chain_metric(PROMETHEUS_URL, api_url, source=source))
report.add(
check_alert_chain_metric(
PROMETHEUS_URL,
metrics_url,
source=source,
)
)
# Check 4: SigNoz
report.add(check_signoz_reachable(SIGNOZ_URL))
@@ -671,6 +696,14 @@ def main() -> int:
parser.add_argument(
"--api-url", default=DEFAULT_API_URL, help="API base URL"
)
parser.add_argument(
"--metrics-api-url",
default=os.environ.get("ALERT_CHAIN_METRICS_API_URL"),
help=(
"API base URL used only for /metrics fallback; useful when public "
"API routes /metrics to the frontend"
),
)
parser.add_argument(
"--fail-fast", action="store_true", help="第一個 critical 失敗即中止"
)
@@ -708,6 +741,7 @@ def main() -> int:
report = run_smoke_test(
args.api_url,
args.fail_fast,
metrics_api_url=args.metrics_api_url,
source_provider_heartbeat=args.source_provider_heartbeat,
source_providers=args.source_provider,
operator_key=os.environ.get(args.operator_key_env),