Files
awoooi/apps/api/tests/test_alert_chain_smoke_metric.py
Your Name 29a67ec775
All checks were successful
CD Pipeline / tests (push) Successful in 1m27s
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / build-and-deploy (push) Successful in 4m2s
CD Pipeline / post-deploy-checks (push) Successful in 1m45s
fix(ci): tolerate empty source link canary response
2026-06-04 20:31:43 +08:00

483 lines
19 KiB
Python

from __future__ import annotations
import importlib.util
import json
import sys
import time
import unittest
from pathlib import Path
SCRIPT_PATH = Path(__file__).resolve().parents[3] / "scripts" / "alert_chain_smoke_test.py"
SPEC = importlib.util.spec_from_file_location("alert_chain_smoke_test", SCRIPT_PATH)
alert_chain_smoke_test = importlib.util.module_from_spec(SPEC)
assert SPEC and SPEC.loader
sys.dont_write_bytecode = True
sys.modules[SPEC.name] = alert_chain_smoke_test
SPEC.loader.exec_module(alert_chain_smoke_test)
class AlertChainSmokeMetricTest(unittest.TestCase):
def test_api_health_passes_when_only_provider_is_degraded(self):
def fake_get(url, *, params=None, timeout=None):
self.assertTrue(url.endswith("/api/v1/health"))
return alert_chain_smoke_test.HttpGetResult(
200,
json.dumps(
{
"status": "degraded",
"environment": "prod",
"components": {
"api": {"status": "up"},
"postgresql": {"status": "up"},
"redis": {"status": "up"},
"ollama": {"status": "down", "error": "timeout"},
"signoz": {"status": "up"},
},
}
),
)
original_get = alert_chain_smoke_test.http_get
try:
alert_chain_smoke_test.http_get = fake_get
result = alert_chain_smoke_test.check_api_health("http://api")
finally:
alert_chain_smoke_test.http_get = original_get
self.assertTrue(result.passed)
self.assertIn("非阻塞降級: ollama", result.message)
def test_api_health_retries_transient_connection_failure(self):
calls = []
def fake_get(url, *, params=None, timeout=None):
self.assertTrue(url.endswith("/api/v1/health"))
calls.append({"url": url, "timeout": timeout})
if len(calls) == 1:
raise alert_chain_smoke_test.URLError("timed out")
return alert_chain_smoke_test.HttpGetResult(
200,
json.dumps(
{
"status": "healthy",
"environment": "prod",
"components": {
"api": {"status": "up"},
"postgresql": {"status": "up"},
"redis": {"status": "up"},
},
}
),
)
original_get = alert_chain_smoke_test.http_get
original_attempts = alert_chain_smoke_test.API_HEALTH_ATTEMPTS
original_timeout = alert_chain_smoke_test.API_HEALTH_TIMEOUT
original_delay = alert_chain_smoke_test.API_HEALTH_RETRY_DELAY
try:
alert_chain_smoke_test.http_get = fake_get
alert_chain_smoke_test.API_HEALTH_ATTEMPTS = 3
alert_chain_smoke_test.API_HEALTH_TIMEOUT = 20
alert_chain_smoke_test.API_HEALTH_RETRY_DELAY = 0
result = alert_chain_smoke_test.check_api_health("http://api")
finally:
alert_chain_smoke_test.http_get = original_get
alert_chain_smoke_test.API_HEALTH_ATTEMPTS = original_attempts
alert_chain_smoke_test.API_HEALTH_TIMEOUT = original_timeout
alert_chain_smoke_test.API_HEALTH_RETRY_DELAY = original_delay
self.assertTrue(result.passed)
self.assertEqual(len(calls), 2)
self.assertEqual({call["timeout"] for call in calls}, {20})
def test_api_health_reports_attempts_after_retry_exhaustion(self):
calls = []
def fake_get(url, *, params=None, timeout=None):
self.assertTrue(url.endswith("/api/v1/health"))
calls.append(timeout)
raise TimeoutError("timed out")
original_get = alert_chain_smoke_test.http_get
original_attempts = alert_chain_smoke_test.API_HEALTH_ATTEMPTS
original_timeout = alert_chain_smoke_test.API_HEALTH_TIMEOUT
original_delay = alert_chain_smoke_test.API_HEALTH_RETRY_DELAY
try:
alert_chain_smoke_test.http_get = fake_get
alert_chain_smoke_test.API_HEALTH_ATTEMPTS = 2
alert_chain_smoke_test.API_HEALTH_TIMEOUT = 7
alert_chain_smoke_test.API_HEALTH_RETRY_DELAY = 0
result = alert_chain_smoke_test.check_api_health("http://api")
finally:
alert_chain_smoke_test.http_get = original_get
alert_chain_smoke_test.API_HEALTH_ATTEMPTS = original_attempts
alert_chain_smoke_test.API_HEALTH_TIMEOUT = original_timeout
alert_chain_smoke_test.API_HEALTH_RETRY_DELAY = original_delay
self.assertFalse(result.passed)
self.assertEqual(calls, [7, 7])
self.assertIn("attempts=2", result.message)
self.assertIn("timeout=7s", result.message)
def test_api_health_fails_when_core_component_is_down(self):
def fake_get(url, *, params=None, timeout=None):
self.assertTrue(url.endswith("/api/v1/health"))
return alert_chain_smoke_test.HttpGetResult(
200,
json.dumps(
{
"status": "degraded",
"components": {
"api": {"status": "up"},
"postgresql": {"status": "down"},
"redis": {"status": "up"},
"ollama": {"status": "up"},
},
}
),
)
original_get = alert_chain_smoke_test.http_get
try:
alert_chain_smoke_test.http_get = fake_get
result = alert_chain_smoke_test.check_api_health("http://api")
finally:
alert_chain_smoke_test.http_get = original_get
self.assertFalse(result.passed)
self.assertIn("核心組件異常: postgresql", result.message)
def test_parse_app_alert_chain_metric_samples(self):
samples = alert_chain_smoke_test.parse_app_alert_chain_metric_samples(
"\n".join([
"# HELP awoooi_alert_chain_last_success_timestamp Last successful alert chain",
'awoooi_alert_chain_last_success_timestamp{source="alertmanager"} 123.5',
'awoooi_alert_chain_last_success_timestamp{source="sentry"} 120',
"unrelated_metric 1",
])
)
self.assertEqual(
samples,
[
alert_chain_smoke_test.AlertChainMetricSample(
source="alertmanager",
timestamp=123.5,
evidence_path="app_metrics",
),
alert_chain_smoke_test.AlertChainMetricSample(
source="sentry",
timestamp=120.0,
evidence_path="app_metrics",
),
],
)
def test_newest_sample_for_source_prefers_requested_source(self):
samples = [
alert_chain_smoke_test.AlertChainMetricSample("sentry", 999.0, "prometheus"),
alert_chain_smoke_test.AlertChainMetricSample("alertmanager", 100.0, "prometheus"),
alert_chain_smoke_test.AlertChainMetricSample("alertmanager", 200.0, "app_metrics"),
]
sample = alert_chain_smoke_test._newest_sample_for_source(samples, "alertmanager")
self.assertEqual(sample.timestamp, 200.0)
self.assertEqual(sample.evidence_path, "app_metrics")
def test_alert_chain_metric_result_marks_recent_app_metric_as_scrape_delay(self):
sample = alert_chain_smoke_test.AlertChainMetricSample(
source="alertmanager",
timestamp=time.time() - 60,
evidence_path="app_metrics",
)
result = alert_chain_smoke_test._alert_chain_metric_result(sample, fallback=True)
self.assertTrue(result.passed)
self.assertIn("Prometheus scrape 尚未看到", result.message)
def test_alert_chain_metric_result_fails_persistent_silence(self):
sample = alert_chain_smoke_test.AlertChainMetricSample(
source="alertmanager",
timestamp=time.time() - alert_chain_smoke_test.MAX_ALERT_CHAIN_SILENCE_SECONDS - 60,
evidence_path="prometheus",
)
result = alert_chain_smoke_test._alert_chain_metric_result(sample)
self.assertFalse(result.passed)
self.assertTrue(result.critical)
def test_alert_chain_metric_checks_app_metric_when_prometheus_is_stale(self):
fresh_ts = time.time() - 30
stale_ts = time.time() - alert_chain_smoke_test.MAX_ALERT_CHAIN_SILENCE_SECONDS - 60
def fake_get(url, *, params=None, timeout=None):
if url.endswith("/api/v1/query"):
return alert_chain_smoke_test.HttpGetResult(
200,
json.dumps(
{
"data": {
"result": [
{
"metric": {"source": "sentry"},
"value": [time.time(), str(stale_ts)],
}
]
}
}
),
)
if url.endswith("/metrics"):
return alert_chain_smoke_test.HttpGetResult(
200,
'awoooi_alert_chain_last_success_timestamp{source="sentry"} '
f"{fresh_ts}",
)
raise AssertionError(f"unexpected url {url}")
original_get = alert_chain_smoke_test.http_get
try:
alert_chain_smoke_test.http_get = fake_get
result = alert_chain_smoke_test.check_alert_chain_metric(
"http://prometheus",
"http://api",
source="sentry",
)
finally:
alert_chain_smoke_test.http_get = original_get
self.assertTrue(result.passed)
self.assertIn("app_metrics", result.message)
self.assertIn("Prometheus scrape 尚未看到", result.message)
def test_source_provider_heartbeat_requires_operator_key(self):
result = alert_chain_smoke_test.send_source_provider_heartbeat(
"https://awoooi.example",
providers=["sentry", "signoz"],
operator_key=None,
operator_id="gitea-e2e-health",
)
self.assertFalse(result.passed)
self.assertTrue(result.critical)
self.assertIn("AWOOOP_OPERATOR_API_KEY", result.message)
def test_source_provider_heartbeat_posts_expected_payload(self):
calls = []
def fake_post(url, payload, *, headers=None, timeout=None):
calls.append(
{
"url": url,
"payload": payload,
"headers": headers,
"timeout": timeout,
}
)
return alert_chain_smoke_test.HttpGetResult(
200,
(
'{"status":"recorded","items":['
'{"provider":"sentry"},{"provider":"signoz"}]}'
),
)
original_post = alert_chain_smoke_test.http_post_json
try:
alert_chain_smoke_test.http_post_json = fake_post
result = alert_chain_smoke_test.send_source_provider_heartbeat(
"https://awoooi.example",
providers=["sentry", "signoz"],
operator_key="secret",
operator_id="gitea-e2e-health",
run_ref="run-123",
)
finally:
alert_chain_smoke_test.http_post_json = original_post
self.assertTrue(result.passed)
self.assertEqual(
calls[0]["url"],
"https://awoooi.example/api/v1/platform/events/dossier/provider-heartbeat",
)
self.assertEqual(calls[0]["payload"]["providers"], ["sentry", "signoz"])
self.assertEqual(calls[0]["payload"]["run_ref"], "run-123")
self.assertEqual(calls[0]["headers"]["X-AwoooP-Operator-Id"], "gitea-e2e-health")
self.assertEqual(calls[0]["headers"]["X-AwoooP-Operator-Key"], "secret")
def test_source_provider_upstream_canary_requires_operator_key(self):
result = alert_chain_smoke_test.send_source_provider_upstream_canary(
"https://awoooi.example",
providers=["sentry", "signoz"],
operator_key=None,
operator_id="gitea-e2e-health",
)
self.assertFalse(result.passed)
self.assertTrue(result.critical)
self.assertIn("AWOOOP_OPERATOR_API_KEY", result.message)
def test_source_provider_upstream_canary_posts_provider_payloads(self):
calls = []
def fake_post(url, payload, *, headers=None, timeout=None):
calls.append(
{
"url": url,
"payload": payload,
"headers": headers,
"timeout": timeout,
}
)
if url.endswith("/api/v1/webhooks/sentry/error"):
return alert_chain_smoke_test.HttpGetResult(
200,
'{"status":"canary_recorded","provider":"sentry"}',
)
if url.endswith("/api/v1/webhooks/signoz/alert"):
return alert_chain_smoke_test.HttpGetResult(
200,
(
'{"status":"ok","results":['
'{"status":"canary_recorded","provider":"signoz"}]}'
),
)
raise AssertionError(f"unexpected url {url}")
original_post = alert_chain_smoke_test.http_post_json
try:
alert_chain_smoke_test.http_post_json = fake_post
result = alert_chain_smoke_test.send_source_provider_upstream_canary(
"https://awoooi.example",
providers=["sentry", "signoz"],
operator_key="secret",
operator_id="gitea-e2e-health",
run_ref="run/123",
)
finally:
alert_chain_smoke_test.http_post_json = original_post
self.assertTrue(result.passed)
self.assertEqual(
calls[0]["url"],
"https://awoooi.example/api/v1/webhooks/sentry/error",
)
self.assertEqual(
calls[1]["url"],
"https://awoooi.example/api/v1/webhooks/signoz/alert",
)
self.assertEqual(calls[0]["payload"]["data"]["issue"]["title"], "AwoooPSourceProviderCanary")
self.assertEqual(calls[1]["payload"]["alerts"][0]["labels"]["awoooi_canary"], "true")
self.assertEqual(calls[0]["headers"]["X-AwoooP-Operator-Id"], "gitea-e2e-health")
self.assertEqual(calls[1]["headers"]["X-AwoooP-Operator-Key"], "secret")
def test_source_link_canary_requires_operator_key(self):
result = alert_chain_smoke_test.send_source_link_canary(
"https://awoooi.example",
target_incident_id="INC-20260505-25E744",
operator_key=None,
operator_id="gitea-e2e-health",
run_ref="run-123",
)
self.assertFalse(result.passed)
self.assertTrue(result.critical)
self.assertIn("AWOOOP_OPERATOR_API_KEY", result.message)
def test_source_link_canary_posts_dedicated_sentry_payload(self):
calls = []
def fake_post(url, payload, *, headers=None, timeout=None):
calls.append(
{
"url": url,
"payload": payload,
"headers": headers,
"timeout": timeout,
}
)
return alert_chain_smoke_test.HttpGetResult(
200,
'{"status":"canary_recorded","provider":"sentry"}',
)
original_post = alert_chain_smoke_test.http_post_json
try:
alert_chain_smoke_test.http_post_json = fake_post
result = alert_chain_smoke_test.send_source_link_canary(
"https://awoooi.example",
target_incident_id="INC-20260505-25E744",
operator_key="secret",
operator_id="gitea-e2e-health",
run_ref="run/123",
)
finally:
alert_chain_smoke_test.http_post_json = original_post
self.assertTrue(result.passed)
self.assertEqual(
calls[0]["url"],
"https://awoooi.example/api/v1/webhooks/sentry/error",
)
issue = calls[0]["payload"]["data"]["issue"]
tags = calls[0]["payload"]["data"]["event"]["tags"]
self.assertEqual(issue["id"], "awoooi-source-link-canary-run-123")
self.assertEqual(issue["title"], "AwoooPSourceLinkCanary")
self.assertIn(["source_link_canary", "true"], tags)
self.assertIn(["target_incident_id", "INC-20260505-25E744"], tags)
self.assertEqual(calls[0]["headers"]["X-AwoooP-Operator-Key"], "secret")
def test_source_link_canary_accepts_empty_2xx_for_downstream_readback(self):
def fake_post(url, payload, *, headers=None, timeout=None):
self.assertTrue(url.endswith("/api/v1/webhooks/sentry/error"))
self.assertEqual(payload["data"]["issue"]["title"], "AwoooPSourceLinkCanary")
return alert_chain_smoke_test.HttpGetResult(204, "")
original_post = alert_chain_smoke_test.http_post_json
try:
alert_chain_smoke_test.http_post_json = fake_post
result = alert_chain_smoke_test.send_source_link_canary(
"https://awoooi.example",
target_incident_id="INC-20260505-25E744",
operator_key="secret",
operator_id="gitea-e2e-health",
run_ref="run/123",
)
finally:
alert_chain_smoke_test.http_post_json = original_post
self.assertTrue(result.passed)
self.assertIn("source-correlation smoke must verify readback", result.message)
def test_source_link_canary_reports_http_error_before_json_parse(self):
def fake_post(url, payload, *, headers=None, timeout=None):
self.assertTrue(url.endswith("/api/v1/webhooks/sentry/error"))
return alert_chain_smoke_test.HttpGetResult(
502,
"<html><body>bad gateway</body></html>",
)
original_post = alert_chain_smoke_test.http_post_json
try:
alert_chain_smoke_test.http_post_json = fake_post
result = alert_chain_smoke_test.send_source_link_canary(
"https://awoooi.example",
target_incident_id="INC-20260505-25E744",
operator_key="secret",
operator_id="gitea-e2e-health",
run_ref="run/123",
)
finally:
alert_chain_smoke_test.http_post_json = original_post
self.assertFalse(result.passed)
self.assertIn("sentry HTTP 502", result.message)
self.assertIn("bad gateway", result.message)
if __name__ == "__main__":
unittest.main()