Files
awoooi/apps/api/scripts/test_phase63_aggregation.py
OG T 196d269b92 feat: add all application source code
- apps/api: FastAPI backend with Dockerfile
- apps/web: Next.js frontend with Dockerfile
- apps/sensor: Signal collection agent
- packages: shared packages

Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-22 18:57:44 +08:00

181 lines
6.0 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Phase 6.3 聚合測試腳本
=======================
功能:
1. 連續打入 3 筆「同源但不同名」的測試告警
2. 證明這 3 筆告警被成功「聚合」進同一個 Incident 的 signals 陣列中
3. 驗證 affected_services 有被正確填入
使用方式:
cd apps/api
python scripts/test_phase63_aggregation.py
預期結果:
- 3 筆告警全部聚合到 1 個 Incident
- signals 陣列長度 = 3
- affected_services 包含 "payment-service"
"""
import asyncio
import json
import httpx
from datetime import datetime
import time
# API 端點
API_BASE = "http://localhost:8000"
SIGNALS_ENDPOINT = f"{API_BASE}/api/v1/webhooks/signals"
# 測試告警: 同 namespace + 同 target不同 alert_name
# 模擬: payment-service 發生一連串相關問題
# 測試告警: 同 namespace + 同 target不同 alert_name
# 模擬: payment-service 發生一連串相關問題
# 注意: severity 只能是 info | warning | critical (SignalPayload 定義)
TEST_ALERTS = [
{
"alert_name": "PaymentServiceHighLatency",
"severity": "warning",
"source": "prometheus",
"namespace": "payment-prod",
"target": "payment-service",
"fingerprint": "fp_latency_001",
"labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"},
"annotations": {"summary": "Payment service latency > 500ms"},
},
{
"alert_name": "PaymentServiceErrorRate",
"severity": "warning", # 原本是 high但 API 只接受 info|warning|critical
"source": "prometheus",
"namespace": "payment-prod",
"target": "payment-service",
"fingerprint": "fp_error_001",
"labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"},
"annotations": {"summary": "Payment service error rate > 5%"},
},
{
"alert_name": "PaymentServicePodCrash",
"severity": "critical",
"source": "alertmanager",
"namespace": "payment-prod",
"target": "payment-service",
"fingerprint": "fp_crash_001",
"labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"},
"annotations": {"summary": "Payment service pod crashed"},
},
]
async def send_alert(client: httpx.AsyncClient, alert: dict, index: int) -> dict:
"""發送單一告警"""
print(f"\n[{index+1}/3] 發送告警: {alert['alert_name']}")
print(f" severity: {alert['severity']}")
print(f" namespace: {alert['namespace']}")
print(f" target: {alert['target']}")
response = await client.post(
SIGNALS_ENDPOINT,
json=alert,
timeout=10.0,
)
result = response.json()
print(f" status_code: {response.status_code}")
print(f" message_id: {result.get('message_id', 'N/A')}")
return result
async def check_redis_incident(client: httpx.AsyncClient) -> dict | None:
"""檢查 Redis 中的 Incident"""
# 使用 health endpoint 確認 API 運作
try:
# 直接查詢 Redis (透過 API 或直接)
# 這裡我們用 curl 模擬,但實際應該有 API
return None
except Exception as e:
print(f"Error checking Redis: {e}")
return None
async def main():
"""主測試流程"""
print("=" * 60)
print("Phase 6.3 聚合測試")
print("=" * 60)
print(f"時間: {datetime.now().isoformat()}")
print(f"目標: 驗證 3 筆同源告警聚合到 1 個 Incident")
print()
async with httpx.AsyncClient() as client:
# 1. 確認 API 運作
print("[0] 檢查 API 健康狀態...")
try:
health = await client.get(f"{API_BASE}/api/v1/health", timeout=5.0)
print(f" API status: {health.status_code}")
except Exception as e:
print(f" API 連線失敗: {e}")
print(" 請確認 API 已啟動: docker compose up -d")
return
# 2. 連續發送 3 筆告警
print("\n" + "-" * 60)
print("階段一: 連續發送 3 筆告警")
print("-" * 60)
results = []
for i, alert in enumerate(TEST_ALERTS):
result = await send_alert(client, alert, i)
results.append(result)
# 短暫等待,確保 Consumer 有時間處理
await asyncio.sleep(0.5)
# 3. 等待 Consumer 處理完成
print("\n" + "-" * 60)
print("階段二: 等待 Consumer 處理 (3 秒)")
print("-" * 60)
await asyncio.sleep(3)
# 4. 輸出驗證指令
print("\n" + "-" * 60)
print("階段三: 驗證指令")
print("-" * 60)
print()
print("請執行以下 Redis 指令檢查聚合結果:")
print()
print("# 1. 查看所有 Incident keys")
print("docker exec -it awoooi-redis redis-cli KEYS 'incident:*'")
print()
print("# 2. 查看特定 Incident 的 JSON (取代 <INCIDENT_ID>)")
print("docker exec -it awoooi-redis redis-cli GET 'incident:INC-XXXXXXXX-XXXXXX'")
print()
print("# 3. 或直接用以下指令掃描並輸出所有 Incident:")
print("""docker exec -it awoooi-redis redis-cli --no-raw KEYS 'incident:INC-*' | xargs -I {} docker exec -i awoooi-redis redis-cli GET {}""")
print()
# 5. 輸出 API 日誌指令
print("-" * 60)
print("檢查 API 日誌:")
print("-" * 60)
print("docker logs awoooi-api --tail 50 | grep -E '(signal_|incident_|aggregat)'")
print()
# 6. 驗證標準
print("-" * 60)
print("驗證標準 (PASS/FAIL)")
print("-" * 60)
print("[ ] 只有 1 個 Incident 被建立 (非 3 個)")
print("[ ] signals 陣列長度 = 3")
print("[ ] affected_services 包含 'payment-service'")
print("[ ] severity 升級為 'P0' (因為第三筆是 critical)")
print()
print("=" * 60)
print("測試腳本執行完成")
print("=" * 60)
if __name__ == "__main__":
asyncio.run(main())