#!/usr/bin/env python3 """ Phase 6.3 聚合測試腳本 ======================= 功能: 1. 連續打入 3 筆「同源但不同名」的測試告警 2. 證明這 3 筆告警被成功「聚合」進同一個 Incident 的 signals 陣列中 3. 驗證 affected_services 有被正確填入 使用方式: cd apps/api python scripts/test_phase63_aggregation.py 預期結果: - 3 筆告警全部聚合到 1 個 Incident - signals 陣列長度 = 3 - affected_services 包含 "payment-service" """ import asyncio from datetime import datetime import httpx # API 端點 API_BASE = "http://localhost:8000" SIGNALS_ENDPOINT = f"{API_BASE}/api/v1/webhooks/signals" # 測試告警: 同 namespace + 同 target,不同 alert_name # 模擬: payment-service 發生一連串相關問題 # 測試告警: 同 namespace + 同 target,不同 alert_name # 模擬: payment-service 發生一連串相關問題 # 注意: severity 只能是 info | warning | critical (SignalPayload 定義) TEST_ALERTS = [ { "alert_name": "PaymentServiceHighLatency", "severity": "warning", "source": "prometheus", "namespace": "payment-prod", "target": "payment-service", "fingerprint": "fp_latency_001", "labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"}, "annotations": {"summary": "Payment service latency > 500ms"}, }, { "alert_name": "PaymentServiceErrorRate", "severity": "warning", # 原本是 high,但 API 只接受 info|warning|critical "source": "prometheus", "namespace": "payment-prod", "target": "payment-service", "fingerprint": "fp_error_001", "labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"}, "annotations": {"summary": "Payment service error rate > 5%"}, }, { "alert_name": "PaymentServicePodCrash", "severity": "critical", "source": "alertmanager", "namespace": "payment-prod", "target": "payment-service", "fingerprint": "fp_crash_001", "labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"}, "annotations": {"summary": "Payment service pod crashed"}, }, ] async def send_alert(client: httpx.AsyncClient, alert: dict, index: int) -> dict: """發送單一告警""" print(f"\n[{index+1}/3] 發送告警: {alert['alert_name']}") print(f" severity: {alert['severity']}") print(f" namespace: {alert['namespace']}") print(f" target: {alert['target']}") response = await client.post( SIGNALS_ENDPOINT, json=alert, timeout=10.0, ) result = response.json() print(f" status_code: {response.status_code}") print(f" message_id: {result.get('message_id', 'N/A')}") return result async def check_redis_incident(client: httpx.AsyncClient) -> dict | None: """檢查 Redis 中的 Incident""" # 使用 health endpoint 確認 API 運作 try: # 直接查詢 Redis (透過 API 或直接) # 這裡我們用 curl 模擬,但實際應該有 API return None except Exception as e: print(f"Error checking Redis: {e}") return None async def main(): """主測試流程""" print("=" * 60) print("Phase 6.3 聚合測試") print("=" * 60) print(f"時間: {datetime.now().isoformat()}") print("目標: 驗證 3 筆同源告警聚合到 1 個 Incident") print() async with httpx.AsyncClient() as client: # 1. 確認 API 運作 print("[0] 檢查 API 健康狀態...") try: health = await client.get(f"{API_BASE}/api/v1/health", timeout=5.0) print(f" API status: {health.status_code}") except Exception as e: print(f" API 連線失敗: {e}") print(" 請確認 API 已啟動: docker compose up -d") return # 2. 連續發送 3 筆告警 print("\n" + "-" * 60) print("階段一: 連續發送 3 筆告警") print("-" * 60) results = [] for i, alert in enumerate(TEST_ALERTS): result = await send_alert(client, alert, i) results.append(result) # 短暫等待,確保 Consumer 有時間處理 await asyncio.sleep(0.5) # 3. 等待 Consumer 處理完成 print("\n" + "-" * 60) print("階段二: 等待 Consumer 處理 (3 秒)") print("-" * 60) await asyncio.sleep(3) # 4. 輸出驗證指令 print("\n" + "-" * 60) print("階段三: 驗證指令") print("-" * 60) print() print("請執行以下 Redis 指令檢查聚合結果:") print() print("# 1. 查看所有 Incident keys") print("docker exec -it awoooi-redis redis-cli KEYS 'incident:*'") print() print("# 2. 查看特定 Incident 的 JSON (取代 )") print("docker exec -it awoooi-redis redis-cli GET 'incident:INC-XXXXXXXX-XXXXXX'") print() print("# 3. 或直接用以下指令掃描並輸出所有 Incident:") print("""docker exec -it awoooi-redis redis-cli --no-raw KEYS 'incident:INC-*' | xargs -I {} docker exec -i awoooi-redis redis-cli GET {}""") print() # 5. 輸出 API 日誌指令 print("-" * 60) print("檢查 API 日誌:") print("-" * 60) print("docker logs awoooi-api --tail 50 | grep -E '(signal_|incident_|aggregat)'") print() # 6. 驗證標準 print("-" * 60) print("驗證標準 (PASS/FAIL)") print("-" * 60) print("[ ] 只有 1 個 Incident 被建立 (非 3 個)") print("[ ] signals 陣列長度 = 3") print("[ ] affected_services 包含 'payment-service'") print("[ ] severity 升級為 'P0' (因為第三筆是 critical)") print() print("=" * 60) print("測試腳本執行完成") print("=" * 60) if __name__ == "__main__": asyncio.run(main())