Files
awoooi/apps/api/scripts/tracer_bullet_2.py
OG T 4f1c8ae473 fix(ci): Resolve Python and TypeScript lint errors
- Fix 35 Python ruff errors (B904, F841, E722, E741, B007, B008)
- Add eslint config for lewooogo-core package
- Update pyproject.toml to new ruff lint config format
- Relax frontend eslint rules to warnings for unused vars
- Allow console.* for debugging (TODO: unified logger)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-24 09:20:56 +08:00

361 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Tracer Bullet 2.0 - 全站閉環測試腳本
Phase 4: E2E Integration Test
測試流程:
1. 觸發假告警 (Mock Alert)
2. GraphRAG 分析 (Blast Radius + Root Cause)
3. 產生 ApprovalCard (Dry-Run)
4. 人類批准 (Multi-Sig)
5. MCP 模擬執行
執行方式:
cd apps/api
python scripts/tracer_bullet_2.py
"""
import asyncio
import json
from datetime import datetime
# ==================== 模擬模組導入 ====================
# 實際運行時這些會從專案導入
# from src.services import (
# topology_graph, trust_engine, multi_sig_engine, dry_run_engine
# )
# from src.plugins.finops import idle_scanner
# from src.plugins.mcp import mcp_bridge
# ==================== Test Configuration ====================
class TracerBullet2:
"""全站閉環測試器"""
def __init__(self):
self.test_id = f"tb2-{datetime.utcnow().strftime('%Y%m%d%H%M%S')}"
self.results: list[dict] = []
def log(self, step: str, status: str, data: dict | None = None):
"""記錄測試結果"""
result = {
"step": step,
"status": status,
"timestamp": datetime.utcnow().isoformat(),
"data": data or {},
}
self.results.append(result)
emoji = "" if status == "PASS" else "" if status == "FAIL" else "🔄"
print(f"{emoji} [{step}] {status}")
if data:
print(f" {json.dumps(data, indent=2, default=str)}")
# ==================== Step 1: Mock Alert ====================
async def step1_trigger_alert(self) -> dict:
"""
Step 1: 觸發假告警
模擬 Prometheus AlertManager 發送告警:
- frontend 服務 5xx 錯誤率上升
"""
print("\n" + "=" * 60)
print("STEP 1: TRIGGER MOCK ALERT")
print("=" * 60)
alert = {
"alertname": "HighErrorRate",
"service": "frontend",
"namespace": "production",
"severity": "critical",
"error_rate": 15.2, # 15% 5xx
"threshold": 5.0,
"fired_at": datetime.utcnow().isoformat(),
}
self.log("trigger_alert", "PASS", alert)
return alert
# ==================== Step 2: GraphRAG Analysis ====================
async def step2_graphrag_analysis(self, alert: dict) -> dict:
"""
Step 2: GraphRAG 分析
呼叫 TopologyGraph.get_blast_radius_and_root_cause()
分析:
- Blast Radius: frontend 掛了誰會跟著掛
- Root Cause: frontend 的依賴誰目前有問題
"""
print("\n" + "=" * 60)
print("STEP 2: GRAPHRAG ANALYSIS")
print("=" * 60)
target_service = alert["service"]
# Mock GraphRAG 結果 (實際會呼叫 topology_graph)
analysis = {
"targetService": target_service,
"blastRadius": {
"affectedServices": ["ingress"],
"affectedCount": 1,
"criticalPath": ["ingress -> frontend"],
"impactSummary": "If 'frontend' goes down, 1 upstream service (ingress) will be affected.",
},
"rootCause": {
"unhealthyDependencies": ["postgres-db"],
"dependencyChain": ["auth-service", "product-api", "order-api", "postgres-db", "redis-cache"],
"probableRootCauses": ["postgres-db"],
"analysisSummary": "Found 1 unhealthy dependency: postgres-db. Probable root cause: postgres-db.",
},
"analyzedAt": datetime.utcnow().isoformat(),
}
# 視覺化輸出
print("\n[BLAST RADIUS - Upstream Impact]")
print(" ┌─────────────────────┐")
print(" │ ingress │")
print(" └─────────┬───────────┘")
print(" │ depends on")
print("")
print(" ┌─────────────────────┐")
print(" │ frontend │ X")
print(" └─────────────────────┘")
print("\n[ROOT CAUSE - Downstream Chain]")
print(" ┌─────────────────────┐")
print(" │ frontend │ !")
print(" └─────────┬───────────┘")
print(" │ calls")
print("")
print(" ┌─────────────────────┐")
print(" │ postgres-db │ X (UNHEALTHY)")
print(" └─────────────────────┘")
self.log("graphrag_analysis", "PASS", analysis)
return analysis
# ==================== Step 3: Dry-Run & ApprovalCard ====================
async def step3_generate_approval(self, analysis: dict) -> dict:
"""
Step 3: 產生 ApprovalCard
根據分析結果,建議重啟 postgres-db
執行 Dry-Run 檢查
"""
print("\n" + "=" * 60)
print("STEP 3: DRY-RUN & APPROVAL CARD")
print("=" * 60)
root_cause = analysis["rootCause"]["probableRootCauses"][0]
# 建議動作
proposed_action = {
"operation": "restart_pod",
"parameters": {
"pod_name": f"{root_cause}-0",
"namespace": "production",
"graceful": True,
},
"reason": f"Auto-suggested based on GraphRAG root cause analysis: {root_cause} is unhealthy",
}
# Mock Dry-Run 結果
dry_run_result = {
"checks": [
{"name": "RBAC Permission", "passed": True, "message": "User has pod/delete permission"},
{"name": "Syntax Validation", "passed": True, "message": "Parameters valid"},
{"name": "Resource Exists", "passed": True, "message": "Pod postgres-db-0 exists"},
{"name": "No PDB Violation", "passed": True, "message": "PodDisruptionBudget allows 1 eviction"},
],
"overallPassed": True,
"blastRadius": {
"affectedPods": 1,
"affectedServices": ["postgres-db"],
"dataImpact": "NONE", # Graceful restart
},
"riskLevel": "high", # Database 操作
}
# 產生 ApprovalCard
approval_card = {
"approvalId": f"approval-{self.test_id}",
"action": proposed_action,
"dryRunResult": dry_run_result,
"requiredSignatures": 2, # HIGH risk = 2-sig
"allowedRoles": ["admin", "devops", "sre"],
"createdAt": datetime.utcnow().isoformat(),
"expiresAt": None, # No expiry for critical ops
}
print("\n[APPROVAL CARD]")
print(f" Action: {proposed_action['operation']}")
print(f" Target: {proposed_action['parameters']['pod_name']}")
print(f" Risk Level: {dry_run_result['riskLevel'].upper()}")
print(f" Required Signatures: {approval_card['requiredSignatures']}")
print(f" Dry-Run: {'PASSED' if dry_run_result['overallPassed'] else 'FAILED'}")
self.log("generate_approval", "PASS", approval_card)
return approval_card
# ==================== Step 4: Multi-Sig Approval ====================
async def step4_multisig_approval(self, approval_card: dict) -> dict:
"""
Step 4: 人類批准 (Multi-Sig)
模擬兩位管理者簽名:
1. DevOps Engineer
2. SRE Lead
"""
print("\n" + "=" * 60)
print("STEP 4: MULTI-SIG APPROVAL")
print("=" * 60)
approval_id = approval_card["approvalId"]
# 第一位簽名
sig1 = {
"userId": "devops-alice",
"role": "devops",
"signedAt": datetime.utcnow().isoformat(),
"comment": "GraphRAG analysis looks correct. Approving restart.",
}
print(f"\n[SIGNATURE 1] {sig1['role']}: {sig1['userId']}")
print(f" Comment: {sig1['comment']}")
# 第二位簽名
sig2 = {
"userId": "sre-bob",
"role": "sre",
"signedAt": datetime.utcnow().isoformat(),
"comment": "Verified PDB. Safe to proceed.",
}
print(f"\n[SIGNATURE 2] {sig2['role']}: {sig2['userId']}")
print(f" Comment: {sig2['comment']}")
# 批准結果
approval_result = {
"approvalId": approval_id,
"status": "APPROVED",
"signatures": [sig1, sig2],
"approvedAt": datetime.utcnow().isoformat(),
}
print(f"\n[APPROVAL STATUS] {approval_result['status']}")
print(f" Signatures: {len(approval_result['signatures'])}/{approval_card['requiredSignatures']}")
self.log("multisig_approval", "PASS", approval_result)
return approval_result
# ==================== Step 5: MCP Execution ====================
async def step5_mcp_execution(self, approval_result: dict, approval_card: dict) -> dict:
"""
Step 5: MCP 模擬執行
透過 MCP Bridge 執行操作
(Phase 3 為模擬Phase 4+ 連接真實 K8s)
"""
print("\n" + "=" * 60)
print("STEP 5: MCP EXECUTION")
print("=" * 60)
action = approval_card["action"]
# TOCTOU 保護: 再次執行 Dry-Run
print("\n[TOCTOU CHECK] Re-running dry-run before execution...")
toctou_passed = True # Mock
print(f" Result: {'PASSED' if toctou_passed else 'VOIDED'}")
if not toctou_passed:
self.log("mcp_execution", "FAIL", {"reason": "TOCTOU conflict detected"})
return {"status": "VOIDED"}
# MCP 執行
execution_result = {
"executionId": f"exec-{self.test_id}",
"operation": action["operation"],
"parameters": action["parameters"],
"status": "SUCCESS",
"output": {
"message": f"Pod {action['parameters']['pod_name']} restarted successfully",
"newPodName": "postgres-db-0", # Same name after restart
"restartTime": "2.3s",
},
"executedAt": datetime.utcnow().isoformat(),
}
print("\n[EXECUTION RESULT]")
print(f" Status: {execution_result['status']}")
print(f" Output: {execution_result['output']['message']}")
print(f" Restart Time: {execution_result['output']['restartTime']}")
# 更新 Trust Engine
print("\n[TRUST ENGINE] Recording approval for progressive autonomy...")
print(" Action Pattern: restart_pod:postgres-*")
print(" Trust Score: +1")
self.log("mcp_execution", "PASS", execution_result)
return execution_result
# ==================== Run All ====================
async def run(self):
"""執行完整測試流程"""
print("\n" + "=" * 60)
print("TRACER BULLET 2.0 - FULL LOOP TEST")
print(f"Test ID: {self.test_id}")
print("=" * 60)
try:
# Step 1: Trigger Alert
alert = await self.step1_trigger_alert()
# Step 2: GraphRAG Analysis
analysis = await self.step2_graphrag_analysis(alert)
# Step 3: Dry-Run & Approval Card
approval_card = await self.step3_generate_approval(analysis)
# Step 4: Multi-Sig Approval
approval_result = await self.step4_multisig_approval(approval_card)
# Step 5: MCP Execution
_execution_result = await self.step5_mcp_execution(approval_result, approval_card)
# Summary
print("\n" + "=" * 60)
print("TEST SUMMARY")
print("=" * 60)
passed = sum(1 for r in self.results if r["status"] == "PASS")
failed = sum(1 for r in self.results if r["status"] == "FAIL")
print(f" Total Steps: {len(self.results)}")
print(f" Passed: {passed}")
print(f" Failed: {failed}")
print(f" Status: {'ALL PASSED' if failed == 0 else 'SOME FAILED'}")
return {
"testId": self.test_id,
"status": "PASS" if failed == 0 else "FAIL",
"results": self.results,
}
except Exception as e:
self.log("unexpected_error", "FAIL", {"error": str(e)})
raise
# ==================== Main ====================
if __name__ == "__main__":
tracer = TracerBullet2()
asyncio.run(tracer.run())