Files
awoooi/apps/api/src/services/velero_client.py
OG T 0f5fecfef5
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m40s
fix(sprint5.1): 首席架構師審查修正 — S1×4 S2×2 S3×1
S1-1: service_registry/velero_client/preflight_service 改用 structlog
S1-2: velero_client datetime.now(UTC) 改用 now_taipei()(台北時區鐵律)
S1-3: Guardrail 失敗改為保守拒絕(原放行方向與安全目標相悖)
S1-4: service_registry import 移至模組頂部(移除函數內 import)
S2-1: telegram_gateway T1-T6 六個通知方法補齊 try/except
S2-2: webhooks.py Langfuse URL 改用 settings.LANGFUSE_URL(移除硬寫內網 IP)
S3-3: velero_client trigger_emergency_backup 改為 kubectl apply Backup CRD
      (原 kubectl create backup 語法不存在,審查發現靜默失敗風險)

審查評分: 70/100 → 修正後預計 90+/100

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 16:36:18 +08:00

134 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/src/services/velero_client.py
# Velero Backup 查詢客戶端 (kubectl 方式Q7 決策)
# 撰寫: Claude Sonnet 4.6 / 2026-04-08 Asia/Taipei
# 架構: leWOOOgo 積木化,純 Service 層
# 參考: ADR-062
from __future__ import annotations
import asyncio
import json
import time
from datetime import datetime
import structlog
from src.utils.timezone import now_taipei
logger = structlog.get_logger(__name__)
_VELERO_NAMESPACE = "velero"
_KUBECTL_TIMEOUT = 30 # 秒
class VeleroClient:
"""
透過 kubectl 查詢 Velero 備份狀態
設計原則: 失敗時 fallback「假設備份過期」保守原則
"""
async def get_latest_backup_age_hours(self) -> float:
"""
查詢最近一次 Completed 備份距今幾小時
失敗時返回 999.0(視為嚴重過期,觸發 Abort
"""
try:
result = await asyncio.wait_for(
self._run_kubectl(
["get", "backup", "-n", _VELERO_NAMESPACE,
"-o", "json", "--field-selector", "status.phase=Completed"]
),
timeout=_KUBECTL_TIMEOUT,
)
data = json.loads(result)
items = data.get("items", [])
if not items:
logger.warning("velero_no_completed_backups")
return 999.0
latest = max(
items,
key=lambda x: x.get("status", {}).get("completionTimestamp", ""),
)
completion_ts = latest["status"].get("completionTimestamp", "")
if not completion_ts:
return 999.0
completed_at = datetime.fromisoformat(completion_ts.replace("Z", "+00:00"))
age = (now_taipei() - completed_at).total_seconds() / 3600
logger.info("velero_backup_age_checked", completion_ts=completion_ts, age_hours=round(age, 1))
return age
except asyncio.TimeoutError:
logger.error("velero_kubectl_timeout")
return 999.0
except Exception as e:
logger.error("velero_query_failed", error=str(e))
return 999.0
async def trigger_emergency_backup(self, backup_name: str | None = None) -> bool:
"""
觸發緊急備份(非同步,不等待完成)
返回 True 表示指令已成功發送
"""
# S3-3 修正: kubectl apply Backup CRD非 kubectl create backup不存在此子命令
# (2026-04-08 審查修正 Claude Sonnet 4.6 Asia/Taipei)
name = backup_name or f"emergency-{int(time.time())}"
manifest = (
f"apiVersion: velero.io/v1\n"
f"kind: Backup\n"
f"metadata:\n"
f" name: {name}\n"
f" namespace: {_VELERO_NAMESPACE}\n"
f"spec:\n"
f" includedNamespaces:\n"
f" - awoooi-prod\n"
f" ttl: 720h0m0s\n"
)
try:
# kubectl apply -f - (from stdin)
proc = await asyncio.wait_for(
asyncio.create_subprocess_exec(
"kubectl", "apply", "-f", "-",
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
),
timeout=_KUBECTL_TIMEOUT,
)
stdout, stderr = await proc.communicate(input=manifest.encode())
if proc.returncode != 0:
raise RuntimeError(f"kubectl apply 失敗: {stderr.decode()}")
logger.info("velero_emergency_backup_triggered", backup_name=name)
return True
except Exception as e:
logger.error("velero_emergency_backup_failed", backup_name=name, error=str(e))
return False
async def _run_kubectl(self, args: list[str]) -> str:
proc = await asyncio.create_subprocess_exec(
"kubectl", *args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"kubectl 失敗: {stderr.decode()}")
return stdout.decode()
_velero_client: VeleroClient | None = None
def get_velero_client() -> VeleroClient:
global _velero_client
if _velero_client is None:
_velero_client = VeleroClient()
return _velero_client
def set_velero_client(client: VeleroClient) -> None:
"""測試注入用 (P4 規範)"""
global _velero_client
_velero_client = client