Layer 0 - K8s RBAC: - k8s/rbac/api-velero-reader.yaml: awoooi-executor SA Velero backup reader Layer 1 - DB Migration (已在 188 執行): - M-002: approval_records 新增 approval_level/votes/required_votes - M-003: alert_event_type ENUM 新增 8 個值 Layer 2 - IaC: - ops/config/service-registry.yaml: 全服務 Stateful 分級清單 (BLOCK/CRITICAL_HITL/STANDARD_HITL/AUTO) Layer 3 - Python Services: - service_registry.py: 讀取 YAML,提供 is_blocked/requires_multisig/get_required_votes - velero_client.py: kubectl 查詢 Velero 備份年齡,失敗 fallback 999h - preflight_service.py: Pre-flight 安全檢查 (Q2/Q4 決策) Layer 1-M001 - Playbook model: - playbook.py: 新增 requires_approval_level/stateful_targets/requires_pre_backup Layer 4 - 業務邏輯: - alert_operation_log_repository.py: 新增 8 個 event_type (Guardrail/Pre-flight/MultiSig/備份) - auto_repair_service.py: 注入 Service Registry Guardrail 檢查 (BLOCK → 直接拒絕) - webhooks.py: ALERT_RECEIVED 溯源記錄 + auto_repair flag Q9 + Langfuse trace_id Q10 - db/models.py: ApprovalRecord 同步 approval_level/votes/required_votes 欄位 - docker-health-monitor.sh: 純感知層改造(移除所有 docker restart 邏輯) Layer 5 - Telegram 通知: - telegram_gateway.py: T1-T6 六個新通知方法 (Guardrail/Pre-flight/Backup/MultiSig/ChangeApplied) 參考: ADR-062 Data Safety Guardrails, ADR-063 Service Registry IaC Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
114 lines
3.7 KiB
Python
114 lines
3.7 KiB
Python
# apps/api/src/services/velero_client.py
|
||
# Velero Backup 查詢客戶端 (kubectl 方式,Q7 決策)
|
||
# 撰寫: Claude Sonnet 4.6 / 2026-04-08 Asia/Taipei
|
||
# 架構: leWOOOgo 積木化,純 Service 層
|
||
# 參考: ADR-062
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import time
|
||
from datetime import UTC, datetime
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
_VELERO_NAMESPACE = "velero"
|
||
_KUBECTL_TIMEOUT = 30 # 秒
|
||
|
||
|
||
class VeleroClient:
|
||
"""
|
||
透過 kubectl 查詢 Velero 備份狀態
|
||
設計原則: 失敗時 fallback「假設備份過期」(保守原則)
|
||
"""
|
||
|
||
async def get_latest_backup_age_hours(self) -> float:
|
||
"""
|
||
查詢最近一次 Completed 備份距今幾小時
|
||
失敗時返回 999.0(視為嚴重過期,觸發 Abort)
|
||
"""
|
||
try:
|
||
result = await asyncio.wait_for(
|
||
self._run_kubectl(
|
||
["get", "backup", "-n", _VELERO_NAMESPACE,
|
||
"-o", "json", "--field-selector", "status.phase=Completed"]
|
||
),
|
||
timeout=_KUBECTL_TIMEOUT,
|
||
)
|
||
data = json.loads(result)
|
||
items = data.get("items", [])
|
||
if not items:
|
||
logger.warning("Velero: 找不到任何 Completed 備份")
|
||
return 999.0
|
||
|
||
latest = max(
|
||
items,
|
||
key=lambda x: x.get("status", {}).get("completionTimestamp", ""),
|
||
)
|
||
completion_ts = latest["status"].get("completionTimestamp", "")
|
||
if not completion_ts:
|
||
return 999.0
|
||
|
||
completed_at = datetime.fromisoformat(completion_ts.replace("Z", "+00:00"))
|
||
age = (datetime.now(UTC) - completed_at).total_seconds() / 3600
|
||
logger.info(f"Velero 最近備份: {completion_ts},距今 {age:.1f} 小時")
|
||
return age
|
||
|
||
except asyncio.TimeoutError:
|
||
logger.error("Velero kubectl 查詢超時")
|
||
return 999.0
|
||
except Exception as e:
|
||
logger.error(f"Velero 查詢失敗: {e}")
|
||
return 999.0
|
||
|
||
async def trigger_emergency_backup(self, backup_name: str | None = None) -> bool:
|
||
"""
|
||
觸發緊急備份(非同步,不等待完成)
|
||
返回 True 表示指令已成功發送
|
||
"""
|
||
name = backup_name or f"emergency-{int(time.time())}"
|
||
try:
|
||
await asyncio.wait_for(
|
||
self._run_kubectl([
|
||
"create", "backup", name,
|
||
"-n", _VELERO_NAMESPACE,
|
||
"--include-namespaces", "awoooi-prod",
|
||
"--wait=false",
|
||
]),
|
||
timeout=_KUBECTL_TIMEOUT,
|
||
)
|
||
logger.info(f"Velero 緊急備份已啟動: {name}")
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"Velero 緊急備份失敗: {e}")
|
||
return False
|
||
|
||
async def _run_kubectl(self, args: list[str]) -> str:
|
||
proc = await asyncio.create_subprocess_exec(
|
||
"kubectl", *args,
|
||
stdout=asyncio.subprocess.PIPE,
|
||
stderr=asyncio.subprocess.PIPE,
|
||
)
|
||
stdout, stderr = await proc.communicate()
|
||
if proc.returncode != 0:
|
||
raise RuntimeError(f"kubectl 失敗: {stderr.decode()}")
|
||
return stdout.decode()
|
||
|
||
|
||
_velero_client: VeleroClient | None = None
|
||
|
||
|
||
def get_velero_client() -> VeleroClient:
|
||
global _velero_client
|
||
if _velero_client is None:
|
||
_velero_client = VeleroClient()
|
||
return _velero_client
|
||
|
||
|
||
def set_velero_client(client: VeleroClient) -> None:
|
||
"""測試注入用 (P4 規範)"""
|
||
global _velero_client
|
||
_velero_client = client
|