Files
ewoooc/services/db_backup_service.py
OoO 55e14c0332
All checks were successful
CD Pipeline / deploy (push) Successful in 9m17s
V10.605 修復當日業績匯入與資料庫備份
2026-06-15 14:52:33 +08:00

242 lines
9.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
DB Backup Service — EwoooC V10.3
負責執行 pg_dump 備份、保留策略、以及備份狀態寫入 backup_log
"""
import os
import logging
import glob
import shutil
import subprocess
from datetime import datetime, timedelta, timezone
TAIPEI_TZ = timezone(timedelta(hours=8))
logger = logging.getLogger(__name__)
# 備份目錄container 內掛載點
BACKUP_DIR = os.environ.get("BACKUP_DIR", "/app/data/db_backups")
# pg_dump 目標:在 momo-db container 內執行docker exec
DB_CONTAINER = os.environ.get("DB_CONTAINER", "momo-db")
DB_USER = os.environ.get("POSTGRES_USER", "momo")
DB_NAME = os.environ.get("POSTGRES_DB", "momo_analytics")
# 保留天數
RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "7"))
def _ensure_backup_dir():
os.makedirs(BACKUP_DIR, exist_ok=True)
def _remove_partial_backup(filepath: str):
try:
if os.path.exists(filepath):
os.remove(filepath)
logger.warning(f"[Backup] 已移除不完整備份檔: {filepath}")
except Exception as exc:
logger.warning(f"[Backup] 移除不完整備份檔失敗 {filepath}: {exc}")
def _ensure_pg_dump_available() -> str:
pg_dump_path = shutil.which("pg_dump")
if pg_dump_path:
return pg_dump_path
apt_get_path = shutil.which("apt-get")
if not apt_get_path:
raise RuntimeError("pg_dump 不存在,且容器沒有 apt-get請重建 image 並安裝 postgresql-client")
logger.info("[Backup] pg_dump 不存在,嘗試安裝 postgresql-client...")
commands = [
[apt_get_path, "update", "-qq"],
[apt_get_path, "install", "-y", "-qq", "postgresql-client"],
]
for command in commands:
proc = subprocess.run(command, capture_output=True, text=True, timeout=180)
if proc.returncode != 0:
stderr = (proc.stderr or proc.stdout or "").strip()
raise RuntimeError(
"pg_dump 不存在,自動安裝 postgresql-client 失敗:"
f"{' '.join(command)}{stderr[:500]}"
)
pg_dump_path = shutil.which("pg_dump")
if not pg_dump_path:
raise RuntimeError("postgresql-client 安裝後仍找不到 pg_dump")
return pg_dump_path
def _log_backup(filename, file_size, duration, status, error=None, storage_path=None):
"""寫入 backup_log 表,失敗不阻斷主流程"""
try:
from database.manager import DatabaseManager
db = DatabaseManager()
with db.get_session() as session:
from sqlalchemy import text
session.execute(text("""
INSERT INTO backup_log
(filename, file_size_bytes, duration_seconds, status, error_message,
host, storage_path, completed_at)
VALUES
(:filename, :size, :dur, :status, :error,
:host, :path, CURRENT_TIMESTAMP)
"""), {
"filename": filename,
"size": file_size,
"dur": duration,
"status": status,
"error": error,
"host": os.uname().nodename if hasattr(os, 'uname') else "unknown",
"path": storage_path or BACKUP_DIR,
})
session.commit()
except Exception as e:
logger.warning(f"[Backup] backup_log 寫入失敗(不影響備份本體): {e}")
def run_backup() -> dict:
"""
執行 pg_dump 備份。
因 scheduler 在 momo-scheduler container 內pg_dump 直連 momo-db service。
回傳 dict: {success, filename, file_size, duration, error}
"""
_ensure_backup_dir()
now = datetime.now(TAIPEI_TZ)
filename = f"momo_analytics_{now.strftime('%Y%m%d_%H%M%S')}.sql.gz"
filepath = os.path.join(BACKUP_DIR, filename)
start = datetime.now()
db_host = os.environ.get("POSTGRES_HOST", "momo-db")
db_port = os.environ.get("POSTGRES_PORT", "5432")
pg_password = os.environ.get("POSTGRES_PASSWORD")
pg_env = {**os.environ, "PGPASSWORD": pg_password} if pg_password else dict(os.environ)
logger.info(f"[Backup] 開始備份 → {filepath}")
result = {"success": False, "filename": filename, "file_size": 0, "duration": 0, "error": None}
try:
pg_dump_path = _ensure_pg_dump_available()
with open(filepath, "wb") as out_f:
pg_dump_proc = subprocess.Popen(
[pg_dump_path, "-h", db_host, "-p", db_port, "-U", DB_USER, "-d", DB_NAME,
"--no-password", "-Fp"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
env=pg_env
)
gzip_proc = subprocess.Popen(
["gzip"],
stdin=pg_dump_proc.stdout, stdout=out_f, stderr=subprocess.PIPE
)
pg_dump_proc.stdout.close()
gzip_stderr = gzip_proc.communicate(timeout=300)[1]
pg_dump_proc.wait(timeout=300)
# 模擬 proc 介面供後續邏輯共用
class _FakeProc:
def __init__(self, returncode, stderr_text):
self.returncode = returncode
self.stderr = stderr_text
pg_dump_stderr = pg_dump_proc.stderr.read().decode(errors="replace").strip()
combined_returncode = pg_dump_proc.returncode if pg_dump_proc.returncode != 0 else gzip_proc.returncode
proc = _FakeProc(combined_returncode, pg_dump_stderr or gzip_stderr.decode(errors="replace").strip())
duration = (datetime.now() - start).total_seconds()
if proc.returncode != 0:
error_msg = proc.stderr.strip() or "pg_dump 非零退出碼"
logger.error(f"[Backup] 備份失敗: {error_msg}")
_remove_partial_backup(filepath)
result["error"] = error_msg
result["duration"] = duration
_log_backup(filename, 0, duration, "failed", error=error_msg)
else:
file_size = os.path.getsize(filepath) if os.path.exists(filepath) else 0
logger.info(f"[Backup] 備份成功 | 大小={file_size//1024}KB | 耗時={duration:.1f}s")
result.update({"success": True, "file_size": file_size, "duration": duration})
_log_backup(filename, file_size, duration, "success", storage_path=filepath)
except subprocess.TimeoutExpired:
duration = (datetime.now() - start).total_seconds()
error_msg = "pg_dump 超時300s"
logger.error(f"[Backup] {error_msg}")
_remove_partial_backup(filepath)
result["error"] = error_msg
result["duration"] = duration
_log_backup(filename, 0, duration, "failed", error=error_msg)
except Exception as e:
duration = (datetime.now() - start).total_seconds()
error_msg = str(e)
logger.error(f"[Backup] 備份異常: {e}")
_remove_partial_backup(filepath)
result["error"] = error_msg
result["duration"] = duration
_log_backup(filename, 0, duration, "failed", error=error_msg)
return result
def cleanup_old_backups() -> int:
"""刪除超過保留期限的備份檔,回傳刪除數量"""
_ensure_backup_dir()
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
deleted = 0
for f in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")):
try:
mtime = datetime.fromtimestamp(os.path.getmtime(f))
if mtime < cutoff:
os.remove(f)
deleted += 1
logger.info(f"[Backup] 已刪除舊備份: {os.path.basename(f)}")
except Exception as e:
logger.warning(f"[Backup] 刪除舊備份失敗 {f}: {e}")
return deleted
def get_latest_backup_info() -> dict:
"""
回傳最新備份的資訊(供監控用)。
優先從 backup_log 讀取fallback 掃描檔案系統。
"""
try:
from database.manager import DatabaseManager
db = DatabaseManager()
with db.get_session() as session:
from sqlalchemy import text
row = session.execute(text("""
SELECT filename, file_size_bytes, duration_seconds, status, created_at, error_message
FROM backup_log
ORDER BY created_at DESC
LIMIT 1
""")).fetchone()
if row:
return {
"filename": row[0],
"file_size": row[1],
"duration": row[2],
"status": row[3],
"created_at": row[4],
"error": row[5],
"source": "db",
}
except Exception as e:
logger.warning(f"[Backup] 無法從 DB 讀取最新備份資訊: {e}")
# fallback: 掃描檔案
_ensure_backup_dir()
files = sorted(
glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")),
key=os.path.getmtime, reverse=True
)
if files:
f = files[0]
mtime = datetime.fromtimestamp(os.path.getmtime(f))
return {
"filename": os.path.basename(f),
"file_size": os.path.getsize(f),
"duration": None,
"status": "success",
"created_at": mtime,
"error": None,
"source": "filesystem",
}
return {"filename": None, "status": "no_backup", "created_at": None, "source": "none"}