""" DB Backup Service — EwoooC V10.3 負責執行 pg_dump 備份、保留策略、以及備份狀態寫入 backup_log """ import os import logging import glob import shutil import subprocess from datetime import datetime, timedelta, timezone TAIPEI_TZ = timezone(timedelta(hours=8)) logger = logging.getLogger(__name__) # 備份目錄:container 內掛載點 BACKUP_DIR = os.environ.get("BACKUP_DIR", "/app/data/db_backups") # pg_dump 目標:在 momo-db container 內執行(docker exec) DB_CONTAINER = os.environ.get("DB_CONTAINER", "momo-db") DB_USER = os.environ.get("POSTGRES_USER", "momo") DB_NAME = os.environ.get("POSTGRES_DB", "momo_analytics") # 保留天數 RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "7")) def _ensure_backup_dir(): os.makedirs(BACKUP_DIR, exist_ok=True) def _remove_partial_backup(filepath: str): try: if os.path.exists(filepath): os.remove(filepath) logger.warning(f"[Backup] 已移除不完整備份檔: {filepath}") except Exception as exc: logger.warning(f"[Backup] 移除不完整備份檔失敗 {filepath}: {exc}") def _ensure_pg_dump_available() -> str: pg_dump_path = shutil.which("pg_dump") if pg_dump_path: return pg_dump_path apt_get_path = shutil.which("apt-get") if not apt_get_path: raise RuntimeError("pg_dump 不存在,且容器沒有 apt-get;請重建 image 並安裝 postgresql-client") logger.info("[Backup] pg_dump 不存在,嘗試安裝 postgresql-client...") commands = [ [apt_get_path, "update", "-qq"], [apt_get_path, "install", "-y", "-qq", "postgresql-client"], ] for command in commands: proc = subprocess.run(command, capture_output=True, text=True, timeout=180) if proc.returncode != 0: stderr = (proc.stderr or proc.stdout or "").strip() raise RuntimeError( "pg_dump 不存在,自動安裝 postgresql-client 失敗:" f"{' '.join(command)} → {stderr[:500]}" ) pg_dump_path = shutil.which("pg_dump") if not pg_dump_path: raise RuntimeError("postgresql-client 安裝後仍找不到 pg_dump") return pg_dump_path def _log_backup(filename, file_size, duration, status, error=None, storage_path=None): """寫入 backup_log 表,失敗不阻斷主流程""" try: from database.manager import DatabaseManager db = DatabaseManager() with db.get_session() as session: from sqlalchemy import text session.execute(text(""" INSERT INTO backup_log (filename, file_size_bytes, duration_seconds, status, error_message, host, storage_path, completed_at) VALUES (:filename, :size, :dur, :status, :error, :host, :path, CURRENT_TIMESTAMP) """), { "filename": filename, "size": file_size, "dur": duration, "status": status, "error": error, "host": os.uname().nodename if hasattr(os, 'uname') else "unknown", "path": storage_path or BACKUP_DIR, }) session.commit() except Exception as e: logger.warning(f"[Backup] backup_log 寫入失敗(不影響備份本體): {e}") def run_backup() -> dict: """ 執行 pg_dump 備份。 因 scheduler 在 momo-scheduler container 內,pg_dump 直連 momo-db service。 回傳 dict: {success, filename, file_size, duration, error} """ _ensure_backup_dir() now = datetime.now(TAIPEI_TZ) filename = f"momo_analytics_{now.strftime('%Y%m%d_%H%M%S')}.sql.gz" filepath = os.path.join(BACKUP_DIR, filename) start = datetime.now() db_host = os.environ.get("POSTGRES_HOST", "momo-db") db_port = os.environ.get("POSTGRES_PORT", "5432") pg_password = os.environ.get("POSTGRES_PASSWORD") pg_env = {**os.environ, "PGPASSWORD": pg_password} if pg_password else dict(os.environ) logger.info(f"[Backup] 開始備份 → {filepath}") result = {"success": False, "filename": filename, "file_size": 0, "duration": 0, "error": None} try: pg_dump_path = _ensure_pg_dump_available() with open(filepath, "wb") as out_f: pg_dump_proc = subprocess.Popen( [pg_dump_path, "-h", db_host, "-p", db_port, "-U", DB_USER, "-d", DB_NAME, "--no-password", "-Fp"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=pg_env ) gzip_proc = subprocess.Popen( ["gzip"], stdin=pg_dump_proc.stdout, stdout=out_f, stderr=subprocess.PIPE ) pg_dump_proc.stdout.close() gzip_stderr = gzip_proc.communicate(timeout=300)[1] pg_dump_proc.wait(timeout=300) # 模擬 proc 介面供後續邏輯共用 class _FakeProc: def __init__(self, returncode, stderr_text): self.returncode = returncode self.stderr = stderr_text pg_dump_stderr = pg_dump_proc.stderr.read().decode(errors="replace").strip() combined_returncode = pg_dump_proc.returncode if pg_dump_proc.returncode != 0 else gzip_proc.returncode proc = _FakeProc(combined_returncode, pg_dump_stderr or gzip_stderr.decode(errors="replace").strip()) duration = (datetime.now() - start).total_seconds() if proc.returncode != 0: error_msg = proc.stderr.strip() or "pg_dump 非零退出碼" logger.error(f"[Backup] 備份失敗: {error_msg}") _remove_partial_backup(filepath) result["error"] = error_msg result["duration"] = duration _log_backup(filename, 0, duration, "failed", error=error_msg) else: file_size = os.path.getsize(filepath) if os.path.exists(filepath) else 0 logger.info(f"[Backup] 備份成功 | 大小={file_size//1024}KB | 耗時={duration:.1f}s") result.update({"success": True, "file_size": file_size, "duration": duration}) _log_backup(filename, file_size, duration, "success", storage_path=filepath) except subprocess.TimeoutExpired: duration = (datetime.now() - start).total_seconds() error_msg = "pg_dump 超時(300s)" logger.error(f"[Backup] {error_msg}") _remove_partial_backup(filepath) result["error"] = error_msg result["duration"] = duration _log_backup(filename, 0, duration, "failed", error=error_msg) except Exception as e: duration = (datetime.now() - start).total_seconds() error_msg = str(e) logger.error(f"[Backup] 備份異常: {e}") _remove_partial_backup(filepath) result["error"] = error_msg result["duration"] = duration _log_backup(filename, 0, duration, "failed", error=error_msg) return result def cleanup_old_backups() -> int: """刪除超過保留期限的備份檔,回傳刪除數量""" _ensure_backup_dir() cutoff = datetime.now() - timedelta(days=RETENTION_DAYS) deleted = 0 for f in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")): try: mtime = datetime.fromtimestamp(os.path.getmtime(f)) if mtime < cutoff: os.remove(f) deleted += 1 logger.info(f"[Backup] 已刪除舊備份: {os.path.basename(f)}") except Exception as e: logger.warning(f"[Backup] 刪除舊備份失敗 {f}: {e}") return deleted def get_latest_backup_info() -> dict: """ 回傳最新備份的資訊(供監控用)。 優先從 backup_log 讀取,fallback 掃描檔案系統。 """ try: from database.manager import DatabaseManager db = DatabaseManager() with db.get_session() as session: from sqlalchemy import text row = session.execute(text(""" SELECT filename, file_size_bytes, duration_seconds, status, created_at, error_message FROM backup_log ORDER BY created_at DESC LIMIT 1 """)).fetchone() if row: return { "filename": row[0], "file_size": row[1], "duration": row[2], "status": row[3], "created_at": row[4], "error": row[5], "source": "db", } except Exception as e: logger.warning(f"[Backup] 無法從 DB 讀取最新備份資訊: {e}") # fallback: 掃描檔案 _ensure_backup_dir() files = sorted( glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")), key=os.path.getmtime, reverse=True ) if files: f = files[0] mtime = datetime.fromtimestamp(os.path.getmtime(f)) return { "filename": os.path.basename(f), "file_size": os.path.getsize(f), "duration": None, "status": "success", "created_at": mtime, "error": None, "source": "filesystem", } return {"filename": None, "status": "no_backup", "created_at": None, "source": "none"}