Some checks failed
CD Pipeline / deploy (push) Failing after 5m18s
🔴 Critical - auto_heal_service: 補 import re + sqlalchemy.text + 修正 orchestrator 變數名 + autoheal_playbook→playbooks 表名 + _alert_and_store cooldown 修復 - aider_heal_executor: shell injection 改 shell=False + list 參數 - docker-compose: DISABLE_LOGIN 改 env var + 移除密碼 fallback + POSTGRES_HOST 修正 - app.py: /api/backup /api/run_task 等 6 個管理 API 加 @login_required - config.py + pg_sync + e2e_test: 移除 wooo_pg_2026 hardcoded 密碼 fallback - pg_backup.sh: 移除 TELEGRAM_TOKEN= 中間變數,直接用 $TELEGRAM_BOT_TOKEN - migration 014: trigger_pattern→match_pattern + 補 error_type NOT NULL 欄位 🟡 High - telegram_bot_service: str(e) 改通用訊息 + session try/finally + 移除 pa:/pr: 舊 callback - run_scheduler: ElephantAlpha thread 死亡監控 + 自動重啟 + Telegram 告警 + agent_context 03:30 TTL 定時清理任務 - openclaw_learning_service: build_rag_context 兩路徑加 .limit(200) - hooks: commit-quality + momo-prod-guard 空 catch 改 stderr+exit(1) - scripts/code_review: auto_yes 預設改 false - db_backup_service: PGPASSWORD 透過 env dict 傳遞 📦 Migrations - 013_autoheal: 修正建表順序 playbooks→incidents(外鍵前向引用) - 018_add_missing_indexes: heal_logs/incidents 外鍵索引 + cleanup_expired_agent_context() 🟢 Infrastructure - requirements.txt: 加版本下界 Flask>=2.3 SQLAlchemy>=1.4 等 - cd.yaml: 新增 run_scheduler.py + run_telegram_bot.py 監聽路徑 - .gitignore: insert_playbook_local.py 加入忽略 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
207 lines
8.0 KiB
Python
207 lines
8.0 KiB
Python
"""
|
||
DB Backup Service — EwoooC V10.3
|
||
負責執行 pg_dump 備份、保留策略、以及備份狀態寫入 backup_log
|
||
"""
|
||
import os
|
||
import subprocess
|
||
import logging
|
||
import glob
|
||
from datetime import datetime, timedelta, timezone
|
||
|
||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 備份目錄:container 內掛載點
|
||
BACKUP_DIR = os.environ.get("BACKUP_DIR", "/app/data/db_backups")
|
||
# pg_dump 目標:在 momo-db container 內執行(docker exec)
|
||
DB_CONTAINER = os.environ.get("DB_CONTAINER", "momo-db")
|
||
DB_USER = os.environ.get("POSTGRES_USER", "momo")
|
||
DB_NAME = os.environ.get("POSTGRES_DB", "momo_analytics")
|
||
# 保留天數
|
||
RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "7"))
|
||
|
||
|
||
def _ensure_backup_dir():
|
||
os.makedirs(BACKUP_DIR, exist_ok=True)
|
||
|
||
|
||
def _log_backup(filename, file_size, duration, status, error=None, storage_path=None):
|
||
"""寫入 backup_log 表,失敗不阻斷主流程"""
|
||
try:
|
||
from database.manager import DatabaseManager
|
||
db = DatabaseManager()
|
||
with db.get_session() as session:
|
||
from sqlalchemy import text
|
||
session.execute(text("""
|
||
INSERT INTO backup_log
|
||
(filename, file_size_bytes, duration_seconds, status, error_message,
|
||
host, storage_path, completed_at)
|
||
VALUES
|
||
(:filename, :size, :dur, :status, :error,
|
||
:host, :path, CURRENT_TIMESTAMP)
|
||
"""), {
|
||
"filename": filename,
|
||
"size": file_size,
|
||
"dur": duration,
|
||
"status": status,
|
||
"error": error,
|
||
"host": os.uname().nodename if hasattr(os, 'uname') else "unknown",
|
||
"path": storage_path or BACKUP_DIR,
|
||
})
|
||
session.commit()
|
||
except Exception as e:
|
||
logger.warning(f"[Backup] backup_log 寫入失敗(不影響備份本體): {e}")
|
||
|
||
|
||
def run_backup() -> dict:
|
||
"""
|
||
執行 pg_dump 備份。
|
||
因 scheduler 在 momo-scheduler container 內,pg_dump 直連 momo-db service。
|
||
回傳 dict: {success, filename, file_size, duration, error}
|
||
"""
|
||
_ensure_backup_dir()
|
||
now = datetime.now(TAIPEI_TZ)
|
||
filename = f"momo_analytics_{now.strftime('%Y%m%d_%H%M%S')}.sql.gz"
|
||
filepath = os.path.join(BACKUP_DIR, filename)
|
||
start = datetime.now()
|
||
|
||
db_host = os.environ.get("POSTGRES_HOST", "momo-db")
|
||
db_port = os.environ.get("POSTGRES_PORT", "5432")
|
||
|
||
# 若 pg_dump 不存在則嘗試安裝(容器重建後需重裝;Dockerfile 已加入 postgresql-client)
|
||
if not os.path.exists("/usr/bin/pg_dump"):
|
||
logger.info("[Backup] pg_dump 不存在,嘗試安裝 postgresql-client...")
|
||
subprocess.run(
|
||
["apt-get", "install", "-y", "-qq", "postgresql-client"],
|
||
capture_output=True
|
||
)
|
||
|
||
pg_password = os.environ.get("POSTGRES_PASSWORD")
|
||
pg_env = {**os.environ, "PGPASSWORD": pg_password} if pg_password else dict(os.environ)
|
||
|
||
logger.info(f"[Backup] 開始備份 → {filepath}")
|
||
result = {"success": False, "filename": filename, "file_size": 0, "duration": 0, "error": None}
|
||
|
||
try:
|
||
with open(filepath, "wb") as out_f:
|
||
pg_dump_proc = subprocess.Popen(
|
||
["pg_dump", "-h", db_host, "-p", db_port, "-U", DB_USER, "-d", DB_NAME,
|
||
"--no-password", "-Fp"],
|
||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||
env=pg_env
|
||
)
|
||
gzip_proc = subprocess.Popen(
|
||
["gzip"],
|
||
stdin=pg_dump_proc.stdout, stdout=out_f, stderr=subprocess.PIPE
|
||
)
|
||
pg_dump_proc.stdout.close()
|
||
gzip_stderr = gzip_proc.communicate(timeout=300)[1]
|
||
pg_dump_proc.wait(timeout=300)
|
||
|
||
# 模擬 proc 介面供後續邏輯共用
|
||
class _FakeProc:
|
||
def __init__(self, returncode, stderr_text):
|
||
self.returncode = returncode
|
||
self.stderr = stderr_text
|
||
|
||
pg_dump_stderr = pg_dump_proc.stderr.read().decode(errors="replace").strip()
|
||
combined_returncode = pg_dump_proc.returncode if pg_dump_proc.returncode != 0 else gzip_proc.returncode
|
||
proc = _FakeProc(combined_returncode, pg_dump_stderr or gzip_stderr.decode(errors="replace").strip())
|
||
duration = (datetime.now() - start).total_seconds()
|
||
|
||
if proc.returncode != 0:
|
||
error_msg = proc.stderr.strip() or "pg_dump 非零退出碼"
|
||
logger.error(f"[Backup] 備份失敗: {error_msg}")
|
||
result["error"] = error_msg
|
||
result["duration"] = duration
|
||
_log_backup(filename, 0, duration, "failed", error=error_msg)
|
||
else:
|
||
file_size = os.path.getsize(filepath) if os.path.exists(filepath) else 0
|
||
logger.info(f"[Backup] 備份成功 | 大小={file_size//1024}KB | 耗時={duration:.1f}s")
|
||
result.update({"success": True, "file_size": file_size, "duration": duration})
|
||
_log_backup(filename, file_size, duration, "success", storage_path=filepath)
|
||
|
||
except subprocess.TimeoutExpired:
|
||
duration = (datetime.now() - start).total_seconds()
|
||
error_msg = "pg_dump 超時(300s)"
|
||
logger.error(f"[Backup] {error_msg}")
|
||
result["error"] = error_msg
|
||
result["duration"] = duration
|
||
_log_backup(filename, 0, duration, "failed", error=error_msg)
|
||
except Exception as e:
|
||
duration = (datetime.now() - start).total_seconds()
|
||
error_msg = str(e)
|
||
logger.error(f"[Backup] 備份異常: {e}")
|
||
result["error"] = error_msg
|
||
result["duration"] = duration
|
||
_log_backup(filename, 0, duration, "failed", error=error_msg)
|
||
|
||
return result
|
||
|
||
|
||
def cleanup_old_backups() -> int:
|
||
"""刪除超過保留期限的備份檔,回傳刪除數量"""
|
||
_ensure_backup_dir()
|
||
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
|
||
deleted = 0
|
||
for f in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")):
|
||
try:
|
||
mtime = datetime.fromtimestamp(os.path.getmtime(f))
|
||
if mtime < cutoff:
|
||
os.remove(f)
|
||
deleted += 1
|
||
logger.info(f"[Backup] 已刪除舊備份: {os.path.basename(f)}")
|
||
except Exception as e:
|
||
logger.warning(f"[Backup] 刪除舊備份失敗 {f}: {e}")
|
||
return deleted
|
||
|
||
|
||
def get_latest_backup_info() -> dict:
|
||
"""
|
||
回傳最新備份的資訊(供監控用)。
|
||
優先從 backup_log 讀取,fallback 掃描檔案系統。
|
||
"""
|
||
try:
|
||
from database.manager import DatabaseManager
|
||
db = DatabaseManager()
|
||
with db.get_session() as session:
|
||
from sqlalchemy import text
|
||
row = session.execute(text("""
|
||
SELECT filename, file_size_bytes, duration_seconds, status, created_at, error_message
|
||
FROM backup_log
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
""")).fetchone()
|
||
if row:
|
||
return {
|
||
"filename": row[0],
|
||
"file_size": row[1],
|
||
"duration": row[2],
|
||
"status": row[3],
|
||
"created_at": row[4],
|
||
"error": row[5],
|
||
"source": "db",
|
||
}
|
||
except Exception as e:
|
||
logger.warning(f"[Backup] 無法從 DB 讀取最新備份資訊: {e}")
|
||
|
||
# fallback: 掃描檔案
|
||
_ensure_backup_dir()
|
||
files = sorted(
|
||
glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")),
|
||
key=os.path.getmtime, reverse=True
|
||
)
|
||
if files:
|
||
f = files[0]
|
||
mtime = datetime.fromtimestamp(os.path.getmtime(f))
|
||
return {
|
||
"filename": os.path.basename(f),
|
||
"file_size": os.path.getsize(f),
|
||
"duration": None,
|
||
"status": "success",
|
||
"created_at": mtime,
|
||
"error": None,
|
||
"source": "filesystem",
|
||
}
|
||
return {"filename": None, "status": "no_backup", "created_at": None, "source": "none"}
|