Files
ewoooc/services/db_backup_service.py
ogt 676c711e7a
Some checks are pending
CD Pipeline / deploy (push) Waiting to run
feat: AI 治理完備 V10.3 — 技術債清零 + DB 備份機制 + 備份 AI 監控
技術債清零 (2026-04-19):
- migrations/010: ai_insights 補 decay_exempt/avg_quality/status/ai_model/feedback 欄位
- migrations/011: embedding_retry_queue 持久化表 (ADR-009)
- migrations/012: backup_log 備份記錄表
- services/openclaw_learning_service: 記憶體 Queue → DB retry queue,時間衰減 RAG
- services/nemoton_dispatcher_service: 三個 tool 強制雙寫 ai_insights (_sink_insight_to_km)
- services/import_service: Excel 前置欄位防禦(商品名稱類 + 業績金額類)
- services/ollama_service: generate_embedding 新增 EMBEDDING_HOST env,embedding 永遠走 192.168.0.111
- SYSTEM_VERSION: V9.4 → V10.3

DB 備份機制:
- scripts/pg_backup.sh: host-level pg_dump 備份腳本,cron 每日 02:00,保留 7 天,Telegram 通知
- services/db_backup_service.py: Python 備份 service,寫入 backup_log
- scheduler: run_db_backup_task (02:00) + run_backup_monitor_task (每 6h AI Agent 監控)
- Dockerfile: 加入 postgresql-client

文件:
- CLAUDE.md: 環境架構依 ADR-008 實地重寫,含完整 SSH/Docker 部署 SOP
- PROJECT_CONSTITUTION.md: 內容已整合入 CLAUDE.md,刪除重複檔案

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 02:03:45 +08:00

188 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
DB Backup Service — EwoooC V10.3
負責執行 pg_dump 備份、保留策略、以及備份狀態寫入 backup_log
"""
import os
import subprocess
import logging
import glob
from datetime import datetime, timedelta, timezone
TAIPEI_TZ = timezone(timedelta(hours=8))
logger = logging.getLogger(__name__)
# 備份目錄container 內掛載點
BACKUP_DIR = os.environ.get("BACKUP_DIR", "/app/data/db_backups")
# pg_dump 目標:在 momo-db container 內執行docker exec
DB_CONTAINER = os.environ.get("DB_CONTAINER", "momo-db")
DB_USER = os.environ.get("POSTGRES_USER", "momo")
DB_NAME = os.environ.get("POSTGRES_DB", "momo_analytics")
# 保留天數
RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "7"))
def _ensure_backup_dir():
os.makedirs(BACKUP_DIR, exist_ok=True)
def _log_backup(filename, file_size, duration, status, error=None, storage_path=None):
"""寫入 backup_log 表,失敗不阻斷主流程"""
try:
from database.manager import DatabaseManager
db = DatabaseManager()
with db.get_session() as session:
from sqlalchemy import text
session.execute(text("""
INSERT INTO backup_log
(filename, file_size_bytes, duration_seconds, status, error_message,
host, storage_path, completed_at)
VALUES
(:filename, :size, :dur, :status, :error,
:host, :path, CURRENT_TIMESTAMP)
"""), {
"filename": filename,
"size": file_size,
"dur": duration,
"status": status,
"error": error,
"host": os.uname().nodename if hasattr(os, 'uname') else "unknown",
"path": storage_path or BACKUP_DIR,
})
session.commit()
except Exception as e:
logger.warning(f"[Backup] backup_log 寫入失敗(不影響備份本體): {e}")
def run_backup() -> dict:
"""
執行 pg_dump 備份。
因 scheduler 在 momo-scheduler container 內pg_dump 直連 momo-db service。
回傳 dict: {success, filename, file_size, duration, error}
"""
_ensure_backup_dir()
now = datetime.now(TAIPEI_TZ)
filename = f"momo_analytics_{now.strftime('%Y%m%d_%H%M%S')}.sql.gz"
filepath = os.path.join(BACKUP_DIR, filename)
start = datetime.now()
db_host = os.environ.get("POSTGRES_HOST", "momo-db")
db_port = os.environ.get("POSTGRES_PORT", "5432")
# 若 pg_dump 不存在則嘗試安裝容器重建後需重裝Dockerfile 已加入 postgresql-client
if not os.path.exists("/usr/bin/pg_dump"):
logger.info("[Backup] pg_dump 不存在,嘗試安裝 postgresql-client...")
subprocess.run(
["apt-get", "install", "-y", "-qq", "postgresql-client"],
capture_output=True
)
cmd = [
"sh", "-c",
f"PGPASSWORD={os.environ.get('POSTGRES_PASSWORD', 'wooo_pg_2026')} "
f"pg_dump -h {db_host} -p {db_port} -U {DB_USER} -d {DB_NAME} "
f"--no-password -Fp | gzip > {filepath}"
]
logger.info(f"[Backup] 開始備份 → {filepath}")
result = {"success": False, "filename": filename, "file_size": 0, "duration": 0, "error": None}
try:
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
duration = (datetime.now() - start).total_seconds()
if proc.returncode != 0:
error_msg = proc.stderr.strip() or "pg_dump 非零退出碼"
logger.error(f"[Backup] 備份失敗: {error_msg}")
result["error"] = error_msg
result["duration"] = duration
_log_backup(filename, 0, duration, "failed", error=error_msg)
else:
file_size = os.path.getsize(filepath) if os.path.exists(filepath) else 0
logger.info(f"[Backup] 備份成功 | 大小={file_size//1024}KB | 耗時={duration:.1f}s")
result.update({"success": True, "file_size": file_size, "duration": duration})
_log_backup(filename, file_size, duration, "success", storage_path=filepath)
except subprocess.TimeoutExpired:
duration = (datetime.now() - start).total_seconds()
error_msg = "pg_dump 超時300s"
logger.error(f"[Backup] {error_msg}")
result["error"] = error_msg
result["duration"] = duration
_log_backup(filename, 0, duration, "failed", error=error_msg)
except Exception as e:
duration = (datetime.now() - start).total_seconds()
error_msg = str(e)
logger.error(f"[Backup] 備份異常: {e}")
result["error"] = error_msg
result["duration"] = duration
_log_backup(filename, 0, duration, "failed", error=error_msg)
return result
def cleanup_old_backups() -> int:
"""刪除超過保留期限的備份檔,回傳刪除數量"""
_ensure_backup_dir()
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
deleted = 0
for f in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")):
try:
mtime = datetime.fromtimestamp(os.path.getmtime(f))
if mtime < cutoff:
os.remove(f)
deleted += 1
logger.info(f"[Backup] 已刪除舊備份: {os.path.basename(f)}")
except Exception as e:
logger.warning(f"[Backup] 刪除舊備份失敗 {f}: {e}")
return deleted
def get_latest_backup_info() -> dict:
"""
回傳最新備份的資訊(供監控用)。
優先從 backup_log 讀取fallback 掃描檔案系統。
"""
try:
from database.manager import DatabaseManager
db = DatabaseManager()
with db.get_session() as session:
from sqlalchemy import text
row = session.execute(text("""
SELECT filename, file_size_bytes, duration_seconds, status, created_at, error_message
FROM backup_log
ORDER BY created_at DESC
LIMIT 1
""")).fetchone()
if row:
return {
"filename": row[0],
"file_size": row[1],
"duration": row[2],
"status": row[3],
"created_at": row[4],
"error": row[5],
"source": "db",
}
except Exception as e:
logger.warning(f"[Backup] 無法從 DB 讀取最新備份資訊: {e}")
# fallback: 掃描檔案
_ensure_backup_dir()
files = sorted(
glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")),
key=os.path.getmtime, reverse=True
)
if files:
f = files[0]
mtime = datetime.fromtimestamp(os.path.getmtime(f))
return {
"filename": os.path.basename(f),
"file_size": os.path.getsize(f),
"duration": None,
"status": "success",
"created_at": mtime,
"error": None,
"source": "filesystem",
}
return {"filename": None, "status": "no_backup", "created_at": None, "source": "none"}