From 37f1802274984b1787d01b6a2c5ee871a69c10ce Mon Sep 17 00:00:00 2001 From: OoO Date: Thu, 18 Jun 2026 13:51:15 +0800 Subject: [PATCH] fix: clean stale partial database backups --- .env.example | 1 + config.py | 2 +- .../current_execution_queue_20260524.md | 6 +++ services/db_backup_service.py | 29 ++++++++++++- tests/test_db_backup_service.py | 41 +++++++++++++++++++ 5 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 tests/test_db_backup_service.py diff --git a/.env.example b/.env.example index 1759e95..81ee068 100644 --- a/.env.example +++ b/.env.example @@ -177,6 +177,7 @@ EMBEDDING_HOST= EMBEDDING_TIMEOUT=30 OLLAMA_EMBED_MAX_TIMEOUT=30 OLLAMA_EMBED_KEEP_ALIVE=1m +PARTIAL_BACKUP_MIN_AGE_MINUTES=60 OLLAMA_EMBED_MAX_CHARS=4000 OLLAMA_EMBED_GCP_FAILURE_COOLDOWN_SEC=60 OLLAMA_EMBED_GCP_FAILURE_NOTICE_SEC=30 diff --git a/config.py b/config.py index 78ca06e..72dc166 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.627" +SYSTEM_VERSION = "V10.628" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/current_execution_queue_20260524.md b/docs/memory/current_execution_queue_20260524.md index d971367..d4e3dff 100644 --- a/docs/memory/current_execution_queue_20260524.md +++ b/docs/memory/current_execution_queue_20260524.md @@ -330,3 +330,9 @@ - V10.626 已能在 GCP-A direct timeout 後走 110 proxy,但 cache refresh 仍會先等一次 direct `/api/version` timeout。 - V10.627 新增 direct-only host health skip:`resolve_ollama_host()` 會讀最近 `host_health_probes`,若 GCP-A/GCP-B direct 在視窗內已 unhealthy,先跳過 direct endpoint,改試同順位 110 proxy;proxy rescue 不吃這個 skip,避免因 direct unhealthy 誤跳過可用 proxy。 - 新增 `OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED=true` 與 `OLLAMA_RESOLVE_HOST_HEALTH_SKIP_WINDOW_MINUTES=20`;DB 讀取失敗 fail-open,回到原本網路探測。 + +## 31. 2026-06-18 V10.628 備份 partial 檔案清理 + +- 正式 `backup_log` 最新狀態已是 2026-06-18 02:00 成功備份,6/15 的 `pg_dump` not found 是舊失敗紀錄;`get_latest_backup_info()` 目前回 success,backup monitor 不會再因舊 row 告警。 +- 備份目錄仍殘留 0 byte `momo_analytics_*.sql.gz` partial 檔,容易讓人工查檔誤判。 +- V10.628 新增 `cleanup_partial_backups()`,`cleanup_old_backups()` 會先清除超過 `PARTIAL_BACKUP_MIN_AGE_MINUTES=60` 的 0 byte partial 備份;剛產生的 0 byte 檔不刪,避免誤傷正在寫入的備份。 diff --git a/services/db_backup_service.py b/services/db_backup_service.py index a3a03da..dc9fd03 100644 --- a/services/db_backup_service.py +++ b/services/db_backup_service.py @@ -20,6 +20,7 @@ DB_USER = os.environ.get("POSTGRES_USER", "momo") DB_NAME = os.environ.get("POSTGRES_DB", "momo_analytics") # 保留天數 RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "7")) +PARTIAL_BACKUP_MIN_AGE_MINUTES = int(os.environ.get("PARTIAL_BACKUP_MIN_AGE_MINUTES", "60")) def _ensure_backup_dir(): @@ -35,6 +36,30 @@ def _remove_partial_backup(filepath: str): logger.warning(f"[Backup] 移除不完整備份檔失敗 {filepath}: {exc}") +def cleanup_partial_backups(min_age_minutes: int = PARTIAL_BACKUP_MIN_AGE_MINUTES) -> int: + """清除失敗後殘留的 0 byte 備份檔,避免監控與人工查檔誤判。""" + _ensure_backup_dir() + try: + min_age_minutes = max(0, int(min_age_minutes)) + except (TypeError, ValueError): + min_age_minutes = 60 + cutoff = datetime.now() - timedelta(minutes=min_age_minutes) + deleted = 0 + for filepath in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")): + try: + if os.path.getsize(filepath) != 0: + continue + mtime = datetime.fromtimestamp(os.path.getmtime(filepath)) + if mtime > cutoff: + continue + os.remove(filepath) + deleted += 1 + logger.info(f"[Backup] 已清除 0 byte 不完整備份: {os.path.basename(filepath)}") + except Exception as exc: + logger.warning(f"[Backup] 清除不完整備份失敗 {filepath}: {exc}") + return deleted + + def _ensure_pg_dump_available() -> str: pg_dump_path = shutil.which("pg_dump") if pg_dump_path: @@ -175,10 +200,10 @@ def run_backup() -> dict: def cleanup_old_backups() -> int: - """刪除超過保留期限的備份檔,回傳刪除數量""" + """刪除不完整與超過保留期限的備份檔,回傳刪除數量。""" _ensure_backup_dir() cutoff = datetime.now() - timedelta(days=RETENTION_DAYS) - deleted = 0 + deleted = cleanup_partial_backups() for f in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")): try: mtime = datetime.fromtimestamp(os.path.getmtime(f)) diff --git a/tests/test_db_backup_service.py b/tests/test_db_backup_service.py new file mode 100644 index 0000000..ba3f2a9 --- /dev/null +++ b/tests/test_db_backup_service.py @@ -0,0 +1,41 @@ +import os +import time + + +def test_cleanup_partial_backups_removes_only_stale_zero_byte_files(tmp_path, monkeypatch): + from services import db_backup_service as backup + + stale_zero = tmp_path / "momo_analytics_20260611_010050.sql.gz" + fresh_zero = tmp_path / "momo_analytics_20260618_134900.sql.gz" + normal_backup = tmp_path / "momo_analytics_20260618_020001.sql.gz" + + stale_zero.write_bytes(b"") + fresh_zero.write_bytes(b"") + normal_backup.write_bytes(b"ok") + + old_ts = time.time() - 7200 + os.utime(stale_zero, (old_ts, old_ts)) + + monkeypatch.setattr(backup, "BACKUP_DIR", str(tmp_path)) + + deleted = backup.cleanup_partial_backups(min_age_minutes=60) + + assert deleted == 1 + assert not stale_zero.exists() + assert fresh_zero.exists() + assert normal_backup.exists() + + +def test_cleanup_old_backups_counts_partial_cleanup(tmp_path, monkeypatch): + from services import db_backup_service as backup + + stale_zero = tmp_path / "momo_analytics_20260611_010050.sql.gz" + stale_zero.write_bytes(b"") + old_ts = time.time() - 7200 + os.utime(stale_zero, (old_ts, old_ts)) + + monkeypatch.setattr(backup, "BACKUP_DIR", str(tmp_path)) + monkeypatch.setattr(backup, "RETENTION_DAYS", 7) + + assert backup.cleanup_old_backups() == 1 + assert not stale_zero.exists()