fix: clean stale partial database backups
All checks were successful
CD Pipeline / deploy (push) Successful in 1m8s

This commit is contained in:
OoO
2026-06-18 13:51:15 +08:00
parent bd942e9427
commit 37f1802274
5 changed files with 76 additions and 3 deletions

View File

@@ -177,6 +177,7 @@ EMBEDDING_HOST=
EMBEDDING_TIMEOUT=30
OLLAMA_EMBED_MAX_TIMEOUT=30
OLLAMA_EMBED_KEEP_ALIVE=1m
PARTIAL_BACKUP_MIN_AGE_MINUTES=60
OLLAMA_EMBED_MAX_CHARS=4000
OLLAMA_EMBED_GCP_FAILURE_COOLDOWN_SEC=60
OLLAMA_EMBED_GCP_FAILURE_NOTICE_SEC=30

View File

@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.627"
SYSTEM_VERSION = "V10.628"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -330,3 +330,9 @@
- V10.626 已能在 GCP-A direct timeout 後走 110 proxy但 cache refresh 仍會先等一次 direct `/api/version` timeout。
- V10.627 新增 direct-only host health skip`resolve_ollama_host()` 會讀最近 `host_health_probes`,若 GCP-A/GCP-B direct 在視窗內已 unhealthy先跳過 direct endpoint改試同順位 110 proxyproxy rescue 不吃這個 skip避免因 direct unhealthy 誤跳過可用 proxy。
- 新增 `OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED=true``OLLAMA_RESOLVE_HOST_HEALTH_SKIP_WINDOW_MINUTES=20`DB 讀取失敗 fail-open回到原本網路探測。
## 31. 2026-06-18 V10.628 備份 partial 檔案清理
- 正式 `backup_log` 最新狀態已是 2026-06-18 02:00 成功備份6/15 的 `pg_dump` not found 是舊失敗紀錄;`get_latest_backup_info()` 目前回 successbackup monitor 不會再因舊 row 告警。
- 備份目錄仍殘留 0 byte `momo_analytics_*.sql.gz` partial 檔,容易讓人工查檔誤判。
- V10.628 新增 `cleanup_partial_backups()``cleanup_old_backups()` 會先清除超過 `PARTIAL_BACKUP_MIN_AGE_MINUTES=60` 的 0 byte partial 備份;剛產生的 0 byte 檔不刪,避免誤傷正在寫入的備份。

View File

@@ -20,6 +20,7 @@ DB_USER = os.environ.get("POSTGRES_USER", "momo")
DB_NAME = os.environ.get("POSTGRES_DB", "momo_analytics")
# 保留天數
RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "7"))
PARTIAL_BACKUP_MIN_AGE_MINUTES = int(os.environ.get("PARTIAL_BACKUP_MIN_AGE_MINUTES", "60"))
def _ensure_backup_dir():
@@ -35,6 +36,30 @@ def _remove_partial_backup(filepath: str):
logger.warning(f"[Backup] 移除不完整備份檔失敗 {filepath}: {exc}")
def cleanup_partial_backups(min_age_minutes: int = PARTIAL_BACKUP_MIN_AGE_MINUTES) -> int:
"""清除失敗後殘留的 0 byte 備份檔,避免監控與人工查檔誤判。"""
_ensure_backup_dir()
try:
min_age_minutes = max(0, int(min_age_minutes))
except (TypeError, ValueError):
min_age_minutes = 60
cutoff = datetime.now() - timedelta(minutes=min_age_minutes)
deleted = 0
for filepath in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")):
try:
if os.path.getsize(filepath) != 0:
continue
mtime = datetime.fromtimestamp(os.path.getmtime(filepath))
if mtime > cutoff:
continue
os.remove(filepath)
deleted += 1
logger.info(f"[Backup] 已清除 0 byte 不完整備份: {os.path.basename(filepath)}")
except Exception as exc:
logger.warning(f"[Backup] 清除不完整備份失敗 {filepath}: {exc}")
return deleted
def _ensure_pg_dump_available() -> str:
pg_dump_path = shutil.which("pg_dump")
if pg_dump_path:
@@ -175,10 +200,10 @@ def run_backup() -> dict:
def cleanup_old_backups() -> int:
"""刪除超過保留期限的備份檔,回傳刪除數量"""
"""刪除不完整與超過保留期限的備份檔,回傳刪除數量"""
_ensure_backup_dir()
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
deleted = 0
deleted = cleanup_partial_backups()
for f in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")):
try:
mtime = datetime.fromtimestamp(os.path.getmtime(f))

View File

@@ -0,0 +1,41 @@
import os
import time
def test_cleanup_partial_backups_removes_only_stale_zero_byte_files(tmp_path, monkeypatch):
from services import db_backup_service as backup
stale_zero = tmp_path / "momo_analytics_20260611_010050.sql.gz"
fresh_zero = tmp_path / "momo_analytics_20260618_134900.sql.gz"
normal_backup = tmp_path / "momo_analytics_20260618_020001.sql.gz"
stale_zero.write_bytes(b"")
fresh_zero.write_bytes(b"")
normal_backup.write_bytes(b"ok")
old_ts = time.time() - 7200
os.utime(stale_zero, (old_ts, old_ts))
monkeypatch.setattr(backup, "BACKUP_DIR", str(tmp_path))
deleted = backup.cleanup_partial_backups(min_age_minutes=60)
assert deleted == 1
assert not stale_zero.exists()
assert fresh_zero.exists()
assert normal_backup.exists()
def test_cleanup_old_backups_counts_partial_cleanup(tmp_path, monkeypatch):
from services import db_backup_service as backup
stale_zero = tmp_path / "momo_analytics_20260611_010050.sql.gz"
stale_zero.write_bytes(b"")
old_ts = time.time() - 7200
os.utime(stale_zero, (old_ts, old_ts))
monkeypatch.setattr(backup, "BACKUP_DIR", str(tmp_path))
monkeypatch.setattr(backup, "RETENTION_DAYS", 7)
assert backup.cleanup_old_backups() == 1
assert not stale_zero.exists()