fix: clean stale partial database backups
All checks were successful
CD Pipeline / deploy (push) Successful in 1m8s
All checks were successful
CD Pipeline / deploy (push) Successful in 1m8s
This commit is contained in:
@@ -177,6 +177,7 @@ EMBEDDING_HOST=
|
||||
EMBEDDING_TIMEOUT=30
|
||||
OLLAMA_EMBED_MAX_TIMEOUT=30
|
||||
OLLAMA_EMBED_KEEP_ALIVE=1m
|
||||
PARTIAL_BACKUP_MIN_AGE_MINUTES=60
|
||||
OLLAMA_EMBED_MAX_CHARS=4000
|
||||
OLLAMA_EMBED_GCP_FAILURE_COOLDOWN_SEC=60
|
||||
OLLAMA_EMBED_GCP_FAILURE_NOTICE_SEC=30
|
||||
|
||||
@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.627"
|
||||
SYSTEM_VERSION = "V10.628"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -330,3 +330,9 @@
|
||||
- V10.626 已能在 GCP-A direct timeout 後走 110 proxy,但 cache refresh 仍會先等一次 direct `/api/version` timeout。
|
||||
- V10.627 新增 direct-only host health skip:`resolve_ollama_host()` 會讀最近 `host_health_probes`,若 GCP-A/GCP-B direct 在視窗內已 unhealthy,先跳過 direct endpoint,改試同順位 110 proxy;proxy rescue 不吃這個 skip,避免因 direct unhealthy 誤跳過可用 proxy。
|
||||
- 新增 `OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED=true` 與 `OLLAMA_RESOLVE_HOST_HEALTH_SKIP_WINDOW_MINUTES=20`;DB 讀取失敗 fail-open,回到原本網路探測。
|
||||
|
||||
## 31. 2026-06-18 V10.628 備份 partial 檔案清理
|
||||
|
||||
- 正式 `backup_log` 最新狀態已是 2026-06-18 02:00 成功備份,6/15 的 `pg_dump` not found 是舊失敗紀錄;`get_latest_backup_info()` 目前回 success,backup monitor 不會再因舊 row 告警。
|
||||
- 備份目錄仍殘留 0 byte `momo_analytics_*.sql.gz` partial 檔,容易讓人工查檔誤判。
|
||||
- V10.628 新增 `cleanup_partial_backups()`,`cleanup_old_backups()` 會先清除超過 `PARTIAL_BACKUP_MIN_AGE_MINUTES=60` 的 0 byte partial 備份;剛產生的 0 byte 檔不刪,避免誤傷正在寫入的備份。
|
||||
|
||||
@@ -20,6 +20,7 @@ DB_USER = os.environ.get("POSTGRES_USER", "momo")
|
||||
DB_NAME = os.environ.get("POSTGRES_DB", "momo_analytics")
|
||||
# 保留天數
|
||||
RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "7"))
|
||||
PARTIAL_BACKUP_MIN_AGE_MINUTES = int(os.environ.get("PARTIAL_BACKUP_MIN_AGE_MINUTES", "60"))
|
||||
|
||||
|
||||
def _ensure_backup_dir():
|
||||
@@ -35,6 +36,30 @@ def _remove_partial_backup(filepath: str):
|
||||
logger.warning(f"[Backup] 移除不完整備份檔失敗 {filepath}: {exc}")
|
||||
|
||||
|
||||
def cleanup_partial_backups(min_age_minutes: int = PARTIAL_BACKUP_MIN_AGE_MINUTES) -> int:
|
||||
"""清除失敗後殘留的 0 byte 備份檔,避免監控與人工查檔誤判。"""
|
||||
_ensure_backup_dir()
|
||||
try:
|
||||
min_age_minutes = max(0, int(min_age_minutes))
|
||||
except (TypeError, ValueError):
|
||||
min_age_minutes = 60
|
||||
cutoff = datetime.now() - timedelta(minutes=min_age_minutes)
|
||||
deleted = 0
|
||||
for filepath in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")):
|
||||
try:
|
||||
if os.path.getsize(filepath) != 0:
|
||||
continue
|
||||
mtime = datetime.fromtimestamp(os.path.getmtime(filepath))
|
||||
if mtime > cutoff:
|
||||
continue
|
||||
os.remove(filepath)
|
||||
deleted += 1
|
||||
logger.info(f"[Backup] 已清除 0 byte 不完整備份: {os.path.basename(filepath)}")
|
||||
except Exception as exc:
|
||||
logger.warning(f"[Backup] 清除不完整備份失敗 {filepath}: {exc}")
|
||||
return deleted
|
||||
|
||||
|
||||
def _ensure_pg_dump_available() -> str:
|
||||
pg_dump_path = shutil.which("pg_dump")
|
||||
if pg_dump_path:
|
||||
@@ -175,10 +200,10 @@ def run_backup() -> dict:
|
||||
|
||||
|
||||
def cleanup_old_backups() -> int:
|
||||
"""刪除超過保留期限的備份檔,回傳刪除數量"""
|
||||
"""刪除不完整與超過保留期限的備份檔,回傳刪除數量。"""
|
||||
_ensure_backup_dir()
|
||||
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
|
||||
deleted = 0
|
||||
deleted = cleanup_partial_backups()
|
||||
for f in glob.glob(os.path.join(BACKUP_DIR, "momo_analytics_*.sql.gz")):
|
||||
try:
|
||||
mtime = datetime.fromtimestamp(os.path.getmtime(f))
|
||||
|
||||
41
tests/test_db_backup_service.py
Normal file
41
tests/test_db_backup_service.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
|
||||
def test_cleanup_partial_backups_removes_only_stale_zero_byte_files(tmp_path, monkeypatch):
|
||||
from services import db_backup_service as backup
|
||||
|
||||
stale_zero = tmp_path / "momo_analytics_20260611_010050.sql.gz"
|
||||
fresh_zero = tmp_path / "momo_analytics_20260618_134900.sql.gz"
|
||||
normal_backup = tmp_path / "momo_analytics_20260618_020001.sql.gz"
|
||||
|
||||
stale_zero.write_bytes(b"")
|
||||
fresh_zero.write_bytes(b"")
|
||||
normal_backup.write_bytes(b"ok")
|
||||
|
||||
old_ts = time.time() - 7200
|
||||
os.utime(stale_zero, (old_ts, old_ts))
|
||||
|
||||
monkeypatch.setattr(backup, "BACKUP_DIR", str(tmp_path))
|
||||
|
||||
deleted = backup.cleanup_partial_backups(min_age_minutes=60)
|
||||
|
||||
assert deleted == 1
|
||||
assert not stale_zero.exists()
|
||||
assert fresh_zero.exists()
|
||||
assert normal_backup.exists()
|
||||
|
||||
|
||||
def test_cleanup_old_backups_counts_partial_cleanup(tmp_path, monkeypatch):
|
||||
from services import db_backup_service as backup
|
||||
|
||||
stale_zero = tmp_path / "momo_analytics_20260611_010050.sql.gz"
|
||||
stale_zero.write_bytes(b"")
|
||||
old_ts = time.time() - 7200
|
||||
os.utime(stale_zero, (old_ts, old_ts))
|
||||
|
||||
monkeypatch.setattr(backup, "BACKUP_DIR", str(tmp_path))
|
||||
monkeypatch.setattr(backup, "RETENTION_DAYS", 7)
|
||||
|
||||
assert backup.cleanup_old_backups() == 1
|
||||
assert not stale_zero.exists()
|
||||
Reference in New Issue
Block a user