From c1f43b0ae496da4fde2cf0e551458f2f6d2259b7 Mon Sep 17 00:00:00 2001 From: OoO Date: Fri, 1 May 2026 20:48:28 +0800 Subject: [PATCH] fix(campaign): persist full crawl snapshots --- app.py | 4 +- config.py | 2 +- scheduler.py | 107 +++++++++++++---------- tests/test_promo_persistence_contract.py | 40 +++++++++ 4 files changed, 104 insertions(+), 49 deletions(-) create mode 100644 tests/test_promo_persistence_contract.py diff --git a/app.py b/app.py index 5445bf1..4b91bd3 100644 --- a/app.py +++ b/app.py @@ -95,8 +95,8 @@ except Exception as e: sys_log.error(f"無法檢測磁碟空間: {e}") # 🚩 系統版本定義 (備份與顯示用) -# 🚩 2026-05-01 V10.70: Restore campaign operations table signals -SYSTEM_VERSION = "V10.70" +# 🚩 2026-05-01 V10.71: Persist full campaign crawl snapshots +SYSTEM_VERSION = "V10.71" # ========================================== # 🔒 SQL Injection 防護函數 diff --git a/config.py b/config.py index e0ebc43..8a79ba2 100644 --- a/config.py +++ b/config.py @@ -254,7 +254,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.70" +SYSTEM_VERSION = "V10.71" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/scheduler.py b/scheduler.py index 84b5fcd..5d610ea 100644 --- a/scheduler.py +++ b/scheduler.py @@ -553,6 +553,7 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): logging.info(f"[Crawler] [EDM] 📦 偵測到 {len(product_areas)} 個商品區塊") count = 0 + snapshot_count = 0 current_scan_icodes = set() seen_time_slots = set() # V9.60: 記錄本次掃描到的所有時段 changed_products = [] # V-New: 收集異動商品以發送通知 @@ -722,32 +723,34 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): status_change = "UPDATE" previous_price = prev_record.previous_price - # 只記錄有變動的 (新增 或 價格變動) - if status_change != "NONE": - new_promo = PromoProduct( - batch_id=batch_id, - i_code=i_code, - name=name, - price=price, - discount_text=discount_text, - url=link_url, - previous_price=previous_price, # V9.64: 寫入舊價格 - time_slot=time_slot, - status_change=status_change, - crawled_at=now, - activity_time_text=activity_time_text, - session_time_text=session_time_text, - remain_qty=remain_qty, - page_type=PAGE_TYPE - ) - # V9.62: 嘗試寫入圖片 (若 Model 尚未更新欄位定義,此行可能無效,但 DB 已有欄位) - new_promo.image_url = image_url - - session.add(new_promo) + is_changed = status_change != "NONE" + new_promo = PromoProduct( + batch_id=batch_id, + i_code=i_code, + name=name, + price=price, + discount_text=discount_text, + url=link_url, + previous_price=previous_price, # V9.64: 寫入舊價格 + time_slot=time_slot, + status_change=status_change if is_changed else "ACTIVE", + crawled_at=now, + activity_time_text=activity_time_text, + session_time_text=session_time_text, + remain_qty=remain_qty, + page_type=PAGE_TYPE + ) + # V9.62: 嘗試寫入圖片 (若 Model 尚未更新欄位定義,此行可能無效,但 DB 已有欄位) + new_promo.image_url = image_url + + session.add(new_promo) + snapshot_count += 1 + + if is_changed: changed_products.append(new_promo) count += 1 else: - logging.debug(f"[Crawler] [EDM] [=] 無變動跳過 | Name: {name}") + logging.debug(f"[Crawler] [EDM] [=] 無變動快照已寫入 | Name: {name}") except Exception as e: logging.warning(f"[Crawler] [EDM] ⚠️ EDM 單一商品解析失敗 | Error: {e}") continue # 單一商品失敗不影響整體 @@ -778,6 +781,7 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): ) delisted_promo.image_url = record.image_url if hasattr(record, 'image_url') else None session.add(delisted_promo) + snapshot_count += 1 changed_products.append(delisted_promo) count += 1 @@ -840,9 +844,10 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): except Exception as e: logging.error(f"[Crawler] [EDM] ❌ 發送通知時發生錯誤 | Error: {e}") - logging.info(f"[Crawler] [EDM] ✅ EDM 任務完成 | New Records: {count} | Batch: {batch_id}") + logging.info(f"[Crawler] [EDM] ✅ EDM 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}") stats = { "changed_records": count, + "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" } @@ -1018,6 +1023,7 @@ def run_festival_task(lpn_code="O7ylWfihYUM"): logging.warning("[Crawler] [Festival] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案") return count = 0 + snapshot_count = 0 current_scan_items = set() seen_groups = set() changed_products = [] # V-New: 收集異動商品以發送通知 @@ -1155,19 +1161,22 @@ def run_festival_task(lpn_code="O7ylWfihYUM"): status_change = "UPDATE" logging.info(f"[Crawler] [Festival] -> 狀態: 圖片更新 (UPDATE)") - if status_change != "NONE": - new_promo = PromoProduct( - batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url, - image_url=image_url, previous_price=previous_price, time_slot=group_title, - status_change=status_change, crawled_at=now, activity_time_text=activity_name, - session_time_text=group_title, page_type=PAGE_TYPE - ) - session.add(new_promo) + is_changed = status_change != "NONE" + new_promo = PromoProduct( + batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url, + image_url=image_url, previous_price=previous_price, time_slot=group_title, + status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name, + session_time_text=group_title, page_type=PAGE_TYPE + ) + session.add(new_promo) + snapshot_count += 1 + + if is_changed: changed_products.append(new_promo) # V-New: 收集異動商品 count += 1 logging.info(f"[Crawler] [Festival] -> 寫入資料庫: {status_change}") else: - logging.info("[Crawler] [Festival] -> 狀態: 無變動 (NONE) | Action: Skip Write") + logging.info("[Crawler] [Festival] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written") except Exception as e: logging.error(f"[Crawler] [Festival] ❌ 解析商品時發生未預期錯誤 | Error: {e}") @@ -1184,12 +1193,13 @@ def run_festival_task(lpn_code="O7ylWfihYUM"): session_time_text=getattr(record, 'session_time_text', activity_name), page_type=PAGE_TYPE ) session.add(delisted_promo) + snapshot_count += 1 changed_products.append(delisted_promo) # V-New: 收集下架商品 count += 1 session.commit() - logging.info(f"[Crawler] [Festival] ✅ {PAGE_TYPE} 任務完成 | New Records: {count} | Batch: {batch_id}") - stats = { "changed_records": count, "batch_id": batch_id, "status": "Success" } + logging.info(f"[Crawler] [Festival] ✅ {PAGE_TYPE} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}") + stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" } _save_stats('festival_task', stats) # 為統計資料使用新的任務名稱 # V-New: 發送通知 - 如果有異動商品則發送 Telegram 和 Line 通知 @@ -1357,6 +1367,7 @@ def run_promo_event_task(lpn_code, page_type, activity_name): return count = 0 + snapshot_count = 0 current_scan_items = set() seen_groups = set() changed_products = [] @@ -1485,19 +1496,22 @@ def run_promo_event_task(lpn_code, page_type, activity_name): status_change = "UPDATE" logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 圖片更新 (UPDATE)") - if status_change != "NONE": - new_promo = PromoProduct( - batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url, - image_url=image_url, previous_price=previous_price, time_slot=group_title, - status_change=status_change, crawled_at=now, activity_time_text=activity_name, - session_time_text=group_title, page_type=page_type - ) - session.add(new_promo) + is_changed = status_change != "NONE" + new_promo = PromoProduct( + batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url, + image_url=image_url, previous_price=previous_price, time_slot=group_title, + status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name, + session_time_text=group_title, page_type=page_type + ) + session.add(new_promo) + snapshot_count += 1 + + if is_changed: changed_products.append(new_promo) count += 1 logging.info(f"[Crawler] [{page_type.upper()}] -> 寫入資料庫: {status_change}") else: - logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 無變動 (NONE) | Action: Skip Write") + logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written") except Exception as e: logging.error(f"[Crawler] [{page_type.upper()}] ❌ 解析商品時發生未預期錯誤 | Error: {e}") @@ -1513,12 +1527,13 @@ def run_promo_event_task(lpn_code, page_type, activity_name): session_time_text=getattr(record, 'session_time_text', activity_name), page_type=page_type ) session.add(delisted_promo) + snapshot_count += 1 changed_products.append(delisted_promo) count += 1 session.commit() - logging.info(f"[Crawler] [{page_type.upper()}] ✅ {page_type} 任務完成 | New Records: {count} | Batch: {batch_id}") - stats = { "changed_records": count, "batch_id": batch_id, "status": "Success" } + logging.info(f"[Crawler] [{page_type.upper()}] ✅ {page_type} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}") + stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" } _save_stats(f'{page_type}_task', stats) if changed_products: diff --git a/tests/test_promo_persistence_contract.py b/tests/test_promo_persistence_contract.py new file mode 100644 index 0000000..7ade94a --- /dev/null +++ b/tests/test_promo_persistence_contract.py @@ -0,0 +1,40 @@ +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + + +def test_promo_crawlers_append_full_snapshots_for_unchanged_items(): + scheduler_source = (ROOT / "scheduler.py").read_text(encoding="utf-8") + + assert 'status_change=status_change if is_changed else "ACTIVE"' in scheduler_source + assert "snapshot_count = 0" in scheduler_source + assert "snapshot_count += 1" in scheduler_source + assert '"snapshot_records": snapshot_count' in scheduler_source + assert "無變動快照已寫入" in scheduler_source + assert "Snapshot Written" in scheduler_source + assert "Skip Write" not in scheduler_source + assert "無變動跳過" not in scheduler_source + + +def test_promo_product_model_contains_crawled_fields_needed_for_history(): + model_source = (ROOT / "database/edm_models.py").read_text(encoding="utf-8") + + for field in [ + "batch_id", + "crawled_at", + "time_slot", + "activity_time_text", + "session_time_text", + "i_code", + "name", + "price", + "discount_text", + "url", + "image_url", + "previous_price", + "remain_qty", + "status_change", + "page_type", + ]: + assert field in model_source