From af260c4a01aa9ce74b849ed730c1847ddfe8bc8c Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 28 Apr 2026 13:57:44 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E4=B8=89=E5=80=8B?= =?UTF-8?q?=E4=BF=83=E9=8A=B7=E6=B4=BB=E5=8B=95=E7=88=AC=E8=9F=B2=E6=94=AF?= =?UTF-8?q?=E6=8F=B4=EF=BC=88=E6=AF=8D=E8=A6=AA=E7=AF=80=E3=80=81520?= =?UTF-8?q?=E6=83=85=E4=BA=BA=E7=AF=80=E3=80=81=E5=8B=9E=E5=8B=95=E7=AF=80?= =?UTF-8?q?=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增通用促銷活動爬蟲函式 run_promo_event_task() - 更新 crawler_config_loader.py 新增三個活動配置 - 更新 run_scheduler.py 動態註冊促銷活動爬蟲 - 新增 API 端點 /api/run_promo_event_task - 新增三個前端儀表板路由(/edm/mothers_day, /edm/valentine_520, /edm/labor_day) - 更新所有儀表板頁籤列表 - 新增配置檔案 services/data/crawler_config.json - 新增使用文件 docs/guides/promo_event_crawler_guide.md - 更新 agent_actions.py 允許重試列表 --- docs/guides/promo_event_crawler_guide.md | 177 +++++++++++++ routes/api_routes.py | 30 +++ routes/edm_routes.py | 157 +++++++++++- run_scheduler.py | 25 ++ scheduler.py | 312 +++++++++++++++++++++++ services/agent_actions.py | 2 +- services/crawler_config_loader.py | 5 +- services/data/crawler_config.json | 52 ++++ 8 files changed, 756 insertions(+), 4 deletions(-) create mode 100644 docs/guides/promo_event_crawler_guide.md create mode 100644 services/data/crawler_config.json diff --git a/docs/guides/promo_event_crawler_guide.md b/docs/guides/promo_event_crawler_guide.md new file mode 100644 index 0000000..26ddad3 --- /dev/null +++ b/docs/guides/promo_event_crawler_guide.md @@ -0,0 +1,177 @@ +# 促銷活動爬蟲配置指南 + +## 概述 + +本系統支援多種促銷活動的爬蟲監控,包括母親節、520情人節、勞動節等季節性活動。爬蟲系統採用通用架構,可輕鬆擴展支援新的活動類型。 + +## 支援的活動類型 + +| 活動類型 | page_type | 預設活動名稱 | 配置鍵值 | +|---------|-----------|-------------|---------| +| 母親節 | `mothers_day` | 母親節超值限時購 | `mothers_day_2026` | +| 520情人節 | `valentine_520` | 520情人節限定購物 | `valentine_520_2026` | +| 勞動節 | `labor_day` | 勞動節購物優惠 | `labor_day_2026` | +| 1.1狂歡購物節 | `festival` | 1.1狂歡購物節 | `festival_11` | + +## 配置步驟 + +### 1. 編輯配置文件 + +編輯 `services/data/crawler_config.json`: + +```json +{ + "crawlers": { + "mothers_day_2026": { + "enabled": true, + "schedule_hours": 4, + "lpn_code": "YOUR_LPN_CODE_HERE", + "activity_name": "母親節超值限時購", + "page_type": "mothers_day", + "name": "2026 母親節促銷爬蟲" + } + } +} +``` + +**重要參數說明:** +- `enabled`: 設為 `true` 啟用爬蟲 +- `schedule_hours`: 執行頻率(小時) +- `lpn_code`: MOMO 活動頁面的 LPN 代碼(必填) +- `activity_name`: 活動名稱(用於通知和日誌) +- `page_type`: 資料庫區分標識(不可重複) + +### 2. 取得 LPN 代碼 + +LPN 代碼是 MOMO 活動頁面的唯一識別碼,格式如下: + +``` +https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn=O1K5FBOqsvN + ^^^^^^^^^^^^ + 這就是 LPN 代碼 +``` + +### 3. 重啟排程服務 + +修改配置後,需重啟 `momo-scheduler` 容器使配置生效: + +```bash +ssh wooo@192.168.0.110 "ssh ollama@192.168.0.188 \"\ + cd /home/ollama/momo-pro && docker compose restart momo-scheduler\"" +``` + +## 手動觸發爬蟲 + +### 透過 Web API + +```bash +curl -X POST http://localhost:5000/api/run_promo_event_task \ + -H "Content-Type: application/json" \ + -d '{ + "page_type": "mothers_day", + "lpn_code": "O1K5FBOqsvN", + "activity_name": "母親節超值限時購" + }' +``` + +### 透過 Python + +```python +import requests + +response = requests.post( + 'http://localhost:5000/api/run_promo_event_task', + json={ + 'page_type': 'mothers_day', + 'lpn_code': 'O1K5FBOqsvN', + 'activity_name': '母親節超值限時購' + } +) +print(response.json()) +``` + +## 資料庫結構 + +所有促銷活動商品資料儲存在 `promo_products` 表,透過 `page_type` 欄位區分: + +```sql +SELECT * FROM promo_products WHERE page_type = 'mothers_day'; +``` + +## 爬蟲結構分析 + +根據市場情報,各活動頁面結構如下: + +### 母親節(5月第2週) +- **活動名稱**: 《愛在五月》母親節超值限時購 +- **主要商品**: OLAY 歐蕾系列、全酵肌泌賦活乳、時尚服飾 +- **活動時間**: 5月第2週 + +### 520情人節(5/20-21) +- **活動名稱**: 《你我在一起》情人節限定購物 +- **主要商品**: OLAY 歐蕾系列、OZIO 歐姬兒蜂王乳凝露EX +- **活動時間**: 5月20日-21日 + +### 勞動節(5/1-3) +- **活動名稱**: 《勞動者福利》勞動節購物優惠 +- **主要商品**: OLAY 歐蕾系列、春季保養相關商品 +- **活動時間**: 5月1日-3日 + +## 爬蟲功能 + +系統會自動: +1. 抓取活動頁面所有商品區塊 +2. 解析商品名稱、價格、圖片 +3. 偵測價格變動(漲價/跌價) +4. 偵測新上架商品 +5. 偵測下架商品 +6. 發送 Telegram/Line 通知(附截圖) + +## 故障排除 + +### 爬蟲未啟動 +檢查配置文件中的 `enabled` 是否為 `true`,且 `lpn_code` 已填寫。 + +### 未抓取到商品 +1. 檢查 LPN 代碼是否正確 +2. 檢查活動頁面是否已上線 +3. 查看 `logs/system.log` 中的錯誤訊息 +4. 檢查 `logs/debug_htmls/` 中的偵錯截圖 + +### 排程未執行 +確認 `momo-scheduler` 容器運行狀態: + +```bash +ssh wooo@192.168.0.110 "ssh ollama@192.168.0.188 \"\ + docker ps --format '{{.Names}} | {{.Status}}' | grep momo-scheduler\"" +``` + +## 新增活動類型 + +如需新增其他活動類型: + +1. 在 `services/data/crawler_config.json` 新增配置 +2. 在 `run_scheduler.py` 的 `promo_event_configs` 字典中新增對應 +3. 重啟排程服務 + +範例: + +```json +{ + "mid_autumn_2026": { + "enabled": false, + "schedule_hours": 4, + "lpn_code": "", + "activity_name": "中秋節限定優惠", + "page_type": "mid_autumn", + "name": "2026 中秋節促銷爬蟲" + } +} +``` + +```python +promo_event_configs = { + # ... 現有配置 + 'mid_autumn_2026': {'lpn': '', 'page_type': 'mid_autumn', 'name': '中秋節限定優惠'} +} +``` diff --git a/routes/api_routes.py b/routes/api_routes.py index 8a802a6..6dab103 100644 --- a/routes/api_routes.py +++ b/routes/api_routes.py @@ -97,6 +97,36 @@ def trigger_festival_task(): return jsonify({"status": "error", "message": str(e)}), 500 +@api_bp.route('/api/run_promo_event_task', methods=['POST']) +@login_required +def trigger_promo_event_task(): + """API: 手動觸發促銷活動爬蟲任務(支援母親節、520、勞動節等)""" + try: + data = request.get_json() + page_type = data.get('page_type', '') + lpn_code = data.get('lpn_code', '') + activity_name = data.get('activity_name', '促銷活動') + + if not page_type or not lpn_code: + return jsonify({"status": "error", "message": "缺少必要參數: page_type 和 lpn_code"}), 400 + + sys_log.info(f"[Web] [Task] 接收到手動促銷活動執行請求 | Type: {page_type} | LPN: {lpn_code} | Name: {activity_name}") + + # 延遲導入 + import scheduler + importlib.reload(scheduler) + + # 使用執行緒啟動,避免卡住 Web Server + task_thread = threading.Thread(target=scheduler.run_promo_event_task, args=(lpn_code, page_type, activity_name)) + task_thread.daemon = True + task_thread.start() + + return jsonify({"status": "success", "message": f"{activity_name} 爬蟲任務 (LPN: {lpn_code}) 已在背景啟動,請稍後刷新頁面查看結果"}) + except Exception as e: + sys_log.error(f"[Web] [Task] 手動觸發促銷活動任務失敗 | Error: {e}") + return jsonify({"status": "error", "message": str(e)}), 500 + + # ========================================== # 通知 API # ========================================== diff --git a/routes/edm_routes.py b/routes/edm_routes.py index 6d41934..3b4bded 100644 --- a/routes/edm_routes.py +++ b/routes/edm_routes.py @@ -312,7 +312,10 @@ def edm_dashboard(): # 建立儀表板頁籤 promo_pages = [ {'url': url_for('edm.edm_dashboard'), 'name': '限時搶購', 'id': 'edm'}, - {'url': url_for('edm.festival_dashboard'), 'name': '1.1狂歡購物節', 'id': 'festival'} + {'url': url_for('edm.festival_dashboard'), 'name': '1.1狂歡購物節', 'id': 'festival'}, + {'url': url_for('edm.mothers_day_dashboard'), 'name': '母親節', 'id': 'mothers_day'}, + {'url': url_for('edm.valentine_520_dashboard'), 'name': '520情人節', 'id': 'valentine_520'}, + {'url': url_for('edm.labor_day_dashboard'), 'name': '勞動節', 'id': 'labor_day'} ] scheduler_stats = load_scheduler_stats() @@ -361,7 +364,10 @@ def festival_dashboard(): # 建立儀表板頁籤 promo_pages = [ {'url': url_for('edm.edm_dashboard'), 'name': '限時搶購', 'id': 'edm'}, - {'url': url_for('edm.festival_dashboard'), 'name': '1.1狂歡購物節', 'id': 'festival'} + {'url': url_for('edm.festival_dashboard'), 'name': '1.1狂歡購物節', 'id': 'festival'}, + {'url': url_for('edm.mothers_day_dashboard'), 'name': '母親節', 'id': 'mothers_day'}, + {'url': url_for('edm.valentine_520_dashboard'), 'name': '520情人節', 'id': 'valentine_520'}, + {'url': url_for('edm.labor_day_dashboard'), 'name': '勞動節', 'id': 'labor_day'} ] scheduler_stats = load_scheduler_stats() @@ -386,3 +392,150 @@ def festival_dashboard(): return f"系統錯誤: {e}" finally: session.close() + + +@edm_bp.route('/mothers_day') +@login_required +def mothers_day_dashboard(): + """母親節促銷活動專屬儀表板""" + db = DatabaseManager() + session = db.get_session() + + PAGE_TYPE = "mothers_day" + PAGE_NAME = "母親節超值限時購" + + sort_by = request.args.get('sort_by', 'default') + order = request.args.get('order', 'desc') + + try: + data = _build_promo_dashboard_data(session, PAGE_TYPE, PAGE_NAME, sort_by, order) + + # 建立儀表板頁籤 + promo_pages = [ + {'url': url_for('edm.edm_dashboard'), 'name': '限時搶購', 'id': 'edm'}, + {'url': url_for('edm.festival_dashboard'), 'name': '1.1狂歡購物節', 'id': 'festival'}, + {'url': url_for('edm.mothers_day_dashboard'), 'name': '母親節', 'id': 'mothers_day'}, + {'url': url_for('edm.valentine_520_dashboard'), 'name': '520情人節', 'id': 'valentine_520'}, + {'url': url_for('edm.labor_day_dashboard'), 'name': '勞動節', 'id': 'labor_day'} + ] + + scheduler_stats = load_scheduler_stats() + + return render_template('edm_dashboard.html', + promo_pages=promo_pages, + current_promo_page='mothers_day', + page_title=PAGE_NAME, + grouped_items=data['sorted_grouped_items'], + slot_stats=data['slot_stats'], + total_edm_products=len(data['items_in_batch']), + last_update=data['last_update_str'], + activity_time=data['activity_time'], + active_tab=data['active_tab'], + public_url=public_url, + scheduler_stats=scheduler_stats, + current_sort=sort_by, + current_order=order, + slugify=slugify) + except Exception as e: + sys_log.error(f"{PAGE_NAME} Dashboard 渲染錯誤: {e}") + return f"系統錯誤: {e}" + finally: + session.close() + + +@edm_bp.route('/valentine_520') +@login_required +def valentine_520_dashboard(): + """520情人節促銷活動專屬儀表板""" + db = DatabaseManager() + session = db.get_session() + + PAGE_TYPE = "valentine_520" + PAGE_NAME = "520情人節限定購物" + + sort_by = request.args.get('sort_by', 'default') + order = request.args.get('order', 'desc') + + try: + data = _build_promo_dashboard_data(session, PAGE_TYPE, PAGE_NAME, sort_by, order) + + # 建立儀表板頁籤 + promo_pages = [ + {'url': url_for('edm.edm_dashboard'), 'name': '限時搶購', 'id': 'edm'}, + {'url': url_for('edm.festival_dashboard'), 'name': '1.1狂歡購物節', 'id': 'festival'}, + {'url': url_for('edm.mothers_day_dashboard'), 'name': '母親節', 'id': 'mothers_day'}, + {'url': url_for('edm.valentine_520_dashboard'), 'name': '520情人節', 'id': 'valentine_520'}, + {'url': url_for('edm.labor_day_dashboard'), 'name': '勞動節', 'id': 'labor_day'} + ] + + scheduler_stats = load_scheduler_stats() + + return render_template('edm_dashboard.html', + promo_pages=promo_pages, + current_promo_page='valentine_520', + page_title=PAGE_NAME, + grouped_items=data['sorted_grouped_items'], + slot_stats=data['slot_stats'], + total_edm_products=len(data['items_in_batch']), + last_update=data['last_update_str'], + activity_time=data['activity_time'], + active_tab=data['active_tab'], + public_url=public_url, + scheduler_stats=scheduler_stats, + current_sort=sort_by, + current_order=order, + slugify=slugify) + except Exception as e: + sys_log.error(f"{PAGE_NAME} Dashboard 渲染錯誤: {e}") + return f"系統錯誤: {e}" + finally: + session.close() + + +@edm_bp.route('/labor_day') +@login_required +def labor_day_dashboard(): + """勞動節促銷活動專屬儀表板""" + db = DatabaseManager() + session = db.get_session() + + PAGE_TYPE = "labor_day" + PAGE_NAME = "勞動節購物優惠" + + sort_by = request.args.get('sort_by', 'default') + order = request.args.get('order', 'desc') + + try: + data = _build_promo_dashboard_data(session, PAGE_TYPE, PAGE_NAME, sort_by, order) + + # 建立儀表板頁籤 + promo_pages = [ + {'url': url_for('edm.edm_dashboard'), 'name': '限時搶購', 'id': 'edm'}, + {'url': url_for('edm.festival_dashboard'), 'name': '1.1狂歡購物節', 'id': 'festival'}, + {'url': url_for('edm.mothers_day_dashboard'), 'name': '母親節', 'id': 'mothers_day'}, + {'url': url_for('edm.valentine_520_dashboard'), 'name': '520情人節', 'id': 'valentine_520'}, + {'url': url_for('edm.labor_day_dashboard'), 'name': '勞動節', 'id': 'labor_day'} + ] + + scheduler_stats = load_scheduler_stats() + + return render_template('edm_dashboard.html', + promo_pages=promo_pages, + current_promo_page='labor_day', + page_title=PAGE_NAME, + grouped_items=data['sorted_grouped_items'], + slot_stats=data['slot_stats'], + total_edm_products=len(data['items_in_batch']), + last_update=data['last_update_str'], + activity_time=data['activity_time'], + active_tab=data['active_tab'], + public_url=public_url, + scheduler_stats=scheduler_stats, + current_sort=sort_by, + current_order=order, + slugify=slugify) + except Exception as e: + sys_log.error(f"{PAGE_NAME} Dashboard 渲染錯誤: {e}") + return f"系統錯誤: {e}" + finally: + session.close() diff --git a/run_scheduler.py b/run_scheduler.py index 6134926..d291438 100644 --- a/run_scheduler.py +++ b/run_scheduler.py @@ -24,6 +24,7 @@ from scheduler import ( run_momo_task, run_edm_task, run_festival_task, + run_promo_event_task, run_auto_import_task, run_whitepage_check, run_competitor_price_feeder_task, @@ -61,6 +62,30 @@ def _register_schedules(): schedule.every(1).hours.do(run_festival_task) logger.info("📅 每 1 小時:festival_task") + # 動態註冊促銷活動爬蟲(根據配置) + from services.crawler_config_loader import get_enabled_crawlers + enabled_crawlers = get_enabled_crawlers() + + promo_event_configs = { + 'mothers_day_2026': {'lpn': '', 'page_type': 'mothers_day', 'name': '母親節超值限時購'}, + 'valentine_520_2026': {'lpn': '', 'page_type': 'valentine_520', 'name': '520情人節限定購物'}, + 'labor_day_2026': {'lpn': '', 'page_type': 'labor_day', 'name': '勞動節購物優惠'} + } + + for crawler_key, config in enabled_crawlers.items(): + if crawler_key in promo_event_configs: + event_config = promo_event_configs[crawler_key] + lpn_code = config.get('lpn_code', '') + if lpn_code: + schedule_hours = config.get('schedule_hours', 4) + schedule.every(schedule_hours).hours.do( + lambda lpn=lpn_code, pt=event_config['page_type'], an=event_config['name']: + run_promo_event_task(lpn, pt, an) + ) + logger.info(f"📅 每 {schedule_hours} 小時:{event_config['name']} ({event_config['page_type']})") + else: + logger.warning(f"⚠️ {event_config['name']} 未配置 LPN 代碼,跳過排程") + schedule.every(4).hours.do(run_competitor_price_feeder_task) logger.info("📅 每 4 小時:competitor_price_feeder") diff --git a/scheduler.py b/scheduler.py index d02b6e9..6b276d4 100644 --- a/scheduler.py +++ b/scheduler.py @@ -1179,6 +1179,318 @@ def run_festival_task(lpn_code="O7ylWfihYUM"): stats = { "status": "Failed", "error": str(e) } _save_stats('festival_task', stats) +def run_promo_event_task(lpn_code, page_type, activity_name): + """ + 通用促銷活動爬蟲任務 + 支援多種促銷活動類型(母親節、520情人節、勞動節等) + + :param lpn_code: 活動代碼 (LPN) + :param page_type: 頁面類型 (用於資料庫區分,如 'mothers_day', 'valentine_520', 'labor_day') + :param activity_name: 活動名稱 (用於通知和日誌) + """ + logging.info(f"[Crawler] [{page_type.upper()}] 🚀 啟動 {activity_name} 爬蟲任務 | LPN: {lpn_code}") + + DEBUG_MODE = False + + try: + with managed_scraper_resources(window_size='1920,10000', debug=DEBUG_MODE, timeout=120) as (driver, session): + component_areas = [] + + url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}&n=1" + logging.info(f"[Crawler] [{page_type.upper()}] 🔗 前往頁面: {url}") + + try: + driver.get(url) + except TimeoutException: + logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 頁面載入超時 (120s),嘗試停止載入並繼續解析...") + try: + driver.execute_script("window.stop();") + except: pass + + time.sleep(10) + logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {driver.title}") + + # 嘗試在 iframe 中尋找內容 + iframes = driver.find_elements(By.TAG_NAME, 'iframe') + if iframes: + logging.info(f"[Crawler] [{page_type.upper()}] 🕵️‍♂️ 偵測到 {len(iframes)} 個 iframe | Action: 嘗試切換進入...") + for index, iframe in enumerate(iframes): + try: + driver.switch_to.frame(iframe) + logging.info(f"[Crawler] [{page_type.upper()}] -> 已進入 iframe #{index} | Action: 開始尋找內容...") + + body = driver.find_element(By.TAG_NAME, 'body') + last_height = 0 + for scroll_attempt in range(25): + body.send_keys(Keys.PAGE_DOWN) + time.sleep(0.5) + if scroll_attempt % 5 == 0: + new_height = driver.execute_script("return document.body.scrollHeight") + if new_height == last_height: + break + last_height = new_height + time.sleep(2) + + component_areas = _find_component_areas_with_diagnostic(driver) + if component_areas: + logging.info(f"[Crawler] [{page_type.upper()}] ✅ 在 iframe #{index} 中找到 {len(component_areas)} 個商品區塊!") + break + else: + logging.info(f"[Crawler] [{page_type.upper()}] ...在 iframe #{index} 中未找到商品區塊 | Action: 切換回主頁面") + driver.switch_to.default_content() + except Exception as e: + logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 處理 iframe #{index} 時發生錯誤 | Error: {e}") + driver.switch_to.default_content() + + driver.switch_to.default_content() + if not component_areas: + logging.info(f"[Crawler] [{page_type.upper()}] 📜 在主頁面執行滾動與查找...") + body = driver.find_element(By.TAG_NAME, 'body') + last_height = 0 + for scroll_attempt in range(25): + body.send_keys(Keys.PAGE_DOWN) + time.sleep(0.5) + if scroll_attempt > 0 and scroll_attempt % 5 == 0: + new_height = driver.execute_script("return document.body.scrollHeight") + if new_height == last_height: + break + last_height = new_height + time.sleep(3) + component_areas = _find_component_areas_with_diagnostic(driver) + + if DEBUG_MODE or not component_areas: + debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls') + os.makedirs(debug_path, exist_ok=True) + ts = int(time.time()) + screenshot_path = os.path.join(debug_path, f"{page_type}_screenshot_{ts}.png") + html_path = os.path.join(debug_path, f"{page_type}_page_source_{ts}.html") + driver.save_screenshot(screenshot_path) + with open(html_path, "w", encoding="utf-8") as f: + f.write(driver.page_source) + logging.info(f"[Crawler] [{page_type.upper()}] 📸 [偵錯] 螢幕截圖已儲存 | Path: {screenshot_path}") + logging.info(f"[Crawler] [{page_type.upper()}] 📄 [偵錯] 頁面原始碼已儲存 | Path: {html_path}") + + batch_id = int(time.time()) + now = datetime.now(TAIPEI_TZ).replace(tzinfo=None) + + subq = session.query(func.max(PromoProduct.id).label('max_id'))\ + .filter(PromoProduct.page_type == page_type)\ + .group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery() + latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all() + active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')} + + logging.info(f"[Crawler] [{page_type.upper()}] 📦 偵測到 {len(component_areas)} 個商品區塊 (component-area)") + if not component_areas: + logging.warning(f"[Crawler] [{page_type.upper()}] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案") + return + + count = 0 + current_scan_items = set() + seen_groups = set() + changed_products = [] + screenshot_path = None + + for area in component_areas: + group_title = "未分類" + try: + title_el = area.find_element(By.CSS_SELECTOR, "span.js-PD_val[data-title='區塊標題文案']") + group_title = title_el.text.strip() + except Exception: + try: + area.find_element(By.CSS_SELECTOR, ".Area_swiper") + group_title = "今日主打" + logging.info(f"[Crawler] [{page_type.upper()}] 🔍 偵測到輪播區塊 | Title: '今日主打'") + except Exception: + logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 找不到區塊標題,也非輪播區塊 | Action: 跳過此區塊") + continue + + seen_groups.add(group_title) + logging.info(f"[Crawler] [{page_type.upper()}] 👉 處理區塊: '{group_title}'") + + products = area.find_elements(By.CSS_SELECTOR, "li.PD_slide.js-PD_id") + logging.info(f"[Crawler] [{page_type.upper()}] 📦 此區塊找到 {len(products)} 個商品") + + for item_idx, item in enumerate(products): + try: + i_code_raw = item.get_attribute("data-id") + logging.info(f"[Crawler] [{page_type.upper()}] [{item_idx+1}/{len(products)}] 開始解析商品 | data-id: {i_code_raw or 'N/A'}") + if not i_code_raw or 'logo' in i_code_raw: + logging.debug(f"[Crawler] [{page_type.upper()}] -> 跳過無效 data-id 的項目。") + continue + + try: + i_code = str(int(i_code_raw)) + except ValueError: + i_code = i_code_raw.upper() + + link_url, brand, product_name, name, price, image_url = None, "", "", "", None, None + + try: + link_el = item.find_element(By.CSS_SELECTOR, "a.js-PD_url") + link_url = link_el.get_attribute("data-urlpc") + except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到商品連結 | i_code: {i_code}") + + try: brand = item.find_element(By.CSS_SELECTOR, "b.js-PD_txt1").text.strip() + except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到品牌名稱 | i_code: {i_code}") + + try: product_name = item.find_element(By.CSS_SELECTOR, "span.js-PD_txt2").text.strip() + except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到產品名稱 | i_code: {i_code}") + + name = f"{brand} {product_name}".strip() + if not name: + try: + name = item.find_element(By.CSS_SELECTOR, "img.js-PD_img").get_attribute("alt").strip() + logging.debug(f"[Crawler] [{page_type.upper()}] - 使用圖片 alt 屬性作為名稱: '{name}'") + except Exception: + logging.warning(f"[Crawler] [{page_type.upper()}] - 錯誤: 無法解析任何有效名稱 | i_code: {i_code} | Action: Skip") + continue + + price_selectors = [ + "span.Price.js-PD_price", + ".price span", + ".money span", + ".price", + ".money", + "b.price", + "span.price", + ".price b", + ".money b", + ".prdPrice span", + ".prdPrice b", + ".prdPrice", + "p.price", + "div.price" + ] + + for selector in price_selectors: + try: + price_el = item.find_element(By.CSS_SELECTOR, selector) + price_text = price_el.text + price_clean = re.sub(r'[^\d]', '', price_text) + if price_clean: + price = int(price_clean) + break + except Exception: + continue + + if price is None: + logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到價格 | Name: {name} | ID: {i_code}") + + def get_image_path(i_code_str): + part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3) + part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000' + part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0' + part1 = part1.zfill(4) + part2 = part2.zfill(3) + return f'{part1}/{part2}/{part3}' + + try: + image_path = get_image_path(str(i_code)) + image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp" + except Exception as e: + logging.warning(f"[Crawler] [{page_type.upper()}] - 警告: 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}") + image_url = None + + logging.info(f"[Crawler] [{page_type.upper()}] -> 解析結果 | Name: {name} | Price: {price}") + + current_scan_items.add((i_code, group_title)) + + status_change = "NONE" + prev_record = active_db_items.get((i_code, group_title)) + previous_price = None + if not prev_record: + status_change = "NEW" + logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 新商品 (NEW)") + else: + if price != prev_record.price: + if price is not None and prev_record.price is not None: + status_change = "PRICE_DOWN" if price < prev_record.price else "PRICE_UP" + else: + status_change = "UPDATE" + previous_price = prev_record.price + logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 價格變動 ({status_change}) | From: {prev_record.price} | To: {price}") + elif status_change == "NONE" and (not prev_record.image_url and image_url): + status_change = "UPDATE" + logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 圖片更新 (UPDATE)") + + if status_change != "NONE": + new_promo = PromoProduct( + batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url, + image_url=image_url, previous_price=previous_price, time_slot=group_title, + status_change=status_change, crawled_at=now, activity_time_text=activity_name, + session_time_text=group_title, page_type=page_type + ) + session.add(new_promo) + changed_products.append(new_promo) + count += 1 + logging.info(f"[Crawler] [{page_type.upper()}] -> 寫入資料庫: {status_change}") + else: + logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 無變動 (NONE) | Action: Skip Write") + + except Exception as e: + logging.error(f"[Crawler] [{page_type.upper()}] ❌ 解析商品時發生未預期錯誤 | Error: {e}") + continue + + for (i_code, slot), record in active_db_items.items(): + if (i_code, slot) not in current_scan_items: + new_status = "DELISTED" if record.time_slot in seen_groups else "SLOT_END" + delisted_promo = PromoProduct( + batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, url=record.url, + image_url=record.image_url, time_slot=record.time_slot, previous_price=record.price, + status_change=new_status, crawled_at=now, activity_time_text=activity_name, + session_time_text=getattr(record, 'session_time_text', activity_name), page_type=page_type + ) + session.add(delisted_promo) + changed_products.append(delisted_promo) + count += 1 + + session.commit() + logging.info(f"[Crawler] [{page_type.upper()}] ✅ {page_type} 任務完成 | New Records: {count} | Batch: {batch_id}") + stats = { "changed_records": count, "batch_id": batch_id, "status": "Success" } + _save_stats(f'{page_type}_task', stats) + + if changed_products: + logging.info(f"[Crawler] [{page_type.upper()}] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知") + try: + try: + dashboard_url = "http://127.0.0.1:5000/edm" + logging.info(f"[Crawler] [{page_type.upper()}] 📸 準備截取儀表板畫面: {dashboard_url}") + driver.get(dashboard_url) + time.sleep(3) + driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + time.sleep(2) + driver.execute_script("window.scrollTo(0, 0);") + time.sleep(2) + required_height = driver.execute_script("return document.body.parentNode.scrollHeight") + driver.set_window_size(2560, int(required_height) + 100) + time.sleep(2) + shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots') + os.makedirs(shot_dir, exist_ok=True) + screenshot_filename = f"{page_type}_{batch_id}.png" + screenshot_path = os.path.join(shot_dir, screenshot_filename) + driver.save_screenshot(screenshot_path) + file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0 + logging.info(f"[Crawler] [{page_type.upper()}] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes") + except Exception as e: + logging.error(f"[Crawler] [{page_type.upper()}] ❌ 截圖失敗 | Error: {e}") + + import importlib + import services.edm_notifier + importlib.reload(services.edm_notifier) + from services.edm_notifier import EdmNotifier + EdmNotifier().send_edm_report(changed_products, screenshot_path) + logging.info(f"[Crawler] [{page_type.upper()}] ✅ {activity_name} 通知已發送") + except Exception as e: + logging.error(f"[Crawler] [{page_type.upper()}] ❌ 發送通知時發生錯誤 | Error: {e}") + else: + logging.info(f"[Crawler] [{page_type.upper()}] ℹ️ 無異動,不發送通知") + + except Exception as e: + logging.error(f"[Crawler] [{page_type.upper()}] 🚨 {page_type} 任務異常 | Error: {e}") + stats = { "status": "Failed", "error": str(e) } + _save_stats(f'{page_type}_task', stats) + + def run_whitepage_check(): """ V-New: 檢查網頁服務是否變成白頁 diff --git a/services/agent_actions.py b/services/agent_actions.py index 96df359..ada59c0 100644 --- a/services/agent_actions.py +++ b/services/agent_actions.py @@ -55,7 +55,7 @@ ALLOWED_RETRY_TASKS = { "run_auto_import_task", "run_momo_task", "run_edm_task", "run_competitor_price_feeder_task", "run_backup_monitor_task", "run_icaim_analysis_task", "run_festival_task", "run_whitepage_check", - "run_icaim_analysis_task", "run_db_backup_task", + "run_icaim_analysis_task", "run_db_backup_task", "run_promo_event_task", } diff --git a/services/crawler_config_loader.py b/services/crawler_config_loader.py index 9d8792d..ec6db0b 100644 --- a/services/crawler_config_loader.py +++ b/services/crawler_config_loader.py @@ -33,7 +33,10 @@ def get_default_config() -> Dict[str, Any]: "crawlers": { "momo_main": {"enabled": True, "schedule_hours": 4}, "edm_promo": {"enabled": True, "schedule_hours": 4}, - "festival_11": {"enabled": False, "schedule_hours": 6} + "festival_11": {"enabled": False, "schedule_hours": 6}, + "mothers_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "母親節超值限時購", "page_type": "mothers_day"}, + "valentine_520_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "520情人節限定購物", "page_type": "valentine_520"}, + "labor_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "勞動節購物優惠", "page_type": "labor_day"} }, "settings": { "auto_import_enabled": True, diff --git a/services/data/crawler_config.json b/services/data/crawler_config.json new file mode 100644 index 0000000..3bc00f6 --- /dev/null +++ b/services/data/crawler_config.json @@ -0,0 +1,52 @@ +{ + "crawlers": { + "momo_main": { + "enabled": true, + "schedule_hours": 4, + "name": "MOMO 主站熱銷商品爬蟲" + }, + "edm_promo": { + "enabled": true, + "schedule_hours": 4, + "name": "EDM 限時搶購爬蟲" + }, + "festival_11": { + "enabled": false, + "schedule_hours": 6, + "name": "1.1 狂歡購物節爬蟲" + }, + "mothers_day_2026": { + "enabled": false, + "schedule_hours": 4, + "lpn_code": "", + "activity_name": "母親節超值限時購", + "page_type": "mothers_day", + "name": "2026 母親節促銷爬蟲" + }, + "valentine_520_2026": { + "enabled": false, + "schedule_hours": 4, + "lpn_code": "", + "activity_name": "520情人節限定購物", + "page_type": "valentine_520", + "name": "2026 520 情人節促銷爬蟲" + }, + "labor_day_2026": { + "enabled": false, + "schedule_hours": 4, + "lpn_code": "", + "activity_name": "勞動節購物優惠", + "page_type": "labor_day", + "name": "2026 勞動節促銷爬蟲" + } + }, + "settings": { + "auto_import_enabled": true, + "auto_import_interval_minutes": 30 + }, + "metadata": { + "version": "1.0", + "last_updated": "2026-04-28", + "description": "爬蟲配置文件 - 控制各爬蟲任務的啟用狀態與執行頻率" + } +}