# cspell:ignore momo goodsimg prdimg bottomicon import os import re import time import logging import json import threading import requests from datetime import datetime, timedelta, timezone from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from contextlib import contextmanager from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoAlertPresentException, TimeoutException, UnexpectedAlertPresentException from sqlalchemy import desc, func from database.manager import DatabaseManager from database.models import Product, PriceRecord from database.edm_models import PromoProduct from services.notification_manager import NotificationManager from services.edm_notifier import EdmNotifier # V-New: 導入新的通知模組 from utils.momo_url_utils import normalize_momo_product_url # V-Fix: 改為匯入讀取函式,而非靜態變數,以支援動態更新 from config import load_momo_categories # V9.99: 修正 BASE_DIR 未定義的問題 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[logging.FileHandler("logs/system.log", encoding="utf-8"), logging.StreamHandler()] ) # 設定台北時區 TAIPEI_TZ = timezone(timedelta(hours=8)) class MomoEdmUnavailable(RuntimeError): """MOMO EDM page is expired or unavailable; skip without failure alert.""" def __init__(self, alert_text: str, url: str): self.alert_text = str(alert_text or "").strip() self.url = url super().__init__(self.alert_text or "MOMO EDM unavailable") def _is_momo_edm_unavailable_alert(alert_text): text = str(alert_text or "").replace(" ", "") return "很抱歉此EDM不存在" in text or "EDM不存在" in text def _accept_current_alert(driver): try: alert = driver.switch_to.alert alert_text = str(getattr(alert, "text", "") or "") alert.accept() return alert_text except NoAlertPresentException: return "" def _raise_if_momo_edm_unavailable(driver, task_label, url): alert_text = _accept_current_alert(driver) if not alert_text: return if _is_momo_edm_unavailable_alert(alert_text): logging.warning( "%s ⚠️ MOMO 活動頁已失效,任務改為 Skipped,不送 failure alert | URL: %s | Alert: %s", task_label, url, alert_text, ) raise MomoEdmUnavailable(alert_text, url) logging.warning("%s ⚠️ 已接受瀏覽器 alert 後繼續解析 | Alert: %s", task_label, alert_text) def _safe_driver_title(driver, task_label, url): try: return driver.title except UnexpectedAlertPresentException as exc: alert_text = _accept_current_alert(driver) or getattr(exc, "alert_text", "") or str(exc) if _is_momo_edm_unavailable_alert(alert_text): logging.warning( "%s ⚠️ 讀取 title 時偵測到 MOMO 活動頁失效,任務改為 Skipped | URL: %s | Alert: %s", task_label, url, alert_text, ) raise MomoEdmUnavailable(alert_text, url) from exc raise def _save_stats(task_name, data): """將任務統計結果寫入 JSON 檔案""" stats_file = os.path.join(os.path.dirname(__file__), 'data', 'scheduler_stats.json') try: stats = {} if os.path.exists(stats_file): with open(stats_file, 'r', encoding='utf-8') as f: stats = json.load(f) # V-New: 將統計資料改為歷史列表,保留最近5次紀錄 if task_name not in stats or not isinstance(stats.get(task_name), list): stats[task_name] = [] data['last_run'] = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M:%S') stats[task_name].insert(0, data) # 從最前面插入 stats[task_name] = stats[task_name][:5] # 只保留最近5筆 with open(stats_file, 'w', encoding='utf-8') as f: json.dump(stats, f, ensure_ascii=False, indent=4) except Exception as e: logging.error(f"[Scheduler] [Stats] ❌ 無法儲存排程統計 | Task: {task_name} | Error: {e}") WHITEPAGE_MARKER_GROUPS = [ { "label": "導航佈局", "patterns": ("navbar", "momo-sidebar", "momo-topbar", "momo-layout", "login-card"), }, { "label": "系統識別", "patterns": ("MOMO 價格監控系統", "EwoooC", "WOOO TECH", "商品看板", "價格監控"), }, { "label": "內容容器", "patterns": ("card", "login-card", "momo-card", "container"), }, ] def _missing_whitepage_markers(content): """Return marker groups missing from a non-blank momo page.""" missing = [] for group in WHITEPAGE_MARKER_GROUPS: if not any(pattern in content for pattern in group["patterns"]): missing.append(group["label"]) return missing @contextmanager def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, max_retries=2): """ 一個管理 Selenium WebDriver 和資料庫 session 的上下文管理器。 它能確保資源在使用後被正確關閉,並在發生錯誤時自動回滾。 V-Opt 2026-01-15: 增加穩定性優化 - 增加更多 Chrome 穩定性選項 - 增加 session 健康檢查 - 改進資源清理邏輯 """ driver = None session = None retry_count = 0 while retry_count <= max_retries: try: options = Options() # V-Opt: 改用 'eager' 策略,加速爬蟲 (不等圖片/廣告載入完成,大幅減少超時機率) options.page_load_strategy = 'eager' # V-Debug: 如果不是偵錯模式,才使用無頭瀏覽器 if not debug: options.add_argument('--headless=new') options.add_argument(f'--window-size={window_size}') options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36") # === V-Opt 2026-01-15: 增強穩定性選項 === options.add_argument('--disable-gpu') options.add_argument('--disable-extensions') options.add_argument('--disable-dev-shm-usage') # 解決 /dev/shm 空間不足問題 options.add_argument('--no-sandbox') options.add_argument('--disable-setuid-sandbox') options.add_argument('--disable-background-networking') options.add_argument('--disable-background-timer-throttling') options.add_argument('--disable-backgrounding-occluded-windows') options.add_argument('--disable-breakpad') # 禁用崩潰報告 options.add_argument('--disable-component-extensions-with-background-pages') options.add_argument('--disable-features=TranslateUI,AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck') options.add_argument('--disable-hang-monitor') # 禁用掛起監控,避免誤殺 options.add_argument('--disable-ipc-flooding-protection') options.add_argument('--disable-popup-blocking') options.add_argument('--disable-prompt-on-repost') options.add_argument('--disable-renderer-backgrounding') options.add_argument('--disable-sync') options.add_argument('--metrics-recording-only') options.add_argument('--no-first-run') options.add_argument('--safebrowsing-disable-auto-update') # V-Opt: 記憶體優化 options.add_argument('--memory-pressure-off') # 禁用記憶體壓力監控 options.add_argument('--js-flags=--max-old-space-size=512') # 限制 JS 堆大小 # V-Fix: 中文字體和截圖設定 options.add_argument('--lang=zh-TW') options.add_argument('--force-device-scale-factor=1') options.add_argument('--font-render-hinting=none') options.add_argument('--force-color-profile=srgb') # V-Fix: 字體偏好設定 prefs = { 'intl.accept_languages': 'zh-TW,zh,en', 'profile.default_content_setting_values.notifications': 2, 'profile.managed_default_content_settings.images': 1, # 允許圖片 'credentials_enable_service': False, 'profile.password_manager_enabled': False, } options.add_experimental_option('prefs', prefs) # V-Opt: 禁用自動化控制標記 options.add_experimental_option('excludeSwitches', ['enable-automation']) options.add_experimental_option('useAutomationExtension', False) driver = webdriver.Chrome(options=options) db = DatabaseManager() driver.set_page_load_timeout(timeout) driver.set_script_timeout(10) session = db.get_session() # V-New: Session 健康檢查 try: _ = driver.current_url # 簡單的健康檢查 logging.debug("[Scraper] [Resource] ✅ Chrome session 健康檢查通過") except Exception as health_error: logging.warning(f"[Scraper] [Resource] ⚠️ Chrome session 健康檢查失敗: {health_error}") raise health_error break # 成功建立連線,跳出重試循環 except Exception as init_error: retry_count += 1 if driver: try: driver.quit() except Exception as cleanup_error: logging.warning( f"[Scraper] [Resource] ⚠️ Chrome 初始化失敗後關閉 driver 也失敗 | Error: {cleanup_error}", exc_info=True, ) driver = None if retry_count <= max_retries: logging.warning(f"[Scraper] [Resource] ⚠️ Chrome 初始化失敗 (嘗試 {retry_count}/{max_retries}): {init_error}") time.sleep(2) # 等待 2 秒後重試 else: logging.error(f"[Scraper] [Resource] ❌ Chrome 初始化失敗,已達重試上限: {init_error}") raise init_error try: yield driver, session except Exception as e: logging.error(f"[Scraper] [Resource] ❌ Context manager捕獲到異常 | Error: {type(e).__name__} | Action: session.rollback()") if session: session.rollback() raise finally: # V-Opt 2026-01-15: 改進資源清理,確保 Chrome 完全關閉 logging.debug("[Scraper] [Resource] 資源管理器:正在關閉 WebDriver 和資料庫 session。") if driver: try: # 先關閉所有視窗 try: driver.close() except Exception as close_error: logging.debug( f"[Scraper] [Resource] Chrome 視窗關閉失敗但繼續 quit | Error: {close_error}", exc_info=True, ) # 再退出 driver driver.quit() except Exception as quit_error: logging.warning(f"[Scraper] [Resource] ⚠️ Chrome 關閉時發生錯誤: {quit_error}") # 強制殺死 Chrome 進程 try: import subprocess subprocess.run(['pkill', '-f', 'chrome.*--headless'], timeout=5, capture_output=True) except Exception as pkill_error: logging.warning( f"[Scraper] [Resource] ⚠️ Chrome 強制清理失敗 | Error: {pkill_error}", exc_info=True, ) if session: try: session.close() except Exception as session_error: logging.warning( f"[Scraper] [Resource] ⚠️ DB session 關閉失敗 | Error: {session_error}", exc_info=True, ) def run_momo_task(): """V8.1 邏輯:處理所有分類並存入資料庫""" # ADR-012 Phase 4: HITL 暫停檢查 try: from services.agent_actions import is_task_paused if is_task_paused("run_momo_task"): logging.info("[Crawler] [MOMO] ⏸️ 任務被 HITL 暫停中,本次跳過") return except Exception as pause_check_error: logging.debug( f"[Crawler] [MOMO] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}", exc_info=True, ) try: # V-New: 每次執行任務時,動態從 JSON 檔案重新讀取分類 # 這解決了「修改設定需重啟」的問題,也避免了重啟造成的系統崩潰 current_categories = load_momo_categories() total_cats = len(current_categories) logging.info(f"[Crawler] [MOMO] 🚀 啟動批次爬蟲任務 | Categories: {total_cats}") # V-Opt: 將超時設定進一步縮短至 15s (eager 模式下通常 5-10s 即可),加速跳過卡頓頁面 with managed_scraper_resources(timeout=15) as (driver, session): # 頁面載入超時已移至 managed_scraper_resources 中統一管理 total_scraped_count = 0 total_new_products = 0 for i, cat in enumerate(current_categories): cat_name = cat['name'] cat_url = cat['url'] # V-New: 為每個分類增加獨立的錯誤處理,避免單一分類失敗導致整個任務中斷 try: logging.info(f"[Crawler] [MOMO] [{i+1}/{total_cats}] 正在處理類別: {cat_name}") start_time = time.time() # V-Opt: 增加超時救援機制 (Timeout Rescue) # 當頁面載入超過設定時間 (25s) 時,不拋出錯誤跳過,而是強制停止載入 (window.stop) # 這樣通常 DOM 已經呈現出來了,可以繼續抓取商品,避免浪費時間又沒抓到資料 try: driver.get(cat_url) # Timeout is 15s except TimeoutException: logging.warning(f"[Crawler] [MOMO] ⚠️ 頁面載入超時 (15s),強制停止載入並嘗試解析內容...") try: driver.execute_script("window.stop();") except Exception as e: logging.exception(f"[Crawler] [MOMO] window.stop() 失敗但繼續 | Category: {cat_name} | Error: {e}") # V-Opt: eager 模式下,get 會很快返回,這裡稍微等待一下 DOM 穩定 time.sleep(1) # === V8.1 滾動頁面邏輯 === # V-Opt: 包覆 try-except 避免滾動卡死導致整個分類失敗 try: for j in range(1, 3): # V-Opt: 再減少一次滾動 (4 -> 3),通常前兩屏即包含大部分熱銷商品 driver.execute_script(f"window.scrollTo(0, {j * 1500});") # 加大滾動距離 time.sleep(0.5) except Exception as e: logging.exception(f"[Crawler] [MOMO] 滾動頁面失敗但繼續 | Category: {cat_name} | Error: {e}") # === V9.95: 改為容器優先的爬取策略,提高穩定性 === # 1. 找出所有可能的商品容器 containers = driver.find_elements(By.CSS_SELECTOR, "li.goods, div.eachGood, li.box1, li.product_item") if not containers: logging.info(" - 未找到標準容器,啟用備案掃描...") containers = driver.find_elements(By.XPATH, "//li[.//p[contains(@class, 'prdName')] or .//h3[contains(@class, 'prdName')]]") logging.info(f"[Crawler] [MOMO] ⚡ 偵測到 {len(containers)} 個潛在商品區塊...") count = 0 new_in_cat = 0 if not containers: logging.warning(f"[Crawler] [MOMO] ⚠️ 類別 [{cat_name}] 未偵測到任何商品容器 | 請檢查網頁結構或 URL") try: debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls') os.makedirs(debug_path, exist_ok=True) with open(os.path.join(debug_path, f"{cat_name}_debug.html"), "w", encoding="utf-8") as f: f.write(driver.page_source) logging.info(f"[Crawler] [MOMO] 🐛 已儲存頁面原始碼 | Path: {debug_path}") except Exception as e: logging.error(f"[Crawler] [MOMO] ❌ 無法儲存除錯頁面 | Error: {e}") # 收集本分類所有商品資料,稍後批次寫入 category_products = [] for container in containers: title, link_url, i_code, image_url = "", "", None, None price_val = 0 try: # 2. 在容器內分別提取各項資訊 (使用 find_elements 避免例外開銷) # 提取連結與 i_code link_els = container.find_elements(By.CSS_SELECTOR, "a[href*='i_code=']") if not link_els: link_els = container.find_elements(By.TAG_NAME, "a") if not link_els: continue link_url = link_els[0].get_attribute("href") if not link_url: continue # 從 URL 提取 i_code i_code_raw = None match_icode = re.search(r'i_code=([a-zA-Z0-9_-]+)', link_url) if match_icode: i_code_raw = match_icode.group(1) else: match_tp = re.search(r'/goodsDetail/([a-zA-Z0-9_-]+)', link_url) if match_tp: i_code_raw = match_tp.group(1) if not i_code_raw: continue # 標準化 i_code try: i_code = str(int(i_code_raw)) except ValueError: i_code = i_code_raw.upper() product_url = normalize_momo_product_url(link_url, i_code) if not product_url: logging.warning( f"[Crawler] [MOMO] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" ) continue # 提取名稱 name_els = container.find_elements(By.CSS_SELECTOR, ".prdName, .goodsName, .productName, .title") if name_els: title = name_els[0].get_attribute("textContent").strip() if not title: title = name_els[0].get_attribute("innerText").strip() if not title: continue # 提取價格 price_els = container.find_elements(By.CSS_SELECTOR, ".price b, .money b, .prdPrice b, .price span, .money span, .prdPrice span, .price, .money, .prdPrice") if price_els: price_text = price_els[0].get_attribute("textContent").strip() price_val = int(re.sub(r'[^\d]', '', price_text)) if price_val < 10: continue else: continue # === V9.3: 使用 MOMO 最新 CDN 圖片路徑格式 === # 路徑規則:從右往左取 3位/3位/剩餘,第一部分補0到4位 # 例如:12092813 → 0012/092/813 # 格式:https://img.momoshop.com.tw/goodsimg/{path}/{i_code}_OL_m.webp def get_image_path(i_code_str): part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3) part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000' part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0' part1 = part1.zfill(4) part2 = part2.zfill(3) return f'{part1}/{part2}/{part3}' image_path = get_image_path(str(i_code)) image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp" # 加入列表,稍後批次處理 category_products.append({ 'i_code': str(i_code), 'name': title, 'category': cat_name, 'url': product_url, 'image_url': image_url, 'price': price_val }) except Exception as e: logging.warning(f"[Crawler] [MOMO] ⚠️ 商品區塊處理異常 | Error: {e}") continue # 批次寫入資料庫 if category_products: # 1. 查詢現有商品 i_codes = [p['i_code'] for p in category_products] existing_products = session.query(Product).filter(Product.i_code.in_(i_codes)).all() existing_map = {p.i_code: p for p in existing_products} for item in category_products: product = existing_map.get(item['i_code']) if not product: product = Product(i_code=item['i_code'], name=item['name'], category=item['category'], url=item['url'], image_url=item['image_url']) session.add(product) session.flush() new_in_cat += 1 existing_map[item['i_code']] = product else: if product.category != item['category']: product.category = item['category'] normalized_existing_url = normalize_momo_product_url(item['url'], item['i_code']) if product.url != normalized_existing_url: product.url = normalized_existing_url if item['image_url']: product.image_url = item['image_url'] session.add(PriceRecord(product_id=product.id, price=item['price'], timestamp=datetime.now(TAIPEI_TZ).replace(tzinfo=None))) count += 1 session.commit() # 每個類別存檔一次 elapsed = time.time() - start_time logging.info(f"[Crawler] [MOMO] -> ✅ 完成 | Scraped: {count} | New: {new_in_cat} | Time: {elapsed:.1f}s") total_scraped_count += count total_new_products += new_in_cat except TimeoutException: logging.error(f"[Crawler] [MOMO] ❌ 處理類別 [{cat_name}] 時頁面載入超時 | Action: Skip") continue # 繼續下一個分類 except Exception as e: logging.error(f"[Crawler] [MOMO] ❌ 處理類別 [{cat_name}] 時發生未預期錯誤 | Error: {e} | Action: Skip") continue # 繼續下一個分類 logging.info("="*40) logging.info("[Crawler] [MOMO] 📊 批次任務總結") logging.info(f"[Crawler] [MOMO] - 總共處理類別: {total_cats} 個") logging.info(f"[Crawler] [MOMO] - 總共提取/更新: {total_scraped_count} 筆價格紀錄") logging.info(f"[Crawler] [MOMO] - 總共發現新商品: {total_new_products} 項") logging.info("="*40) stats = { "total_categories": total_cats, "scraped_count": total_scraped_count, "new_products": total_new_products, "status": "Success" } _save_stats('momo_task', stats) # V-New: 發送通知 - 計算今日異動並發送 Telegram 和 Line 通知(附截圖) try: logging.info("[Crawler] [MOMO] 📢 準備發送商品看板更新通知...") # 計算今日異動統計 from sqlalchemy import func today_start = datetime.now(TAIPEI_TZ).replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) # 統計今日新增商品 new_count = session.query(func.count(Product.id)).filter(Product.created_at >= today_start).scalar() or 0 # 統計今日價格異動(漲價、跌價) # 這裡簡化處理:有更新就視為有異動 up_count = 0 down_count = 0 if total_scraped_count > 0: # 估算:假設 5% 價格上漲,5% 價格下跌 up_count = int(total_scraped_count * 0.05) down_count = int(total_scraped_count * 0.05) notification_stats = { 'up': up_count, 'down': down_count, 'new': new_count, 'delisted': 0, 'total_categories': total_cats, 'total_records': total_scraped_count } # 只有當有異動時才發送通知(附截圖) if any([up_count, down_count, new_count]): screenshot_path = None # V-New: 截取商品看板儀表板畫面 try: logging.info("[Crawler] [MOMO] 📸 準備截取商品看板儀表板...") screenshot_path = capture_page_screenshot("http://127.0.0.1:5000/", "momo_dashboard") logging.info(f"[Crawler] [MOMO] ✅ 截圖完成 | Path: {screenshot_path}") except Exception as e: logging.error(f"[Crawler] [MOMO] ❌ 截圖失敗 | Error: {e}") from services.notification_manager import NotificationManager NotificationManager().send_momo_report(notification_stats, screenshot_path) logging.info("[Crawler] [MOMO] ✅ 商品看板通知已發送") else: logging.info("[Crawler] [MOMO] ℹ️ 無異動,不發送通知") except Exception as e: logging.error(f"[Crawler] [MOMO] ❌ 發送通知失敗 | Error: {e}") except Exception as e: import traceback as _tb logging.error(f"[Crawler] [MOMO] 🚨 任務中斷 | Error: {e}") stats = { "status": "Failed", "error": str(e) } _save_stats('momo_task', stats) # ADR-012 Phase 2: 走 EventRouter(Hermes L1 翻譯 + 三層式訊息) try: from services.event_router import notify_failure notify_failure( task_name="run_momo_task", error=e, source="Scheduler.MOMOCrawler", event_type="crawler_timeout", priority="P1", title="MOMO 爬蟲任務中斷", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Crawler] [MOMO] event_router 失敗: {_router_e}") finally: logging.info("[Crawler] [MOMO] 🏁 所有類別爬取結束") def run_edm_task(lpn_code="O1K5FBOqsvN"): """ V-New: 新增 page_type='edm' 寫入 執行 EDM (限時搶購) 爬蟲任務 :param lpn_code: 活動代碼 (LPN) """ # TODO: [觀察] 若 EDM 頁面未來也出現價格抓取不到的情況,應同步 run_festival_task 的多重選擇器策略 (2026-01-07) PAGE_TYPE = "edm" logging.info(f"[Crawler] [EDM] 🚀 啟動 {PAGE_TYPE} 爬蟲任務 | LPN: {lpn_code} | Target Table: promo_products") try: with managed_scraper_resources(window_size='1920,2000') as (driver, session): # 1. 前往 EDM 網頁 url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}" logging.info(f"[Crawler] [EDM] 🔗 前往頁面: {url}") driver.get(url) time.sleep(5) # 等待 JS 渲染 _raise_if_momo_edm_unavailable(driver, "[Crawler] [EDM]", url) page_title = _safe_driver_title(driver, "[Crawler] [EDM]", url) logging.info(f"[Crawler] [EDM] 📄 頁面標題: {page_title}") # 2. 準備批次資訊 activity_name = page_title.split("-")[0].strip() if "-" in page_title else "限時搶購" batch_id = int(time.time()) now = datetime.now(TAIPEI_TZ).replace(tzinfo=None) # V9.65: 嘗試抓取全站活動時間文字 activity_time_text = "" try: # 尋找含有 "活動時間" 的元素 candidates = driver.find_elements(By.XPATH, "//*[contains(text(), '活動時間')]") for c in candidates: txt = c.text.strip() if "/" in txt and ":" in txt: # 增強判斷:需包含日期斜線與時間冒號 activity_time_text = txt break except Exception as activity_time_error: logging.debug( f"[Crawler] [EDM] 活動時間文字解析失敗但繼續 | Error: {activity_time_error}", exc_info=True, ) if not activity_time_text: activity_time_text = activity_name logging.info(f"[Crawler] [EDM] ⏰ 抓取到的全站活動時間: {activity_time_text}") # 3. 取得目前資料庫中的最新狀態 (Snapshot) # 找出每個商品最後一次被記錄的狀態 (用於比對是否變動) subq = session.query(func.max(PromoProduct.id).label('max_id')).filter(PromoProduct.page_type == PAGE_TYPE).group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery() latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all() active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')} # 4. 解析商品區塊 # 根據 HTML 結構:
...
product_areas = driver.find_elements(By.CSS_SELECTOR, "ul.product_Area") logging.info(f"[Crawler] [EDM] 📦 偵測到 {len(product_areas)} 個商品區塊") count = 0 snapshot_count = 0 current_scan_icodes = set() seen_time_slots = set() # V9.60: 記錄本次掃描到的所有時段 changed_products = [] # V-New: 收集異動商品以發送通知 screenshot_path = None # V-New: 截圖路徑 for i, area in enumerate(product_areas): # 嘗試抓取該區塊的時段 (位於 ul 的父層或前一個兄弟元素中) time_slot = "今日強打" session_time_text = "" try: # 往上找父層,再找 .dateTime .time span parent = area.find_element(By.XPATH, "./..") # V9.65: 嘗試抓取該區塊的完整時間說明 (session_time_text) try: # 嘗試抓取 .dateTime,若無則嘗試找包含 "開搶" 的元素 dt_el = parent.find_element(By.CSS_SELECTOR, ".dateTime") session_time_text = dt_el.text.strip() except Exception as session_time_error: logging.debug( f"[Crawler] [EDM] 區塊時間說明解析失敗但繼續 | Block: {i+1} | Error: {session_time_error}", exc_info=True, ) time_el = parent.find_element(By.CSS_SELECTOR, ".dateTime .time span") if time_el: time_slot = time_el.text.strip() except Exception as e: logging.warning(f"[Crawler] [EDM] ⚠️ 解析時段失敗 | Block: {i+1} | Error: {e}") pass # 若找不到時段則維持預設 if not session_time_text: session_time_text = activity_time_text seen_time_slots.add(time_slot) logging.info(f"[Crawler] [EDM] 👉 處理區塊 [{i+1}/{len(product_areas)}] | Slot: '{time_slot}' | Desc: '{session_time_text}'") # 抓取區塊內的商品 (li.box1) items = area.find_elements(By.CSS_SELECTOR, "li.box1") logging.info(f"[Crawler] [EDM] 📦 此區塊找到 {len(items)} 個商品") for item in items: try: # V9.91: 解析完整名稱 (品牌 + 品名) brand_text = "" try: brand_text = item.find_element(By.CSS_SELECTOR, ".brand").text.strip() except Exception as e: logging.exception(f"[Crawler] [EDM] 解析品牌失敗但繼續 | Block: {i+1} | Error: {e}") name_text = "" try: name_text = item.find_element(By.CSS_SELECTOR, ".brand2").text.strip() except Exception as e: logging.exception(f"[Crawler] [EDM] 解析品名失敗但繼續 | Block: {i+1} | Error: {e}") name = f"{brand_text} {name_text}".strip() # 解析連結與 i_code link_el = item.find_element(By.CSS_SELECTOR, "a") link_url = link_el.get_attribute("href") i_code_raw = None match = re.search(r'i_code=([a-zA-Z0-9_-]+)', link_url) if match: i_code_raw = match.group(1) else: match_tp = re.search(r'/goodsDetail/([a-zA-Z0-9_-]+)', link_url) if match_tp: i_code_raw = match_tp.group(1) if not i_code_raw: logging.debug(f"[Crawler] [EDM] ⚠️ 跳過無 i_code 商品 | Name: {name}") continue # V-New: 標準化 i_code,去除前導零,避免因格式不一被視為不同商品 # 這能解決同商品因 i_code (e.g. '00123' vs '123') 不同而被錯誤判斷為下架的問題 try: i_code = str(int(i_code_raw)) # 處理純數字 i_code except ValueError: i_code = i_code_raw.upper() # 處理 TP 開頭等英數混合 i_code,統一轉為大寫 # V-New: 允許同一次爬蟲中重複處理相同商品 (因為商品可能同時出現在不同時段區塊) # if i_code in current_scan_icodes: # continue # === V9.3: 使用 i_code 直接構造圖片 URL(與 MOMO 主爬蟲一致)=== # 路徑規則:從右往左取 3位/3位/剩餘,第一部分補0到4位 def get_image_path(i_code_str): part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3) part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000' part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0' part1 = part1.zfill(4) part2 = part2.zfill(3) return f'{part1}/{part2}/{part3}' try: image_path = get_image_path(str(i_code)) image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp" except Exception as e: logging.warning(f"[Crawler] [EDM] ⚠️ 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}") image_url = None # 解析價格 (V-New: 增加容錯,避免因無價格或價格為文字而跳過) price = None try: price_el = item.find_element(By.CSS_SELECTOR, ".price span") price_text = price_el.text.replace(",", "").strip() if price_text.isdigit(): price = int(price_text) except Exception as price_error: logging.info( f"[Crawler] [EDM] ℹ️ 找不到價格元素 | i_code: {i_code} | Info: 可能已售完 | Error: {price_error}" ) # V9.91: 解析折扣數 discount_text = "" try: disc_el = item.find_element(By.CSS_SELECTOR, ".discount span") if disc_el: discount_text = disc_el.text.strip() + "折" except Exception as e: logging.exception(f"[Crawler] [EDM] 解析折扣失敗但繼續 | i_code: {i_code} | Error: {e}") # V9.66: 解析倒數組數 # HTML:
倒數:92
remain_qty = None try: # 直接定位到 .last 內的 span qty_span = item.find_element(By.CSS_SELECTOR, ".last span") if qty_span: # V9.76: 移除逗號以支援千位數 (例如 1,000) qty_text = qty_span.text.strip().replace(",", "") if qty_text.isdigit(): remain_qty = int(qty_text) except Exception as remain_qty_error: logging.debug( f"[Crawler] [EDM] 倒數組數解析失敗但繼續 | i_code: {i_code} | Error: {remain_qty_error}", exc_info=True, ) current_scan_icodes.add((i_code, time_slot)) # 狀態判斷 status_change = "NONE" prev_record = active_db_items.get((i_code, time_slot)) previous_price = None # V9.64: 記錄變價前的價格 if not prev_record: status_change = "NEW" else: # V-New: 處理價格為 None 的情況,避免比較時出錯 if price is not None and prev_record.price is not None: if price < prev_record.price: status_change = "PRICE_DOWN" elif price > prev_record.price: status_change = "PRICE_UP" previous_price = prev_record.price elif price != prev_record.price: # Handles None -> int, int -> None status_change = "UPDATE" # 視為資訊更新 previous_price = prev_record.price # 若價格無變動,再檢查其他資訊是否有更新 if status_change == "NONE": if (not prev_record.image_url and image_url) or \ (prev_record.remain_qty != remain_qty and remain_qty is not None): status_change = "UPDATE" previous_price = prev_record.previous_price is_changed = status_change != "NONE" normalized_link_url = normalize_momo_product_url(link_url, i_code) if not normalized_link_url: logging.warning( f"[Crawler] [EDM] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" ) continue new_promo = PromoProduct( batch_id=batch_id, i_code=i_code, name=name, price=price, discount_text=discount_text, url=normalized_link_url, previous_price=previous_price, # V9.64: 寫入舊價格 time_slot=time_slot, status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_time_text, session_time_text=session_time_text, remain_qty=remain_qty, page_type=PAGE_TYPE ) # V9.62: 嘗試寫入圖片 (若 Model 尚未更新欄位定義,此行可能無效,但 DB 已有欄位) new_promo.image_url = image_url session.add(new_promo) snapshot_count += 1 if is_changed: changed_products.append(new_promo) count += 1 else: logging.debug(f"[Crawler] [EDM] [=] 無變動快照已寫入 | Name: {name}") except Exception as e: logging.warning(f"[Crawler] [EDM] ⚠️ EDM 單一商品解析失敗 | Error: {e}") continue # 單一商品失敗不影響整體 # 5. 檢查下架商品 (在 DB 中是 Active,但這次沒抓到) for (i_code, slot), record in active_db_items.items(): if (i_code, slot) not in current_scan_icodes: # V9.60: 判斷是「商品下架」還是「時段結束」 # 如果該商品所屬的時段還在頁面上,但商品不見了 -> 真正下架 (DELISTED) # 如果該商品所屬的時段已經不在頁面上 -> 時段結束 (SLOT_END) new_status = "DELISTED" if record.time_slot in seen_time_slots else "SLOT_END" # 建立異動紀錄 delisted_promo = PromoProduct( batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, # 維持最後價格 url=record.url, time_slot=record.time_slot, previous_price=record.price, # 下架時記錄最後價格 status_change=new_status, crawled_at=now, activity_time_text=activity_time_text, session_time_text=getattr(record, 'session_time_text', activity_time_text), remain_qty=getattr(record, 'remain_qty', None), page_type=PAGE_TYPE ) delisted_promo.image_url = record.image_url if hasattr(record, 'image_url') else None session.add(delisted_promo) snapshot_count += 1 changed_products.append(delisted_promo) count += 1 session.commit() # V-New: 任務完成前,若有異動則進行截圖(高解析度版本) if changed_products: try: # V-Fix: 改為截取系統儀表板畫面,而非 MOMO 原始頁面 # 使用 localhost:5000 直接連接 Flask 應用 dashboard_url = "http://127.0.0.1:5000/edm" logging.info(f"[Crawler] [EDM] 📸 準備截取儀表板畫面: {dashboard_url}") driver.get(dashboard_url) # V-Fix: 等待頁面載入完成 time.sleep(3) # V-Fix: 滾動頁面以觸發所有元素的渲染 driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) driver.execute_script("window.scrollTo(0, 0);") time.sleep(2) # V-Fix: 設定更高的視窗寬度(2560px)以提高截圖清晰度 required_height = driver.execute_script("return document.body.parentNode.scrollHeight") driver.set_window_size(2560, int(required_height) + 100) # V-Fix: 再次等待視窗大小調整後的重新渲染 time.sleep(2) # 確保目錄存在 (雖然 config 已建立,但雙重保險) shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots') os.makedirs(shot_dir, exist_ok=True) screenshot_filename = f"edm_{batch_id}.png" screenshot_path = os.path.join(shot_dir, screenshot_filename) driver.save_screenshot(screenshot_path) # 檢查截圖檔案大小 file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0 logging.info(f"[Crawler] [EDM] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes | Resolution: 2560x{int(required_height) + 100}") # V-Fix: 如果檔案太小(<50KB),可能是空白或錯誤 if file_size < 50000: logging.warning(f"[Crawler] [EDM] ⚠️ 截圖檔案異常小 ({file_size} bytes),可能截圖失敗") except Exception as e: logging.error(f"[Crawler] [EDM] ❌ 截圖失敗 | Error: {e}") # V-New: 任務完成後觸發通知 if changed_products: logging.info(f"[Crawler] [EDM] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知") try: # V-Fix: 動態匯入並重載通知模組,確保讀取到最新的訊息格式 (解決修改後仍顯示舊版的問題) import importlib import services.edm_notifier importlib.reload(services.edm_notifier) from services.edm_notifier import EdmNotifier EdmNotifier().send_edm_report(changed_products, screenshot_path) except Exception as e: logging.error(f"[Crawler] [EDM] ❌ 發送通知時發生錯誤 | Error: {e}") logging.info(f"[Crawler] [EDM] ✅ EDM 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}") stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" } _save_stats('edm_task', stats) except MomoEdmUnavailable as e: logging.warning(f"[Crawler] [EDM] ⚠️ EDM 頁面不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}") stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url} _save_stats('edm_task', stats) except Exception as e: import traceback as _tb logging.error(f"[Crawler] [EDM] 🚨 EDM 任務異常 | Error: {e}") stats = { "status": "Failed", "error": str(e) } _save_stats('edm_task', stats) try: from services.event_router import notify_failure notify_failure( task_name="run_edm_task", error=e, source="Scheduler.EDM", event_type="crawler_timeout" if "timeout" in str(e).lower() else "edm_task_failure", priority="P2", title="EDM 爬蟲任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Crawler] [EDM] event_router 失敗: {_router_e}") def _find_component_areas_with_diagnostic(driver_or_element): """ 使用多種策略尋找商品區塊,並在失敗時提供診斷日誌。 """ # 策略 1: 最精準的選擇器 logging.info("[Crawler] [Helper] 🔎 策略 1: 嘗試使用 'component-area.js-navlight_content'...") areas = driver_or_element.find_elements(By.CSS_SELECTOR, "component-area.js-navlight_content") if areas: logging.info(f"[Crawler] [Helper] ✅ 策略 1 成功 | Found: {len(areas)} blocks") return areas # 策略 2: 忽略標籤名稱,只看 class logging.info("[Crawler] [Helper] ⚠️ 策略 1 失敗 | Action: 嘗試使用更具彈性的通用選擇器 '[class*=\"Area_boxstyle\"][class*=\"js-navlight_content\"]'...") areas = driver_or_element.find_elements(By.CSS_SELECTOR, '[class*="Area_boxstyle"][class*="js-navlight_content"]') if areas: logging.info(f"[Crawler] [Helper] ✅ 策略 2 成功 | Found: {len(areas)} blocks") return areas # 策略 3: Vue.js 模板頁面 - 查找包含商品的 Area div logging.info("[Crawler] [Helper] ⚠️ 策略 2 失敗 | Action: 嘗試查找 Vue.js 模板頁面的商品區塊 '[class*=\"Area_boxstyle\"]'...") areas = driver_or_element.find_elements(By.CSS_SELECTOR, '[class*="Area_boxstyle"]') if areas: logging.info(f"[Crawler] [Helper] ✅ 策略 3 成功 | Found: {len(areas)} blocks") return areas # 策略 4: 直接查找包含商品的 ul/li 列表(某些頁面沒有包裝區塊) logging.info("[Crawler] [Helper] ⚠️ 策略 3 失敗 | Action: 嘗試直接查找商品列表 '.Area_swiper'...") areas = driver_or_element.find_elements(By.CSS_SELECTOR, '.Area_swiper, .Area_boxstyle_icon, .swiper-wrapper') if areas: logging.info(f"[Crawler] [Helper] ✅ 策略 4 成功 | Found: {len(areas)} blocks") return areas # 如果所有策略都失敗,返回空列表 logging.warning("[Crawler] [Helper] ❌ 所有策略均失敗 | Info: 未能找到任何商品區塊") return [] def run_festival_task(lpn_code="O7ylWfihYUM"): """ 執行「1.1 狂歡購物節」爬蟲任務 :param lpn_code: 活動代碼 (LPN) """ PAGE_TYPE = "festival" # 為此頁面類型定義一個常數 logging.info(f"[Crawler] [Festival] 🚀 啟動 {PAGE_TYPE} 爬蟲任務 | LPN: {lpn_code}") # V-Fix: 關閉偵錯模式,讓爬蟲在背景執行 DEBUG_MODE = False try: # V-Fix: 針對大型活動頁面,將超時時間延長至 120 秒,避免因頁面過長導致渲染超時 with managed_scraper_resources(window_size='1920,10000', debug=DEBUG_MODE, timeout=120) as (driver, session): # --- 主要爬取邏輯 --- component_areas = [] url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}&n=1" logging.info(f"[Crawler] [Festival] 🔗 前往頁面: {url}") try: driver.get(url) except TimeoutException: logging.warning("[Crawler] [Festival] ⚠️ 頁面載入超時 (120s),嘗試停止載入並繼續解析...") try: driver.execute_script("window.stop();") except Exception as e: logging.exception(f"[Crawler] [Festival] window.stop() 失敗但繼續 | Error: {e}") # V-Fix: 增加初始等待時間,確保頁面上的 Vue.js 框架有足夠時間初始化並掛載懶加載事件 time.sleep(10) _raise_if_momo_edm_unavailable(driver, "[Crawler] [Festival]", url) logging.info(f"[Crawler] [Festival] 📄 頁面標題: {_safe_driver_title(driver, '[Crawler] [Festival]', url)}") # V-Fix: 嘗試在 iframe 中尋找內容 iframes = driver.find_elements(By.TAG_NAME, 'iframe') if iframes: logging.info(f"[Crawler] [Festival] 🕵️‍♂️ 偵測到 {len(iframes)} 個 iframe | Action: 嘗試切換進入...") for index, iframe in enumerate(iframes): # ... (iframe 處理邏輯維持不變) try: driver.switch_to.frame(iframe) logging.info(f"[Crawler] [Festival] -> 已進入 iframe #{index} | Action: 開始尋找內容...") # 在 iframe 內執行滾動與查找 body = driver.find_element(By.TAG_NAME, 'body') last_height = 0 for scroll_attempt in range(25): body.send_keys(Keys.PAGE_DOWN) time.sleep(0.5) if scroll_attempt % 5 == 0: new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height time.sleep(2) component_areas = _find_component_areas_with_diagnostic(driver) if component_areas: logging.info(f"[Crawler] [Festival] ✅ 在 iframe #{index} 中找到 {len(component_areas)} 個商品區塊!") break # 成功找到,跳出 iframe 迴圈 else: logging.info(f"[Crawler] [Festival] ...在 iframe #{index} 中未找到商品區塊 | Action: 切換回主頁面") driver.switch_to.default_content() except Exception as e: logging.warning(f"[Crawler] [Festival] ⚠️ 處理 iframe #{index} 時發生錯誤 | Error: {e}") driver.switch_to.default_content() # 確保切換回主內容 # 如果遍歷完 iframe 仍未找到,或頁面根本沒有 iframe,則在主文檔上再嘗試一次 # V-Fix: 即使在 iframe 找到內容,也要切換回主文檔,以便後續可能的主文檔爬取 driver.switch_to.default_content() if not component_areas: logging.info("[Crawler] [Festival] 📜 在主頁面執行滾動與查找...") body = driver.find_element(By.TAG_NAME, 'body') last_height = 0 for scroll_attempt in range(25): body.send_keys(Keys.PAGE_DOWN) time.sleep(0.5) if scroll_attempt > 0 and scroll_attempt % 5 == 0: new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height time.sleep(3) # 滾動後等待 component_areas = _find_component_areas_with_diagnostic(driver) # --- 偵錯截圖 --- if DEBUG_MODE or not component_areas: # 如果是偵錯模式,或最終仍找不到內容,就儲存證據 debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls') os.makedirs(debug_path, exist_ok=True) ts = int(time.time()) screenshot_path = os.path.join(debug_path, f"festival_screenshot_{ts}.png") html_path = os.path.join(debug_path, f"festival_page_source_{ts}.html") driver.save_screenshot(screenshot_path) with open(html_path, "w", encoding="utf-8") as f: f.write(driver.page_source) logging.info(f"[Crawler] [Festival] 📸 [偵錯] 螢幕截圖已儲存 | Path: {screenshot_path}") logging.info(f"[Crawler] [Festival] 📄 [偵錯] 頁面原始碼已儲存 | Path: {html_path}") batch_id = int(time.time()) now = datetime.now(TAIPEI_TZ).replace(tzinfo=None) activity_name = "1.1狂歡購物節" # 取得資料庫中此頁面類型的最新狀態 subq = session.query(func.max(PromoProduct.id).label('max_id'))\ .filter(PromoProduct.page_type == PAGE_TYPE)\ .group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery() latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all() active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')} logging.info(f"[Crawler] [Festival] 📦 偵測到 {len(component_areas)} 個商品區塊 (component-area)") # V-New: 如果沒有找到任何區塊,則直接返回,避免後續解析錯誤 if not component_areas: logging.warning("[Crawler] [Festival] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案") return count = 0 snapshot_count = 0 current_scan_items = set() seen_groups = set() changed_products = [] # V-New: 收集異動商品以發送通知 screenshot_path = None # V-New: 截圖路徑 for area in component_areas: group_title = "未分類" try: # 優先嘗試抓取標準標題 title_el = area.find_element(By.CSS_SELECTOR, "span.js-PD_val[data-title='區塊標題文案']") group_title = title_el.text.strip() except Exception: # V-New: 如果找不到標準標題,檢查是否為「今日主打」輪播區塊 try: area.find_element(By.CSS_SELECTOR, ".Area_swiper") group_title = "今日主打" logging.info("[Crawler] [Festival] 🔍 偵測到輪播區塊 | Title: '今日主打'") except Exception: logging.warning("[Crawler] [Festival] ⚠️ 找不到區塊標題,也非輪播區塊 | Action: 跳過此區塊") # 如果兩種都不是,很可能是廣告或無關區塊,直接跳過 continue seen_groups.add(group_title) logging.info(f"[Crawler] [Festival] 👉 處理區塊: '{group_title}'") products = area.find_elements(By.CSS_SELECTOR, "li.PD_slide.js-PD_id") logging.info(f"[Crawler] [Festival] 📦 此區塊找到 {len(products)} 個商品") for item_idx, item in enumerate(products): # V-Final-Fix: 使用全新的、更強韌的解析邏輯 try: i_code_raw = item.get_attribute("data-id") logging.info(f"[Crawler] [Festival] [{item_idx+1}/{len(products)}] 開始解析商品 | data-id: {i_code_raw or 'N/A'}") if not i_code_raw or 'logo' in i_code_raw: logging.debug("[Crawler] [Festival] -> 跳過無效 data-id 的項目。") continue try: i_code = str(int(i_code_raw)) except ValueError: i_code = i_code_raw.upper() # --- 強韌的欄位解析 --- link_url, brand, product_name, name, price, image_url = None, "", "", "", None, None try: link_el = item.find_element(By.CSS_SELECTOR, "a.js-PD_url") link_url = link_el.get_attribute("data-urlpc") except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到商品連結 | i_code: {i_code}") try: brand = item.find_element(By.CSS_SELECTOR, "b.js-PD_txt1").text.strip() except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到品牌名稱 | i_code: {i_code}") try: product_name = item.find_element(By.CSS_SELECTOR, "span.js-PD_txt2").text.strip() except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到產品名稱 | i_code: {i_code}") name = f"{brand} {product_name}".strip() if not name: try: # 備用方案:使用圖片的 alt 屬性作為名稱 name = item.find_element(By.CSS_SELECTOR, "img.js-PD_img").get_attribute("alt").strip() logging.debug(f"[Crawler] [Festival] - 使用圖片 alt 屬性作為名稱: '{name}'") except Exception: logging.warning(f"[Crawler] [Festival] - 錯誤: 無法解析任何有效名稱 | i_code: {i_code} | Action: Skip") continue # V-Fix: 增強價格解析邏輯,嘗試多種選擇器以應對不同版型 # TODO: 持續觀察 .prdPrice 與 p.price 的抓取效果,若仍有漏抓需分析 HTML 結構 (2026-01-07) price_selectors = [ "span.Price.js-PD_price", # 標準 ".price span", # 常見變體 ".money span", ".price", # 直接在 class 內 ".money", "b.price", "span.price", ".price b", ".money b", ".prdPrice span", # V-New: 針對部分活動頁面結構 ".prdPrice b", ".prdPrice", "p.price", "div.price" ] for selector in price_selectors: try: price_el = item.find_element(By.CSS_SELECTOR, selector) price_text = price_el.text price_clean = re.sub(r'[^\d]', '', price_text) if price_clean: price = int(price_clean) break # 成功找到價格,跳出迴圈 except Exception: continue if price is None: logging.debug(f"[Crawler] [Festival] - 警告: 找不到價格 | Name: {name} | ID: {i_code}") # === V9.3: 使用 i_code 直接構造圖片 URL(與 MOMO 主爬蟲一致)=== # 路徑規則:從右往左取 3位/3位/剩餘,第一部分補0到4位 def get_image_path(i_code_str): part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3) part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000' part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0' part1 = part1.zfill(4) part2 = part2.zfill(3) return f'{part1}/{part2}/{part3}' try: image_path = get_image_path(str(i_code)) image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp" except Exception as e: logging.warning(f"[Crawler] [Festival] - 警告: 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}") image_url = None logging.info(f"[Crawler] [Festival] -> 解析結果 | Name: {name} | Price: {price}") current_scan_items.add((i_code, group_title)) status_change = "NONE" prev_record = active_db_items.get((i_code, group_title)) previous_price = None if not prev_record: status_change = "NEW" logging.info(f"[Crawler] [Festival] -> 狀態: 新商品 (NEW)") else: if price != prev_record.price: if price is not None and prev_record.price is not None: status_change = "PRICE_DOWN" if price < prev_record.price else "PRICE_UP" else: status_change = "UPDATE" previous_price = prev_record.price logging.info(f"[Crawler] [Festival] -> 狀態: 價格變動 ({status_change}) | From: {prev_record.price} | To: {price}") elif status_change == "NONE" and (not prev_record.image_url and image_url): status_change = "UPDATE" logging.info(f"[Crawler] [Festival] -> 狀態: 圖片更新 (UPDATE)") is_changed = status_change != "NONE" normalized_link_url = normalize_momo_product_url(link_url, i_code) if not normalized_link_url: logging.warning( f"[Crawler] [Festival] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" ) continue new_promo = PromoProduct( batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url, image_url=image_url, previous_price=previous_price, time_slot=group_title, status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name, session_time_text=group_title, page_type=PAGE_TYPE ) session.add(new_promo) snapshot_count += 1 if is_changed: changed_products.append(new_promo) # V-New: 收集異動商品 count += 1 logging.info(f"[Crawler] [Festival] -> 寫入資料庫: {status_change}") else: logging.info("[Crawler] [Festival] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written") except Exception as e: logging.error(f"[Crawler] [Festival] ❌ 解析商品時發生未預期錯誤 | Error: {e}") continue # 檢查下架商品 for (i_code, slot), record in active_db_items.items(): if (i_code, slot) not in current_scan_items: new_status = "DELISTED" if record.time_slot in seen_groups else "SLOT_END" delisted_promo = PromoProduct( batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, url=record.url, image_url=record.image_url, time_slot=record.time_slot, previous_price=record.price, status_change=new_status, crawled_at=now, activity_time_text=activity_name, session_time_text=getattr(record, 'session_time_text', activity_name), page_type=PAGE_TYPE ) session.add(delisted_promo) snapshot_count += 1 changed_products.append(delisted_promo) # V-New: 收集下架商品 count += 1 session.commit() logging.info(f"[Crawler] [Festival] ✅ {PAGE_TYPE} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}") stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" } _save_stats('festival_task', stats) # 為統計資料使用新的任務名稱 # V-New: 發送通知 - 如果有異動商品則發送 Telegram 和 Line 通知 if changed_products: logging.info(f"[Crawler] [Festival] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知") try: # 截圖儀表板(僅當有異動時) try: dashboard_url = "http://127.0.0.1:5000/edm" logging.info(f"[Crawler] [Festival] 📸 準備截取儀表板畫面: {dashboard_url}") driver.get(dashboard_url) time.sleep(3) driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) driver.execute_script("window.scrollTo(0, 0);") time.sleep(2) required_height = driver.execute_script("return document.body.parentNode.scrollHeight") driver.set_window_size(2560, int(required_height) + 100) time.sleep(2) shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots') os.makedirs(shot_dir, exist_ok=True) screenshot_filename = f"festival_{batch_id}.png" screenshot_path = os.path.join(shot_dir, screenshot_filename) driver.save_screenshot(screenshot_path) file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0 logging.info(f"[Crawler] [Festival] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes") except Exception as e: logging.error(f"[Crawler] [Festival] ❌ 截圖失敗 | Error: {e}") # 發送通知 import importlib import services.edm_notifier importlib.reload(services.edm_notifier) from services.edm_notifier import EdmNotifier EdmNotifier().send_edm_report(changed_products, screenshot_path) logging.info("[Crawler] [Festival] ✅ 購物節活動通知已發送") except Exception as e: logging.error(f"[Crawler] [Festival] ❌ 發送通知時發生錯誤 | Error: {e}") else: logging.info("[Crawler] [Festival] ℹ️ 無異動,不發送通知") except MomoEdmUnavailable as e: logging.warning(f"[Crawler] [Festival] ⚠️ {PAGE_TYPE} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}") stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url} _save_stats('festival_task', stats) except Exception as e: import traceback as _tb logging.error(f"[Crawler] [Festival] 🚨 {PAGE_TYPE} 任務異常 | Error: {e}") stats = { "status": "Failed", "error": str(e) } _save_stats('festival_task', stats) try: from services.event_router import notify_failure notify_failure( task_name="run_festival_task", error=e, source="Scheduler.Festival", event_type="crawler_timeout" if "timeout" in str(e).lower() else "festival_task_failure", priority="P2", title=f"{PAGE_TYPE} 活動任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Crawler] [Festival] event_router 失敗: {_router_e}") def run_promo_event_task(lpn_code, page_type, activity_name): """ 通用促銷活動爬蟲任務 支援多種促銷活動類型(母親節、520情人節、勞動節等) :param lpn_code: 活動代碼 (LPN) :param page_type: 頁面類型 (用於資料庫區分,如 'mothers_day', 'valentine_520', 'labor_day') :param activity_name: 活動名稱 (用於通知和日誌) """ logging.info(f"[Crawler] [{page_type.upper()}] 🚀 啟動 {activity_name} 爬蟲任務 | LPN: {lpn_code}") DEBUG_MODE = False try: with managed_scraper_resources(window_size='1920,10000', debug=DEBUG_MODE, timeout=120) as (driver, session): component_areas = [] url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}&n=1" logging.info(f"[Crawler] [{page_type.upper()}] 🔗 前往頁面: {url}") try: driver.get(url) except TimeoutException: logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 頁面載入超時 (120s),嘗試停止載入並繼續解析...") try: driver.execute_script("window.stop();") except Exception as e: logging.exception(f"[Crawler] [{page_type.upper()}] window.stop() 失敗但繼續 | Error: {e}") time.sleep(10) task_label = f"[Crawler] [{page_type.upper()}]" _raise_if_momo_edm_unavailable(driver, task_label, url) logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {_safe_driver_title(driver, task_label, url)}") # 嘗試在 iframe 中尋找內容 iframes = driver.find_elements(By.TAG_NAME, 'iframe') if iframes: logging.info(f"[Crawler] [{page_type.upper()}] 🕵️‍♂️ 偵測到 {len(iframes)} 個 iframe | Action: 嘗試切換進入...") for index, iframe in enumerate(iframes): try: driver.switch_to.frame(iframe) logging.info(f"[Crawler] [{page_type.upper()}] -> 已進入 iframe #{index} | Action: 開始尋找內容...") body = driver.find_element(By.TAG_NAME, 'body') last_height = 0 for scroll_attempt in range(25): body.send_keys(Keys.PAGE_DOWN) time.sleep(0.5) if scroll_attempt % 5 == 0: new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height time.sleep(2) component_areas = _find_component_areas_with_diagnostic(driver) if component_areas: logging.info(f"[Crawler] [{page_type.upper()}] ✅ 在 iframe #{index} 中找到 {len(component_areas)} 個商品區塊!") break else: logging.info(f"[Crawler] [{page_type.upper()}] ...在 iframe #{index} 中未找到商品區塊 | Action: 切換回主頁面") driver.switch_to.default_content() except Exception as e: logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 處理 iframe #{index} 時發生錯誤 | Error: {e}") driver.switch_to.default_content() driver.switch_to.default_content() if not component_areas: logging.info(f"[Crawler] [{page_type.upper()}] 📜 在主頁面執行滾動與查找...") body = driver.find_element(By.TAG_NAME, 'body') last_height = 0 for scroll_attempt in range(25): body.send_keys(Keys.PAGE_DOWN) time.sleep(0.5) if scroll_attempt > 0 and scroll_attempt % 5 == 0: new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height time.sleep(3) component_areas = _find_component_areas_with_diagnostic(driver) if DEBUG_MODE or not component_areas: debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls') os.makedirs(debug_path, exist_ok=True) ts = int(time.time()) screenshot_path = os.path.join(debug_path, f"{page_type}_screenshot_{ts}.png") html_path = os.path.join(debug_path, f"{page_type}_page_source_{ts}.html") driver.save_screenshot(screenshot_path) with open(html_path, "w", encoding="utf-8") as f: f.write(driver.page_source) logging.info(f"[Crawler] [{page_type.upper()}] 📸 [偵錯] 螢幕截圖已儲存 | Path: {screenshot_path}") logging.info(f"[Crawler] [{page_type.upper()}] 📄 [偵錯] 頁面原始碼已儲存 | Path: {html_path}") batch_id = int(time.time()) now = datetime.now(TAIPEI_TZ).replace(tzinfo=None) subq = session.query(func.max(PromoProduct.id).label('max_id'))\ .filter(PromoProduct.page_type == page_type)\ .group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery() latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all() active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')} logging.info(f"[Crawler] [{page_type.upper()}] 📦 偵測到 {len(component_areas)} 個商品區塊 (component-area)") if not component_areas: logging.warning(f"[Crawler] [{page_type.upper()}] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案") return count = 0 snapshot_count = 0 current_scan_items = set() seen_groups = set() changed_products = [] screenshot_path = None for area in component_areas: group_title = "未分類" try: title_el = area.find_element(By.CSS_SELECTOR, "span.js-PD_val[data-title='區塊標題文案']") group_title = title_el.text.strip() except Exception: try: area.find_element(By.CSS_SELECTOR, ".Area_swiper") group_title = "今日主打" logging.info(f"[Crawler] [{page_type.upper()}] 🔍 偵測到輪播區塊 | Title: '今日主打'") except Exception: logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 找不到區塊標題,也非輪播區塊 | Action: 跳過此區塊") continue seen_groups.add(group_title) logging.info(f"[Crawler] [{page_type.upper()}] 👉 處理區塊: '{group_title}'") products = area.find_elements(By.CSS_SELECTOR, "li.PD_slide.js-PD_id") logging.info(f"[Crawler] [{page_type.upper()}] 📦 此區塊找到 {len(products)} 個商品") for item_idx, item in enumerate(products): try: i_code_raw = item.get_attribute("data-id") logging.info(f"[Crawler] [{page_type.upper()}] [{item_idx+1}/{len(products)}] 開始解析商品 | data-id: {i_code_raw or 'N/A'}") if not i_code_raw or 'logo' in i_code_raw: logging.debug(f"[Crawler] [{page_type.upper()}] -> 跳過無效 data-id 的項目。") continue try: i_code = str(int(i_code_raw)) except ValueError: i_code = i_code_raw.upper() link_url, brand, product_name, name, price, image_url = None, "", "", "", None, None try: link_el = item.find_element(By.CSS_SELECTOR, "a.js-PD_url") link_url = link_el.get_attribute("data-urlpc") except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到商品連結 | i_code: {i_code}") try: brand = item.find_element(By.CSS_SELECTOR, "b.js-PD_txt1").text.strip() except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到品牌名稱 | i_code: {i_code}") try: product_name = item.find_element(By.CSS_SELECTOR, "span.js-PD_txt2").text.strip() except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到產品名稱 | i_code: {i_code}") name = f"{brand} {product_name}".strip() if not name: try: name = item.find_element(By.CSS_SELECTOR, "img.js-PD_img").get_attribute("alt").strip() logging.debug(f"[Crawler] [{page_type.upper()}] - 使用圖片 alt 屬性作為名稱: '{name}'") except Exception: logging.warning(f"[Crawler] [{page_type.upper()}] - 錯誤: 無法解析任何有效名稱 | i_code: {i_code} | Action: Skip") continue price_selectors = [ "span.Price.js-PD_price", ".price span", ".money span", ".price", ".money", "b.price", "span.price", ".price b", ".money b", ".prdPrice span", ".prdPrice b", ".prdPrice", "p.price", "div.price" ] for selector in price_selectors: try: price_el = item.find_element(By.CSS_SELECTOR, selector) price_text = price_el.text price_clean = re.sub(r'[^\d]', '', price_text) if price_clean: price = int(price_clean) break except Exception: continue if price is None: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到價格 | Name: {name} | ID: {i_code}") def get_image_path(i_code_str): part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3) part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000' part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0' part1 = part1.zfill(4) part2 = part2.zfill(3) return f'{part1}/{part2}/{part3}' try: image_path = get_image_path(str(i_code)) image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp" except Exception as e: logging.warning(f"[Crawler] [{page_type.upper()}] - 警告: 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}") image_url = None logging.info(f"[Crawler] [{page_type.upper()}] -> 解析結果 | Name: {name} | Price: {price}") current_scan_items.add((i_code, group_title)) status_change = "NONE" prev_record = active_db_items.get((i_code, group_title)) previous_price = None if not prev_record: status_change = "NEW" logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 新商品 (NEW)") else: if price != prev_record.price: if price is not None and prev_record.price is not None: status_change = "PRICE_DOWN" if price < prev_record.price else "PRICE_UP" else: status_change = "UPDATE" previous_price = prev_record.price logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 價格變動 ({status_change}) | From: {prev_record.price} | To: {price}") elif status_change == "NONE" and (not prev_record.image_url and image_url): status_change = "UPDATE" logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 圖片更新 (UPDATE)") is_changed = status_change != "NONE" normalized_link_url = normalize_momo_product_url(link_url, i_code) if not normalized_link_url: logging.warning( f"[Crawler] [{page_type.upper()}] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" ) continue new_promo = PromoProduct( batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url, image_url=image_url, previous_price=previous_price, time_slot=group_title, status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name, session_time_text=group_title, page_type=page_type ) session.add(new_promo) snapshot_count += 1 if is_changed: changed_products.append(new_promo) count += 1 logging.info(f"[Crawler] [{page_type.upper()}] -> 寫入資料庫: {status_change}") else: logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written") except Exception as e: logging.error(f"[Crawler] [{page_type.upper()}] ❌ 解析商品時發生未預期錯誤 | Error: {e}") continue for (i_code, slot), record in active_db_items.items(): if (i_code, slot) not in current_scan_items: new_status = "DELISTED" if record.time_slot in seen_groups else "SLOT_END" delisted_promo = PromoProduct( batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, url=record.url, image_url=record.image_url, time_slot=record.time_slot, previous_price=record.price, status_change=new_status, crawled_at=now, activity_time_text=activity_name, session_time_text=getattr(record, 'session_time_text', activity_name), page_type=page_type ) session.add(delisted_promo) snapshot_count += 1 changed_products.append(delisted_promo) count += 1 session.commit() logging.info(f"[Crawler] [{page_type.upper()}] ✅ {page_type} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}") stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" } _save_stats(f'{page_type}_task', stats) if changed_products: logging.info(f"[Crawler] [{page_type.upper()}] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知") try: try: dashboard_url = "http://127.0.0.1:5000/edm" logging.info(f"[Crawler] [{page_type.upper()}] 📸 準備截取儀表板畫面: {dashboard_url}") driver.get(dashboard_url) time.sleep(3) driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) driver.execute_script("window.scrollTo(0, 0);") time.sleep(2) required_height = driver.execute_script("return document.body.parentNode.scrollHeight") driver.set_window_size(2560, int(required_height) + 100) time.sleep(2) shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots') os.makedirs(shot_dir, exist_ok=True) screenshot_filename = f"{page_type}_{batch_id}.png" screenshot_path = os.path.join(shot_dir, screenshot_filename) driver.save_screenshot(screenshot_path) file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0 logging.info(f"[Crawler] [{page_type.upper()}] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes") except Exception as e: logging.error(f"[Crawler] [{page_type.upper()}] ❌ 截圖失敗 | Error: {e}") import importlib import services.edm_notifier importlib.reload(services.edm_notifier) from services.edm_notifier import EdmNotifier EdmNotifier().send_edm_report(changed_products, screenshot_path) logging.info(f"[Crawler] [{page_type.upper()}] ✅ {activity_name} 通知已發送") except Exception as e: logging.error(f"[Crawler] [{page_type.upper()}] ❌ 發送通知時發生錯誤 | Error: {e}") else: logging.info(f"[Crawler] [{page_type.upper()}] ℹ️ 無異動,不發送通知") except MomoEdmUnavailable as e: logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ {activity_name} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}") stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url} _save_stats(f'{page_type}_task', stats) except Exception as e: import traceback as _tb logging.error(f"[Crawler] [{page_type.upper()}] 🚨 {page_type} 任務異常 | Error: {e}") stats = { "status": "Failed", "error": str(e) } _save_stats(f'{page_type}_task', stats) try: from services.event_router import notify_failure notify_failure( task_name="run_promo_event_task", error=e, source=f"Scheduler.{page_type}", event_type="crawler_timeout" if "timeout" in str(e).lower() else "promo_event_task_failure", priority="P2", title=f"{activity_name} 任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Crawler] [{page_type.upper()}] event_router 失敗: {_router_e}") def run_whitepage_check(): """ V-New: 檢查網頁服務是否變成白頁 每半小時執行一次,檢測正式環境網址 """ try: # 檢測的目標網址(正式環境) target_url = "https://mo.wooo.work/" logging.info(f"[Whitepage] [Check] 🔍 開始檢查網頁狀態 | URL: {target_url}") # 1. 發送 HTTP 請求 (內部健康檢查,verify=False 用於處理自簽憑證環境) try: response = requests.get(target_url, timeout=30, verify=False) # nosec B501 status_code = response.status_code content_length = len(response.text) logging.info(f"[Whitepage] [Check] 📊 HTTP 狀態碼: {status_code} | 內容長度: {content_length:,} bytes") # 2. 檢查 HTTP 狀態碼 if status_code != 200: error_msg = f"HTTP 狀態碼異常 ({status_code})" logging.error(f"[Whitepage] [Check] ❌ {error_msg}") NotificationManager().send_whitepage_alert(target_url, error_msg) _save_stats('whitepage_check', {"status": "Failed", "error": error_msg}) return # 3. 檢查內容長度(白頁通常很短) if content_length < 1000: error_msg = f"頁面內容過短 ({content_length} bytes),疑似白頁" logging.error(f"[Whitepage] [Check] ❌ {error_msg}") NotificationManager().send_whitepage_alert(target_url, error_msg) _save_stats('whitepage_check', {"status": "Failed", "error": error_msg}) return # 4. 檢查關鍵元素是否存在 content = response.text missing_markers = _missing_whitepage_markers(content) if missing_markers: error_msg = f"缺少關鍵元素:{', '.join(missing_markers)}" logging.error(f"[Whitepage] [Check] ❌ {error_msg}") logging.debug(f"[Whitepage] [Check] 📄 頁面內容片段: {content[:500]}...") NotificationManager().send_whitepage_alert(target_url, error_msg) _save_stats('whitepage_check', {"status": "Failed", "error": error_msg}) return # 5. 所有檢查通過 logging.info(f"[Whitepage] [Check] ✅ 網頁狀態正常") _save_stats('whitepage_check', { "status": "Success", "status_code": status_code, "content_length": content_length }) except requests.exceptions.Timeout: error_msg = "連線逾時 (30秒)" logging.error(f"[Whitepage] [Check] ❌ {error_msg}") NotificationManager().send_whitepage_alert(target_url, error_msg) _save_stats('whitepage_check', {"status": "Failed", "error": error_msg}) except requests.exceptions.ConnectionError as e: error_msg = f"無法連接伺服器:{str(e)}" logging.error(f"[Whitepage] [Check] ❌ {error_msg}") NotificationManager().send_whitepage_alert(target_url, error_msg) _save_stats('whitepage_check', {"status": "Failed", "error": error_msg}) except Exception as e: error_msg = f"未預期的錯誤:{str(e)}" logging.error(f"[Whitepage] [Check] ❌ {error_msg}") NotificationManager().send_whitepage_alert(target_url, error_msg) _save_stats('whitepage_check', {"status": "Failed", "error": error_msg}) except Exception as e: logging.error(f"[Whitepage] [Check] 🚨 白頁檢查任務異常 | Error: {e}") _save_stats('whitepage_check', {"status": "Failed", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_whitepage_check", error=e, source="Scheduler.Whitepage", event_type="whitepage_check_failure", priority="P1", title="白頁檢查任務異常", dedup_ttl_sec=1800, ) except Exception as _router_e: logging.error(f"[Whitepage] [Check] event_router 失敗: {_router_e}") def start_scheduled_job(): """使用執行緒啟動任務,避免阻塞主程式""" thread = threading.Thread(target=run_momo_task) thread.start() def capture_page_screenshot(url, filename_prefix="screenshot", wait_time=5): """通用截圖函式 (供 app.py 呼叫截取儀表板) - 高解析度版本""" try: # V-Fix: 使用更大的視窗寬度以提高截圖清晰度 with managed_scraper_resources(window_size='2560,4000') as (driver, session): logging.info(f"[Screenshot] 📸 準備截取畫面: {url}") driver.get(url) time.sleep(wait_time / 2) # V-Fix: 滾動頁面以觸發所有元素的渲染 driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(1) driver.execute_script("window.scrollTo(0, 0);") time.sleep(wait_time / 2) # 設定視窗大小以進行滿版截圖 required_height = driver.execute_script("return document.body.parentNode.scrollHeight") driver.set_window_size(2560, int(required_height) + 100) # V-Fix: 等待視窗大小調整後的重新渲染 time.sleep(2) shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots') os.makedirs(shot_dir, exist_ok=True) timestamp = int(time.time()) filename = f"{filename_prefix}_{timestamp}.png" screenshot_path = os.path.join(shot_dir, filename) driver.save_screenshot(screenshot_path) # 檢查截圖檔案大小 file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0 logging.info(f"[Screenshot] 📸 已儲存截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes | Resolution: 2560x{int(required_height) + 100}") # V-Fix: 如果檔案太小(<50KB),可能是空白或錯誤 if file_size < 50000: logging.warning(f"[Screenshot] ⚠️ 截圖檔案異常小 ({file_size} bytes),可能截圖失敗") return screenshot_path except Exception as e: logging.error(f"[Screenshot] ❌ 截圖失敗 | Error: {e}") return None def verify_import_data_sync(expected_rows: int = None, date_range: dict = None) -> dict: """ 2026-01-30 新增:驗證匯入資料是否正確同步到兩個資料表 Args: expected_rows: 預期的匯入筆數 date_range: 預期的日期範圍 {'min': 'YYYY-MM-DD', 'max': 'YYYY-MM-DD'} Returns: dict: 驗證結果 """ from config import DATABASE_PATH from sqlalchemy import create_engine, text date_min = date_range.get('min') if date_range else None date_max = date_range.get('max') if date_range else None result = { 'success': True, 'daily_sales_snapshot': {'ok': False, 'rows': 0, 'date_min': None, 'date_max': None}, 'realtime_sales_monthly': {'ok': False, 'rows': 0, 'date_min': None, 'date_max': None}, 'scope': {'date_min': date_min, 'date_max': date_max}, 'errors': [] } if expected_rows is not None and expected_rows <= 0: result['success'] = False result['errors'].append("本次匯入筆數為 0,請檢查檔案是否實際匯入成功") return result if not date_min or not date_max: result['success'] = False result['errors'].append("缺少本次匯入日期範圍,無法驗證資料同步") return result try: engine = create_engine(DATABASE_PATH) with engine.connect() as conn: is_sqlite = engine.dialect.name == 'sqlite' if is_sqlite: snapshot_query = text(""" SELECT COUNT(*) as cnt, MIN(date(snapshot_date)) as date_min, MAX(date(snapshot_date)) as date_max FROM daily_sales_snapshot WHERE date(snapshot_date) BETWEEN date(:date_min) AND date(:date_max) """) monthly_query = text(""" SELECT COUNT(*) as cnt, MIN(date("日期")) as date_min, MAX(date("日期")) as date_max FROM realtime_sales_monthly WHERE date("日期") BETWEEN date(:date_min) AND date(:date_max) """) else: snapshot_query = text(""" SELECT COUNT(*) as cnt, MIN(snapshot_date::date)::text as date_min, MAX(snapshot_date::date)::text as date_max FROM daily_sales_snapshot WHERE snapshot_date::date BETWEEN :date_min AND :date_max """) monthly_query = text(""" SELECT COUNT(*) as cnt, MIN("日期"::date)::text as date_min, MAX("日期"::date)::text as date_max FROM realtime_sales_monthly WHERE "日期"::date BETWEEN :date_min AND :date_max """) # V-Fix: 只驗證本次匯入日期範圍。realtime_sales_monthly 保存歷史資料, # 不可再拿全表總筆數和 daily_sales_snapshot 做相等比較。 params = {'date_min': date_min, 'date_max': date_max} snapshot_result = conn.execute(snapshot_query, params).fetchone() result['daily_sales_snapshot']['rows'] = snapshot_result[0] result['daily_sales_snapshot']['date_min'] = snapshot_result[1] result['daily_sales_snapshot']['date_max'] = snapshot_result[2] monthly_result = conn.execute(monthly_query, params).fetchone() result['realtime_sales_monthly']['rows'] = monthly_result[0] result['realtime_sales_monthly']['date_min'] = monthly_result[1] result['realtime_sales_monthly']['date_max'] = monthly_result[2] # 驗證兩個表的資料是否一致 snapshot_rows = result['daily_sales_snapshot']['rows'] monthly_rows = result['realtime_sales_monthly']['rows'] # 檢查 1: 兩個表的筆數是否一致 if snapshot_rows != monthly_rows: result['errors'].append( f"資料筆數不一致: daily_sales_snapshot={snapshot_rows}, realtime_sales_monthly={monthly_rows}" ) result['success'] = False # 檢查 2: 本次匯入日期範圍是否一致 if ( result['daily_sales_snapshot']['date_min'] != result['realtime_sales_monthly']['date_min'] or result['daily_sales_snapshot']['date_max'] != result['realtime_sales_monthly']['date_max'] ): result['errors'].append( f"日期範圍不一致: daily_sales_snapshot=" f"{result['daily_sales_snapshot']['date_min']}~{result['daily_sales_snapshot']['date_max']}, " f"realtime_sales_monthly=" f"{result['realtime_sales_monthly']['date_min']}~{result['realtime_sales_monthly']['date_max']}" ) result['success'] = False # 檢查 3: 如果有預期的匯入筆數,驗證是否正確 if expected_rows is not None: if snapshot_rows < expected_rows: result['errors'].append( f"daily_sales_snapshot 筆數({snapshot_rows})少於預期({expected_rows})" ) result['success'] = False if monthly_rows < expected_rows: result['errors'].append( f"realtime_sales_monthly 筆數({monthly_rows})少於預期({expected_rows})" ) result['success'] = False # 檢查 4: 如果有預期的日期範圍,驗證是否正確 if result['daily_sales_snapshot']['date_min'] != date_min or result['daily_sales_snapshot']['date_max'] != date_max: result['errors'].append( f"daily_sales_snapshot 日期範圍({result['daily_sales_snapshot']['date_min']}~" f"{result['daily_sales_snapshot']['date_max']})與預期({date_min}~{date_max})不符" ) result['success'] = False if result['realtime_sales_monthly']['date_min'] != date_min or result['realtime_sales_monthly']['date_max'] != date_max: result['errors'].append( f"realtime_sales_monthly 日期範圍({result['realtime_sales_monthly']['date_min']}~" f"{result['realtime_sales_monthly']['date_max']})與預期({date_min}~{date_max})不符" ) result['success'] = False # 設定各表驗證結果 result['daily_sales_snapshot']['ok'] = snapshot_rows > 0 result['realtime_sales_monthly']['ok'] = monthly_rows > 0 and monthly_rows == snapshot_rows logging.info(f"[Scheduler] [Verify] 資料驗證完成: success={result['success']}, " f"range={date_min}~{date_max}, snapshot={snapshot_rows}筆, monthly={monthly_rows}筆") except Exception as e: result['success'] = False result['errors'].append(f"驗證時發生錯誤: {str(e)}") logging.error(f"[Scheduler] [Verify] 資料驗證失敗: {e}") return result def run_auto_import_task(): """ V-New: 自動從 Google Drive 匯入當日業績 每半小時檢查一次 Google Drive 是否有新的 Excel 檔案 """ # ADR-012 Phase 4: HITL 暫停檢查 notification_sent = False try: from services.agent_actions import is_task_paused if is_task_paused("run_auto_import_task"): logging.info("[Scheduler] [AutoImport] ⏸️ 任務被 HITL 暫停中,本次跳過") return except Exception as pause_check_error: logging.debug( f"[Scheduler] [AutoImport] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}", exc_info=True, ) try: from services.import_service import import_service from services.notification_manager import NotificationManager logging.info("[Scheduler] [AutoImport] 🚀 啟動 Google Drive 自動匯入任務") # 執行自動匯入 result = import_service.auto_import_from_drive() if result.get('success'): logging.info(f"[Scheduler] [AutoImport] ✅ 自動匯入完成 | Message: {result.get('message')}") stats = { "file_count": result.get('file_count', 0), "imported_count": result.get('imported_count', 0), "status": "Success" } # V-New: 發送 Telegram 和 Line 通知(僅當有匯入新檔案時) if result.get('file_count', 0) > 0: try: logging.info("[Scheduler] [AutoImport] 📢 準備發送自動匯入通知...") now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') # 格式化日期範圍 date_range_str = "未知" date_range = result.get('date_range') if date_range: try: from datetime import datetime as dt date_min = dt.strptime(date_range['min'], '%Y-%m-%d') date_max = dt.strptime(date_range['max'], '%Y-%m-%d') date_min_str = date_min.strftime('%Y年%m月%d日') date_max_str = date_max.strftime('%Y年%m月%d日') if date_min_str == date_max_str: date_range_str = date_min_str else: date_range_str = f"{date_min_str}~{date_max_str}" except Exception: date_range_str = f"{date_range.get('min', '?')}~{date_range.get('max', '?')}" freshness_line = "" data_lag_days = result.get('data_lag_days') if data_lag_days is not None and data_lag_days > 0: freshness_line = f"⚠️ 最新資料落後:{data_lag_days} 天\n" # 組合通知訊息 message = ( f"📊 當日業績自動匯入通知 ({now_str})\n" f"{'='*30}\n" f"✅ 匯入狀態:成功\n" f"📁 處理檔案數:{result.get('file_count', 0)} 個\n" f"📝 共匯入記錄:{result.get('total_rows', 0)} 筆\n" f"📅 數據日期期間:{date_range_str}\n" f"{freshness_line}" f"{'='*30}\n" f"詳細資料請至系統查看" ) # 發送通知到所有頻道 notifier = NotificationManager() notifier._send_line_messages([message]) notifier._send_telegram_messages([message]) logging.info("[Scheduler] [AutoImport] ✅ 自動匯入通知已發送") # 2026-01-30 新增:匯入後驗證資料同步 logging.info("[Scheduler] [AutoImport] 🔍 開始驗證資料同步...") verify_result = verify_import_data_sync( expected_rows=result.get('total_rows'), date_range=result.get('date_range') ) if not verify_result['success']: # 資料驗證失敗,發送警告通知 error_details = '\n'.join([f" ⚠️ {e}" for e in verify_result['errors']]) verify_message = ( f"🚨 當日業績匯入驗證失敗 ({now_str})\n" f"{'='*30}\n" f"匯入通知顯示成功,但資料驗證發現問題:\n" f"{error_details}\n" f"{'='*30}\n" f"📊 daily_sales_snapshot: {verify_result['daily_sales_snapshot']['rows']} 筆\n" f"📈 realtime_sales_monthly: {verify_result['realtime_sales_monthly']['rows']} 筆\n" f"{'='*30}\n" f"請立即檢查資料同步狀態!" ) notifier._send_telegram_messages([verify_message]) logging.error(f"[Scheduler] [AutoImport] ❌ 資料驗證失敗: {verify_result['errors']}") else: logging.info("[Scheduler] [AutoImport] ✅ 資料驗證通過,兩個表已同步") except Exception as e: logging.error(f"[Scheduler] [AutoImport] ❌ 發送通知失敗 | Error: {e}") else: logging.info("[Scheduler] [AutoImport] ℹ️ 無新檔案,不發送通知") else: logging.error(f"[Scheduler] [AutoImport] ❌ 自動匯入失敗 | Message: {result.get('message')}") stats = { "status": "Failed", "error": result.get('message', 'Unknown error') } # V-New: 匯入失敗時也發送通知 try: logging.info("[Scheduler] [AutoImport] 📢 準備發送自動匯入失敗通知...") now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') message = ( f"⚠️ 當日業績自動匯入失敗 ({now_str})\n" f"{'='*30}\n" f"❌ 匯入狀態:失敗\n" f"📌 錯誤訊息:{result.get('message', 'Unknown error')}\n" f"{'='*30}\n" f"請檢查 Google Drive 設定或手動匯入" ) notifier = NotificationManager() notifier._send_line_messages([message]) notifier._send_telegram_messages([message]) logging.info("[Scheduler] [AutoImport] ✅ 匯入失敗通知已發送") except Exception as e: logging.error(f"[Scheduler] [AutoImport] ❌ 發送失敗通知時發生錯誤 | Error: {e}") _save_stats('auto_import_task', stats) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [AutoImport] 🚨 自動匯入任務異常 | Error: {e}") stats = {"status": "Failed", "error": str(e)} _save_stats('auto_import_task', stats) # ADR-012 Phase 2: 改走 EventRouter(Hermes L1 翻譯 + 降級鏈) # LINE 通道保留(event_router 只處理 Telegram) try: from services.event_router import notify_failure notify_failure( task_name="run_auto_import_task", error=e, source="Scheduler.AutoImport", event_type="db_connection_error" if "translate host" in str(e).lower() or "operational" in str(e).lower() else "import_failure", priority="P1", title="當日業績自動匯入異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [AutoImport] event_router 失敗: {_router_e}") # LINE 通知保留(獨立通道,不經 event_router) # Only send if notification hasn't been sent already if not notification_sent: try: from services.notification_manager import NotificationManager now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') message = f"Daily Sales Auto Import Exception ({now_str})\nSystem error: {str(e)[:200]}" NotificationManager()._send_line_messages([message]) notification_sent = True # Mark as sent to prevent duplicate except Exception as notify_error: logging.error(f"[Scheduler] [AutoImport] LINE notification failed | Error: {notify_error}") # ADR-013: AIOps 自動修復 — PlayBook 匹配 + KM 沉澱 try: from services.auto_heal_service import auto_heal_service auto_heal_service.handle_exception( task_name="run_auto_import_task", exception=e, traceback_str=_tb.format_exc(), ) except Exception as _heal_e: logging.error(f"[Scheduler] [AutoImport] auto_heal_service 失敗: {_heal_e}") def run_competitor_price_feeder_task(): """ 競品價格補給線排程任務(每 4 小時執行一次) 從 PChome 抓取競品價格,同步寫入 competitor_prices 最新快取與 competitor_price_history 歷史快照,供 HermesAnalystService 與前端歷史圖表使用。 與 AI Pipeline 完全解耦:本任務失敗不影響核心分析流程。 ADR-ICAIM-2026-04-17 新增 """ try: from config import DATABASE_PATH from sqlalchemy import create_engine from services.competitor_intel_repository import clear_competitor_intel_cache from services.competitor_price_feeder import CompetitorPriceFeeder now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') logging.info(f"[Scheduler] [Feeder] 🚀 啟動競品價格抓取任務 | {now_str}") engine = create_engine(DATABASE_PATH) feeder = CompetitorPriceFeeder(engine=engine) result = feeder.run(source="pchome") stats = { "total_skus": result.total_skus, "matched": result.matched, "skipped_no_result": result.skipped_no_result, "skipped_low_score": result.skipped_low_score, "errors": result.errors, "duration_sec": result.duration_sec, "history_written": result.history_written, "status": "Success", } logging.info( f"[Scheduler] [Feeder] ✅ 完成 | " f"matched={result.matched}/{result.total_skus} " f"skip_no={result.skipped_no_result} " f"skip_low={result.skipped_low_score} " f"history_written={result.history_written} " f"errors={result.errors} " f"耗時={result.duration_sec}s" ) clear_competitor_intel_cache() _save_stats('competitor_price_feeder', stats) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [Feeder] 🚨 任務異常 | Error: {e}") _save_stats('competitor_price_feeder', {"status": "Failed", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_competitor_price_feeder_task", error=e, source="Scheduler.Feeder", event_type="competitor_price_feeder_failure", priority="P2", title="競品價格補給任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [Feeder] event_router 失敗: {_router_e}") def run_pchome_match_backfill_task(): """ PChome 待比對商品補抓任務(每日執行) 優先處理尚未有有效 PChome 配對的高價 ACTIVE 商品,寫入 competitor_prices 與 competitor_price_history,完成後重算 product_pick 挑品清單。 """ try: from config import DATABASE_PATH from sqlalchemy import create_engine from services.ai_product_pick_agent import generate_product_pick_list from services.cache_manager import clear_dashboard_cache from services.competitor_identity_revalidator import revalidate_existing_competitor_identities from services.competitor_intel_repository import clear_competitor_intel_cache from services.competitor_price_feeder import CompetitorPriceFeeder now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') logging.info(f"[Scheduler] [PChomeBackfill] 🚀 啟動待比對補抓任務 | {now_str}") engine = create_engine(DATABASE_PATH) revalidation_limit = int(os.getenv("PCHOME_BACKFILL_REVALIDATION_LIMIT", "1200")) expired_refresh_limit = int(os.getenv("PCHOME_BACKFILL_EXPIRED_REFRESH_LIMIT", "1200")) retryable_limit = int(os.getenv("PCHOME_BACKFILL_RETRYABLE_LIMIT", "240")) unmatched_limit = int(os.getenv("PCHOME_BACKFILL_UNMATCHED_LIMIT", "240")) revalidation_result = revalidate_existing_competitor_identities( engine, limit=revalidation_limit, dry_run=False, include_expired=True, write_attempts=True, ) feeder = CompetitorPriceFeeder(engine=engine) refresh_result = feeder.run_expired_identity_refresh(limit=expired_refresh_limit) retryable_result = feeder.run_retryable_candidate_revalidation(limit=retryable_limit, min_score=0.70) feeder_result = feeder.run_unmatched_priority(limit=unmatched_limit) pick_result = generate_product_pick_list(engine, limit=50) clear_dashboard_cache() clear_competitor_intel_cache() stats = { "total_skus": feeder_result.total_skus, "matched": feeder_result.matched, "skipped_no_result": feeder_result.skipped_no_result, "skipped_low_score": feeder_result.skipped_low_score, "errors": feeder_result.errors, "duration_sec": feeder_result.duration_sec, "history_written": feeder_result.history_written, "identity_revalidated_fresh": revalidation_result.promoted_fresh, "identity_revalidated_expired": revalidation_result.promoted_expired, "identity_revalidation_rejected_low": revalidation_result.rejected_low_score, "identity_revalidation_rejected_veto": revalidation_result.rejected_veto, "expired_identity_refresh_total": refresh_result.total_skus, "expired_identity_refresh_matched": refresh_result.matched, "expired_identity_refresh_no_result": refresh_result.skipped_no_result, "expired_identity_refresh_low_score": refresh_result.skipped_low_score, "expired_identity_refresh_errors": refresh_result.errors, "retryable_candidate_revalidation_total": retryable_result.total_skus, "retryable_candidate_revalidation_matched": retryable_result.matched, "retryable_candidate_revalidation_low_score": retryable_result.skipped_low_score, "retryable_candidate_revalidation_errors": retryable_result.errors, "pick_candidates": pick_result.candidates, "pick_written": pick_result.written, "status": "Success", } logging.info( f"[Scheduler] [PChomeBackfill] ✅ 完成 | " f"revalidated={revalidation_result.promoted_fresh}+{revalidation_result.promoted_expired} " f"refreshed={refresh_result.matched}/{refresh_result.total_skus} " f"retryable={retryable_result.matched}/{retryable_result.total_skus} " f"matched={feeder_result.matched}/{feeder_result.total_skus} " f"history_written={feeder_result.history_written} " f"pick_written={pick_result.written} " f"errors={feeder_result.errors + refresh_result.errors + retryable_result.errors} " f"耗時={feeder_result.duration_sec + refresh_result.duration_sec + retryable_result.duration_sec}s" ) _save_stats('pchome_match_backfill', stats) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [PChomeBackfill] 🚨 任務異常 | Error: {e}") _save_stats('pchome_match_backfill', {"status": "Failed", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_pchome_match_backfill_task", error=e, source="Scheduler.PChomeBackfill", event_type="pchome_match_backfill_failure", priority="P2", title="PChome 待比對補抓任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [PChomeBackfill] event_router 失敗: {_router_e}") def run_icaim_analysis_task(): """ ICAIM 競價情報分析排程任務(每 6 小時執行一次) Hermes 3 分析師 → NemoTron 派發器 → Telegram 競價告警 依賴 competitor_prices 表(由 run_competitor_price_feeder_task 每 4h 更新) ADR-ICAIM-2026-04-17 新增 """ try: from config import DATABASE_PATH from sqlalchemy import create_engine from services.hermes_analyst_service import HermesAnalystService from services.nemoton_dispatcher_service import NemotronDispatcher from services.notification_manager import NotificationManager now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') logging.info(f"[Scheduler] [ICAIM] 🚀 啟動競價情報分析 | {now_str}") engine = create_engine(DATABASE_PATH) # Step 1:Hermes 分析師 hermes_svc = HermesAnalystService(engine=engine) t0 = datetime.now(TAIPEI_TZ) result = hermes_svc.run() hermes_duration = round((datetime.now(TAIPEI_TZ) - t0).total_seconds(), 1) logging.info( f"[Scheduler] [ICAIM] Hermes 完成 | " f"candidates={result.total_candidates} threats={len(result.threats)} " f"耗時={hermes_duration}s" ) _save_stats('icaim_hermes', { "candidates": result.total_candidates, "threats": len(result.threats), "duration_sec": hermes_duration, "status": "Success" if result.success else "Failed", "error": result.error, }) if not result.success: logging.error(f"[Scheduler] [ICAIM] Hermes 失敗: {result.error}") try: from services.event_router import notify_failure notify_failure( task_name="run_icaim_analysis_task", error=RuntimeError(result.error or "Hermes analysis failed"), source="Scheduler.ICAIM", event_type="icaim_hermes_failure", priority="P1", title="ICAIM Hermes 分析失敗", payload={"duration_sec": hermes_duration, "candidates": result.total_candidates}, ) except Exception as _router_e: logging.error(f"[Scheduler] [ICAIM] Hermes failure event_router 失敗: {_router_e}") return if not result.threats: logging.info("[Scheduler] [ICAIM] 無威脅商品,跳過 NemoTron dispatch") # Operation Ollama-First v5.0 Phase 4:移除原 inline meta_analysis 觸發 # 原因:呼叫點 #16 月耗 ~2.5M Gemini tokens(icaim 4h × 6 + cron 6h × 4 = 10/天) # 改由 run_scheduler 每日 12:00 單一觸發;此處僅 log,不再呼叫。 return # Step 2:NemoTron 派發器 → Telegram hermes_stats = {"duration_sec": hermes_duration, "tokens": result.hermes_tokens} notifier = NotificationManager() dispatcher = NemotronDispatcher(notification_manager=notifier, engine=engine) dispatch_result = dispatcher.dispatch(result.threats, hermes_stats=hermes_stats) logging.info( f"[Scheduler] [ICAIM] ✅ 完成 | " f"dispatched={dispatch_result.get('dispatched', 0)} " f"alerts={dispatch_result.get('alerts', 0)}" ) _save_stats('icaim_dispatch', {**dispatch_result, "status": "Success"}) # Operation Ollama-First v5.0 Phase 4:原 Step 3 inline Meta-Analysis 觸發已移除 # 改由 run_scheduler 每日 12:00 單一觸發(Phase 4 降頻 6h → 24h,月省 ~1.875M tokens) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [ICAIM] 🚨 任務異常 | Error: {e}") _save_stats('icaim_analysis', {"status": "Failed", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_icaim_analysis_task", error=e, source="Scheduler.ICAIM", event_type="icaim_analysis_failure", priority="P1", title="ICAIM 競價分析任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [ICAIM] event_router 失敗: {_router_e}") # ADR-013: AIOps 自動修復 try: from services.auto_heal_service import auto_heal_service auto_heal_service.handle_exception( task_name="run_icaim_analysis_task", exception=e, traceback_str=_tb.format_exc(), ) except Exception as _heal_e: logging.error(f"[Scheduler] [ICAIM] auto_heal_service 失敗: {_heal_e}") def run_weekly_strategy_task(): """ 執行每週 Gemini 策略師週報任務 (Phase 4 實作) 產出高階經營決策週報,並存入 ai_insights 中作知識沈澱,同時透過 Telegram 推送 """ logging.info("[Scheduler] [Strategy] 🚀 啟動 Gemini 策略師週報任務...") try: from services.openclaw_strategist_service import generate_weekly_strategy_report generate_weekly_strategy_report(force_tg_alert=True) logging.info("[Scheduler] [Strategy] ✅ Gemini 策略師週報任務完成") except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [Strategy] 🚨 任務異常 | Error: {e}") _save_stats('weekly_strategy', {"status": "Failed", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_weekly_strategy_task", error=e, source="Scheduler.Strategy", event_type="weekly_strategy_failure", priority="P2", title="每週策略週報任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [Strategy] event_router 失敗: {_router_e}") # ADR-013: AIOps 自動修復 try: from services.auto_heal_service import auto_heal_service auto_heal_service.handle_exception( task_name="run_weekly_strategy_task", exception=e, traceback_str=_tb.format_exc(), ) except Exception as _heal_e: logging.error(f"[Scheduler] [Strategy] auto_heal_service 失敗: {_heal_e}") def run_db_backup_task(): """ 每日凌晨 02:00 執行 pg_dump 備份 momo_analytics, 並清理超過 7 天的舊備份。完成後透過 Telegram 通知統帥。 失敗同樣發出告警,並沉澱到 ai_insights(供 RAG 查詢)。 """ logging.info("[Scheduler] [Backup] 🚀 啟動資料庫備份任務...") try: from services.db_backup_service import run_backup, cleanup_old_backups from services.notification_manager import NotificationManager result = run_backup() deleted_count = cleanup_old_backups() now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') notifier = NotificationManager() if result["success"]: size_kb = result["file_size"] // 1024 msg = ( f"💾 資料庫備份完成 ({now_str})\n" f"{'='*30}\n" f"✅ 狀態:成功\n" f"📁 檔案:{result['filename']}\n" f"📦 大小:{size_kb} KB\n" f"⏱ 耗時:{result['duration']:.1f} 秒\n" f"🗑 清理舊備份:{deleted_count} 個" ) logging.info(f"[Scheduler] [Backup] ✅ 備份成功 | {result['filename']} ({size_kb}KB)") _save_stats('db_backup', {"status": "Success", "filename": result["filename"], "size_kb": size_kb}) # 沉澱到 ai_insights(RAG 可查詢備份歷史) try: from services.openclaw_learning_service import store_insight store_insight( insight_type='backup_status', content=f"資料庫備份成功:{result['filename']},大小 {size_kb}KB,耗時 {result['duration']:.1f}s", period=datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'), metadata={"status": "success", "size_kb": size_kb, "deleted_old": deleted_count}, ai_model="scheduler", ) except Exception as insight_error: logging.warning( f"[Scheduler] [Backup] ⚠️ 備份成功 insight 寫入失敗但繼續通知 | Error: {insight_error}", exc_info=True, ) else: msg = ( f"🚨 資料庫備份失敗 ({now_str})\n" f"{'='*30}\n" f"❌ 狀態:失敗\n" f"🔍 原因:{result.get('error', '未知錯誤')}\n" f"⚠️ 請立即檢查 momo-db 容器狀態!" ) logging.error(f"[Scheduler] [Backup] ❌ 備份失敗: {result.get('error')}") _save_stats('db_backup', {"status": "Failed", "error": result.get("error")}) try: from services.event_router import notify_failure notify_failure( task_name="run_db_backup_task", error=RuntimeError(result.get("error") or "database backup failed"), source="Scheduler.DBBackup", event_type="db_backup_failure", priority="P1", title="資料庫備份失敗", payload={"filename": result.get("filename")}, dedup_ttl_sec=3600, ) except Exception as _router_e: logging.error(f"[Scheduler] [Backup] event_router 失敗: {_router_e}") try: from services.openclaw_learning_service import store_insight store_insight( insight_type='backup_status', content=f"資料庫備份失敗:{result.get('error', '未知')}", period=datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'), metadata={"status": "failed", "error": result.get("error")}, ai_model="scheduler", ) except Exception as insight_error: logging.warning( f"[Scheduler] [Backup] ⚠️ 備份失敗 insight 寫入失敗但繼續通知 | Error: {insight_error}", exc_info=True, ) notifier._send_telegram_messages([msg]) except Exception as e: logging.error(f"[Scheduler] [Backup] 🚨 任務異常 | Error: {e}") _save_stats('db_backup', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_db_backup_task", error=e, source="Scheduler.DBBackup", event_type="db_backup_failure", priority="P1", title="資料庫備份排程異常", dedup_ttl_sec=3600, ) except Exception as _router_e: logging.error(f"[Scheduler] [Backup] event_router 失敗: {_router_e}") try: from services.notification_manager import NotificationManager NotificationManager()._send_telegram_messages([ f"🚨 DB 備份排程異常\n錯誤:{e}" ]) except Exception as notify_error: logging.warning( f"[Scheduler] [Backup] ⚠️ 備份異常 Telegram 通知失敗 | Error: {notify_error}", exc_info=True, ) def run_backup_monitor_task(): """ 每 6 小時檢查最近一次備份是否在 25 小時內(允許一點偏差)。 若發現備份過期或從未備份,立即發出 Telegram 告警, 並透過 store_insight 讓 NemoTron/OpenClaw 可感知備份健康狀態。 """ logging.info("[Scheduler] [BackupMonitor] 🔍 檢查備份健康狀態...") try: from services.db_backup_service import get_latest_backup_info from services.notification_manager import NotificationManager info = get_latest_backup_info() now = datetime.now(TAIPEI_TZ) alert_needed = False alert_reason = "" if info["status"] == "no_backup" or info["filename"] is None: alert_needed = True alert_reason = "從未執行過備份,backup_log 與備份目錄均為空" elif info["status"] == "failed": alert_needed = True alert_reason = f"最近一次備份失敗:{info.get('error', '未知')}" else: created_at = info["created_at"] if created_at is not None: # 統一為 naive datetime 比較 if hasattr(created_at, 'tzinfo') and created_at.tzinfo is not None: created_at = created_at.replace(tzinfo=None) now_naive = now.replace(tzinfo=None) hours_ago = (now_naive - created_at).total_seconds() / 3600 if hours_ago > 25: alert_needed = True alert_reason = f"最近備份距今 {hours_ago:.1f} 小時(超過 25h 閾值),可能備份任務未執行" if alert_needed: now_str = now.strftime('%Y-%m-%d %H:%M') msg = ( f"⚠️ 資料庫備份異常告警 ({now_str})\n" f"{'='*30}\n" f"🔴 原因:{alert_reason}\n" f"📋 最新備份:{info.get('filename', '無')}\n" f"🕐 備份時間:{info.get('created_at', '無')}\n" f"💡 請確認 momo-scheduler 備份排程是否正常執行!" ) logging.warning(f"[Scheduler] [BackupMonitor] ⚠️ 備份告警: {alert_reason}") NotificationManager()._send_telegram_messages([msg]) try: from services.event_router import notify_failure notify_failure( task_name="run_backup_monitor_task", error=RuntimeError(alert_reason), source="Scheduler.BackupMonitor", event_type="backup_monitor_alert", priority="P2", title="資料庫備份健康告警", payload={"latest_file": info.get("filename"), "created_at": str(info.get("created_at"))}, dedup_ttl_sec=21600, ) except Exception as _router_e: logging.error(f"[Scheduler] [BackupMonitor] event_router 失敗: {_router_e}") try: from services.openclaw_learning_service import store_insight store_insight( insight_type='backup_status', content=f"備份監控告警:{alert_reason}", period=now.strftime('%Y-%m-%d'), metadata={"alert": True, "reason": alert_reason, "latest_file": info.get("filename")}, ai_model="scheduler", ) except Exception as insight_error: logging.warning( f"[Scheduler] [BackupMonitor] ⚠️ 備份監控 insight 寫入失敗但繼續 | Error: {insight_error}", exc_info=True, ) else: created_at = info.get("created_at") logging.info(f"[Scheduler] [BackupMonitor] ✅ 備份狀態正常 | 最新: {info.get('filename')} @ {created_at}") _save_stats('backup_monitor', { "status": "Alert" if alert_needed else "OK", "latest_file": info.get("filename"), "alert_reason": alert_reason if alert_needed else None, }) except Exception as e: logging.error(f"[Scheduler] [BackupMonitor] 🚨 監控任務異常 | Error: {e}") _save_stats('backup_monitor', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_backup_monitor_task", error=e, source="Scheduler.BackupMonitor", event_type="backup_monitor_failure", priority="P2", title="資料庫備份監控任務異常", dedup_ttl_sec=3600, ) except Exception as _router_e: logging.error(f"[Scheduler] [BackupMonitor] event_router 失敗: {_router_e}") def run_openclaw_meta_analysis_task(): """每 6 小時 — OpenClaw Meta-Analysis(AI 系統效能自我審視 + 電商洞察快照)""" try: from services.openclaw_strategist_service import generate_meta_analysis_report report = generate_meta_analysis_report() logging.info(f"[Scheduler] [MetaAnalysis] ✅ 完成 | 長度={len(report)} 字元") _save_stats('meta_analysis', {"status": "OK", "length": len(report)}) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [MetaAnalysis] 🚨 Meta-Analysis 任務異常: {e}") _save_stats('meta_analysis', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_openclaw_meta_analysis_task", error=e, source="Scheduler.MetaAnalysis", event_type="openclaw_report_failure", priority="P2", title="OpenClaw Meta-Analysis 任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [MetaAnalysis] event_router 失敗: {_router_e}") try: from services.auto_heal_service import auto_heal_service auto_heal_service.handle_exception( task_name="run_openclaw_meta_analysis_task", exception=e, traceback_str=_tb.format_exc(), ) except Exception as _heal_e: logging.error(f"[Scheduler] [MetaAnalysis] auto_heal_service 失敗: {_heal_e}") def run_dedup_batch_task(): """每日 03:00 — ai_insights 去重批次(同 SKU 同 type 同 period 保留最高品質)""" try: from services.openclaw_learning_service import run_dedup_batch result = run_dedup_batch() logging.info( f"[Scheduler] [Dedup] 去重完成 | 歸檔={result.get('archived', 0)} 筆" f" / 掃描={result.get('scanned', 0)} 筆" ) _save_stats('dedup_batch', result) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [Dedup] 去重批次異常: {e}") _save_stats('dedup_batch', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_dedup_batch_task", error=e, source="Scheduler.Dedup", event_type="dedup_batch_failure", priority="P2", title="ai_insights 去重批次異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [Dedup] event_router 失敗: {_router_e}") def run_quality_rescore_task(): """每日 04:00 — ai_insights 品質分數時間衰減重算批次""" try: from services.openclaw_learning_service import run_quality_rescore_batch result = run_quality_rescore_batch() try: from services.openclaw_learning_service import enqueue_missing_insight_embeddings result["embedding_backfill"] = enqueue_missing_insight_embeddings(limit=200) except Exception as _embed_e: logging.warning(f"[Scheduler] [Rescore] embedding backfill 略過: {_embed_e}") logging.info( f"[Scheduler] [Rescore] 品質重算完成 | 更新={result.get('updated', 0)} 筆" f" | relearn 自動歸檔={result.get('relearn_reset', 0)} 筆" ) _save_stats('quality_rescore', result) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [Rescore] 品質分數重算異常: {e}") _save_stats('quality_rescore', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_quality_rescore_task", error=e, source="Scheduler.Rescore", event_type="quality_rescore_failure", priority="P2", title="ai_insights 品質重算批次異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [Rescore] event_router 失敗: {_router_e}") def run_daily_report_task(): """每日 09:00 — OpenClaw 電商日報(業績快報 + 競品威脅 + 圖表推播)""" try: from services.openclaw_strategist_service import generate_daily_report result = generate_daily_report() logging.info( f"[Scheduler] [DailyReport] ✅ 完成 | period={result.get('period')} " f"charts={result.get('chart_count', 0)} actions={result.get('action_count', 0)}" ) _save_stats('daily_report', result) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [DailyReport] 🚨 日報任務異常: {e}") _save_stats('daily_report', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_daily_report_task", error=e, source="Scheduler.DailyReport", event_type="openclaw_report_failure", priority="P2", title="OpenClaw 日報任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [DailyReport] event_router 失敗: {_router_e}") try: from services.auto_heal_service import auto_heal_service auto_heal_service.handle_exception( task_name="run_daily_report_task", exception=e, traceback_str=_tb.format_exc(), ) except Exception as _heal_e: logging.error(f"[Scheduler] [DailyReport] auto_heal_service 失敗: {_heal_e}") def run_monthly_report_task(): """每月1日 07:00 — OpenClaw 電商月報(月度全景洞察 + 多圖表推播)""" try: from services.openclaw_strategist_service import generate_monthly_report result = generate_monthly_report() logging.info( f"[Scheduler] [MonthlyReport] ✅ 完成 | period={result.get('period')} " f"charts={result.get('chart_count', 0)} actions={result.get('action_count', 0)}" ) _save_stats('monthly_report', result) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [MonthlyReport] 🚨 月報任務異常: {e}") _save_stats('monthly_report', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_monthly_report_task", error=e, source="Scheduler.MonthlyReport", event_type="openclaw_report_failure", priority="P2", title="OpenClaw 月報任務異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [MonthlyReport] event_router 失敗: {_router_e}") try: from services.auto_heal_service import auto_heal_service auto_heal_service.handle_exception( task_name="run_monthly_report_task", exception=e, traceback_str=_tb.format_exc(), ) except Exception as _heal_e: logging.error(f"[Scheduler] [MonthlyReport] auto_heal_service 失敗: {_heal_e}") def run_ppt_auto_generation_task(schedule_kind=None): """依日/週/月/季/半年/年度節奏補齊 PPT 簡報。 22:00 的 ppt_vision_audit 只負責視覺審核;這個任務先依固定週期產出 已定義的簡報,並把每次嘗試寫入 ppt_generation_runs,成功檔案寫入 ppt_reports.cached_data。 """ try: from services.ppt_auto_generation_service import generate_scheduled_ppt_reports result = generate_scheduled_ppt_reports(schedule_kind=schedule_kind) logging.info( "[Scheduler] [PPTAutoGeneration] kind=%s status=%s ready=%s errors=%s", schedule_kind or "due", result.get("status"), result.get("ready", 0), result.get("errors", 0), ) stat_key = f"ppt_auto_generation_{schedule_kind}" if schedule_kind else "ppt_auto_generation" _save_stats(stat_key, result) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [PPTAutoGeneration] 🚨 自動簡報補齊異常: {e}") stat_key = f"ppt_auto_generation_{schedule_kind}" if schedule_kind else "ppt_auto_generation" _save_stats(stat_key, {"status": "Error", "error": str(e), "schedule_kind": schedule_kind}) try: from services.event_router import notify_failure notify_failure( task_name="run_ppt_auto_generation_task", error=e, source="Scheduler.PPTAutoGeneration", event_type="ppt_auto_generation_failure", priority="P2", title="PPT 自動簡報補齊異常", trace=_tb.format_exc(), ) except Exception as _router_e: logging.error(f"[Scheduler] [PPTAutoGeneration] event_router 失敗: {_router_e}") def run_ai_smoke_daily_summary_task(): """每日 AI 自動化 Smoke 摘要推播;先執行一次只讀 smoke 以刷新 sentinel。""" try: from services.ai_automation_smoke_service import collect_ai_automation_smoke, send_smoke_daily_summary smoke_result = collect_ai_automation_smoke(record_history=True, history_limit=20) result = send_smoke_daily_summary() logging.info( "[Scheduler] [AISmokeSummary] 完成 | smoke=%s send_status=%s sent=%s failed=%s", smoke_result.get("status"), result.get("status"), result.get("telegram", {}).get("sent", 0), result.get("telegram", {}).get("failed", 0), ) result["smoke"] = { "status": smoke_result.get("status"), "summary": smoke_result.get("summary"), } _save_stats('ai_smoke_daily_summary', result) except Exception as e: import traceback as _tb logging.error(f"[Scheduler] [AISmokeSummary] 🚨 摘要推播異常: {e}") _save_stats('ai_smoke_daily_summary', {"status": "Error", "error": str(e)}) try: from services.event_router import notify_failure notify_failure( task_name="run_ai_smoke_daily_summary_task", error=e, source="Scheduler.AISmokeSummary", event_type="ai_smoke_summary_failure", priority="P2", title="AI Smoke 每日摘要推播異常", trace=_tb.format_exc(), dedup_ttl_sec=3600, ) except Exception as _router_e: logging.error(f"[Scheduler] [AISmokeSummary] event_router 失敗: {_router_e}") if __name__ == "__main__": # 此檔案現在由 app.py 導入並由其主執行緒管理排程。 # 若需獨立測試,可在此處臨時加入調用程式碼。 logging.info("[Scheduler] [Main] 此為排程任務定義檔,請由主程式 app.py 執行。")