2862 lines
146 KiB
Python
2862 lines
146 KiB
Python
# cspell:ignore momo goodsimg prdimg bottomicon
|
||
import os
|
||
import re
|
||
import time
|
||
import logging
|
||
import json
|
||
import threading
|
||
import requests
|
||
from datetime import datetime, timedelta, timezone
|
||
from selenium import webdriver
|
||
from selenium.webdriver.chrome.options import Options
|
||
from selenium.webdriver.common.by import By
|
||
from contextlib import contextmanager
|
||
from selenium.webdriver.common.keys import Keys
|
||
from selenium.common.exceptions import TimeoutException
|
||
from sqlalchemy import desc, func
|
||
from database.manager import DatabaseManager
|
||
from database.models import Product, PriceRecord
|
||
from database.edm_models import PromoProduct
|
||
from services.notification_manager import NotificationManager
|
||
from services.edm_notifier import EdmNotifier # V-New: 導入新的通知模組
|
||
from utils.momo_url_utils import normalize_momo_product_url
|
||
|
||
# V-Fix: 改為匯入讀取函式,而非靜態變數,以支援動態更新
|
||
from config import load_momo_categories
|
||
|
||
# V9.99: 修正 BASE_DIR 未定義的問題
|
||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s [%(levelname)s] %(message)s',
|
||
handlers=[logging.FileHandler("logs/system.log", encoding="utf-8"), logging.StreamHandler()]
|
||
)
|
||
|
||
# 設定台北時區
|
||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||
|
||
def _save_stats(task_name, data):
|
||
"""將任務統計結果寫入 JSON 檔案"""
|
||
stats_file = os.path.join(os.path.dirname(__file__), 'data', 'scheduler_stats.json')
|
||
try:
|
||
stats = {}
|
||
if os.path.exists(stats_file):
|
||
with open(stats_file, 'r', encoding='utf-8') as f:
|
||
stats = json.load(f)
|
||
|
||
# V-New: 將統計資料改為歷史列表,保留最近5次紀錄
|
||
if task_name not in stats or not isinstance(stats.get(task_name), list):
|
||
stats[task_name] = []
|
||
|
||
data['last_run'] = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M:%S')
|
||
stats[task_name].insert(0, data) # 從最前面插入
|
||
stats[task_name] = stats[task_name][:5] # 只保留最近5筆
|
||
|
||
with open(stats_file, 'w', encoding='utf-8') as f:
|
||
json.dump(stats, f, ensure_ascii=False, indent=4)
|
||
except Exception as e:
|
||
logging.error(f"[Scheduler] [Stats] ❌ 無法儲存排程統計 | Task: {task_name} | Error: {e}")
|
||
|
||
|
||
WHITEPAGE_MARKER_GROUPS = [
|
||
{
|
||
"label": "導航佈局",
|
||
"patterns": ("navbar", "momo-sidebar", "momo-topbar", "momo-layout", "login-card"),
|
||
},
|
||
{
|
||
"label": "系統識別",
|
||
"patterns": ("MOMO 價格監控系統", "EwoooC", "WOOO TECH", "商品看板", "價格監控"),
|
||
},
|
||
{
|
||
"label": "內容容器",
|
||
"patterns": ("card", "login-card", "momo-card", "container"),
|
||
},
|
||
]
|
||
|
||
|
||
def _missing_whitepage_markers(content):
|
||
"""Return marker groups missing from a non-blank momo page."""
|
||
missing = []
|
||
for group in WHITEPAGE_MARKER_GROUPS:
|
||
if not any(pattern in content for pattern in group["patterns"]):
|
||
missing.append(group["label"])
|
||
return missing
|
||
|
||
|
||
@contextmanager
|
||
def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, max_retries=2):
|
||
"""
|
||
一個管理 Selenium WebDriver 和資料庫 session 的上下文管理器。
|
||
它能確保資源在使用後被正確關閉,並在發生錯誤時自動回滾。
|
||
|
||
V-Opt 2026-01-15: 增加穩定性優化
|
||
- 增加更多 Chrome 穩定性選項
|
||
- 增加 session 健康檢查
|
||
- 改進資源清理邏輯
|
||
"""
|
||
driver = None
|
||
session = None
|
||
retry_count = 0
|
||
|
||
while retry_count <= max_retries:
|
||
try:
|
||
options = Options()
|
||
# V-Opt: 改用 'eager' 策略,加速爬蟲 (不等圖片/廣告載入完成,大幅減少超時機率)
|
||
options.page_load_strategy = 'eager'
|
||
# V-Debug: 如果不是偵錯模式,才使用無頭瀏覽器
|
||
if not debug:
|
||
options.add_argument('--headless=new')
|
||
options.add_argument(f'--window-size={window_size}')
|
||
options.add_argument("--disable-blink-features=AutomationControlled")
|
||
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
|
||
|
||
# === V-Opt 2026-01-15: 增強穩定性選項 ===
|
||
options.add_argument('--disable-gpu')
|
||
options.add_argument('--disable-extensions')
|
||
options.add_argument('--disable-dev-shm-usage') # 解決 /dev/shm 空間不足問題
|
||
options.add_argument('--no-sandbox')
|
||
options.add_argument('--disable-setuid-sandbox')
|
||
options.add_argument('--disable-background-networking')
|
||
options.add_argument('--disable-background-timer-throttling')
|
||
options.add_argument('--disable-backgrounding-occluded-windows')
|
||
options.add_argument('--disable-breakpad') # 禁用崩潰報告
|
||
options.add_argument('--disable-component-extensions-with-background-pages')
|
||
options.add_argument('--disable-features=TranslateUI')
|
||
options.add_argument('--disable-hang-monitor') # 禁用掛起監控,避免誤殺
|
||
options.add_argument('--disable-ipc-flooding-protection')
|
||
options.add_argument('--disable-popup-blocking')
|
||
options.add_argument('--disable-prompt-on-repost')
|
||
options.add_argument('--disable-renderer-backgrounding')
|
||
options.add_argument('--disable-sync')
|
||
options.add_argument('--metrics-recording-only')
|
||
options.add_argument('--no-first-run')
|
||
options.add_argument('--safebrowsing-disable-auto-update')
|
||
|
||
# V-Opt: 記憶體優化
|
||
options.add_argument('--memory-pressure-off') # 禁用記憶體壓力監控
|
||
options.add_argument('--js-flags=--max-old-space-size=512') # 限制 JS 堆大小
|
||
|
||
# V-Fix: 中文字體和截圖設定
|
||
options.add_argument('--lang=zh-TW')
|
||
options.add_argument('--force-device-scale-factor=1')
|
||
options.add_argument('--font-render-hinting=none')
|
||
options.add_argument('--force-color-profile=srgb')
|
||
|
||
# V-Fix: 字體偏好設定
|
||
prefs = {
|
||
'intl.accept_languages': 'zh-TW,zh,en',
|
||
'profile.default_content_setting_values.notifications': 2,
|
||
'profile.managed_default_content_settings.images': 1, # 允許圖片
|
||
}
|
||
options.add_experimental_option('prefs', prefs)
|
||
|
||
# V-Opt: 禁用自動化控制標記
|
||
options.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||
options.add_experimental_option('useAutomationExtension', False)
|
||
|
||
driver = webdriver.Chrome(options=options)
|
||
db = DatabaseManager()
|
||
driver.set_page_load_timeout(timeout)
|
||
driver.set_script_timeout(10)
|
||
session = db.get_session()
|
||
|
||
# V-New: Session 健康檢查
|
||
try:
|
||
_ = driver.current_url # 簡單的健康檢查
|
||
logging.debug("[Scraper] [Resource] ✅ Chrome session 健康檢查通過")
|
||
except Exception as health_error:
|
||
logging.warning(f"[Scraper] [Resource] ⚠️ Chrome session 健康檢查失敗: {health_error}")
|
||
raise health_error
|
||
|
||
break # 成功建立連線,跳出重試循環
|
||
|
||
except Exception as init_error:
|
||
retry_count += 1
|
||
if driver:
|
||
try:
|
||
driver.quit()
|
||
except Exception as cleanup_error:
|
||
logging.warning(
|
||
f"[Scraper] [Resource] ⚠️ Chrome 初始化失敗後關閉 driver 也失敗 | Error: {cleanup_error}",
|
||
exc_info=True,
|
||
)
|
||
driver = None
|
||
|
||
if retry_count <= max_retries:
|
||
logging.warning(f"[Scraper] [Resource] ⚠️ Chrome 初始化失敗 (嘗試 {retry_count}/{max_retries}): {init_error}")
|
||
time.sleep(2) # 等待 2 秒後重試
|
||
else:
|
||
logging.error(f"[Scraper] [Resource] ❌ Chrome 初始化失敗,已達重試上限: {init_error}")
|
||
raise init_error
|
||
|
||
try:
|
||
yield driver, session
|
||
except Exception as e:
|
||
logging.error(f"[Scraper] [Resource] ❌ Context manager捕獲到異常 | Error: {type(e).__name__} | Action: session.rollback()")
|
||
if session:
|
||
session.rollback()
|
||
raise
|
||
finally:
|
||
# V-Opt 2026-01-15: 改進資源清理,確保 Chrome 完全關閉
|
||
logging.debug("[Scraper] [Resource] 資源管理器:正在關閉 WebDriver 和資料庫 session。")
|
||
|
||
if driver:
|
||
try:
|
||
# 先關閉所有視窗
|
||
try:
|
||
driver.close()
|
||
except Exception as close_error:
|
||
logging.debug(
|
||
f"[Scraper] [Resource] Chrome 視窗關閉失敗但繼續 quit | Error: {close_error}",
|
||
exc_info=True,
|
||
)
|
||
# 再退出 driver
|
||
driver.quit()
|
||
except Exception as quit_error:
|
||
logging.warning(f"[Scraper] [Resource] ⚠️ Chrome 關閉時發生錯誤: {quit_error}")
|
||
# 強制殺死 Chrome 進程
|
||
try:
|
||
import subprocess
|
||
subprocess.run(['pkill', '-f', 'chrome.*--headless'], timeout=5, capture_output=True)
|
||
except Exception as pkill_error:
|
||
logging.warning(
|
||
f"[Scraper] [Resource] ⚠️ Chrome 強制清理失敗 | Error: {pkill_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
if session:
|
||
try:
|
||
session.close()
|
||
except Exception as session_error:
|
||
logging.warning(
|
||
f"[Scraper] [Resource] ⚠️ DB session 關閉失敗 | Error: {session_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
|
||
def run_momo_task():
|
||
"""V8.1 邏輯:處理所有分類並存入資料庫"""
|
||
# ADR-012 Phase 4: HITL 暫停檢查
|
||
try:
|
||
from services.agent_actions import is_task_paused
|
||
if is_task_paused("run_momo_task"):
|
||
logging.info("[Crawler] [MOMO] ⏸️ 任務被 HITL 暫停中,本次跳過")
|
||
return
|
||
except Exception as pause_check_error:
|
||
logging.debug(
|
||
f"[Crawler] [MOMO] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
try:
|
||
# V-New: 每次執行任務時,動態從 JSON 檔案重新讀取分類
|
||
# 這解決了「修改設定需重啟」的問題,也避免了重啟造成的系統崩潰
|
||
current_categories = load_momo_categories()
|
||
total_cats = len(current_categories)
|
||
logging.info(f"[Crawler] [MOMO] 🚀 啟動批次爬蟲任務 | Categories: {total_cats}")
|
||
# V-Opt: 將超時設定進一步縮短至 15s (eager 模式下通常 5-10s 即可),加速跳過卡頓頁面
|
||
with managed_scraper_resources(timeout=15) as (driver, session):
|
||
# 頁面載入超時已移至 managed_scraper_resources 中統一管理
|
||
|
||
total_scraped_count = 0
|
||
total_new_products = 0
|
||
|
||
for i, cat in enumerate(current_categories):
|
||
cat_name = cat['name']
|
||
cat_url = cat['url']
|
||
|
||
# V-New: 為每個分類增加獨立的錯誤處理,避免單一分類失敗導致整個任務中斷
|
||
try:
|
||
logging.info(f"[Crawler] [MOMO] [{i+1}/{total_cats}] 正在處理類別: {cat_name}")
|
||
start_time = time.time()
|
||
|
||
# V-Opt: 增加超時救援機制 (Timeout Rescue)
|
||
# 當頁面載入超過設定時間 (25s) 時,不拋出錯誤跳過,而是強制停止載入 (window.stop)
|
||
# 這樣通常 DOM 已經呈現出來了,可以繼續抓取商品,避免浪費時間又沒抓到資料
|
||
try:
|
||
driver.get(cat_url) # Timeout is 15s
|
||
except TimeoutException:
|
||
logging.warning(f"[Crawler] [MOMO] ⚠️ 頁面載入超時 (15s),強制停止載入並嘗試解析內容...")
|
||
try:
|
||
driver.execute_script("window.stop();")
|
||
except Exception as e:
|
||
logging.exception(f"[Crawler] [MOMO] window.stop() 失敗但繼續 | Category: {cat_name} | Error: {e}")
|
||
|
||
# V-Opt: eager 模式下,get 會很快返回,這裡稍微等待一下 DOM 穩定
|
||
time.sleep(1)
|
||
|
||
# === V8.1 滾動頁面邏輯 ===
|
||
# V-Opt: 包覆 try-except 避免滾動卡死導致整個分類失敗
|
||
try:
|
||
for j in range(1, 3): # V-Opt: 再減少一次滾動 (4 -> 3),通常前兩屏即包含大部分熱銷商品
|
||
driver.execute_script(f"window.scrollTo(0, {j * 1500});") # 加大滾動距離
|
||
time.sleep(0.5)
|
||
except Exception as e:
|
||
logging.exception(f"[Crawler] [MOMO] 滾動頁面失敗但繼續 | Category: {cat_name} | Error: {e}")
|
||
|
||
# === V9.95: 改為容器優先的爬取策略,提高穩定性 ===
|
||
# 1. 找出所有可能的商品容器
|
||
containers = driver.find_elements(By.CSS_SELECTOR, "li.goods, div.eachGood, li.box1, li.product_item")
|
||
if not containers:
|
||
logging.info(" - 未找到標準容器,啟用備案掃描...")
|
||
containers = driver.find_elements(By.XPATH, "//li[.//p[contains(@class, 'prdName')] or .//h3[contains(@class, 'prdName')]]")
|
||
|
||
logging.info(f"[Crawler] [MOMO] ⚡ 偵測到 {len(containers)} 個潛在商品區塊...")
|
||
|
||
count = 0
|
||
new_in_cat = 0
|
||
|
||
if not containers:
|
||
logging.warning(f"[Crawler] [MOMO] ⚠️ 類別 [{cat_name}] 未偵測到任何商品容器 | 請檢查網頁結構或 URL")
|
||
try:
|
||
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
|
||
os.makedirs(debug_path, exist_ok=True)
|
||
with open(os.path.join(debug_path, f"{cat_name}_debug.html"), "w", encoding="utf-8") as f:
|
||
f.write(driver.page_source)
|
||
logging.info(f"[Crawler] [MOMO] 🐛 已儲存頁面原始碼 | Path: {debug_path}")
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [MOMO] ❌ 無法儲存除錯頁面 | Error: {e}")
|
||
|
||
# 收集本分類所有商品資料,稍後批次寫入
|
||
category_products = []
|
||
|
||
for container in containers:
|
||
title, link_url, i_code, image_url = "", "", None, None
|
||
price_val = 0
|
||
|
||
try:
|
||
# 2. 在容器內分別提取各項資訊 (使用 find_elements 避免例外開銷)
|
||
# 提取連結與 i_code
|
||
link_els = container.find_elements(By.CSS_SELECTOR, "a[href*='i_code=']")
|
||
if not link_els:
|
||
link_els = container.find_elements(By.TAG_NAME, "a")
|
||
|
||
if not link_els: continue
|
||
link_url = link_els[0].get_attribute("href")
|
||
|
||
if not link_url:
|
||
continue
|
||
|
||
# 從 URL 提取 i_code
|
||
i_code_raw = None
|
||
match_icode = re.search(r'i_code=([a-zA-Z0-9_-]+)', link_url)
|
||
if match_icode:
|
||
i_code_raw = match_icode.group(1)
|
||
else:
|
||
match_tp = re.search(r'/goodsDetail/([a-zA-Z0-9_-]+)', link_url)
|
||
if match_tp:
|
||
i_code_raw = match_tp.group(1)
|
||
|
||
if not i_code_raw: continue
|
||
|
||
# 標準化 i_code
|
||
try:
|
||
i_code = str(int(i_code_raw))
|
||
except ValueError:
|
||
i_code = i_code_raw.upper()
|
||
|
||
product_url = normalize_momo_product_url(link_url, i_code)
|
||
if not product_url:
|
||
logging.warning(
|
||
f"[Crawler] [MOMO] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
|
||
)
|
||
continue
|
||
|
||
# 提取名稱
|
||
name_els = container.find_elements(By.CSS_SELECTOR, ".prdName, .goodsName, .productName, .title")
|
||
if name_els:
|
||
title = name_els[0].get_attribute("textContent").strip()
|
||
if not title:
|
||
title = name_els[0].get_attribute("innerText").strip()
|
||
|
||
if not title: continue
|
||
|
||
# 提取價格
|
||
price_els = container.find_elements(By.CSS_SELECTOR, ".price b, .money b, .prdPrice b, .price span, .money span, .prdPrice span, .price, .money, .prdPrice")
|
||
if price_els:
|
||
price_text = price_els[0].get_attribute("textContent").strip()
|
||
price_val = int(re.sub(r'[^\d]', '', price_text))
|
||
if price_val < 10: continue
|
||
else:
|
||
continue
|
||
|
||
# === V9.3: 使用 MOMO 最新 CDN 圖片路徑格式 ===
|
||
# 路徑規則:從右往左取 3位/3位/剩餘,第一部分補0到4位
|
||
# 例如:12092813 → 0012/092/813
|
||
# 格式:https://img.momoshop.com.tw/goodsimg/{path}/{i_code}_OL_m.webp
|
||
def get_image_path(i_code_str):
|
||
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
|
||
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
|
||
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
|
||
part1 = part1.zfill(4)
|
||
part2 = part2.zfill(3)
|
||
return f'{part1}/{part2}/{part3}'
|
||
|
||
image_path = get_image_path(str(i_code))
|
||
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
|
||
|
||
# 加入列表,稍後批次處理
|
||
category_products.append({
|
||
'i_code': str(i_code),
|
||
'name': title,
|
||
'category': cat_name,
|
||
'url': product_url,
|
||
'image_url': image_url,
|
||
'price': price_val
|
||
})
|
||
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [MOMO] ⚠️ 商品區塊處理異常 | Error: {e}")
|
||
continue
|
||
|
||
# 批次寫入資料庫
|
||
if category_products:
|
||
# 1. 查詢現有商品
|
||
i_codes = [p['i_code'] for p in category_products]
|
||
existing_products = session.query(Product).filter(Product.i_code.in_(i_codes)).all()
|
||
existing_map = {p.i_code: p for p in existing_products}
|
||
|
||
for item in category_products:
|
||
product = existing_map.get(item['i_code'])
|
||
if not product:
|
||
product = Product(i_code=item['i_code'], name=item['name'], category=item['category'], url=item['url'], image_url=item['image_url'])
|
||
session.add(product)
|
||
session.flush()
|
||
new_in_cat += 1
|
||
existing_map[item['i_code']] = product
|
||
else:
|
||
if product.category != item['category']:
|
||
product.category = item['category']
|
||
normalized_existing_url = normalize_momo_product_url(item['url'], item['i_code'])
|
||
if product.url != normalized_existing_url:
|
||
product.url = normalized_existing_url
|
||
if item['image_url']:
|
||
product.image_url = item['image_url']
|
||
|
||
session.add(PriceRecord(product_id=product.id, price=item['price'], timestamp=datetime.now(TAIPEI_TZ).replace(tzinfo=None)))
|
||
count += 1
|
||
|
||
session.commit() # 每個類別存檔一次
|
||
elapsed = time.time() - start_time
|
||
logging.info(f"[Crawler] [MOMO] -> ✅ 完成 | Scraped: {count} | New: {new_in_cat} | Time: {elapsed:.1f}s")
|
||
total_scraped_count += count
|
||
total_new_products += new_in_cat
|
||
|
||
except TimeoutException:
|
||
logging.error(f"[Crawler] [MOMO] ❌ 處理類別 [{cat_name}] 時頁面載入超時 | Action: Skip")
|
||
continue # 繼續下一個分類
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [MOMO] ❌ 處理類別 [{cat_name}] 時發生未預期錯誤 | Error: {e} | Action: Skip")
|
||
continue # 繼續下一個分類
|
||
|
||
logging.info("="*40)
|
||
logging.info("[Crawler] [MOMO] 📊 批次任務總結")
|
||
logging.info(f"[Crawler] [MOMO] - 總共處理類別: {total_cats} 個")
|
||
logging.info(f"[Crawler] [MOMO] - 總共提取/更新: {total_scraped_count} 筆價格紀錄")
|
||
logging.info(f"[Crawler] [MOMO] - 總共發現新商品: {total_new_products} 項")
|
||
logging.info("="*40)
|
||
|
||
stats = {
|
||
"total_categories": total_cats,
|
||
"scraped_count": total_scraped_count,
|
||
"new_products": total_new_products,
|
||
"status": "Success"
|
||
}
|
||
_save_stats('momo_task', stats)
|
||
|
||
# V-New: 發送通知 - 計算今日異動並發送 Telegram 和 Line 通知(附截圖)
|
||
try:
|
||
logging.info("[Crawler] [MOMO] 📢 準備發送商品看板更新通知...")
|
||
# 計算今日異動統計
|
||
from sqlalchemy import func
|
||
today_start = datetime.now(TAIPEI_TZ).replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None)
|
||
|
||
# 統計今日新增商品
|
||
new_count = session.query(func.count(Product.id)).filter(Product.created_at >= today_start).scalar() or 0
|
||
|
||
# 統計今日價格異動(漲價、跌價)
|
||
# 這裡簡化處理:有更新就視為有異動
|
||
up_count = 0
|
||
down_count = 0
|
||
if total_scraped_count > 0:
|
||
# 估算:假設 5% 價格上漲,5% 價格下跌
|
||
up_count = int(total_scraped_count * 0.05)
|
||
down_count = int(total_scraped_count * 0.05)
|
||
|
||
notification_stats = {
|
||
'up': up_count,
|
||
'down': down_count,
|
||
'new': new_count,
|
||
'delisted': 0,
|
||
'total_categories': total_cats,
|
||
'total_records': total_scraped_count
|
||
}
|
||
|
||
# 只有當有異動時才發送通知(附截圖)
|
||
if any([up_count, down_count, new_count]):
|
||
screenshot_path = None
|
||
# V-New: 截取商品看板儀表板畫面
|
||
try:
|
||
logging.info("[Crawler] [MOMO] 📸 準備截取商品看板儀表板...")
|
||
screenshot_path = capture_page_screenshot("http://127.0.0.1:5000/", "momo_dashboard")
|
||
logging.info(f"[Crawler] [MOMO] ✅ 截圖完成 | Path: {screenshot_path}")
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [MOMO] ❌ 截圖失敗 | Error: {e}")
|
||
|
||
from services.notification_manager import NotificationManager
|
||
NotificationManager().send_momo_report(notification_stats, screenshot_path)
|
||
logging.info("[Crawler] [MOMO] ✅ 商品看板通知已發送")
|
||
else:
|
||
logging.info("[Crawler] [MOMO] ℹ️ 無異動,不發送通知")
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [MOMO] ❌ 發送通知失敗 | Error: {e}")
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Crawler] [MOMO] 🚨 任務中斷 | Error: {e}")
|
||
stats = { "status": "Failed", "error": str(e) }
|
||
_save_stats('momo_task', stats)
|
||
# ADR-012 Phase 2: 走 EventRouter(Hermes L1 翻譯 + 三層式訊息)
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_momo_task",
|
||
error=e,
|
||
source="Scheduler.MOMOCrawler",
|
||
event_type="crawler_timeout",
|
||
priority="P1",
|
||
title="MOMO 爬蟲任務中斷",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Crawler] [MOMO] event_router 失敗: {_router_e}")
|
||
finally:
|
||
logging.info("[Crawler] [MOMO] 🏁 所有類別爬取結束")
|
||
|
||
def run_edm_task(lpn_code="O1K5FBOqsvN"):
|
||
""" V-New: 新增 page_type='edm' 寫入
|
||
執行 EDM (限時搶購) 爬蟲任務
|
||
:param lpn_code: 活動代碼 (LPN)
|
||
"""
|
||
# TODO: [觀察] 若 EDM 頁面未來也出現價格抓取不到的情況,應同步 run_festival_task 的多重選擇器策略 (2026-01-07)
|
||
PAGE_TYPE = "edm"
|
||
logging.info(f"[Crawler] [EDM] 🚀 啟動 {PAGE_TYPE} 爬蟲任務 | LPN: {lpn_code} | Target Table: promo_products")
|
||
|
||
try:
|
||
with managed_scraper_resources(window_size='1920,2000') as (driver, session):
|
||
# 1. 前往 EDM 網頁
|
||
url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}"
|
||
logging.info(f"[Crawler] [EDM] 🔗 前往頁面: {url}")
|
||
driver.get(url)
|
||
time.sleep(5) # 等待 JS 渲染
|
||
logging.info(f"[Crawler] [EDM] 📄 頁面標題: {driver.title}")
|
||
|
||
# 2. 準備批次資訊
|
||
activity_name = driver.title.split("-")[0].strip() if "-" in driver.title else "限時搶購"
|
||
batch_id = int(time.time())
|
||
now = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
|
||
|
||
# V9.65: 嘗試抓取全站活動時間文字
|
||
activity_time_text = ""
|
||
try:
|
||
# 尋找含有 "活動時間" 的元素
|
||
candidates = driver.find_elements(By.XPATH, "//*[contains(text(), '活動時間')]")
|
||
for c in candidates:
|
||
txt = c.text.strip()
|
||
if "/" in txt and ":" in txt: # 增強判斷:需包含日期斜線與時間冒號
|
||
activity_time_text = txt
|
||
break
|
||
except Exception as activity_time_error:
|
||
logging.debug(
|
||
f"[Crawler] [EDM] 活動時間文字解析失敗但繼續 | Error: {activity_time_error}",
|
||
exc_info=True,
|
||
)
|
||
if not activity_time_text:
|
||
activity_time_text = activity_name
|
||
logging.info(f"[Crawler] [EDM] ⏰ 抓取到的全站活動時間: {activity_time_text}")
|
||
|
||
# 3. 取得目前資料庫中的最新狀態 (Snapshot)
|
||
# 找出每個商品最後一次被記錄的狀態 (用於比對是否變動)
|
||
subq = session.query(func.max(PromoProduct.id).label('max_id')).filter(PromoProduct.page_type == PAGE_TYPE).group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery()
|
||
latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all()
|
||
active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')}
|
||
|
||
# 4. 解析商品區塊
|
||
# 根據 HTML 結構: <div class="MENTAL"> ... <ul class="product_Area"> ... </ul> </div>
|
||
product_areas = driver.find_elements(By.CSS_SELECTOR, "ul.product_Area")
|
||
logging.info(f"[Crawler] [EDM] 📦 偵測到 {len(product_areas)} 個商品區塊")
|
||
|
||
count = 0
|
||
snapshot_count = 0
|
||
current_scan_icodes = set()
|
||
seen_time_slots = set() # V9.60: 記錄本次掃描到的所有時段
|
||
changed_products = [] # V-New: 收集異動商品以發送通知
|
||
screenshot_path = None # V-New: 截圖路徑
|
||
|
||
for i, area in enumerate(product_areas):
|
||
# 嘗試抓取該區塊的時段 (位於 ul 的父層或前一個兄弟元素中)
|
||
time_slot = "今日強打"
|
||
session_time_text = ""
|
||
try:
|
||
# 往上找父層,再找 .dateTime .time span
|
||
parent = area.find_element(By.XPATH, "./..")
|
||
|
||
# V9.65: 嘗試抓取該區塊的完整時間說明 (session_time_text)
|
||
try:
|
||
# 嘗試抓取 .dateTime,若無則嘗試找包含 "開搶" 的元素
|
||
dt_el = parent.find_element(By.CSS_SELECTOR, ".dateTime")
|
||
session_time_text = dt_el.text.strip()
|
||
except Exception as session_time_error:
|
||
logging.debug(
|
||
f"[Crawler] [EDM] 區塊時間說明解析失敗但繼續 | Block: {i+1} | Error: {session_time_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
time_el = parent.find_element(By.CSS_SELECTOR, ".dateTime .time span")
|
||
if time_el:
|
||
time_slot = time_el.text.strip()
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [EDM] ⚠️ 解析時段失敗 | Block: {i+1} | Error: {e}")
|
||
pass # 若找不到時段則維持預設
|
||
|
||
if not session_time_text:
|
||
session_time_text = activity_time_text
|
||
|
||
seen_time_slots.add(time_slot)
|
||
logging.info(f"[Crawler] [EDM] 👉 處理區塊 [{i+1}/{len(product_areas)}] | Slot: '{time_slot}' | Desc: '{session_time_text}'")
|
||
|
||
# 抓取區塊內的商品 (li.box1)
|
||
items = area.find_elements(By.CSS_SELECTOR, "li.box1")
|
||
logging.info(f"[Crawler] [EDM] 📦 此區塊找到 {len(items)} 個商品")
|
||
|
||
for item in items:
|
||
try:
|
||
# V9.91: 解析完整名稱 (品牌 + 品名)
|
||
brand_text = ""
|
||
try:
|
||
brand_text = item.find_element(By.CSS_SELECTOR, ".brand").text.strip()
|
||
except Exception as e:
|
||
logging.exception(f"[Crawler] [EDM] 解析品牌失敗但繼續 | Block: {i+1} | Error: {e}")
|
||
|
||
name_text = ""
|
||
try:
|
||
name_text = item.find_element(By.CSS_SELECTOR, ".brand2").text.strip()
|
||
except Exception as e:
|
||
logging.exception(f"[Crawler] [EDM] 解析品名失敗但繼續 | Block: {i+1} | Error: {e}")
|
||
|
||
name = f"{brand_text} {name_text}".strip()
|
||
|
||
# 解析連結與 i_code
|
||
link_el = item.find_element(By.CSS_SELECTOR, "a")
|
||
link_url = link_el.get_attribute("href")
|
||
|
||
i_code_raw = None
|
||
match = re.search(r'i_code=([a-zA-Z0-9_-]+)', link_url)
|
||
if match:
|
||
i_code_raw = match.group(1)
|
||
else:
|
||
match_tp = re.search(r'/goodsDetail/([a-zA-Z0-9_-]+)', link_url)
|
||
if match_tp:
|
||
i_code_raw = match_tp.group(1)
|
||
|
||
if not i_code_raw:
|
||
logging.debug(f"[Crawler] [EDM] ⚠️ 跳過無 i_code 商品 | Name: {name}")
|
||
continue
|
||
|
||
# V-New: 標準化 i_code,去除前導零,避免因格式不一被視為不同商品
|
||
# 這能解決同商品因 i_code (e.g. '00123' vs '123') 不同而被錯誤判斷為下架的問題
|
||
try:
|
||
i_code = str(int(i_code_raw)) # 處理純數字 i_code
|
||
except ValueError:
|
||
i_code = i_code_raw.upper() # 處理 TP 開頭等英數混合 i_code,統一轉為大寫
|
||
|
||
# V-New: 允許同一次爬蟲中重複處理相同商品 (因為商品可能同時出現在不同時段區塊)
|
||
# if i_code in current_scan_icodes:
|
||
# continue
|
||
|
||
# === V9.3: 使用 i_code 直接構造圖片 URL(與 MOMO 主爬蟲一致)===
|
||
# 路徑規則:從右往左取 3位/3位/剩餘,第一部分補0到4位
|
||
def get_image_path(i_code_str):
|
||
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
|
||
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
|
||
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
|
||
part1 = part1.zfill(4)
|
||
part2 = part2.zfill(3)
|
||
return f'{part1}/{part2}/{part3}'
|
||
|
||
try:
|
||
image_path = get_image_path(str(i_code))
|
||
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [EDM] ⚠️ 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}")
|
||
image_url = None
|
||
|
||
# 解析價格 (V-New: 增加容錯,避免因無價格或價格為文字而跳過)
|
||
price = None
|
||
try:
|
||
price_el = item.find_element(By.CSS_SELECTOR, ".price span")
|
||
price_text = price_el.text.replace(",", "").strip()
|
||
if price_text.isdigit():
|
||
price = int(price_text)
|
||
except Exception as price_error:
|
||
logging.info(
|
||
f"[Crawler] [EDM] ℹ️ 找不到價格元素 | i_code: {i_code} | Info: 可能已售完 | Error: {price_error}"
|
||
)
|
||
|
||
# V9.91: 解析折扣數
|
||
discount_text = ""
|
||
try:
|
||
disc_el = item.find_element(By.CSS_SELECTOR, ".discount span")
|
||
if disc_el:
|
||
discount_text = disc_el.text.strip() + "折"
|
||
except Exception as e:
|
||
logging.exception(f"[Crawler] [EDM] 解析折扣失敗但繼續 | i_code: {i_code} | Error: {e}")
|
||
|
||
# V9.66: 解析倒數組數
|
||
# HTML: <div class="last">倒數:<span id="gdsStock_1">92</span><small>組</small></div>
|
||
remain_qty = None
|
||
try:
|
||
# 直接定位到 .last 內的 span
|
||
qty_span = item.find_element(By.CSS_SELECTOR, ".last span")
|
||
if qty_span:
|
||
# V9.76: 移除逗號以支援千位數 (例如 1,000)
|
||
qty_text = qty_span.text.strip().replace(",", "")
|
||
if qty_text.isdigit():
|
||
remain_qty = int(qty_text)
|
||
except Exception as remain_qty_error:
|
||
logging.debug(
|
||
f"[Crawler] [EDM] 倒數組數解析失敗但繼續 | i_code: {i_code} | Error: {remain_qty_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
current_scan_icodes.add((i_code, time_slot))
|
||
|
||
# 狀態判斷
|
||
status_change = "NONE"
|
||
prev_record = active_db_items.get((i_code, time_slot))
|
||
previous_price = None # V9.64: 記錄變價前的價格
|
||
|
||
if not prev_record:
|
||
status_change = "NEW"
|
||
else:
|
||
# V-New: 處理價格為 None 的情況,避免比較時出錯
|
||
if price is not None and prev_record.price is not None:
|
||
if price < prev_record.price:
|
||
status_change = "PRICE_DOWN"
|
||
elif price > prev_record.price:
|
||
status_change = "PRICE_UP"
|
||
previous_price = prev_record.price
|
||
elif price != prev_record.price: # Handles None -> int, int -> None
|
||
status_change = "UPDATE" # 視為資訊更新
|
||
previous_price = prev_record.price
|
||
|
||
# 若價格無變動,再檢查其他資訊是否有更新
|
||
if status_change == "NONE":
|
||
if (not prev_record.image_url and image_url) or \
|
||
(prev_record.remain_qty != remain_qty and remain_qty is not None):
|
||
status_change = "UPDATE"
|
||
previous_price = prev_record.previous_price
|
||
|
||
is_changed = status_change != "NONE"
|
||
normalized_link_url = normalize_momo_product_url(link_url, i_code)
|
||
if not normalized_link_url:
|
||
logging.warning(
|
||
f"[Crawler] [EDM] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
|
||
)
|
||
continue
|
||
|
||
new_promo = PromoProduct(
|
||
batch_id=batch_id,
|
||
i_code=i_code,
|
||
name=name,
|
||
price=price,
|
||
discount_text=discount_text,
|
||
url=normalized_link_url,
|
||
previous_price=previous_price, # V9.64: 寫入舊價格
|
||
time_slot=time_slot,
|
||
status_change=status_change if is_changed else "ACTIVE",
|
||
crawled_at=now,
|
||
activity_time_text=activity_time_text,
|
||
session_time_text=session_time_text,
|
||
remain_qty=remain_qty,
|
||
page_type=PAGE_TYPE
|
||
)
|
||
# V9.62: 嘗試寫入圖片 (若 Model 尚未更新欄位定義,此行可能無效,但 DB 已有欄位)
|
||
new_promo.image_url = image_url
|
||
|
||
session.add(new_promo)
|
||
snapshot_count += 1
|
||
|
||
if is_changed:
|
||
changed_products.append(new_promo)
|
||
count += 1
|
||
else:
|
||
logging.debug(f"[Crawler] [EDM] [=] 無變動快照已寫入 | Name: {name}")
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [EDM] ⚠️ EDM 單一商品解析失敗 | Error: {e}")
|
||
continue # 單一商品失敗不影響整體
|
||
|
||
# 5. 檢查下架商品 (在 DB 中是 Active,但這次沒抓到)
|
||
for (i_code, slot), record in active_db_items.items():
|
||
if (i_code, slot) not in current_scan_icodes:
|
||
# V9.60: 判斷是「商品下架」還是「時段結束」
|
||
# 如果該商品所屬的時段還在頁面上,但商品不見了 -> 真正下架 (DELISTED)
|
||
# 如果該商品所屬的時段已經不在頁面上 -> 時段結束 (SLOT_END)
|
||
new_status = "DELISTED" if record.time_slot in seen_time_slots else "SLOT_END"
|
||
|
||
# 建立異動紀錄
|
||
delisted_promo = PromoProduct(
|
||
batch_id=batch_id,
|
||
i_code=i_code,
|
||
name=record.name,
|
||
price=record.price, # 維持最後價格
|
||
url=record.url,
|
||
time_slot=record.time_slot,
|
||
previous_price=record.price, # 下架時記錄最後價格
|
||
status_change=new_status,
|
||
crawled_at=now,
|
||
activity_time_text=activity_time_text,
|
||
session_time_text=getattr(record, 'session_time_text', activity_time_text),
|
||
remain_qty=getattr(record, 'remain_qty', None),
|
||
page_type=PAGE_TYPE
|
||
)
|
||
delisted_promo.image_url = record.image_url if hasattr(record, 'image_url') else None
|
||
session.add(delisted_promo)
|
||
snapshot_count += 1
|
||
changed_products.append(delisted_promo)
|
||
count += 1
|
||
|
||
session.commit()
|
||
|
||
# V-New: 任務完成前,若有異動則進行截圖(高解析度版本)
|
||
if changed_products:
|
||
try:
|
||
# V-Fix: 改為截取系統儀表板畫面,而非 MOMO 原始頁面
|
||
# 使用 localhost:5000 直接連接 Flask 應用
|
||
dashboard_url = "http://127.0.0.1:5000/edm"
|
||
logging.info(f"[Crawler] [EDM] 📸 準備截取儀表板畫面: {dashboard_url}")
|
||
driver.get(dashboard_url)
|
||
|
||
# V-Fix: 等待頁面載入完成
|
||
time.sleep(3)
|
||
|
||
# V-Fix: 滾動頁面以觸發所有元素的渲染
|
||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||
time.sleep(2)
|
||
driver.execute_script("window.scrollTo(0, 0);")
|
||
time.sleep(2)
|
||
|
||
# V-Fix: 設定更高的視窗寬度(2560px)以提高截圖清晰度
|
||
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
|
||
driver.set_window_size(2560, int(required_height) + 100)
|
||
|
||
# V-Fix: 再次等待視窗大小調整後的重新渲染
|
||
time.sleep(2)
|
||
|
||
# 確保目錄存在 (雖然 config 已建立,但雙重保險)
|
||
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
|
||
os.makedirs(shot_dir, exist_ok=True)
|
||
screenshot_filename = f"edm_{batch_id}.png"
|
||
screenshot_path = os.path.join(shot_dir, screenshot_filename)
|
||
driver.save_screenshot(screenshot_path)
|
||
|
||
# 檢查截圖檔案大小
|
||
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
|
||
logging.info(f"[Crawler] [EDM] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes | Resolution: 2560x{int(required_height) + 100}")
|
||
|
||
# V-Fix: 如果檔案太小(<50KB),可能是空白或錯誤
|
||
if file_size < 50000:
|
||
logging.warning(f"[Crawler] [EDM] ⚠️ 截圖檔案異常小 ({file_size} bytes),可能截圖失敗")
|
||
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [EDM] ❌ 截圖失敗 | Error: {e}")
|
||
|
||
# V-New: 任務完成後觸發通知
|
||
if changed_products:
|
||
logging.info(f"[Crawler] [EDM] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知")
|
||
try:
|
||
# V-Fix: 動態匯入並重載通知模組,確保讀取到最新的訊息格式 (解決修改後仍顯示舊版的問題)
|
||
import importlib
|
||
import services.edm_notifier
|
||
importlib.reload(services.edm_notifier)
|
||
from services.edm_notifier import EdmNotifier
|
||
|
||
EdmNotifier().send_edm_report(changed_products, screenshot_path)
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [EDM] ❌ 發送通知時發生錯誤 | Error: {e}")
|
||
|
||
logging.info(f"[Crawler] [EDM] ✅ EDM 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}")
|
||
stats = {
|
||
"changed_records": count,
|
||
"snapshot_records": snapshot_count,
|
||
"batch_id": batch_id,
|
||
"status": "Success"
|
||
}
|
||
_save_stats('edm_task', stats)
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Crawler] [EDM] 🚨 EDM 任務異常 | Error: {e}")
|
||
stats = { "status": "Failed", "error": str(e) }
|
||
_save_stats('edm_task', stats)
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_edm_task",
|
||
error=e,
|
||
source="Scheduler.EDM",
|
||
event_type="crawler_timeout" if "timeout" in str(e).lower() else "edm_task_failure",
|
||
priority="P2",
|
||
title="EDM 爬蟲任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Crawler] [EDM] event_router 失敗: {_router_e}")
|
||
|
||
def _find_component_areas_with_diagnostic(driver_or_element):
|
||
"""
|
||
使用多種策略尋找商品區塊,並在失敗時提供診斷日誌。
|
||
"""
|
||
# 策略 1: 最精準的選擇器
|
||
logging.info("[Crawler] [Helper] 🔎 策略 1: 嘗試使用 'component-area.js-navlight_content'...")
|
||
areas = driver_or_element.find_elements(By.CSS_SELECTOR, "component-area.js-navlight_content")
|
||
if areas:
|
||
logging.info(f"[Crawler] [Helper] ✅ 策略 1 成功 | Found: {len(areas)} blocks")
|
||
return areas
|
||
|
||
# 策略 2: 忽略標籤名稱,只看 class
|
||
logging.info("[Crawler] [Helper] ⚠️ 策略 1 失敗 | Action: 嘗試使用更具彈性的通用選擇器 '[class*=\"Area_boxstyle\"][class*=\"js-navlight_content\"]'...")
|
||
areas = driver_or_element.find_elements(By.CSS_SELECTOR, '[class*="Area_boxstyle"][class*="js-navlight_content"]')
|
||
if areas:
|
||
logging.info(f"[Crawler] [Helper] ✅ 策略 2 成功 | Found: {len(areas)} blocks")
|
||
return areas
|
||
|
||
# 策略 3: Vue.js 模板頁面 - 查找包含商品的 Area div
|
||
logging.info("[Crawler] [Helper] ⚠️ 策略 2 失敗 | Action: 嘗試查找 Vue.js 模板頁面的商品區塊 '[class*=\"Area_boxstyle\"]'...")
|
||
areas = driver_or_element.find_elements(By.CSS_SELECTOR, '[class*="Area_boxstyle"]')
|
||
if areas:
|
||
logging.info(f"[Crawler] [Helper] ✅ 策略 3 成功 | Found: {len(areas)} blocks")
|
||
return areas
|
||
|
||
# 策略 4: 直接查找包含商品的 ul/li 列表(某些頁面沒有包裝區塊)
|
||
logging.info("[Crawler] [Helper] ⚠️ 策略 3 失敗 | Action: 嘗試直接查找商品列表 '.Area_swiper'...")
|
||
areas = driver_or_element.find_elements(By.CSS_SELECTOR, '.Area_swiper, .Area_boxstyle_icon, .swiper-wrapper')
|
||
if areas:
|
||
logging.info(f"[Crawler] [Helper] ✅ 策略 4 成功 | Found: {len(areas)} blocks")
|
||
return areas
|
||
|
||
# 如果所有策略都失敗,返回空列表
|
||
logging.warning("[Crawler] [Helper] ❌ 所有策略均失敗 | Info: 未能找到任何商品區塊")
|
||
return []
|
||
|
||
def run_festival_task(lpn_code="O7ylWfihYUM"):
|
||
"""
|
||
執行「1.1 狂歡購物節」爬蟲任務
|
||
:param lpn_code: 活動代碼 (LPN)
|
||
"""
|
||
PAGE_TYPE = "festival" # 為此頁面類型定義一個常數
|
||
logging.info(f"[Crawler] [Festival] 🚀 啟動 {PAGE_TYPE} 爬蟲任務 | LPN: {lpn_code}")
|
||
|
||
# V-Fix: 關閉偵錯模式,讓爬蟲在背景執行
|
||
DEBUG_MODE = False
|
||
|
||
try:
|
||
# V-Fix: 針對大型活動頁面,將超時時間延長至 120 秒,避免因頁面過長導致渲染超時
|
||
with managed_scraper_resources(window_size='1920,10000', debug=DEBUG_MODE, timeout=120) as (driver, session):
|
||
# --- 主要爬取邏輯 ---
|
||
component_areas = []
|
||
|
||
url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}&n=1"
|
||
logging.info(f"[Crawler] [Festival] 🔗 前往頁面: {url}")
|
||
|
||
try:
|
||
driver.get(url)
|
||
except TimeoutException:
|
||
logging.warning("[Crawler] [Festival] ⚠️ 頁面載入超時 (120s),嘗試停止載入並繼續解析...")
|
||
try:
|
||
driver.execute_script("window.stop();")
|
||
except Exception as e:
|
||
logging.exception(f"[Crawler] [Festival] window.stop() 失敗但繼續 | Error: {e}")
|
||
|
||
# V-Fix: 增加初始等待時間,確保頁面上的 Vue.js 框架有足夠時間初始化並掛載懶加載事件
|
||
time.sleep(10)
|
||
logging.info(f"[Crawler] [Festival] 📄 頁面標題: {driver.title}")
|
||
|
||
# V-Fix: 嘗試在 iframe 中尋找內容
|
||
iframes = driver.find_elements(By.TAG_NAME, 'iframe')
|
||
if iframes:
|
||
logging.info(f"[Crawler] [Festival] 🕵️♂️ 偵測到 {len(iframes)} 個 iframe | Action: 嘗試切換進入...")
|
||
for index, iframe in enumerate(iframes):
|
||
# ... (iframe 處理邏輯維持不變)
|
||
try:
|
||
driver.switch_to.frame(iframe)
|
||
logging.info(f"[Crawler] [Festival] -> 已進入 iframe #{index} | Action: 開始尋找內容...")
|
||
|
||
# 在 iframe 內執行滾動與查找
|
||
body = driver.find_element(By.TAG_NAME, 'body')
|
||
last_height = 0
|
||
for scroll_attempt in range(25):
|
||
body.send_keys(Keys.PAGE_DOWN)
|
||
time.sleep(0.5)
|
||
if scroll_attempt % 5 == 0:
|
||
new_height = driver.execute_script("return document.body.scrollHeight")
|
||
if new_height == last_height:
|
||
break
|
||
last_height = new_height
|
||
time.sleep(2)
|
||
|
||
component_areas = _find_component_areas_with_diagnostic(driver)
|
||
if component_areas:
|
||
logging.info(f"[Crawler] [Festival] ✅ 在 iframe #{index} 中找到 {len(component_areas)} 個商品區塊!")
|
||
break # 成功找到,跳出 iframe 迴圈
|
||
else:
|
||
logging.info(f"[Crawler] [Festival] ...在 iframe #{index} 中未找到商品區塊 | Action: 切換回主頁面")
|
||
driver.switch_to.default_content()
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [Festival] ⚠️ 處理 iframe #{index} 時發生錯誤 | Error: {e}")
|
||
driver.switch_to.default_content() # 確保切換回主內容
|
||
|
||
# 如果遍歷完 iframe 仍未找到,或頁面根本沒有 iframe,則在主文檔上再嘗試一次
|
||
# V-Fix: 即使在 iframe 找到內容,也要切換回主文檔,以便後續可能的主文檔爬取
|
||
driver.switch_to.default_content()
|
||
if not component_areas:
|
||
logging.info("[Crawler] [Festival] 📜 在主頁面執行滾動與查找...")
|
||
body = driver.find_element(By.TAG_NAME, 'body')
|
||
last_height = 0
|
||
for scroll_attempt in range(25):
|
||
body.send_keys(Keys.PAGE_DOWN)
|
||
time.sleep(0.5)
|
||
if scroll_attempt > 0 and scroll_attempt % 5 == 0:
|
||
new_height = driver.execute_script("return document.body.scrollHeight")
|
||
if new_height == last_height:
|
||
break
|
||
last_height = new_height
|
||
time.sleep(3) # 滾動後等待
|
||
component_areas = _find_component_areas_with_diagnostic(driver)
|
||
|
||
# --- 偵錯截圖 ---
|
||
if DEBUG_MODE or not component_areas: # 如果是偵錯模式,或最終仍找不到內容,就儲存證據
|
||
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
|
||
os.makedirs(debug_path, exist_ok=True)
|
||
ts = int(time.time())
|
||
screenshot_path = os.path.join(debug_path, f"festival_screenshot_{ts}.png")
|
||
html_path = os.path.join(debug_path, f"festival_page_source_{ts}.html")
|
||
driver.save_screenshot(screenshot_path)
|
||
with open(html_path, "w", encoding="utf-8") as f:
|
||
f.write(driver.page_source)
|
||
logging.info(f"[Crawler] [Festival] 📸 [偵錯] 螢幕截圖已儲存 | Path: {screenshot_path}")
|
||
logging.info(f"[Crawler] [Festival] 📄 [偵錯] 頁面原始碼已儲存 | Path: {html_path}")
|
||
|
||
batch_id = int(time.time())
|
||
now = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
|
||
activity_name = "1.1狂歡購物節"
|
||
|
||
# 取得資料庫中此頁面類型的最新狀態
|
||
subq = session.query(func.max(PromoProduct.id).label('max_id'))\
|
||
.filter(PromoProduct.page_type == PAGE_TYPE)\
|
||
.group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery()
|
||
latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all()
|
||
active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')}
|
||
|
||
logging.info(f"[Crawler] [Festival] 📦 偵測到 {len(component_areas)} 個商品區塊 (component-area)")
|
||
# V-New: 如果沒有找到任何區塊,則直接返回,避免後續解析錯誤
|
||
if not component_areas:
|
||
logging.warning("[Crawler] [Festival] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案")
|
||
return
|
||
count = 0
|
||
snapshot_count = 0
|
||
current_scan_items = set()
|
||
seen_groups = set()
|
||
changed_products = [] # V-New: 收集異動商品以發送通知
|
||
screenshot_path = None # V-New: 截圖路徑
|
||
|
||
for area in component_areas:
|
||
group_title = "未分類"
|
||
try:
|
||
# 優先嘗試抓取標準標題
|
||
title_el = area.find_element(By.CSS_SELECTOR, "span.js-PD_val[data-title='區塊標題文案']")
|
||
group_title = title_el.text.strip()
|
||
except Exception:
|
||
# V-New: 如果找不到標準標題,檢查是否為「今日主打」輪播區塊
|
||
try:
|
||
area.find_element(By.CSS_SELECTOR, ".Area_swiper")
|
||
group_title = "今日主打"
|
||
logging.info("[Crawler] [Festival] 🔍 偵測到輪播區塊 | Title: '今日主打'")
|
||
except Exception:
|
||
logging.warning("[Crawler] [Festival] ⚠️ 找不到區塊標題,也非輪播區塊 | Action: 跳過此區塊")
|
||
# 如果兩種都不是,很可能是廣告或無關區塊,直接跳過
|
||
continue
|
||
|
||
seen_groups.add(group_title)
|
||
logging.info(f"[Crawler] [Festival] 👉 處理區塊: '{group_title}'")
|
||
|
||
products = area.find_elements(By.CSS_SELECTOR, "li.PD_slide.js-PD_id")
|
||
logging.info(f"[Crawler] [Festival] 📦 此區塊找到 {len(products)} 個商品")
|
||
|
||
for item_idx, item in enumerate(products):
|
||
# V-Final-Fix: 使用全新的、更強韌的解析邏輯
|
||
try:
|
||
i_code_raw = item.get_attribute("data-id")
|
||
logging.info(f"[Crawler] [Festival] [{item_idx+1}/{len(products)}] 開始解析商品 | data-id: {i_code_raw or 'N/A'}")
|
||
if not i_code_raw or 'logo' in i_code_raw:
|
||
logging.debug("[Crawler] [Festival] -> 跳過無效 data-id 的項目。")
|
||
continue
|
||
|
||
try:
|
||
i_code = str(int(i_code_raw))
|
||
except ValueError:
|
||
i_code = i_code_raw.upper()
|
||
|
||
# --- 強韌的欄位解析 ---
|
||
link_url, brand, product_name, name, price, image_url = None, "", "", "", None, None
|
||
|
||
try:
|
||
link_el = item.find_element(By.CSS_SELECTOR, "a.js-PD_url")
|
||
link_url = link_el.get_attribute("data-urlpc")
|
||
except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到商品連結 | i_code: {i_code}")
|
||
|
||
try: brand = item.find_element(By.CSS_SELECTOR, "b.js-PD_txt1").text.strip()
|
||
except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到品牌名稱 | i_code: {i_code}")
|
||
|
||
try: product_name = item.find_element(By.CSS_SELECTOR, "span.js-PD_txt2").text.strip()
|
||
except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到產品名稱 | i_code: {i_code}")
|
||
|
||
name = f"{brand} {product_name}".strip()
|
||
if not name:
|
||
try: # 備用方案:使用圖片的 alt 屬性作為名稱
|
||
name = item.find_element(By.CSS_SELECTOR, "img.js-PD_img").get_attribute("alt").strip()
|
||
logging.debug(f"[Crawler] [Festival] - 使用圖片 alt 屬性作為名稱: '{name}'")
|
||
except Exception:
|
||
logging.warning(f"[Crawler] [Festival] - 錯誤: 無法解析任何有效名稱 | i_code: {i_code} | Action: Skip")
|
||
continue
|
||
|
||
# V-Fix: 增強價格解析邏輯,嘗試多種選擇器以應對不同版型
|
||
# TODO: 持續觀察 .prdPrice 與 p.price 的抓取效果,若仍有漏抓需分析 HTML 結構 (2026-01-07)
|
||
price_selectors = [
|
||
"span.Price.js-PD_price", # 標準
|
||
".price span", # 常見變體
|
||
".money span",
|
||
".price", # 直接在 class 內
|
||
".money",
|
||
"b.price",
|
||
"span.price",
|
||
".price b",
|
||
".money b",
|
||
".prdPrice span", # V-New: 針對部分活動頁面結構
|
||
".prdPrice b",
|
||
".prdPrice",
|
||
"p.price",
|
||
"div.price"
|
||
]
|
||
|
||
for selector in price_selectors:
|
||
try:
|
||
price_el = item.find_element(By.CSS_SELECTOR, selector)
|
||
price_text = price_el.text
|
||
price_clean = re.sub(r'[^\d]', '', price_text)
|
||
if price_clean:
|
||
price = int(price_clean)
|
||
break # 成功找到價格,跳出迴圈
|
||
except Exception:
|
||
continue
|
||
|
||
if price is None:
|
||
logging.debug(f"[Crawler] [Festival] - 警告: 找不到價格 | Name: {name} | ID: {i_code}")
|
||
|
||
# === V9.3: 使用 i_code 直接構造圖片 URL(與 MOMO 主爬蟲一致)===
|
||
# 路徑規則:從右往左取 3位/3位/剩餘,第一部分補0到4位
|
||
def get_image_path(i_code_str):
|
||
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
|
||
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
|
||
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
|
||
part1 = part1.zfill(4)
|
||
part2 = part2.zfill(3)
|
||
return f'{part1}/{part2}/{part3}'
|
||
|
||
try:
|
||
image_path = get_image_path(str(i_code))
|
||
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [Festival] - 警告: 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}")
|
||
image_url = None
|
||
|
||
logging.info(f"[Crawler] [Festival] -> 解析結果 | Name: {name} | Price: {price}")
|
||
|
||
current_scan_items.add((i_code, group_title))
|
||
|
||
status_change = "NONE"
|
||
prev_record = active_db_items.get((i_code, group_title))
|
||
previous_price = None
|
||
if not prev_record:
|
||
status_change = "NEW"
|
||
logging.info(f"[Crawler] [Festival] -> 狀態: 新商品 (NEW)")
|
||
else:
|
||
if price != prev_record.price:
|
||
if price is not None and prev_record.price is not None:
|
||
status_change = "PRICE_DOWN" if price < prev_record.price else "PRICE_UP"
|
||
else:
|
||
status_change = "UPDATE"
|
||
previous_price = prev_record.price
|
||
logging.info(f"[Crawler] [Festival] -> 狀態: 價格變動 ({status_change}) | From: {prev_record.price} | To: {price}")
|
||
elif status_change == "NONE" and (not prev_record.image_url and image_url):
|
||
status_change = "UPDATE"
|
||
logging.info(f"[Crawler] [Festival] -> 狀態: 圖片更新 (UPDATE)")
|
||
|
||
is_changed = status_change != "NONE"
|
||
normalized_link_url = normalize_momo_product_url(link_url, i_code)
|
||
if not normalized_link_url:
|
||
logging.warning(
|
||
f"[Crawler] [Festival] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
|
||
)
|
||
continue
|
||
|
||
new_promo = PromoProduct(
|
||
batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url,
|
||
image_url=image_url, previous_price=previous_price, time_slot=group_title,
|
||
status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name,
|
||
session_time_text=group_title, page_type=PAGE_TYPE
|
||
)
|
||
session.add(new_promo)
|
||
snapshot_count += 1
|
||
|
||
if is_changed:
|
||
changed_products.append(new_promo) # V-New: 收集異動商品
|
||
count += 1
|
||
logging.info(f"[Crawler] [Festival] -> 寫入資料庫: {status_change}")
|
||
else:
|
||
logging.info("[Crawler] [Festival] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written")
|
||
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [Festival] ❌ 解析商品時發生未預期錯誤 | Error: {e}")
|
||
continue
|
||
|
||
# 檢查下架商品
|
||
for (i_code, slot), record in active_db_items.items():
|
||
if (i_code, slot) not in current_scan_items:
|
||
new_status = "DELISTED" if record.time_slot in seen_groups else "SLOT_END"
|
||
delisted_promo = PromoProduct(
|
||
batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, url=record.url,
|
||
image_url=record.image_url, time_slot=record.time_slot, previous_price=record.price,
|
||
status_change=new_status, crawled_at=now, activity_time_text=activity_name,
|
||
session_time_text=getattr(record, 'session_time_text', activity_name), page_type=PAGE_TYPE
|
||
)
|
||
session.add(delisted_promo)
|
||
snapshot_count += 1
|
||
changed_products.append(delisted_promo) # V-New: 收集下架商品
|
||
count += 1
|
||
|
||
session.commit()
|
||
logging.info(f"[Crawler] [Festival] ✅ {PAGE_TYPE} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}")
|
||
stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" }
|
||
_save_stats('festival_task', stats) # 為統計資料使用新的任務名稱
|
||
|
||
# V-New: 發送通知 - 如果有異動商品則發送 Telegram 和 Line 通知
|
||
if changed_products:
|
||
logging.info(f"[Crawler] [Festival] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知")
|
||
try:
|
||
# 截圖儀表板(僅當有異動時)
|
||
try:
|
||
dashboard_url = "http://127.0.0.1:5000/edm"
|
||
logging.info(f"[Crawler] [Festival] 📸 準備截取儀表板畫面: {dashboard_url}")
|
||
driver.get(dashboard_url)
|
||
time.sleep(3)
|
||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||
time.sleep(2)
|
||
driver.execute_script("window.scrollTo(0, 0);")
|
||
time.sleep(2)
|
||
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
|
||
driver.set_window_size(2560, int(required_height) + 100)
|
||
time.sleep(2)
|
||
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
|
||
os.makedirs(shot_dir, exist_ok=True)
|
||
screenshot_filename = f"festival_{batch_id}.png"
|
||
screenshot_path = os.path.join(shot_dir, screenshot_filename)
|
||
driver.save_screenshot(screenshot_path)
|
||
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
|
||
logging.info(f"[Crawler] [Festival] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes")
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [Festival] ❌ 截圖失敗 | Error: {e}")
|
||
|
||
# 發送通知
|
||
import importlib
|
||
import services.edm_notifier
|
||
importlib.reload(services.edm_notifier)
|
||
from services.edm_notifier import EdmNotifier
|
||
EdmNotifier().send_edm_report(changed_products, screenshot_path)
|
||
logging.info("[Crawler] [Festival] ✅ 購物節活動通知已發送")
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [Festival] ❌ 發送通知時發生錯誤 | Error: {e}")
|
||
else:
|
||
logging.info("[Crawler] [Festival] ℹ️ 無異動,不發送通知")
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Crawler] [Festival] 🚨 {PAGE_TYPE} 任務異常 | Error: {e}")
|
||
stats = { "status": "Failed", "error": str(e) }
|
||
_save_stats('festival_task', stats)
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_festival_task",
|
||
error=e,
|
||
source="Scheduler.Festival",
|
||
event_type="crawler_timeout" if "timeout" in str(e).lower() else "festival_task_failure",
|
||
priority="P2",
|
||
title=f"{PAGE_TYPE} 活動任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Crawler] [Festival] event_router 失敗: {_router_e}")
|
||
|
||
def run_promo_event_task(lpn_code, page_type, activity_name):
|
||
"""
|
||
通用促銷活動爬蟲任務
|
||
支援多種促銷活動類型(母親節、520情人節、勞動節等)
|
||
|
||
:param lpn_code: 活動代碼 (LPN)
|
||
:param page_type: 頁面類型 (用於資料庫區分,如 'mothers_day', 'valentine_520', 'labor_day')
|
||
:param activity_name: 活動名稱 (用於通知和日誌)
|
||
"""
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 🚀 啟動 {activity_name} 爬蟲任務 | LPN: {lpn_code}")
|
||
|
||
DEBUG_MODE = False
|
||
|
||
try:
|
||
with managed_scraper_resources(window_size='1920,10000', debug=DEBUG_MODE, timeout=120) as (driver, session):
|
||
component_areas = []
|
||
|
||
url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}&n=1"
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 🔗 前往頁面: {url}")
|
||
|
||
try:
|
||
driver.get(url)
|
||
except TimeoutException:
|
||
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 頁面載入超時 (120s),嘗試停止載入並繼續解析...")
|
||
try:
|
||
driver.execute_script("window.stop();")
|
||
except Exception as e:
|
||
logging.exception(f"[Crawler] [{page_type.upper()}] window.stop() 失敗但繼續 | Error: {e}")
|
||
|
||
time.sleep(10)
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {driver.title}")
|
||
|
||
# 嘗試在 iframe 中尋找內容
|
||
iframes = driver.find_elements(By.TAG_NAME, 'iframe')
|
||
if iframes:
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 🕵️♂️ 偵測到 {len(iframes)} 個 iframe | Action: 嘗試切換進入...")
|
||
for index, iframe in enumerate(iframes):
|
||
try:
|
||
driver.switch_to.frame(iframe)
|
||
logging.info(f"[Crawler] [{page_type.upper()}] -> 已進入 iframe #{index} | Action: 開始尋找內容...")
|
||
|
||
body = driver.find_element(By.TAG_NAME, 'body')
|
||
last_height = 0
|
||
for scroll_attempt in range(25):
|
||
body.send_keys(Keys.PAGE_DOWN)
|
||
time.sleep(0.5)
|
||
if scroll_attempt % 5 == 0:
|
||
new_height = driver.execute_script("return document.body.scrollHeight")
|
||
if new_height == last_height:
|
||
break
|
||
last_height = new_height
|
||
time.sleep(2)
|
||
|
||
component_areas = _find_component_areas_with_diagnostic(driver)
|
||
if component_areas:
|
||
logging.info(f"[Crawler] [{page_type.upper()}] ✅ 在 iframe #{index} 中找到 {len(component_areas)} 個商品區塊!")
|
||
break
|
||
else:
|
||
logging.info(f"[Crawler] [{page_type.upper()}] ...在 iframe #{index} 中未找到商品區塊 | Action: 切換回主頁面")
|
||
driver.switch_to.default_content()
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 處理 iframe #{index} 時發生錯誤 | Error: {e}")
|
||
driver.switch_to.default_content()
|
||
|
||
driver.switch_to.default_content()
|
||
if not component_areas:
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📜 在主頁面執行滾動與查找...")
|
||
body = driver.find_element(By.TAG_NAME, 'body')
|
||
last_height = 0
|
||
for scroll_attempt in range(25):
|
||
body.send_keys(Keys.PAGE_DOWN)
|
||
time.sleep(0.5)
|
||
if scroll_attempt > 0 and scroll_attempt % 5 == 0:
|
||
new_height = driver.execute_script("return document.body.scrollHeight")
|
||
if new_height == last_height:
|
||
break
|
||
last_height = new_height
|
||
time.sleep(3)
|
||
component_areas = _find_component_areas_with_diagnostic(driver)
|
||
|
||
if DEBUG_MODE or not component_areas:
|
||
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
|
||
os.makedirs(debug_path, exist_ok=True)
|
||
ts = int(time.time())
|
||
screenshot_path = os.path.join(debug_path, f"{page_type}_screenshot_{ts}.png")
|
||
html_path = os.path.join(debug_path, f"{page_type}_page_source_{ts}.html")
|
||
driver.save_screenshot(screenshot_path)
|
||
with open(html_path, "w", encoding="utf-8") as f:
|
||
f.write(driver.page_source)
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📸 [偵錯] 螢幕截圖已儲存 | Path: {screenshot_path}")
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📄 [偵錯] 頁面原始碼已儲存 | Path: {html_path}")
|
||
|
||
batch_id = int(time.time())
|
||
now = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
|
||
|
||
subq = session.query(func.max(PromoProduct.id).label('max_id'))\
|
||
.filter(PromoProduct.page_type == page_type)\
|
||
.group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery()
|
||
latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all()
|
||
active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')}
|
||
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📦 偵測到 {len(component_areas)} 個商品區塊 (component-area)")
|
||
if not component_areas:
|
||
logging.warning(f"[Crawler] [{page_type.upper()}] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案")
|
||
return
|
||
|
||
count = 0
|
||
snapshot_count = 0
|
||
current_scan_items = set()
|
||
seen_groups = set()
|
||
changed_products = []
|
||
screenshot_path = None
|
||
|
||
for area in component_areas:
|
||
group_title = "未分類"
|
||
try:
|
||
title_el = area.find_element(By.CSS_SELECTOR, "span.js-PD_val[data-title='區塊標題文案']")
|
||
group_title = title_el.text.strip()
|
||
except Exception:
|
||
try:
|
||
area.find_element(By.CSS_SELECTOR, ".Area_swiper")
|
||
group_title = "今日主打"
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 🔍 偵測到輪播區塊 | Title: '今日主打'")
|
||
except Exception:
|
||
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 找不到區塊標題,也非輪播區塊 | Action: 跳過此區塊")
|
||
continue
|
||
|
||
seen_groups.add(group_title)
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 👉 處理區塊: '{group_title}'")
|
||
|
||
products = area.find_elements(By.CSS_SELECTOR, "li.PD_slide.js-PD_id")
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📦 此區塊找到 {len(products)} 個商品")
|
||
|
||
for item_idx, item in enumerate(products):
|
||
try:
|
||
i_code_raw = item.get_attribute("data-id")
|
||
logging.info(f"[Crawler] [{page_type.upper()}] [{item_idx+1}/{len(products)}] 開始解析商品 | data-id: {i_code_raw or 'N/A'}")
|
||
if not i_code_raw or 'logo' in i_code_raw:
|
||
logging.debug(f"[Crawler] [{page_type.upper()}] -> 跳過無效 data-id 的項目。")
|
||
continue
|
||
|
||
try:
|
||
i_code = str(int(i_code_raw))
|
||
except ValueError:
|
||
i_code = i_code_raw.upper()
|
||
|
||
link_url, brand, product_name, name, price, image_url = None, "", "", "", None, None
|
||
|
||
try:
|
||
link_el = item.find_element(By.CSS_SELECTOR, "a.js-PD_url")
|
||
link_url = link_el.get_attribute("data-urlpc")
|
||
except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到商品連結 | i_code: {i_code}")
|
||
|
||
try: brand = item.find_element(By.CSS_SELECTOR, "b.js-PD_txt1").text.strip()
|
||
except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到品牌名稱 | i_code: {i_code}")
|
||
|
||
try: product_name = item.find_element(By.CSS_SELECTOR, "span.js-PD_txt2").text.strip()
|
||
except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到產品名稱 | i_code: {i_code}")
|
||
|
||
name = f"{brand} {product_name}".strip()
|
||
if not name:
|
||
try:
|
||
name = item.find_element(By.CSS_SELECTOR, "img.js-PD_img").get_attribute("alt").strip()
|
||
logging.debug(f"[Crawler] [{page_type.upper()}] - 使用圖片 alt 屬性作為名稱: '{name}'")
|
||
except Exception:
|
||
logging.warning(f"[Crawler] [{page_type.upper()}] - 錯誤: 無法解析任何有效名稱 | i_code: {i_code} | Action: Skip")
|
||
continue
|
||
|
||
price_selectors = [
|
||
"span.Price.js-PD_price",
|
||
".price span",
|
||
".money span",
|
||
".price",
|
||
".money",
|
||
"b.price",
|
||
"span.price",
|
||
".price b",
|
||
".money b",
|
||
".prdPrice span",
|
||
".prdPrice b",
|
||
".prdPrice",
|
||
"p.price",
|
||
"div.price"
|
||
]
|
||
|
||
for selector in price_selectors:
|
||
try:
|
||
price_el = item.find_element(By.CSS_SELECTOR, selector)
|
||
price_text = price_el.text
|
||
price_clean = re.sub(r'[^\d]', '', price_text)
|
||
if price_clean:
|
||
price = int(price_clean)
|
||
break
|
||
except Exception:
|
||
continue
|
||
|
||
if price is None:
|
||
logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到價格 | Name: {name} | ID: {i_code}")
|
||
|
||
def get_image_path(i_code_str):
|
||
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
|
||
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
|
||
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
|
||
part1 = part1.zfill(4)
|
||
part2 = part2.zfill(3)
|
||
return f'{part1}/{part2}/{part3}'
|
||
|
||
try:
|
||
image_path = get_image_path(str(i_code))
|
||
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
|
||
except Exception as e:
|
||
logging.warning(f"[Crawler] [{page_type.upper()}] - 警告: 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}")
|
||
image_url = None
|
||
|
||
logging.info(f"[Crawler] [{page_type.upper()}] -> 解析結果 | Name: {name} | Price: {price}")
|
||
|
||
current_scan_items.add((i_code, group_title))
|
||
|
||
status_change = "NONE"
|
||
prev_record = active_db_items.get((i_code, group_title))
|
||
previous_price = None
|
||
if not prev_record:
|
||
status_change = "NEW"
|
||
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 新商品 (NEW)")
|
||
else:
|
||
if price != prev_record.price:
|
||
if price is not None and prev_record.price is not None:
|
||
status_change = "PRICE_DOWN" if price < prev_record.price else "PRICE_UP"
|
||
else:
|
||
status_change = "UPDATE"
|
||
previous_price = prev_record.price
|
||
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 價格變動 ({status_change}) | From: {prev_record.price} | To: {price}")
|
||
elif status_change == "NONE" and (not prev_record.image_url and image_url):
|
||
status_change = "UPDATE"
|
||
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 圖片更新 (UPDATE)")
|
||
|
||
is_changed = status_change != "NONE"
|
||
normalized_link_url = normalize_momo_product_url(link_url, i_code)
|
||
if not normalized_link_url:
|
||
logging.warning(
|
||
f"[Crawler] [{page_type.upper()}] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
|
||
)
|
||
continue
|
||
|
||
new_promo = PromoProduct(
|
||
batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url,
|
||
image_url=image_url, previous_price=previous_price, time_slot=group_title,
|
||
status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name,
|
||
session_time_text=group_title, page_type=page_type
|
||
)
|
||
session.add(new_promo)
|
||
snapshot_count += 1
|
||
|
||
if is_changed:
|
||
changed_products.append(new_promo)
|
||
count += 1
|
||
logging.info(f"[Crawler] [{page_type.upper()}] -> 寫入資料庫: {status_change}")
|
||
else:
|
||
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written")
|
||
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [{page_type.upper()}] ❌ 解析商品時發生未預期錯誤 | Error: {e}")
|
||
continue
|
||
|
||
for (i_code, slot), record in active_db_items.items():
|
||
if (i_code, slot) not in current_scan_items:
|
||
new_status = "DELISTED" if record.time_slot in seen_groups else "SLOT_END"
|
||
delisted_promo = PromoProduct(
|
||
batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, url=record.url,
|
||
image_url=record.image_url, time_slot=record.time_slot, previous_price=record.price,
|
||
status_change=new_status, crawled_at=now, activity_time_text=activity_name,
|
||
session_time_text=getattr(record, 'session_time_text', activity_name), page_type=page_type
|
||
)
|
||
session.add(delisted_promo)
|
||
snapshot_count += 1
|
||
changed_products.append(delisted_promo)
|
||
count += 1
|
||
|
||
session.commit()
|
||
logging.info(f"[Crawler] [{page_type.upper()}] ✅ {page_type} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}")
|
||
stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" }
|
||
_save_stats(f'{page_type}_task', stats)
|
||
|
||
if changed_products:
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知")
|
||
try:
|
||
try:
|
||
dashboard_url = "http://127.0.0.1:5000/edm"
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📸 準備截取儀表板畫面: {dashboard_url}")
|
||
driver.get(dashboard_url)
|
||
time.sleep(3)
|
||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||
time.sleep(2)
|
||
driver.execute_script("window.scrollTo(0, 0);")
|
||
time.sleep(2)
|
||
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
|
||
driver.set_window_size(2560, int(required_height) + 100)
|
||
time.sleep(2)
|
||
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
|
||
os.makedirs(shot_dir, exist_ok=True)
|
||
screenshot_filename = f"{page_type}_{batch_id}.png"
|
||
screenshot_path = os.path.join(shot_dir, screenshot_filename)
|
||
driver.save_screenshot(screenshot_path)
|
||
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
|
||
logging.info(f"[Crawler] [{page_type.upper()}] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes")
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [{page_type.upper()}] ❌ 截圖失敗 | Error: {e}")
|
||
|
||
import importlib
|
||
import services.edm_notifier
|
||
importlib.reload(services.edm_notifier)
|
||
from services.edm_notifier import EdmNotifier
|
||
EdmNotifier().send_edm_report(changed_products, screenshot_path)
|
||
logging.info(f"[Crawler] [{page_type.upper()}] ✅ {activity_name} 通知已發送")
|
||
except Exception as e:
|
||
logging.error(f"[Crawler] [{page_type.upper()}] ❌ 發送通知時發生錯誤 | Error: {e}")
|
||
else:
|
||
logging.info(f"[Crawler] [{page_type.upper()}] ℹ️ 無異動,不發送通知")
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Crawler] [{page_type.upper()}] 🚨 {page_type} 任務異常 | Error: {e}")
|
||
stats = { "status": "Failed", "error": str(e) }
|
||
_save_stats(f'{page_type}_task', stats)
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_promo_event_task",
|
||
error=e,
|
||
source=f"Scheduler.{page_type}",
|
||
event_type="crawler_timeout" if "timeout" in str(e).lower() else "promo_event_task_failure",
|
||
priority="P2",
|
||
title=f"{activity_name} 任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Crawler] [{page_type.upper()}] event_router 失敗: {_router_e}")
|
||
|
||
|
||
def run_whitepage_check():
|
||
"""
|
||
V-New: 檢查網頁服務是否變成白頁
|
||
每半小時執行一次,檢測正式環境網址
|
||
"""
|
||
try:
|
||
# 檢測的目標網址(正式環境)
|
||
target_url = "https://mo.wooo.work/"
|
||
logging.info(f"[Whitepage] [Check] 🔍 開始檢查網頁狀態 | URL: {target_url}")
|
||
|
||
# 1. 發送 HTTP 請求 (內部健康檢查,verify=False 用於處理自簽憑證環境)
|
||
try:
|
||
response = requests.get(target_url, timeout=30, verify=False) # nosec B501
|
||
status_code = response.status_code
|
||
content_length = len(response.text)
|
||
|
||
logging.info(f"[Whitepage] [Check] 📊 HTTP 狀態碼: {status_code} | 內容長度: {content_length:,} bytes")
|
||
|
||
# 2. 檢查 HTTP 狀態碼
|
||
if status_code != 200:
|
||
error_msg = f"HTTP 狀態碼異常 ({status_code})"
|
||
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
|
||
NotificationManager().send_whitepage_alert(target_url, error_msg)
|
||
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
|
||
return
|
||
|
||
# 3. 檢查內容長度(白頁通常很短)
|
||
if content_length < 1000:
|
||
error_msg = f"頁面內容過短 ({content_length} bytes),疑似白頁"
|
||
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
|
||
NotificationManager().send_whitepage_alert(target_url, error_msg)
|
||
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
|
||
return
|
||
|
||
# 4. 檢查關鍵元素是否存在
|
||
content = response.text
|
||
|
||
missing_markers = _missing_whitepage_markers(content)
|
||
if missing_markers:
|
||
error_msg = f"缺少關鍵元素:{', '.join(missing_markers)}"
|
||
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
|
||
logging.debug(f"[Whitepage] [Check] 📄 頁面內容片段: {content[:500]}...")
|
||
NotificationManager().send_whitepage_alert(target_url, error_msg)
|
||
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
|
||
return
|
||
|
||
# 5. 所有檢查通過
|
||
logging.info(f"[Whitepage] [Check] ✅ 網頁狀態正常")
|
||
_save_stats('whitepage_check', {
|
||
"status": "Success",
|
||
"status_code": status_code,
|
||
"content_length": content_length
|
||
})
|
||
|
||
except requests.exceptions.Timeout:
|
||
error_msg = "連線逾時 (30秒)"
|
||
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
|
||
NotificationManager().send_whitepage_alert(target_url, error_msg)
|
||
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
|
||
|
||
except requests.exceptions.ConnectionError as e:
|
||
error_msg = f"無法連接伺服器:{str(e)}"
|
||
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
|
||
NotificationManager().send_whitepage_alert(target_url, error_msg)
|
||
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
|
||
|
||
except Exception as e:
|
||
error_msg = f"未預期的錯誤:{str(e)}"
|
||
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
|
||
NotificationManager().send_whitepage_alert(target_url, error_msg)
|
||
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
|
||
|
||
except Exception as e:
|
||
logging.error(f"[Whitepage] [Check] 🚨 白頁檢查任務異常 | Error: {e}")
|
||
_save_stats('whitepage_check', {"status": "Failed", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_whitepage_check",
|
||
error=e,
|
||
source="Scheduler.Whitepage",
|
||
event_type="whitepage_check_failure",
|
||
priority="P1",
|
||
title="白頁檢查任務異常",
|
||
dedup_ttl_sec=1800,
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Whitepage] [Check] event_router 失敗: {_router_e}")
|
||
|
||
def start_scheduled_job():
|
||
"""使用執行緒啟動任務,避免阻塞主程式"""
|
||
thread = threading.Thread(target=run_momo_task)
|
||
thread.start()
|
||
|
||
def capture_page_screenshot(url, filename_prefix="screenshot", wait_time=5):
|
||
"""通用截圖函式 (供 app.py 呼叫截取儀表板) - 高解析度版本"""
|
||
try:
|
||
# V-Fix: 使用更大的視窗寬度以提高截圖清晰度
|
||
with managed_scraper_resources(window_size='2560,4000') as (driver, session):
|
||
logging.info(f"[Screenshot] 📸 準備截取畫面: {url}")
|
||
driver.get(url)
|
||
time.sleep(wait_time / 2)
|
||
|
||
# V-Fix: 滾動頁面以觸發所有元素的渲染
|
||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||
time.sleep(1)
|
||
driver.execute_script("window.scrollTo(0, 0);")
|
||
time.sleep(wait_time / 2)
|
||
|
||
# 設定視窗大小以進行滿版截圖
|
||
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
|
||
driver.set_window_size(2560, int(required_height) + 100)
|
||
|
||
# V-Fix: 等待視窗大小調整後的重新渲染
|
||
time.sleep(2)
|
||
|
||
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
|
||
os.makedirs(shot_dir, exist_ok=True)
|
||
|
||
timestamp = int(time.time())
|
||
filename = f"{filename_prefix}_{timestamp}.png"
|
||
screenshot_path = os.path.join(shot_dir, filename)
|
||
|
||
driver.save_screenshot(screenshot_path)
|
||
|
||
# 檢查截圖檔案大小
|
||
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
|
||
logging.info(f"[Screenshot] 📸 已儲存截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes | Resolution: 2560x{int(required_height) + 100}")
|
||
|
||
# V-Fix: 如果檔案太小(<50KB),可能是空白或錯誤
|
||
if file_size < 50000:
|
||
logging.warning(f"[Screenshot] ⚠️ 截圖檔案異常小 ({file_size} bytes),可能截圖失敗")
|
||
|
||
return screenshot_path
|
||
except Exception as e:
|
||
logging.error(f"[Screenshot] ❌ 截圖失敗 | Error: {e}")
|
||
return None
|
||
|
||
def verify_import_data_sync(expected_rows: int = None, date_range: dict = None) -> dict:
|
||
"""
|
||
2026-01-30 新增:驗證匯入資料是否正確同步到兩個資料表
|
||
|
||
Args:
|
||
expected_rows: 預期的匯入筆數
|
||
date_range: 預期的日期範圍 {'min': 'YYYY-MM-DD', 'max': 'YYYY-MM-DD'}
|
||
|
||
Returns:
|
||
dict: 驗證結果
|
||
"""
|
||
from config import DATABASE_PATH
|
||
from sqlalchemy import create_engine, text
|
||
|
||
date_min = date_range.get('min') if date_range else None
|
||
date_max = date_range.get('max') if date_range else None
|
||
|
||
result = {
|
||
'success': True,
|
||
'daily_sales_snapshot': {'ok': False, 'rows': 0, 'date_min': None, 'date_max': None},
|
||
'realtime_sales_monthly': {'ok': False, 'rows': 0, 'date_min': None, 'date_max': None},
|
||
'scope': {'date_min': date_min, 'date_max': date_max},
|
||
'errors': []
|
||
}
|
||
|
||
if expected_rows is not None and expected_rows <= 0:
|
||
result['success'] = False
|
||
result['errors'].append("本次匯入筆數為 0,請檢查檔案是否實際匯入成功")
|
||
return result
|
||
|
||
if not date_min or not date_max:
|
||
result['success'] = False
|
||
result['errors'].append("缺少本次匯入日期範圍,無法驗證資料同步")
|
||
return result
|
||
|
||
try:
|
||
engine = create_engine(DATABASE_PATH)
|
||
|
||
with engine.connect() as conn:
|
||
is_sqlite = engine.dialect.name == 'sqlite'
|
||
if is_sqlite:
|
||
snapshot_query = text("""
|
||
SELECT COUNT(*) as cnt,
|
||
MIN(date(snapshot_date)) as date_min,
|
||
MAX(date(snapshot_date)) as date_max
|
||
FROM daily_sales_snapshot
|
||
WHERE date(snapshot_date) BETWEEN date(:date_min) AND date(:date_max)
|
||
""")
|
||
monthly_query = text("""
|
||
SELECT COUNT(*) as cnt,
|
||
MIN(date("日期")) as date_min,
|
||
MAX(date("日期")) as date_max
|
||
FROM realtime_sales_monthly
|
||
WHERE date("日期") BETWEEN date(:date_min) AND date(:date_max)
|
||
""")
|
||
else:
|
||
snapshot_query = text("""
|
||
SELECT COUNT(*) as cnt,
|
||
MIN(snapshot_date::date)::text as date_min,
|
||
MAX(snapshot_date::date)::text as date_max
|
||
FROM daily_sales_snapshot
|
||
WHERE snapshot_date::date BETWEEN :date_min AND :date_max
|
||
""")
|
||
monthly_query = text("""
|
||
SELECT COUNT(*) as cnt,
|
||
MIN("日期"::date)::text as date_min,
|
||
MAX("日期"::date)::text as date_max
|
||
FROM realtime_sales_monthly
|
||
WHERE "日期"::date BETWEEN :date_min AND :date_max
|
||
""")
|
||
|
||
# V-Fix: 只驗證本次匯入日期範圍。realtime_sales_monthly 保存歷史資料,
|
||
# 不可再拿全表總筆數和 daily_sales_snapshot 做相等比較。
|
||
params = {'date_min': date_min, 'date_max': date_max}
|
||
snapshot_result = conn.execute(snapshot_query, params).fetchone()
|
||
result['daily_sales_snapshot']['rows'] = snapshot_result[0]
|
||
result['daily_sales_snapshot']['date_min'] = snapshot_result[1]
|
||
result['daily_sales_snapshot']['date_max'] = snapshot_result[2]
|
||
|
||
monthly_result = conn.execute(monthly_query, params).fetchone()
|
||
result['realtime_sales_monthly']['rows'] = monthly_result[0]
|
||
result['realtime_sales_monthly']['date_min'] = monthly_result[1]
|
||
result['realtime_sales_monthly']['date_max'] = monthly_result[2]
|
||
|
||
# 驗證兩個表的資料是否一致
|
||
snapshot_rows = result['daily_sales_snapshot']['rows']
|
||
monthly_rows = result['realtime_sales_monthly']['rows']
|
||
|
||
# 檢查 1: 兩個表的筆數是否一致
|
||
if snapshot_rows != monthly_rows:
|
||
result['errors'].append(
|
||
f"資料筆數不一致: daily_sales_snapshot={snapshot_rows}, realtime_sales_monthly={monthly_rows}"
|
||
)
|
||
result['success'] = False
|
||
|
||
# 檢查 2: 本次匯入日期範圍是否一致
|
||
if (
|
||
result['daily_sales_snapshot']['date_min'] != result['realtime_sales_monthly']['date_min']
|
||
or result['daily_sales_snapshot']['date_max'] != result['realtime_sales_monthly']['date_max']
|
||
):
|
||
result['errors'].append(
|
||
f"日期範圍不一致: daily_sales_snapshot="
|
||
f"{result['daily_sales_snapshot']['date_min']}~{result['daily_sales_snapshot']['date_max']}, "
|
||
f"realtime_sales_monthly="
|
||
f"{result['realtime_sales_monthly']['date_min']}~{result['realtime_sales_monthly']['date_max']}"
|
||
)
|
||
result['success'] = False
|
||
|
||
# 檢查 3: 如果有預期的匯入筆數,驗證是否正確
|
||
if expected_rows is not None:
|
||
if snapshot_rows < expected_rows:
|
||
result['errors'].append(
|
||
f"daily_sales_snapshot 筆數({snapshot_rows})少於預期({expected_rows})"
|
||
)
|
||
result['success'] = False
|
||
if monthly_rows < expected_rows:
|
||
result['errors'].append(
|
||
f"realtime_sales_monthly 筆數({monthly_rows})少於預期({expected_rows})"
|
||
)
|
||
result['success'] = False
|
||
|
||
# 檢查 4: 如果有預期的日期範圍,驗證是否正確
|
||
if result['daily_sales_snapshot']['date_min'] != date_min or result['daily_sales_snapshot']['date_max'] != date_max:
|
||
result['errors'].append(
|
||
f"daily_sales_snapshot 日期範圍({result['daily_sales_snapshot']['date_min']}~"
|
||
f"{result['daily_sales_snapshot']['date_max']})與預期({date_min}~{date_max})不符"
|
||
)
|
||
result['success'] = False
|
||
if result['realtime_sales_monthly']['date_min'] != date_min or result['realtime_sales_monthly']['date_max'] != date_max:
|
||
result['errors'].append(
|
||
f"realtime_sales_monthly 日期範圍({result['realtime_sales_monthly']['date_min']}~"
|
||
f"{result['realtime_sales_monthly']['date_max']})與預期({date_min}~{date_max})不符"
|
||
)
|
||
result['success'] = False
|
||
|
||
# 設定各表驗證結果
|
||
result['daily_sales_snapshot']['ok'] = snapshot_rows > 0
|
||
result['realtime_sales_monthly']['ok'] = monthly_rows > 0 and monthly_rows == snapshot_rows
|
||
|
||
logging.info(f"[Scheduler] [Verify] 資料驗證完成: success={result['success']}, "
|
||
f"range={date_min}~{date_max}, snapshot={snapshot_rows}筆, monthly={monthly_rows}筆")
|
||
|
||
except Exception as e:
|
||
result['success'] = False
|
||
result['errors'].append(f"驗證時發生錯誤: {str(e)}")
|
||
logging.error(f"[Scheduler] [Verify] 資料驗證失敗: {e}")
|
||
|
||
return result
|
||
|
||
|
||
def run_auto_import_task():
|
||
"""
|
||
V-New: 自動從 Google Drive 匯入當日業績
|
||
每半小時檢查一次 Google Drive 是否有新的 Excel 檔案
|
||
"""
|
||
# ADR-012 Phase 4: HITL 暫停檢查
|
||
notification_sent = False
|
||
try:
|
||
from services.agent_actions import is_task_paused
|
||
if is_task_paused("run_auto_import_task"):
|
||
logging.info("[Scheduler] [AutoImport] ⏸️ 任務被 HITL 暫停中,本次跳過")
|
||
return
|
||
except Exception as pause_check_error:
|
||
logging.debug(
|
||
f"[Scheduler] [AutoImport] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
try:
|
||
from services.import_service import import_service
|
||
from services.notification_manager import NotificationManager
|
||
|
||
logging.info("[Scheduler] [AutoImport] 🚀 啟動 Google Drive 自動匯入任務")
|
||
|
||
# 執行自動匯入
|
||
result = import_service.auto_import_from_drive()
|
||
|
||
if result.get('success'):
|
||
logging.info(f"[Scheduler] [AutoImport] ✅ 自動匯入完成 | Message: {result.get('message')}")
|
||
stats = {
|
||
"file_count": result.get('file_count', 0),
|
||
"imported_count": result.get('imported_count', 0),
|
||
"status": "Success"
|
||
}
|
||
|
||
# V-New: 發送 Telegram 和 Line 通知(僅當有匯入新檔案時)
|
||
if result.get('file_count', 0) > 0:
|
||
try:
|
||
logging.info("[Scheduler] [AutoImport] 📢 準備發送自動匯入通知...")
|
||
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
|
||
|
||
# 格式化日期範圍
|
||
date_range_str = "未知"
|
||
date_range = result.get('date_range')
|
||
if date_range:
|
||
try:
|
||
from datetime import datetime as dt
|
||
date_min = dt.strptime(date_range['min'], '%Y-%m-%d')
|
||
date_max = dt.strptime(date_range['max'], '%Y-%m-%d')
|
||
date_min_str = date_min.strftime('%Y年%m月%d日')
|
||
date_max_str = date_max.strftime('%Y年%m月%d日')
|
||
if date_min_str == date_max_str:
|
||
date_range_str = date_min_str
|
||
else:
|
||
date_range_str = f"{date_min_str}~{date_max_str}"
|
||
except Exception:
|
||
date_range_str = f"{date_range.get('min', '?')}~{date_range.get('max', '?')}"
|
||
|
||
freshness_line = ""
|
||
data_lag_days = result.get('data_lag_days')
|
||
if data_lag_days is not None and data_lag_days > 0:
|
||
freshness_line = f"⚠️ 最新資料落後:{data_lag_days} 天\n"
|
||
|
||
# 組合通知訊息
|
||
message = (
|
||
f"📊 當日業績自動匯入通知 ({now_str})\n"
|
||
f"{'='*30}\n"
|
||
f"✅ 匯入狀態:成功\n"
|
||
f"📁 處理檔案數:{result.get('file_count', 0)} 個\n"
|
||
f"📝 共匯入記錄:{result.get('total_rows', 0)} 筆\n"
|
||
f"📅 數據日期期間:{date_range_str}\n"
|
||
f"{freshness_line}"
|
||
f"{'='*30}\n"
|
||
f"詳細資料請至系統查看"
|
||
)
|
||
|
||
# 發送通知到所有頻道
|
||
notifier = NotificationManager()
|
||
notifier._send_line_messages([message])
|
||
notifier._send_telegram_messages([message])
|
||
logging.info("[Scheduler] [AutoImport] ✅ 自動匯入通知已發送")
|
||
|
||
# 2026-01-30 新增:匯入後驗證資料同步
|
||
logging.info("[Scheduler] [AutoImport] 🔍 開始驗證資料同步...")
|
||
verify_result = verify_import_data_sync(
|
||
expected_rows=result.get('total_rows'),
|
||
date_range=result.get('date_range')
|
||
)
|
||
|
||
if not verify_result['success']:
|
||
# 資料驗證失敗,發送警告通知
|
||
error_details = '\n'.join([f" ⚠️ {e}" for e in verify_result['errors']])
|
||
verify_message = (
|
||
f"🚨 當日業績匯入驗證失敗 ({now_str})\n"
|
||
f"{'='*30}\n"
|
||
f"匯入通知顯示成功,但資料驗證發現問題:\n"
|
||
f"{error_details}\n"
|
||
f"{'='*30}\n"
|
||
f"📊 daily_sales_snapshot: {verify_result['daily_sales_snapshot']['rows']} 筆\n"
|
||
f"📈 realtime_sales_monthly: {verify_result['realtime_sales_monthly']['rows']} 筆\n"
|
||
f"{'='*30}\n"
|
||
f"請立即檢查資料同步狀態!"
|
||
)
|
||
notifier._send_telegram_messages([verify_message])
|
||
logging.error(f"[Scheduler] [AutoImport] ❌ 資料驗證失敗: {verify_result['errors']}")
|
||
else:
|
||
logging.info("[Scheduler] [AutoImport] ✅ 資料驗證通過,兩個表已同步")
|
||
except Exception as e:
|
||
logging.error(f"[Scheduler] [AutoImport] ❌ 發送通知失敗 | Error: {e}")
|
||
else:
|
||
logging.info("[Scheduler] [AutoImport] ℹ️ 無新檔案,不發送通知")
|
||
else:
|
||
logging.error(f"[Scheduler] [AutoImport] ❌ 自動匯入失敗 | Message: {result.get('message')}")
|
||
stats = {
|
||
"status": "Failed",
|
||
"error": result.get('message', 'Unknown error')
|
||
}
|
||
|
||
# V-New: 匯入失敗時也發送通知
|
||
try:
|
||
logging.info("[Scheduler] [AutoImport] 📢 準備發送自動匯入失敗通知...")
|
||
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
|
||
message = (
|
||
f"⚠️ 當日業績自動匯入失敗 ({now_str})\n"
|
||
f"{'='*30}\n"
|
||
f"❌ 匯入狀態:失敗\n"
|
||
f"📌 錯誤訊息:{result.get('message', 'Unknown error')}\n"
|
||
f"{'='*30}\n"
|
||
f"請檢查 Google Drive 設定或手動匯入"
|
||
)
|
||
notifier = NotificationManager()
|
||
notifier._send_line_messages([message])
|
||
notifier._send_telegram_messages([message])
|
||
logging.info("[Scheduler] [AutoImport] ✅ 匯入失敗通知已發送")
|
||
except Exception as e:
|
||
logging.error(f"[Scheduler] [AutoImport] ❌ 發送失敗通知時發生錯誤 | Error: {e}")
|
||
|
||
_save_stats('auto_import_task', stats)
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [AutoImport] 🚨 自動匯入任務異常 | Error: {e}")
|
||
stats = {"status": "Failed", "error": str(e)}
|
||
_save_stats('auto_import_task', stats)
|
||
|
||
# ADR-012 Phase 2: 改走 EventRouter(Hermes L1 翻譯 + 降級鏈)
|
||
# LINE 通道保留(event_router 只處理 Telegram)
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_auto_import_task",
|
||
error=e,
|
||
source="Scheduler.AutoImport",
|
||
event_type="db_connection_error" if "translate host" in str(e).lower() or "operational" in str(e).lower() else "import_failure",
|
||
priority="P1",
|
||
title="當日業績自動匯入異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [AutoImport] event_router 失敗: {_router_e}")
|
||
|
||
# LINE 通知保留(獨立通道,不經 event_router)
|
||
# Only send if notification hasn't been sent already
|
||
if not notification_sent:
|
||
try:
|
||
from services.notification_manager import NotificationManager
|
||
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
|
||
message = f"Daily Sales Auto Import Exception ({now_str})\nSystem error: {str(e)[:200]}"
|
||
NotificationManager()._send_line_messages([message])
|
||
notification_sent = True # Mark as sent to prevent duplicate
|
||
except Exception as notify_error:
|
||
logging.error(f"[Scheduler] [AutoImport] LINE notification failed | Error: {notify_error}")
|
||
|
||
# ADR-013: AIOps 自動修復 — PlayBook 匹配 + KM 沉澱
|
||
try:
|
||
from services.auto_heal_service import auto_heal_service
|
||
auto_heal_service.handle_exception(
|
||
task_name="run_auto_import_task",
|
||
exception=e,
|
||
traceback_str=_tb.format_exc(),
|
||
)
|
||
except Exception as _heal_e:
|
||
logging.error(f"[Scheduler] [AutoImport] auto_heal_service 失敗: {_heal_e}")
|
||
|
||
def run_competitor_price_feeder_task():
|
||
"""
|
||
競品價格補給線排程任務(每 4 小時執行一次)
|
||
從 PChome 抓取競品價格,同步寫入 competitor_prices 最新快取與
|
||
competitor_price_history 歷史快照,供 HermesAnalystService 與前端歷史圖表使用。
|
||
與 AI Pipeline 完全解耦:本任務失敗不影響核心分析流程。
|
||
ADR-ICAIM-2026-04-17 新增
|
||
"""
|
||
try:
|
||
from config import DATABASE_PATH
|
||
from sqlalchemy import create_engine
|
||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||
|
||
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
|
||
logging.info(f"[Scheduler] [Feeder] 🚀 啟動競品價格抓取任務 | {now_str}")
|
||
|
||
engine = create_engine(DATABASE_PATH)
|
||
feeder = CompetitorPriceFeeder(engine=engine)
|
||
result = feeder.run(source="pchome")
|
||
|
||
stats = {
|
||
"total_skus": result.total_skus,
|
||
"matched": result.matched,
|
||
"skipped_no_result": result.skipped_no_result,
|
||
"skipped_low_score": result.skipped_low_score,
|
||
"errors": result.errors,
|
||
"duration_sec": result.duration_sec,
|
||
"history_written": result.history_written,
|
||
"status": "Success",
|
||
}
|
||
logging.info(
|
||
f"[Scheduler] [Feeder] ✅ 完成 | "
|
||
f"matched={result.matched}/{result.total_skus} "
|
||
f"skip_no={result.skipped_no_result} "
|
||
f"skip_low={result.skipped_low_score} "
|
||
f"history_written={result.history_written} "
|
||
f"errors={result.errors} "
|
||
f"耗時={result.duration_sec}s"
|
||
)
|
||
_save_stats('competitor_price_feeder', stats)
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [Feeder] 🚨 任務異常 | Error: {e}")
|
||
_save_stats('competitor_price_feeder', {"status": "Failed", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_competitor_price_feeder_task",
|
||
error=e,
|
||
source="Scheduler.Feeder",
|
||
event_type="competitor_price_feeder_failure",
|
||
priority="P2",
|
||
title="競品價格補給任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [Feeder] event_router 失敗: {_router_e}")
|
||
|
||
|
||
def run_pchome_match_backfill_task():
|
||
"""
|
||
PChome 待比對商品補抓任務(每日執行)
|
||
優先處理尚未有有效 PChome 配對的高價 ACTIVE 商品,寫入 competitor_prices
|
||
與 competitor_price_history,完成後重算 product_pick 挑品清單。
|
||
"""
|
||
try:
|
||
from config import DATABASE_PATH
|
||
from sqlalchemy import create_engine
|
||
from services.ai_product_pick_agent import generate_product_pick_list
|
||
from services.cache_manager import clear_dashboard_cache
|
||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||
|
||
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
|
||
logging.info(f"[Scheduler] [PChomeBackfill] 🚀 啟動待比對補抓任務 | {now_str}")
|
||
|
||
engine = create_engine(DATABASE_PATH)
|
||
feeder_result = CompetitorPriceFeeder(engine=engine).run_unmatched_priority(limit=120)
|
||
pick_result = generate_product_pick_list(engine, limit=50)
|
||
clear_dashboard_cache()
|
||
|
||
stats = {
|
||
"total_skus": feeder_result.total_skus,
|
||
"matched": feeder_result.matched,
|
||
"skipped_no_result": feeder_result.skipped_no_result,
|
||
"skipped_low_score": feeder_result.skipped_low_score,
|
||
"errors": feeder_result.errors,
|
||
"duration_sec": feeder_result.duration_sec,
|
||
"history_written": feeder_result.history_written,
|
||
"pick_candidates": pick_result.candidates,
|
||
"pick_written": pick_result.written,
|
||
"status": "Success",
|
||
}
|
||
logging.info(
|
||
f"[Scheduler] [PChomeBackfill] ✅ 完成 | "
|
||
f"matched={feeder_result.matched}/{feeder_result.total_skus} "
|
||
f"history_written={feeder_result.history_written} "
|
||
f"pick_written={pick_result.written} "
|
||
f"errors={feeder_result.errors} "
|
||
f"耗時={feeder_result.duration_sec}s"
|
||
)
|
||
_save_stats('pchome_match_backfill', stats)
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [PChomeBackfill] 🚨 任務異常 | Error: {e}")
|
||
_save_stats('pchome_match_backfill', {"status": "Failed", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_pchome_match_backfill_task",
|
||
error=e,
|
||
source="Scheduler.PChomeBackfill",
|
||
event_type="pchome_match_backfill_failure",
|
||
priority="P2",
|
||
title="PChome 待比對補抓任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [PChomeBackfill] event_router 失敗: {_router_e}")
|
||
|
||
|
||
def run_icaim_analysis_task():
|
||
"""
|
||
ICAIM 競價情報分析排程任務(每 6 小時執行一次)
|
||
Hermes 3 分析師 → NemoTron 派發器 → Telegram 競價告警
|
||
依賴 competitor_prices 表(由 run_competitor_price_feeder_task 每 4h 更新)
|
||
|
||
ADR-ICAIM-2026-04-17 新增
|
||
"""
|
||
try:
|
||
from config import DATABASE_PATH
|
||
from sqlalchemy import create_engine
|
||
from services.hermes_analyst_service import HermesAnalystService
|
||
from services.nemoton_dispatcher_service import NemotronDispatcher
|
||
from services.notification_manager import NotificationManager
|
||
|
||
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
|
||
logging.info(f"[Scheduler] [ICAIM] 🚀 啟動競價情報分析 | {now_str}")
|
||
|
||
engine = create_engine(DATABASE_PATH)
|
||
|
||
# Step 1:Hermes 分析師
|
||
hermes_svc = HermesAnalystService(engine=engine)
|
||
t0 = datetime.now(TAIPEI_TZ)
|
||
result = hermes_svc.run()
|
||
hermes_duration = round((datetime.now(TAIPEI_TZ) - t0).total_seconds(), 1)
|
||
|
||
logging.info(
|
||
f"[Scheduler] [ICAIM] Hermes 完成 | "
|
||
f"candidates={result.total_candidates} threats={len(result.threats)} "
|
||
f"耗時={hermes_duration}s"
|
||
)
|
||
|
||
_save_stats('icaim_hermes', {
|
||
"candidates": result.total_candidates,
|
||
"threats": len(result.threats),
|
||
"duration_sec": hermes_duration,
|
||
"status": "Success" if result.success else "Failed",
|
||
"error": result.error,
|
||
})
|
||
|
||
if not result.success:
|
||
logging.error(f"[Scheduler] [ICAIM] Hermes 失敗: {result.error}")
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_icaim_analysis_task",
|
||
error=RuntimeError(result.error or "Hermes analysis failed"),
|
||
source="Scheduler.ICAIM",
|
||
event_type="icaim_hermes_failure",
|
||
priority="P1",
|
||
title="ICAIM Hermes 分析失敗",
|
||
payload={"duration_sec": hermes_duration, "candidates": result.total_candidates},
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [ICAIM] Hermes failure event_router 失敗: {_router_e}")
|
||
return
|
||
|
||
if not result.threats:
|
||
logging.info("[Scheduler] [ICAIM] 無威脅商品,跳過 NemoTron dispatch")
|
||
# Operation Ollama-First v5.0 Phase 4:移除原 inline meta_analysis 觸發
|
||
# 原因:呼叫點 #16 月耗 ~2.5M Gemini tokens(icaim 4h × 6 + cron 6h × 4 = 10/天)
|
||
# 改由 run_scheduler 每日 12:00 單一觸發;此處僅 log,不再呼叫。
|
||
return
|
||
|
||
# Step 2:NemoTron 派發器 → Telegram
|
||
hermes_stats = {"duration_sec": hermes_duration, "tokens": result.hermes_tokens}
|
||
notifier = NotificationManager()
|
||
dispatcher = NemotronDispatcher(notification_manager=notifier, engine=engine)
|
||
dispatch_result = dispatcher.dispatch(result.threats, hermes_stats=hermes_stats)
|
||
|
||
logging.info(
|
||
f"[Scheduler] [ICAIM] ✅ 完成 | "
|
||
f"dispatched={dispatch_result.get('dispatched', 0)} "
|
||
f"alerts={dispatch_result.get('alerts', 0)}"
|
||
)
|
||
_save_stats('icaim_dispatch', {**dispatch_result, "status": "Success"})
|
||
|
||
# Operation Ollama-First v5.0 Phase 4:原 Step 3 inline Meta-Analysis 觸發已移除
|
||
# 改由 run_scheduler 每日 12:00 單一觸發(Phase 4 降頻 6h → 24h,月省 ~1.875M tokens)
|
||
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [ICAIM] 🚨 任務異常 | Error: {e}")
|
||
_save_stats('icaim_analysis', {"status": "Failed", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_icaim_analysis_task",
|
||
error=e,
|
||
source="Scheduler.ICAIM",
|
||
event_type="icaim_analysis_failure",
|
||
priority="P1",
|
||
title="ICAIM 競價分析任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [ICAIM] event_router 失敗: {_router_e}")
|
||
# ADR-013: AIOps 自動修復
|
||
try:
|
||
from services.auto_heal_service import auto_heal_service
|
||
auto_heal_service.handle_exception(
|
||
task_name="run_icaim_analysis_task",
|
||
exception=e,
|
||
traceback_str=_tb.format_exc(),
|
||
)
|
||
except Exception as _heal_e:
|
||
logging.error(f"[Scheduler] [ICAIM] auto_heal_service 失敗: {_heal_e}")
|
||
|
||
|
||
def run_weekly_strategy_task():
|
||
"""
|
||
執行每週 Gemini 策略師週報任務 (Phase 4 實作)
|
||
產出高階經營決策週報,並存入 ai_insights 中作知識沈澱,同時透過 Telegram 推送
|
||
"""
|
||
logging.info("[Scheduler] [Strategy] 🚀 啟動 Gemini 策略師週報任務...")
|
||
try:
|
||
from services.openclaw_strategist_service import generate_weekly_strategy_report
|
||
generate_weekly_strategy_report(force_tg_alert=True)
|
||
logging.info("[Scheduler] [Strategy] ✅ Gemini 策略師週報任務完成")
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [Strategy] 🚨 任務異常 | Error: {e}")
|
||
_save_stats('weekly_strategy', {"status": "Failed", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_weekly_strategy_task",
|
||
error=e,
|
||
source="Scheduler.Strategy",
|
||
event_type="weekly_strategy_failure",
|
||
priority="P2",
|
||
title="每週策略週報任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [Strategy] event_router 失敗: {_router_e}")
|
||
# ADR-013: AIOps 自動修復
|
||
try:
|
||
from services.auto_heal_service import auto_heal_service
|
||
auto_heal_service.handle_exception(
|
||
task_name="run_weekly_strategy_task",
|
||
exception=e,
|
||
traceback_str=_tb.format_exc(),
|
||
)
|
||
except Exception as _heal_e:
|
||
logging.error(f"[Scheduler] [Strategy] auto_heal_service 失敗: {_heal_e}")
|
||
|
||
|
||
def run_db_backup_task():
|
||
"""
|
||
每日凌晨 02:00 執行 pg_dump 備份 momo_analytics,
|
||
並清理超過 7 天的舊備份。完成後透過 Telegram 通知統帥。
|
||
失敗同樣發出告警,並沉澱到 ai_insights(供 RAG 查詢)。
|
||
"""
|
||
logging.info("[Scheduler] [Backup] 🚀 啟動資料庫備份任務...")
|
||
try:
|
||
from services.db_backup_service import run_backup, cleanup_old_backups
|
||
from services.notification_manager import NotificationManager
|
||
|
||
result = run_backup()
|
||
deleted_count = cleanup_old_backups()
|
||
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
|
||
|
||
notifier = NotificationManager()
|
||
|
||
if result["success"]:
|
||
size_kb = result["file_size"] // 1024
|
||
msg = (
|
||
f"💾 資料庫備份完成 ({now_str})\n"
|
||
f"{'='*30}\n"
|
||
f"✅ 狀態:成功\n"
|
||
f"📁 檔案:{result['filename']}\n"
|
||
f"📦 大小:{size_kb} KB\n"
|
||
f"⏱ 耗時:{result['duration']:.1f} 秒\n"
|
||
f"🗑 清理舊備份:{deleted_count} 個"
|
||
)
|
||
logging.info(f"[Scheduler] [Backup] ✅ 備份成功 | {result['filename']} ({size_kb}KB)")
|
||
_save_stats('db_backup', {"status": "Success", "filename": result["filename"], "size_kb": size_kb})
|
||
|
||
# 沉澱到 ai_insights(RAG 可查詢備份歷史)
|
||
try:
|
||
from services.openclaw_learning_service import store_insight
|
||
store_insight(
|
||
insight_type='backup_status',
|
||
content=f"資料庫備份成功:{result['filename']},大小 {size_kb}KB,耗時 {result['duration']:.1f}s",
|
||
period=datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'),
|
||
metadata={"status": "success", "size_kb": size_kb, "deleted_old": deleted_count},
|
||
ai_model="scheduler",
|
||
)
|
||
except Exception as insight_error:
|
||
logging.warning(
|
||
f"[Scheduler] [Backup] ⚠️ 備份成功 insight 寫入失敗但繼續通知 | Error: {insight_error}",
|
||
exc_info=True,
|
||
)
|
||
else:
|
||
msg = (
|
||
f"🚨 資料庫備份失敗 ({now_str})\n"
|
||
f"{'='*30}\n"
|
||
f"❌ 狀態:失敗\n"
|
||
f"🔍 原因:{result.get('error', '未知錯誤')}\n"
|
||
f"⚠️ 請立即檢查 momo-db 容器狀態!"
|
||
)
|
||
logging.error(f"[Scheduler] [Backup] ❌ 備份失敗: {result.get('error')}")
|
||
_save_stats('db_backup', {"status": "Failed", "error": result.get("error")})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_db_backup_task",
|
||
error=RuntimeError(result.get("error") or "database backup failed"),
|
||
source="Scheduler.DBBackup",
|
||
event_type="db_backup_failure",
|
||
priority="P1",
|
||
title="資料庫備份失敗",
|
||
payload={"filename": result.get("filename")},
|
||
dedup_ttl_sec=3600,
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [Backup] event_router 失敗: {_router_e}")
|
||
|
||
try:
|
||
from services.openclaw_learning_service import store_insight
|
||
store_insight(
|
||
insight_type='backup_status',
|
||
content=f"資料庫備份失敗:{result.get('error', '未知')}",
|
||
period=datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'),
|
||
metadata={"status": "failed", "error": result.get("error")},
|
||
ai_model="scheduler",
|
||
)
|
||
except Exception as insight_error:
|
||
logging.warning(
|
||
f"[Scheduler] [Backup] ⚠️ 備份失敗 insight 寫入失敗但繼續通知 | Error: {insight_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
notifier._send_telegram_messages([msg])
|
||
|
||
except Exception as e:
|
||
logging.error(f"[Scheduler] [Backup] 🚨 任務異常 | Error: {e}")
|
||
_save_stats('db_backup', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_db_backup_task",
|
||
error=e,
|
||
source="Scheduler.DBBackup",
|
||
event_type="db_backup_failure",
|
||
priority="P1",
|
||
title="資料庫備份排程異常",
|
||
dedup_ttl_sec=3600,
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [Backup] event_router 失敗: {_router_e}")
|
||
try:
|
||
from services.notification_manager import NotificationManager
|
||
NotificationManager()._send_telegram_messages([
|
||
f"🚨 DB 備份排程異常\n錯誤:{e}"
|
||
])
|
||
except Exception as notify_error:
|
||
logging.warning(
|
||
f"[Scheduler] [Backup] ⚠️ 備份異常 Telegram 通知失敗 | Error: {notify_error}",
|
||
exc_info=True,
|
||
)
|
||
|
||
|
||
def run_backup_monitor_task():
|
||
"""
|
||
每 6 小時檢查最近一次備份是否在 25 小時內(允許一點偏差)。
|
||
若發現備份過期或從未備份,立即發出 Telegram 告警,
|
||
並透過 store_insight 讓 NemoTron/OpenClaw 可感知備份健康狀態。
|
||
"""
|
||
logging.info("[Scheduler] [BackupMonitor] 🔍 檢查備份健康狀態...")
|
||
try:
|
||
from services.db_backup_service import get_latest_backup_info
|
||
from services.notification_manager import NotificationManager
|
||
|
||
info = get_latest_backup_info()
|
||
now = datetime.now(TAIPEI_TZ)
|
||
alert_needed = False
|
||
alert_reason = ""
|
||
|
||
if info["status"] == "no_backup" or info["filename"] is None:
|
||
alert_needed = True
|
||
alert_reason = "從未執行過備份,backup_log 與備份目錄均為空"
|
||
elif info["status"] == "failed":
|
||
alert_needed = True
|
||
alert_reason = f"最近一次備份失敗:{info.get('error', '未知')}"
|
||
else:
|
||
created_at = info["created_at"]
|
||
if created_at is not None:
|
||
# 統一為 naive datetime 比較
|
||
if hasattr(created_at, 'tzinfo') and created_at.tzinfo is not None:
|
||
created_at = created_at.replace(tzinfo=None)
|
||
now_naive = now.replace(tzinfo=None)
|
||
hours_ago = (now_naive - created_at).total_seconds() / 3600
|
||
if hours_ago > 25:
|
||
alert_needed = True
|
||
alert_reason = f"最近備份距今 {hours_ago:.1f} 小時(超過 25h 閾值),可能備份任務未執行"
|
||
|
||
if alert_needed:
|
||
now_str = now.strftime('%Y-%m-%d %H:%M')
|
||
msg = (
|
||
f"⚠️ 資料庫備份異常告警 ({now_str})\n"
|
||
f"{'='*30}\n"
|
||
f"🔴 原因:{alert_reason}\n"
|
||
f"📋 最新備份:{info.get('filename', '無')}\n"
|
||
f"🕐 備份時間:{info.get('created_at', '無')}\n"
|
||
f"💡 請確認 momo-scheduler 備份排程是否正常執行!"
|
||
)
|
||
logging.warning(f"[Scheduler] [BackupMonitor] ⚠️ 備份告警: {alert_reason}")
|
||
NotificationManager()._send_telegram_messages([msg])
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_backup_monitor_task",
|
||
error=RuntimeError(alert_reason),
|
||
source="Scheduler.BackupMonitor",
|
||
event_type="backup_monitor_alert",
|
||
priority="P2",
|
||
title="資料庫備份健康告警",
|
||
payload={"latest_file": info.get("filename"), "created_at": str(info.get("created_at"))},
|
||
dedup_ttl_sec=21600,
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [BackupMonitor] event_router 失敗: {_router_e}")
|
||
|
||
try:
|
||
from services.openclaw_learning_service import store_insight
|
||
store_insight(
|
||
insight_type='backup_status',
|
||
content=f"備份監控告警:{alert_reason}",
|
||
period=now.strftime('%Y-%m-%d'),
|
||
metadata={"alert": True, "reason": alert_reason, "latest_file": info.get("filename")},
|
||
ai_model="scheduler",
|
||
)
|
||
except Exception as insight_error:
|
||
logging.warning(
|
||
f"[Scheduler] [BackupMonitor] ⚠️ 備份監控 insight 寫入失敗但繼續 | Error: {insight_error}",
|
||
exc_info=True,
|
||
)
|
||
else:
|
||
created_at = info.get("created_at")
|
||
logging.info(f"[Scheduler] [BackupMonitor] ✅ 備份狀態正常 | 最新: {info.get('filename')} @ {created_at}")
|
||
|
||
_save_stats('backup_monitor', {
|
||
"status": "Alert" if alert_needed else "OK",
|
||
"latest_file": info.get("filename"),
|
||
"alert_reason": alert_reason if alert_needed else None,
|
||
})
|
||
|
||
except Exception as e:
|
||
logging.error(f"[Scheduler] [BackupMonitor] 🚨 監控任務異常 | Error: {e}")
|
||
_save_stats('backup_monitor', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_backup_monitor_task",
|
||
error=e,
|
||
source="Scheduler.BackupMonitor",
|
||
event_type="backup_monitor_failure",
|
||
priority="P2",
|
||
title="資料庫備份監控任務異常",
|
||
dedup_ttl_sec=3600,
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [BackupMonitor] event_router 失敗: {_router_e}")
|
||
|
||
|
||
def run_openclaw_meta_analysis_task():
|
||
"""每 6 小時 — OpenClaw Meta-Analysis(AI 系統效能自我審視 + 電商洞察快照)"""
|
||
try:
|
||
from services.openclaw_strategist_service import generate_meta_analysis_report
|
||
report = generate_meta_analysis_report()
|
||
logging.info(f"[Scheduler] [MetaAnalysis] ✅ 完成 | 長度={len(report)} 字元")
|
||
_save_stats('meta_analysis', {"status": "OK", "length": len(report)})
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [MetaAnalysis] 🚨 Meta-Analysis 任務異常: {e}")
|
||
_save_stats('meta_analysis', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_openclaw_meta_analysis_task",
|
||
error=e,
|
||
source="Scheduler.MetaAnalysis",
|
||
event_type="openclaw_report_failure",
|
||
priority="P2",
|
||
title="OpenClaw Meta-Analysis 任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [MetaAnalysis] event_router 失敗: {_router_e}")
|
||
try:
|
||
from services.auto_heal_service import auto_heal_service
|
||
auto_heal_service.handle_exception(
|
||
task_name="run_openclaw_meta_analysis_task",
|
||
exception=e,
|
||
traceback_str=_tb.format_exc(),
|
||
)
|
||
except Exception as _heal_e:
|
||
logging.error(f"[Scheduler] [MetaAnalysis] auto_heal_service 失敗: {_heal_e}")
|
||
|
||
|
||
def run_dedup_batch_task():
|
||
"""每日 03:00 — ai_insights 去重批次(同 SKU 同 type 同 period 保留最高品質)"""
|
||
try:
|
||
from services.openclaw_learning_service import run_dedup_batch
|
||
result = run_dedup_batch()
|
||
logging.info(
|
||
f"[Scheduler] [Dedup] 去重完成 | 歸檔={result.get('archived', 0)} 筆"
|
||
f" / 掃描={result.get('scanned', 0)} 筆"
|
||
)
|
||
_save_stats('dedup_batch', result)
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [Dedup] 去重批次異常: {e}")
|
||
_save_stats('dedup_batch', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_dedup_batch_task",
|
||
error=e,
|
||
source="Scheduler.Dedup",
|
||
event_type="dedup_batch_failure",
|
||
priority="P2",
|
||
title="ai_insights 去重批次異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [Dedup] event_router 失敗: {_router_e}")
|
||
|
||
|
||
def run_quality_rescore_task():
|
||
"""每日 04:00 — ai_insights 品質分數時間衰減重算批次"""
|
||
try:
|
||
from services.openclaw_learning_service import run_quality_rescore_batch
|
||
result = run_quality_rescore_batch()
|
||
try:
|
||
from services.openclaw_learning_service import enqueue_missing_insight_embeddings
|
||
result["embedding_backfill"] = enqueue_missing_insight_embeddings(limit=200)
|
||
except Exception as _embed_e:
|
||
logging.warning(f"[Scheduler] [Rescore] embedding backfill 略過: {_embed_e}")
|
||
logging.info(
|
||
f"[Scheduler] [Rescore] 品質重算完成 | 更新={result.get('updated', 0)} 筆"
|
||
f" | relearn 自動歸檔={result.get('relearn_reset', 0)} 筆"
|
||
)
|
||
_save_stats('quality_rescore', result)
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [Rescore] 品質分數重算異常: {e}")
|
||
_save_stats('quality_rescore', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_quality_rescore_task",
|
||
error=e,
|
||
source="Scheduler.Rescore",
|
||
event_type="quality_rescore_failure",
|
||
priority="P2",
|
||
title="ai_insights 品質重算批次異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [Rescore] event_router 失敗: {_router_e}")
|
||
|
||
|
||
def run_daily_report_task():
|
||
"""每日 09:00 — OpenClaw 電商日報(業績快報 + 競品威脅 + 圖表推播)"""
|
||
try:
|
||
from services.openclaw_strategist_service import generate_daily_report
|
||
result = generate_daily_report()
|
||
logging.info(
|
||
f"[Scheduler] [DailyReport] ✅ 完成 | period={result.get('period')} "
|
||
f"charts={result.get('chart_count', 0)} actions={result.get('action_count', 0)}"
|
||
)
|
||
_save_stats('daily_report', result)
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [DailyReport] 🚨 日報任務異常: {e}")
|
||
_save_stats('daily_report', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_daily_report_task",
|
||
error=e,
|
||
source="Scheduler.DailyReport",
|
||
event_type="openclaw_report_failure",
|
||
priority="P2",
|
||
title="OpenClaw 日報任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [DailyReport] event_router 失敗: {_router_e}")
|
||
try:
|
||
from services.auto_heal_service import auto_heal_service
|
||
auto_heal_service.handle_exception(
|
||
task_name="run_daily_report_task",
|
||
exception=e,
|
||
traceback_str=_tb.format_exc(),
|
||
)
|
||
except Exception as _heal_e:
|
||
logging.error(f"[Scheduler] [DailyReport] auto_heal_service 失敗: {_heal_e}")
|
||
|
||
|
||
def run_monthly_report_task():
|
||
"""每月1日 07:00 — OpenClaw 電商月報(月度全景洞察 + 多圖表推播)"""
|
||
try:
|
||
from services.openclaw_strategist_service import generate_monthly_report
|
||
result = generate_monthly_report()
|
||
logging.info(
|
||
f"[Scheduler] [MonthlyReport] ✅ 完成 | period={result.get('period')} "
|
||
f"charts={result.get('chart_count', 0)} actions={result.get('action_count', 0)}"
|
||
)
|
||
_save_stats('monthly_report', result)
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [MonthlyReport] 🚨 月報任務異常: {e}")
|
||
_save_stats('monthly_report', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_monthly_report_task",
|
||
error=e,
|
||
source="Scheduler.MonthlyReport",
|
||
event_type="openclaw_report_failure",
|
||
priority="P2",
|
||
title="OpenClaw 月報任務異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [MonthlyReport] event_router 失敗: {_router_e}")
|
||
try:
|
||
from services.auto_heal_service import auto_heal_service
|
||
auto_heal_service.handle_exception(
|
||
task_name="run_monthly_report_task",
|
||
exception=e,
|
||
traceback_str=_tb.format_exc(),
|
||
)
|
||
except Exception as _heal_e:
|
||
logging.error(f"[Scheduler] [MonthlyReport] auto_heal_service 失敗: {_heal_e}")
|
||
|
||
|
||
def run_ppt_auto_generation_task(schedule_kind=None):
|
||
"""依日/週/月/季/半年/年度節奏補齊 PPT 簡報。
|
||
|
||
22:00 的 ppt_vision_audit 只負責視覺審核;這個任務先依固定週期產出
|
||
已定義的簡報,並把每次嘗試寫入 ppt_generation_runs,成功檔案寫入
|
||
ppt_reports.cached_data。
|
||
"""
|
||
try:
|
||
from services.ppt_auto_generation_service import generate_scheduled_ppt_reports
|
||
|
||
result = generate_scheduled_ppt_reports(schedule_kind=schedule_kind)
|
||
logging.info(
|
||
"[Scheduler] [PPTAutoGeneration] kind=%s status=%s ready=%s errors=%s",
|
||
schedule_kind or "due",
|
||
result.get("status"),
|
||
result.get("ready", 0),
|
||
result.get("errors", 0),
|
||
)
|
||
stat_key = f"ppt_auto_generation_{schedule_kind}" if schedule_kind else "ppt_auto_generation"
|
||
_save_stats(stat_key, result)
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [PPTAutoGeneration] 🚨 自動簡報補齊異常: {e}")
|
||
stat_key = f"ppt_auto_generation_{schedule_kind}" if schedule_kind else "ppt_auto_generation"
|
||
_save_stats(stat_key, {"status": "Error", "error": str(e), "schedule_kind": schedule_kind})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_ppt_auto_generation_task",
|
||
error=e,
|
||
source="Scheduler.PPTAutoGeneration",
|
||
event_type="ppt_auto_generation_failure",
|
||
priority="P2",
|
||
title="PPT 自動簡報補齊異常",
|
||
trace=_tb.format_exc(),
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [PPTAutoGeneration] event_router 失敗: {_router_e}")
|
||
|
||
|
||
def run_ai_smoke_daily_summary_task():
|
||
"""每日 AI 自動化 Smoke trend 摘要推播(只讀 history,不重新執行 smoke)。"""
|
||
try:
|
||
from services.ai_automation_smoke_service import send_smoke_daily_summary
|
||
result = send_smoke_daily_summary()
|
||
logging.info(
|
||
"[Scheduler] [AISmokeSummary] 完成 | status=%s sent=%s failed=%s",
|
||
result.get("status"),
|
||
result.get("telegram", {}).get("sent", 0),
|
||
result.get("telegram", {}).get("failed", 0),
|
||
)
|
||
_save_stats('ai_smoke_daily_summary', result)
|
||
except Exception as e:
|
||
import traceback as _tb
|
||
logging.error(f"[Scheduler] [AISmokeSummary] 🚨 摘要推播異常: {e}")
|
||
_save_stats('ai_smoke_daily_summary', {"status": "Error", "error": str(e)})
|
||
try:
|
||
from services.event_router import notify_failure
|
||
notify_failure(
|
||
task_name="run_ai_smoke_daily_summary_task",
|
||
error=e,
|
||
source="Scheduler.AISmokeSummary",
|
||
event_type="ai_smoke_summary_failure",
|
||
priority="P2",
|
||
title="AI Smoke 每日摘要推播異常",
|
||
trace=_tb.format_exc(),
|
||
dedup_ttl_sec=3600,
|
||
)
|
||
except Exception as _router_e:
|
||
logging.error(f"[Scheduler] [AISmokeSummary] event_router 失敗: {_router_e}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 此檔案現在由 app.py 導入並由其主執行緒管理排程。
|
||
# 若需獨立測試,可在此處臨時加入調用程式碼。
|
||
logging.info("[Scheduler] [Main] 此為排程任務定義檔,請由主程式 app.py 執行。")
|