Files
ewoooc/scheduler.py
OoO 4c59b74ced
All checks were successful
CD Pipeline / deploy (push) Successful in 1m11s
feat: schedule growth momo backfill
2026-06-19 00:18:53 +08:00

3108 lines
158 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# cspell:ignore momo goodsimg prdimg bottomicon
import os
import re
import time
import logging
import json
import threading
import requests
from datetime import datetime, timedelta, timezone
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from contextlib import contextmanager
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoAlertPresentException, TimeoutException, UnexpectedAlertPresentException
from sqlalchemy import desc, func
from database.manager import DatabaseManager
from database.models import Product, PriceRecord
from database.edm_models import PromoProduct
from services.notification_manager import NotificationManager
from services.edm_notifier import EdmNotifier # V-New: 導入新的通知模組
from utils.momo_url_utils import normalize_momo_product_url
# V-Fix: 改為匯入讀取函式,而非靜態變數,以支援動態更新
from config import load_momo_categories
# V9.99: 修正 BASE_DIR 未定義的問題
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[logging.FileHandler("logs/system.log", encoding="utf-8"), logging.StreamHandler()]
)
# 設定台北時區
TAIPEI_TZ = timezone(timedelta(hours=8))
class MomoEdmUnavailable(RuntimeError):
"""MOMO EDM page is expired or unavailable; skip without failure alert."""
def __init__(self, alert_text: str, url: str):
self.alert_text = str(alert_text or "").strip()
self.url = url
super().__init__(self.alert_text or "MOMO EDM unavailable")
def _is_momo_edm_unavailable_alert(alert_text):
text = str(alert_text or "").replace(" ", "")
return "很抱歉此EDM不存在" in text or "EDM不存在" in text
def _accept_current_alert(driver):
try:
alert = driver.switch_to.alert
alert_text = str(getattr(alert, "text", "") or "")
alert.accept()
return alert_text
except NoAlertPresentException:
return ""
def _raise_if_momo_edm_unavailable(driver, task_label, url):
alert_text = _accept_current_alert(driver)
if not alert_text:
return
if _is_momo_edm_unavailable_alert(alert_text):
logging.warning(
"%s ⚠️ MOMO 活動頁已失效,任務改為 Skipped不送 failure alert | URL: %s | Alert: %s",
task_label,
url,
alert_text,
)
raise MomoEdmUnavailable(alert_text, url)
logging.warning("%s ⚠️ 已接受瀏覽器 alert 後繼續解析 | Alert: %s", task_label, alert_text)
def _safe_driver_title(driver, task_label, url):
try:
return driver.title
except UnexpectedAlertPresentException as exc:
alert_text = _accept_current_alert(driver) or getattr(exc, "alert_text", "") or str(exc)
if _is_momo_edm_unavailable_alert(alert_text):
logging.warning(
"%s ⚠️ 讀取 title 時偵測到 MOMO 活動頁失效,任務改為 Skipped | URL: %s | Alert: %s",
task_label,
url,
alert_text,
)
raise MomoEdmUnavailable(alert_text, url) from exc
raise
def _save_stats(task_name, data):
"""將任務統計結果寫入 JSON 檔案"""
stats_file = os.path.join(os.path.dirname(__file__), 'data', 'scheduler_stats.json')
try:
stats = {}
if os.path.exists(stats_file):
with open(stats_file, 'r', encoding='utf-8') as f:
stats = json.load(f)
# V-New: 將統計資料改為歷史列表保留最近5次紀錄
if task_name not in stats or not isinstance(stats.get(task_name), list):
stats[task_name] = []
data['last_run'] = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M:%S')
stats[task_name].insert(0, data) # 從最前面插入
stats[task_name] = stats[task_name][:5] # 只保留最近5筆
with open(stats_file, 'w', encoding='utf-8') as f:
json.dump(stats, f, ensure_ascii=False, indent=4)
except Exception as e:
logging.error(f"[Scheduler] [Stats] ❌ 無法儲存排程統計 | Task: {task_name} | Error: {e}")
WHITEPAGE_MARKER_GROUPS = [
{
"label": "導航佈局",
"patterns": ("navbar", "momo-sidebar", "momo-topbar", "momo-layout", "login-card"),
},
{
"label": "系統識別",
"patterns": ("MOMO 價格監控系統", "EwoooC", "WOOO TECH", "商品看板", "價格監控"),
},
{
"label": "內容容器",
"patterns": ("card", "login-card", "momo-card", "container"),
},
]
def _missing_whitepage_markers(content):
"""Return marker groups missing from a non-blank momo page."""
missing = []
for group in WHITEPAGE_MARKER_GROUPS:
if not any(pattern in content for pattern in group["patterns"]):
missing.append(group["label"])
return missing
@contextmanager
def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, max_retries=2):
"""
一個管理 Selenium WebDriver 和資料庫 session 的上下文管理器。
它能確保資源在使用後被正確關閉,並在發生錯誤時自動回滾。
V-Opt 2026-01-15: 增加穩定性優化
- 增加更多 Chrome 穩定性選項
- 增加 session 健康檢查
- 改進資源清理邏輯
"""
driver = None
session = None
retry_count = 0
while retry_count <= max_retries:
try:
options = Options()
# V-Opt: 改用 'eager' 策略,加速爬蟲 (不等圖片/廣告載入完成,大幅減少超時機率)
options.page_load_strategy = 'eager'
# V-Debug: 如果不是偵錯模式,才使用無頭瀏覽器
if not debug:
options.add_argument('--headless=new')
options.add_argument(f'--window-size={window_size}')
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
# === V-Opt 2026-01-15: 增強穩定性選項 ===
options.add_argument('--disable-gpu')
options.add_argument('--disable-extensions')
options.add_argument('--disable-dev-shm-usage') # 解決 /dev/shm 空間不足問題
options.add_argument('--no-sandbox')
options.add_argument('--disable-setuid-sandbox')
options.add_argument('--disable-background-networking')
options.add_argument('--disable-background-timer-throttling')
options.add_argument('--disable-backgrounding-occluded-windows')
options.add_argument('--disable-breakpad') # 禁用崩潰報告
options.add_argument('--disable-component-extensions-with-background-pages')
options.add_argument('--disable-features=TranslateUI,AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck')
options.add_argument('--disable-hang-monitor') # 禁用掛起監控,避免誤殺
options.add_argument('--disable-ipc-flooding-protection')
options.add_argument('--disable-popup-blocking')
options.add_argument('--disable-prompt-on-repost')
options.add_argument('--disable-renderer-backgrounding')
options.add_argument('--disable-sync')
options.add_argument('--metrics-recording-only')
options.add_argument('--no-first-run')
options.add_argument('--safebrowsing-disable-auto-update')
# V-Opt: 記憶體優化
options.add_argument('--memory-pressure-off') # 禁用記憶體壓力監控
options.add_argument('--js-flags=--max-old-space-size=512') # 限制 JS 堆大小
# V-Fix: 中文字體和截圖設定
options.add_argument('--lang=zh-TW')
options.add_argument('--force-device-scale-factor=1')
options.add_argument('--font-render-hinting=none')
options.add_argument('--force-color-profile=srgb')
# V-Fix: 字體偏好設定
prefs = {
'intl.accept_languages': 'zh-TW,zh,en',
'profile.default_content_setting_values.notifications': 2,
'profile.managed_default_content_settings.images': 1, # 允許圖片
'credentials_enable_service': False,
'profile.password_manager_enabled': False,
}
options.add_experimental_option('prefs', prefs)
# V-Opt: 禁用自動化控制標記
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
db = DatabaseManager()
driver.set_page_load_timeout(timeout)
driver.set_script_timeout(10)
session = db.get_session()
# V-New: Session 健康檢查
try:
_ = driver.current_url # 簡單的健康檢查
logging.debug("[Scraper] [Resource] ✅ Chrome session 健康檢查通過")
except Exception as health_error:
logging.warning(f"[Scraper] [Resource] ⚠️ Chrome session 健康檢查失敗: {health_error}")
raise health_error
break # 成功建立連線,跳出重試循環
except Exception as init_error:
retry_count += 1
if driver:
try:
driver.quit()
except Exception as cleanup_error:
logging.warning(
f"[Scraper] [Resource] ⚠️ Chrome 初始化失敗後關閉 driver 也失敗 | Error: {cleanup_error}",
exc_info=True,
)
driver = None
if retry_count <= max_retries:
logging.warning(f"[Scraper] [Resource] ⚠️ Chrome 初始化失敗 (嘗試 {retry_count}/{max_retries}): {init_error}")
time.sleep(2) # 等待 2 秒後重試
else:
logging.error(f"[Scraper] [Resource] ❌ Chrome 初始化失敗,已達重試上限: {init_error}")
raise init_error
try:
yield driver, session
except Exception as e:
logging.error(f"[Scraper] [Resource] ❌ Context manager捕獲到異常 | Error: {type(e).__name__} | Action: session.rollback()")
if session:
session.rollback()
raise
finally:
# V-Opt 2026-01-15: 改進資源清理,確保 Chrome 完全關閉
logging.debug("[Scraper] [Resource] 資源管理器:正在關閉 WebDriver 和資料庫 session。")
if driver:
try:
# 先關閉所有視窗
try:
driver.close()
except Exception as close_error:
logging.debug(
f"[Scraper] [Resource] Chrome 視窗關閉失敗但繼續 quit | Error: {close_error}",
exc_info=True,
)
# 再退出 driver
driver.quit()
except Exception as quit_error:
logging.warning(f"[Scraper] [Resource] ⚠️ Chrome 關閉時發生錯誤: {quit_error}")
# 強制殺死 Chrome 進程
try:
import subprocess
subprocess.run(['pkill', '-f', 'chrome.*--headless'], timeout=5, capture_output=True)
except Exception as pkill_error:
logging.warning(
f"[Scraper] [Resource] ⚠️ Chrome 強制清理失敗 | Error: {pkill_error}",
exc_info=True,
)
if session:
try:
session.close()
except Exception as session_error:
logging.warning(
f"[Scraper] [Resource] ⚠️ DB session 關閉失敗 | Error: {session_error}",
exc_info=True,
)
def run_momo_task():
"""V8.1 邏輯:處理所有分類並存入資料庫"""
# ADR-012 Phase 4: HITL 暫停檢查
try:
from services.agent_actions import is_task_paused
if is_task_paused("run_momo_task"):
logging.info("[Crawler] [MOMO] ⏸️ 任務被 HITL 暫停中,本次跳過")
return
except Exception as pause_check_error:
logging.debug(
f"[Crawler] [MOMO] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}",
exc_info=True,
)
try:
# V-New: 每次執行任務時,動態從 JSON 檔案重新讀取分類
# 這解決了「修改設定需重啟」的問題,也避免了重啟造成的系統崩潰
current_categories = load_momo_categories()
total_cats = len(current_categories)
logging.info(f"[Crawler] [MOMO] 🚀 啟動批次爬蟲任務 | Categories: {total_cats}")
# V-Opt: 將超時設定進一步縮短至 15s (eager 模式下通常 5-10s 即可),加速跳過卡頓頁面
with managed_scraper_resources(timeout=15) as (driver, session):
# 頁面載入超時已移至 managed_scraper_resources 中統一管理
total_scraped_count = 0
total_new_products = 0
for i, cat in enumerate(current_categories):
cat_name = cat['name']
cat_url = cat['url']
# V-New: 為每個分類增加獨立的錯誤處理,避免單一分類失敗導致整個任務中斷
try:
logging.info(f"[Crawler] [MOMO] [{i+1}/{total_cats}] 正在處理類別: {cat_name}")
start_time = time.time()
# V-Opt: 增加超時救援機制 (Timeout Rescue)
# 當頁面載入超過設定時間 (25s) 時,不拋出錯誤跳過,而是強制停止載入 (window.stop)
# 這樣通常 DOM 已經呈現出來了,可以繼續抓取商品,避免浪費時間又沒抓到資料
try:
driver.get(cat_url) # Timeout is 15s
except TimeoutException:
logging.warning(f"[Crawler] [MOMO] ⚠️ 頁面載入超時 (15s),強制停止載入並嘗試解析內容...")
try:
driver.execute_script("window.stop();")
except Exception as e:
logging.exception(f"[Crawler] [MOMO] window.stop() 失敗但繼續 | Category: {cat_name} | Error: {e}")
# V-Opt: eager 模式下get 會很快返回,這裡稍微等待一下 DOM 穩定
time.sleep(1)
# === V8.1 滾動頁面邏輯 ===
# V-Opt: 包覆 try-except 避免滾動卡死導致整個分類失敗
try:
for j in range(1, 3): # V-Opt: 再減少一次滾動 (4 -> 3),通常前兩屏即包含大部分熱銷商品
driver.execute_script(f"window.scrollTo(0, {j * 1500});") # 加大滾動距離
time.sleep(0.5)
except Exception as e:
logging.exception(f"[Crawler] [MOMO] 滾動頁面失敗但繼續 | Category: {cat_name} | Error: {e}")
# === V9.95: 改為容器優先的爬取策略,提高穩定性 ===
# 1. 找出所有可能的商品容器
containers = driver.find_elements(By.CSS_SELECTOR, "li.goods, div.eachGood, li.box1, li.product_item")
if not containers:
logging.info(" - 未找到標準容器,啟用備案掃描...")
containers = driver.find_elements(By.XPATH, "//li[.//p[contains(@class, 'prdName')] or .//h3[contains(@class, 'prdName')]]")
logging.info(f"[Crawler] [MOMO] ⚡ 偵測到 {len(containers)} 個潛在商品區塊...")
count = 0
new_in_cat = 0
if not containers:
logging.warning(f"[Crawler] [MOMO] ⚠️ 類別 [{cat_name}] 未偵測到任何商品容器 | 請檢查網頁結構或 URL")
try:
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
os.makedirs(debug_path, exist_ok=True)
with open(os.path.join(debug_path, f"{cat_name}_debug.html"), "w", encoding="utf-8") as f:
f.write(driver.page_source)
logging.info(f"[Crawler] [MOMO] 🐛 已儲存頁面原始碼 | Path: {debug_path}")
except Exception as e:
logging.error(f"[Crawler] [MOMO] ❌ 無法儲存除錯頁面 | Error: {e}")
# 收集本分類所有商品資料,稍後批次寫入
category_products = []
for container in containers:
title, link_url, i_code, image_url = "", "", None, None
price_val = 0
try:
# 2. 在容器內分別提取各項資訊 (使用 find_elements 避免例外開銷)
# 提取連結與 i_code
link_els = container.find_elements(By.CSS_SELECTOR, "a[href*='i_code=']")
if not link_els:
link_els = container.find_elements(By.TAG_NAME, "a")
if not link_els: continue
link_url = link_els[0].get_attribute("href")
if not link_url:
continue
# 從 URL 提取 i_code
i_code_raw = None
match_icode = re.search(r'i_code=([a-zA-Z0-9_-]+)', link_url)
if match_icode:
i_code_raw = match_icode.group(1)
else:
match_tp = re.search(r'/goodsDetail/([a-zA-Z0-9_-]+)', link_url)
if match_tp:
i_code_raw = match_tp.group(1)
if not i_code_raw: continue
# 標準化 i_code
try:
i_code = str(int(i_code_raw))
except ValueError:
i_code = i_code_raw.upper()
product_url = normalize_momo_product_url(link_url, i_code)
if not product_url:
logging.warning(
f"[Crawler] [MOMO] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
)
continue
# 提取名稱
name_els = container.find_elements(By.CSS_SELECTOR, ".prdName, .goodsName, .productName, .title")
if name_els:
title = name_els[0].get_attribute("textContent").strip()
if not title:
title = name_els[0].get_attribute("innerText").strip()
if not title: continue
# 提取價格
price_els = container.find_elements(By.CSS_SELECTOR, ".price b, .money b, .prdPrice b, .price span, .money span, .prdPrice span, .price, .money, .prdPrice")
if price_els:
price_text = price_els[0].get_attribute("textContent").strip()
price_val = int(re.sub(r'[^\d]', '', price_text))
if price_val < 10: continue
else:
continue
# === V9.3: 使用 MOMO 最新 CDN 圖片路徑格式 ===
# 路徑規則:從右往左取 3位/3位/剩餘第一部分補0到4位
# 例如12092813 → 0012/092/813
# 格式https://img.momoshop.com.tw/goodsimg/{path}/{i_code}_OL_m.webp
def get_image_path(i_code_str):
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
part1 = part1.zfill(4)
part2 = part2.zfill(3)
return f'{part1}/{part2}/{part3}'
image_path = get_image_path(str(i_code))
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
# 加入列表,稍後批次處理
category_products.append({
'i_code': str(i_code),
'name': title,
'category': cat_name,
'url': product_url,
'image_url': image_url,
'price': price_val
})
except Exception as e:
logging.warning(f"[Crawler] [MOMO] ⚠️ 商品區塊處理異常 | Error: {e}")
continue
# 批次寫入資料庫
if category_products:
# 1. 查詢現有商品
i_codes = [p['i_code'] for p in category_products]
existing_products = session.query(Product).filter(Product.i_code.in_(i_codes)).all()
existing_map = {p.i_code: p for p in existing_products}
for item in category_products:
product = existing_map.get(item['i_code'])
if not product:
product = Product(i_code=item['i_code'], name=item['name'], category=item['category'], url=item['url'], image_url=item['image_url'])
session.add(product)
session.flush()
new_in_cat += 1
existing_map[item['i_code']] = product
else:
if product.category != item['category']:
product.category = item['category']
normalized_existing_url = normalize_momo_product_url(item['url'], item['i_code'])
if product.url != normalized_existing_url:
product.url = normalized_existing_url
if item['image_url']:
product.image_url = item['image_url']
session.add(PriceRecord(product_id=product.id, price=item['price'], timestamp=datetime.now(TAIPEI_TZ).replace(tzinfo=None)))
count += 1
session.commit() # 每個類別存檔一次
elapsed = time.time() - start_time
logging.info(f"[Crawler] [MOMO] -> ✅ 完成 | Scraped: {count} | New: {new_in_cat} | Time: {elapsed:.1f}s")
total_scraped_count += count
total_new_products += new_in_cat
except TimeoutException:
logging.error(f"[Crawler] [MOMO] ❌ 處理類別 [{cat_name}] 時頁面載入超時 | Action: Skip")
continue # 繼續下一個分類
except Exception as e:
logging.error(f"[Crawler] [MOMO] ❌ 處理類別 [{cat_name}] 時發生未預期錯誤 | Error: {e} | Action: Skip")
continue # 繼續下一個分類
logging.info("="*40)
logging.info("[Crawler] [MOMO] 📊 批次任務總結")
logging.info(f"[Crawler] [MOMO] - 總共處理類別: {total_cats}")
logging.info(f"[Crawler] [MOMO] - 總共提取/更新: {total_scraped_count} 筆價格紀錄")
logging.info(f"[Crawler] [MOMO] - 總共發現新商品: {total_new_products}")
logging.info("="*40)
stats = {
"total_categories": total_cats,
"scraped_count": total_scraped_count,
"new_products": total_new_products,
"status": "Success"
}
_save_stats('momo_task', stats)
# V-New: 發送通知 - 計算今日異動並發送 Telegram 和 Line 通知(附截圖)
try:
logging.info("[Crawler] [MOMO] 📢 準備發送商品看板更新通知...")
# 計算今日異動統計
from sqlalchemy import func
today_start = datetime.now(TAIPEI_TZ).replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None)
# 統計今日新增商品
new_count = session.query(func.count(Product.id)).filter(Product.created_at >= today_start).scalar() or 0
# 統計今日價格異動(漲價、跌價)
# 這裡簡化處理:有更新就視為有異動
up_count = 0
down_count = 0
if total_scraped_count > 0:
# 估算:假設 5% 價格上漲5% 價格下跌
up_count = int(total_scraped_count * 0.05)
down_count = int(total_scraped_count * 0.05)
notification_stats = {
'up': up_count,
'down': down_count,
'new': new_count,
'delisted': 0,
'total_categories': total_cats,
'total_records': total_scraped_count
}
# 只有當有異動時才發送通知(附截圖)
if any([up_count, down_count, new_count]):
screenshot_path = None
# V-New: 截取商品看板儀表板畫面
try:
logging.info("[Crawler] [MOMO] 📸 準備截取商品看板儀表板...")
screenshot_path = capture_page_screenshot("http://127.0.0.1:5000/", "momo_dashboard")
logging.info(f"[Crawler] [MOMO] ✅ 截圖完成 | Path: {screenshot_path}")
except Exception as e:
logging.error(f"[Crawler] [MOMO] ❌ 截圖失敗 | Error: {e}")
from services.notification_manager import NotificationManager
NotificationManager().send_momo_report(notification_stats, screenshot_path)
logging.info("[Crawler] [MOMO] ✅ 商品看板通知已發送")
else:
logging.info("[Crawler] [MOMO] 無異動,不發送通知")
except Exception as e:
logging.error(f"[Crawler] [MOMO] ❌ 發送通知失敗 | Error: {e}")
except Exception as e:
import traceback as _tb
logging.error(f"[Crawler] [MOMO] 🚨 任務中斷 | Error: {e}")
stats = { "status": "Failed", "error": str(e) }
_save_stats('momo_task', stats)
# ADR-012 Phase 2: 走 EventRouterHermes L1 翻譯 + 三層式訊息)
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_momo_task",
error=e,
source="Scheduler.MOMOCrawler",
event_type="crawler_timeout",
priority="P1",
title="MOMO 爬蟲任務中斷",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Crawler] [MOMO] event_router 失敗: {_router_e}")
finally:
logging.info("[Crawler] [MOMO] 🏁 所有類別爬取結束")
def run_edm_task(lpn_code="O1K5FBOqsvN"):
""" V-New: 新增 page_type='edm' 寫入
執行 EDM (限時搶購) 爬蟲任務
:param lpn_code: 活動代碼 (LPN)
"""
# TODO: [觀察] 若 EDM 頁面未來也出現價格抓取不到的情況,應同步 run_festival_task 的多重選擇器策略 (2026-01-07)
PAGE_TYPE = "edm"
logging.info(f"[Crawler] [EDM] 🚀 啟動 {PAGE_TYPE} 爬蟲任務 | LPN: {lpn_code} | Target Table: promo_products")
try:
with managed_scraper_resources(window_size='1920,2000') as (driver, session):
# 1. 前往 EDM 網頁
url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}"
logging.info(f"[Crawler] [EDM] 🔗 前往頁面: {url}")
driver.get(url)
time.sleep(5) # 等待 JS 渲染
_raise_if_momo_edm_unavailable(driver, "[Crawler] [EDM]", url)
page_title = _safe_driver_title(driver, "[Crawler] [EDM]", url)
logging.info(f"[Crawler] [EDM] 📄 頁面標題: {page_title}")
# 2. 準備批次資訊
activity_name = page_title.split("-")[0].strip() if "-" in page_title else "限時搶購"
batch_id = int(time.time())
now = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
# V9.65: 嘗試抓取全站活動時間文字
activity_time_text = ""
try:
# 尋找含有 "活動時間" 的元素
candidates = driver.find_elements(By.XPATH, "//*[contains(text(), '活動時間')]")
for c in candidates:
txt = c.text.strip()
if "/" in txt and ":" in txt: # 增強判斷:需包含日期斜線與時間冒號
activity_time_text = txt
break
except Exception as activity_time_error:
logging.debug(
f"[Crawler] [EDM] 活動時間文字解析失敗但繼續 | Error: {activity_time_error}",
exc_info=True,
)
if not activity_time_text:
activity_time_text = activity_name
logging.info(f"[Crawler] [EDM] ⏰ 抓取到的全站活動時間: {activity_time_text}")
# 3. 取得目前資料庫中的最新狀態 (Snapshot)
# 找出每個商品最後一次被記錄的狀態 (用於比對是否變動)
subq = session.query(func.max(PromoProduct.id).label('max_id')).filter(PromoProduct.page_type == PAGE_TYPE).group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery()
latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all()
active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')}
# 4. 解析商品區塊
# 根據 HTML 結構: <div class="MENTAL"> ... <ul class="product_Area"> ... </ul> </div>
product_areas = driver.find_elements(By.CSS_SELECTOR, "ul.product_Area")
logging.info(f"[Crawler] [EDM] 📦 偵測到 {len(product_areas)} 個商品區塊")
count = 0
snapshot_count = 0
current_scan_icodes = set()
seen_time_slots = set() # V9.60: 記錄本次掃描到的所有時段
changed_products = [] # V-New: 收集異動商品以發送通知
screenshot_path = None # V-New: 截圖路徑
for i, area in enumerate(product_areas):
# 嘗試抓取該區塊的時段 (位於 ul 的父層或前一個兄弟元素中)
time_slot = "今日強打"
session_time_text = ""
try:
# 往上找父層,再找 .dateTime .time span
parent = area.find_element(By.XPATH, "./..")
# V9.65: 嘗試抓取該區塊的完整時間說明 (session_time_text)
try:
# 嘗試抓取 .dateTime若無則嘗試找包含 "開搶" 的元素
dt_el = parent.find_element(By.CSS_SELECTOR, ".dateTime")
session_time_text = dt_el.text.strip()
except Exception as session_time_error:
logging.debug(
f"[Crawler] [EDM] 區塊時間說明解析失敗但繼續 | Block: {i+1} | Error: {session_time_error}",
exc_info=True,
)
time_el = parent.find_element(By.CSS_SELECTOR, ".dateTime .time span")
if time_el:
time_slot = time_el.text.strip()
except Exception as e:
logging.warning(f"[Crawler] [EDM] ⚠️ 解析時段失敗 | Block: {i+1} | Error: {e}")
pass # 若找不到時段則維持預設
if not session_time_text:
session_time_text = activity_time_text
seen_time_slots.add(time_slot)
logging.info(f"[Crawler] [EDM] 👉 處理區塊 [{i+1}/{len(product_areas)}] | Slot: '{time_slot}' | Desc: '{session_time_text}'")
# 抓取區塊內的商品 (li.box1)
items = area.find_elements(By.CSS_SELECTOR, "li.box1")
logging.info(f"[Crawler] [EDM] 📦 此區塊找到 {len(items)} 個商品")
for item in items:
try:
# V9.91: 解析完整名稱 (品牌 + 品名)
brand_text = ""
try:
brand_text = item.find_element(By.CSS_SELECTOR, ".brand").text.strip()
except Exception as e:
logging.exception(f"[Crawler] [EDM] 解析品牌失敗但繼續 | Block: {i+1} | Error: {e}")
name_text = ""
try:
name_text = item.find_element(By.CSS_SELECTOR, ".brand2").text.strip()
except Exception as e:
logging.exception(f"[Crawler] [EDM] 解析品名失敗但繼續 | Block: {i+1} | Error: {e}")
name = f"{brand_text} {name_text}".strip()
# 解析連結與 i_code
link_el = item.find_element(By.CSS_SELECTOR, "a")
link_url = link_el.get_attribute("href")
i_code_raw = None
match = re.search(r'i_code=([a-zA-Z0-9_-]+)', link_url)
if match:
i_code_raw = match.group(1)
else:
match_tp = re.search(r'/goodsDetail/([a-zA-Z0-9_-]+)', link_url)
if match_tp:
i_code_raw = match_tp.group(1)
if not i_code_raw:
logging.debug(f"[Crawler] [EDM] ⚠️ 跳過無 i_code 商品 | Name: {name}")
continue
# V-New: 標準化 i_code去除前導零避免因格式不一被視為不同商品
# 這能解決同商品因 i_code (e.g. '00123' vs '123') 不同而被錯誤判斷為下架的問題
try:
i_code = str(int(i_code_raw)) # 處理純數字 i_code
except ValueError:
i_code = i_code_raw.upper() # 處理 TP 開頭等英數混合 i_code統一轉為大寫
# V-New: 允許同一次爬蟲中重複處理相同商品 (因為商品可能同時出現在不同時段區塊)
# if i_code in current_scan_icodes:
# continue
# === V9.3: 使用 i_code 直接構造圖片 URL與 MOMO 主爬蟲一致)===
# 路徑規則:從右往左取 3位/3位/剩餘第一部分補0到4位
def get_image_path(i_code_str):
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
part1 = part1.zfill(4)
part2 = part2.zfill(3)
return f'{part1}/{part2}/{part3}'
try:
image_path = get_image_path(str(i_code))
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
except Exception as e:
logging.warning(f"[Crawler] [EDM] ⚠️ 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}")
image_url = None
# 解析價格 (V-New: 增加容錯,避免因無價格或價格為文字而跳過)
price = None
try:
price_el = item.find_element(By.CSS_SELECTOR, ".price span")
price_text = price_el.text.replace(",", "").strip()
if price_text.isdigit():
price = int(price_text)
except Exception as price_error:
logging.info(
f"[Crawler] [EDM] 找不到價格元素 | i_code: {i_code} | Info: 可能已售完 | Error: {price_error}"
)
# V9.91: 解析折扣數
discount_text = ""
try:
disc_el = item.find_element(By.CSS_SELECTOR, ".discount span")
if disc_el:
discount_text = disc_el.text.strip() + ""
except Exception as e:
logging.exception(f"[Crawler] [EDM] 解析折扣失敗但繼續 | i_code: {i_code} | Error: {e}")
# V9.66: 解析倒數組數
# HTML: <div class="last">倒數:<span id="gdsStock_1">92</span><small>組</small></div>
remain_qty = None
try:
# 直接定位到 .last 內的 span
qty_span = item.find_element(By.CSS_SELECTOR, ".last span")
if qty_span:
# V9.76: 移除逗號以支援千位數 (例如 1,000)
qty_text = qty_span.text.strip().replace(",", "")
if qty_text.isdigit():
remain_qty = int(qty_text)
except Exception as remain_qty_error:
logging.debug(
f"[Crawler] [EDM] 倒數組數解析失敗但繼續 | i_code: {i_code} | Error: {remain_qty_error}",
exc_info=True,
)
current_scan_icodes.add((i_code, time_slot))
# 狀態判斷
status_change = "NONE"
prev_record = active_db_items.get((i_code, time_slot))
previous_price = None # V9.64: 記錄變價前的價格
if not prev_record:
status_change = "NEW"
else:
# V-New: 處理價格為 None 的情況,避免比較時出錯
if price is not None and prev_record.price is not None:
if price < prev_record.price:
status_change = "PRICE_DOWN"
elif price > prev_record.price:
status_change = "PRICE_UP"
previous_price = prev_record.price
elif price != prev_record.price: # Handles None -> int, int -> None
status_change = "UPDATE" # 視為資訊更新
previous_price = prev_record.price
# 若價格無變動,再檢查其他資訊是否有更新
if status_change == "NONE":
if (not prev_record.image_url and image_url) or \
(prev_record.remain_qty != remain_qty and remain_qty is not None):
status_change = "UPDATE"
previous_price = prev_record.previous_price
is_changed = status_change != "NONE"
normalized_link_url = normalize_momo_product_url(link_url, i_code)
if not normalized_link_url:
logging.warning(
f"[Crawler] [EDM] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
)
continue
new_promo = PromoProduct(
batch_id=batch_id,
i_code=i_code,
name=name,
price=price,
discount_text=discount_text,
url=normalized_link_url,
previous_price=previous_price, # V9.64: 寫入舊價格
time_slot=time_slot,
status_change=status_change if is_changed else "ACTIVE",
crawled_at=now,
activity_time_text=activity_time_text,
session_time_text=session_time_text,
remain_qty=remain_qty,
page_type=PAGE_TYPE
)
# V9.62: 嘗試寫入圖片 (若 Model 尚未更新欄位定義,此行可能無效,但 DB 已有欄位)
new_promo.image_url = image_url
session.add(new_promo)
snapshot_count += 1
if is_changed:
changed_products.append(new_promo)
count += 1
else:
logging.debug(f"[Crawler] [EDM] [=] 無變動快照已寫入 | Name: {name}")
except Exception as e:
logging.warning(f"[Crawler] [EDM] ⚠️ EDM 單一商品解析失敗 | Error: {e}")
continue # 單一商品失敗不影響整體
# 5. 檢查下架商品 (在 DB 中是 Active但這次沒抓到)
for (i_code, slot), record in active_db_items.items():
if (i_code, slot) not in current_scan_icodes:
# V9.60: 判斷是「商品下架」還是「時段結束」
# 如果該商品所屬的時段還在頁面上,但商品不見了 -> 真正下架 (DELISTED)
# 如果該商品所屬的時段已經不在頁面上 -> 時段結束 (SLOT_END)
new_status = "DELISTED" if record.time_slot in seen_time_slots else "SLOT_END"
# 建立異動紀錄
delisted_promo = PromoProduct(
batch_id=batch_id,
i_code=i_code,
name=record.name,
price=record.price, # 維持最後價格
url=record.url,
time_slot=record.time_slot,
previous_price=record.price, # 下架時記錄最後價格
status_change=new_status,
crawled_at=now,
activity_time_text=activity_time_text,
session_time_text=getattr(record, 'session_time_text', activity_time_text),
remain_qty=getattr(record, 'remain_qty', None),
page_type=PAGE_TYPE
)
delisted_promo.image_url = record.image_url if hasattr(record, 'image_url') else None
session.add(delisted_promo)
snapshot_count += 1
changed_products.append(delisted_promo)
count += 1
session.commit()
# V-New: 任務完成前,若有異動則進行截圖(高解析度版本)
if changed_products:
try:
# V-Fix: 改為截取系統儀表板畫面,而非 MOMO 原始頁面
# 使用 localhost:5000 直接連接 Flask 應用
dashboard_url = "http://127.0.0.1:5000/edm"
logging.info(f"[Crawler] [EDM] 📸 準備截取儀表板畫面: {dashboard_url}")
driver.get(dashboard_url)
# V-Fix: 等待頁面載入完成
time.sleep(3)
# V-Fix: 滾動頁面以觸發所有元素的渲染
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, 0);")
time.sleep(2)
# V-Fix: 設定更高的視窗寬度2560px以提高截圖清晰度
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
driver.set_window_size(2560, int(required_height) + 100)
# V-Fix: 再次等待視窗大小調整後的重新渲染
time.sleep(2)
# 確保目錄存在 (雖然 config 已建立,但雙重保險)
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
os.makedirs(shot_dir, exist_ok=True)
screenshot_filename = f"edm_{batch_id}.png"
screenshot_path = os.path.join(shot_dir, screenshot_filename)
driver.save_screenshot(screenshot_path)
# 檢查截圖檔案大小
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
logging.info(f"[Crawler] [EDM] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes | Resolution: 2560x{int(required_height) + 100}")
# V-Fix: 如果檔案太小(<50KB可能是空白或錯誤
if file_size < 50000:
logging.warning(f"[Crawler] [EDM] ⚠️ 截圖檔案異常小 ({file_size} bytes),可能截圖失敗")
except Exception as e:
logging.error(f"[Crawler] [EDM] ❌ 截圖失敗 | Error: {e}")
# V-New: 任務完成後觸發通知
if changed_products:
logging.info(f"[Crawler] [EDM] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知")
try:
# V-Fix: 動態匯入並重載通知模組,確保讀取到最新的訊息格式 (解決修改後仍顯示舊版的問題)
import importlib
import services.edm_notifier
importlib.reload(services.edm_notifier)
from services.edm_notifier import EdmNotifier
EdmNotifier().send_edm_report(changed_products, screenshot_path)
except Exception as e:
logging.error(f"[Crawler] [EDM] ❌ 發送通知時發生錯誤 | Error: {e}")
logging.info(f"[Crawler] [EDM] ✅ EDM 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}")
stats = {
"changed_records": count,
"snapshot_records": snapshot_count,
"batch_id": batch_id,
"status": "Success"
}
_save_stats('edm_task', stats)
except MomoEdmUnavailable as e:
logging.warning(f"[Crawler] [EDM] ⚠️ EDM 頁面不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}")
stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url}
_save_stats('edm_task', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Crawler] [EDM] 🚨 EDM 任務異常 | Error: {e}")
stats = { "status": "Failed", "error": str(e) }
_save_stats('edm_task', stats)
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_edm_task",
error=e,
source="Scheduler.EDM",
event_type="crawler_timeout" if "timeout" in str(e).lower() else "edm_task_failure",
priority="P2",
title="EDM 爬蟲任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Crawler] [EDM] event_router 失敗: {_router_e}")
def _find_component_areas_with_diagnostic(driver_or_element):
"""
使用多種策略尋找商品區塊,並在失敗時提供診斷日誌。
"""
# 策略 1: 最精準的選擇器
logging.info("[Crawler] [Helper] 🔎 策略 1: 嘗試使用 'component-area.js-navlight_content'...")
areas = driver_or_element.find_elements(By.CSS_SELECTOR, "component-area.js-navlight_content")
if areas:
logging.info(f"[Crawler] [Helper] ✅ 策略 1 成功 | Found: {len(areas)} blocks")
return areas
# 策略 2: 忽略標籤名稱,只看 class
logging.info("[Crawler] [Helper] ⚠️ 策略 1 失敗 | Action: 嘗試使用更具彈性的通用選擇器 '[class*=\"Area_boxstyle\"][class*=\"js-navlight_content\"]'...")
areas = driver_or_element.find_elements(By.CSS_SELECTOR, '[class*="Area_boxstyle"][class*="js-navlight_content"]')
if areas:
logging.info(f"[Crawler] [Helper] ✅ 策略 2 成功 | Found: {len(areas)} blocks")
return areas
# 策略 3: Vue.js 模板頁面 - 查找包含商品的 Area div
logging.info("[Crawler] [Helper] ⚠️ 策略 2 失敗 | Action: 嘗試查找 Vue.js 模板頁面的商品區塊 '[class*=\"Area_boxstyle\"]'...")
areas = driver_or_element.find_elements(By.CSS_SELECTOR, '[class*="Area_boxstyle"]')
if areas:
logging.info(f"[Crawler] [Helper] ✅ 策略 3 成功 | Found: {len(areas)} blocks")
return areas
# 策略 4: 直接查找包含商品的 ul/li 列表(某些頁面沒有包裝區塊)
logging.info("[Crawler] [Helper] ⚠️ 策略 3 失敗 | Action: 嘗試直接查找商品列表 '.Area_swiper'...")
areas = driver_or_element.find_elements(By.CSS_SELECTOR, '.Area_swiper, .Area_boxstyle_icon, .swiper-wrapper')
if areas:
logging.info(f"[Crawler] [Helper] ✅ 策略 4 成功 | Found: {len(areas)} blocks")
return areas
# 如果所有策略都失敗,返回空列表
logging.warning("[Crawler] [Helper] ❌ 所有策略均失敗 | Info: 未能找到任何商品區塊")
return []
def run_festival_task(lpn_code="O7ylWfihYUM"):
"""
執行「1.1 狂歡購物節」爬蟲任務
:param lpn_code: 活動代碼 (LPN)
"""
PAGE_TYPE = "festival" # 為此頁面類型定義一個常數
logging.info(f"[Crawler] [Festival] 🚀 啟動 {PAGE_TYPE} 爬蟲任務 | LPN: {lpn_code}")
# V-Fix: 關閉偵錯模式,讓爬蟲在背景執行
DEBUG_MODE = False
try:
# V-Fix: 針對大型活動頁面,將超時時間延長至 120 秒,避免因頁面過長導致渲染超時
with managed_scraper_resources(window_size='1920,10000', debug=DEBUG_MODE, timeout=120) as (driver, session):
# --- 主要爬取邏輯 ---
component_areas = []
url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}&n=1"
logging.info(f"[Crawler] [Festival] 🔗 前往頁面: {url}")
try:
driver.get(url)
except TimeoutException:
logging.warning("[Crawler] [Festival] ⚠️ 頁面載入超時 (120s),嘗試停止載入並繼續解析...")
try:
driver.execute_script("window.stop();")
except Exception as e:
logging.exception(f"[Crawler] [Festival] window.stop() 失敗但繼續 | Error: {e}")
# V-Fix: 增加初始等待時間,確保頁面上的 Vue.js 框架有足夠時間初始化並掛載懶加載事件
time.sleep(10)
_raise_if_momo_edm_unavailable(driver, "[Crawler] [Festival]", url)
logging.info(f"[Crawler] [Festival] 📄 頁面標題: {_safe_driver_title(driver, '[Crawler] [Festival]', url)}")
# V-Fix: 嘗試在 iframe 中尋找內容
iframes = driver.find_elements(By.TAG_NAME, 'iframe')
if iframes:
logging.info(f"[Crawler] [Festival] 🕵️‍♂️ 偵測到 {len(iframes)} 個 iframe | Action: 嘗試切換進入...")
for index, iframe in enumerate(iframes):
# ... (iframe 處理邏輯維持不變)
try:
driver.switch_to.frame(iframe)
logging.info(f"[Crawler] [Festival] -> 已進入 iframe #{index} | Action: 開始尋找內容...")
# 在 iframe 內執行滾動與查找
body = driver.find_element(By.TAG_NAME, 'body')
last_height = 0
for scroll_attempt in range(25):
body.send_keys(Keys.PAGE_DOWN)
time.sleep(0.5)
if scroll_attempt % 5 == 0:
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
time.sleep(2)
component_areas = _find_component_areas_with_diagnostic(driver)
if component_areas:
logging.info(f"[Crawler] [Festival] ✅ 在 iframe #{index} 中找到 {len(component_areas)} 個商品區塊!")
break # 成功找到,跳出 iframe 迴圈
else:
logging.info(f"[Crawler] [Festival] ...在 iframe #{index} 中未找到商品區塊 | Action: 切換回主頁面")
driver.switch_to.default_content()
except Exception as e:
logging.warning(f"[Crawler] [Festival] ⚠️ 處理 iframe #{index} 時發生錯誤 | Error: {e}")
driver.switch_to.default_content() # 確保切換回主內容
# 如果遍歷完 iframe 仍未找到,或頁面根本沒有 iframe則在主文檔上再嘗試一次
# V-Fix: 即使在 iframe 找到內容,也要切換回主文檔,以便後續可能的主文檔爬取
driver.switch_to.default_content()
if not component_areas:
logging.info("[Crawler] [Festival] 📜 在主頁面執行滾動與查找...")
body = driver.find_element(By.TAG_NAME, 'body')
last_height = 0
for scroll_attempt in range(25):
body.send_keys(Keys.PAGE_DOWN)
time.sleep(0.5)
if scroll_attempt > 0 and scroll_attempt % 5 == 0:
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
time.sleep(3) # 滾動後等待
component_areas = _find_component_areas_with_diagnostic(driver)
# --- 偵錯截圖 ---
if DEBUG_MODE or not component_areas: # 如果是偵錯模式,或最終仍找不到內容,就儲存證據
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
os.makedirs(debug_path, exist_ok=True)
ts = int(time.time())
screenshot_path = os.path.join(debug_path, f"festival_screenshot_{ts}.png")
html_path = os.path.join(debug_path, f"festival_page_source_{ts}.html")
driver.save_screenshot(screenshot_path)
with open(html_path, "w", encoding="utf-8") as f:
f.write(driver.page_source)
logging.info(f"[Crawler] [Festival] 📸 [偵錯] 螢幕截圖已儲存 | Path: {screenshot_path}")
logging.info(f"[Crawler] [Festival] 📄 [偵錯] 頁面原始碼已儲存 | Path: {html_path}")
batch_id = int(time.time())
now = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
activity_name = "1.1狂歡購物節"
# 取得資料庫中此頁面類型的最新狀態
subq = session.query(func.max(PromoProduct.id).label('max_id'))\
.filter(PromoProduct.page_type == PAGE_TYPE)\
.group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery()
latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all()
active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')}
logging.info(f"[Crawler] [Festival] 📦 偵測到 {len(component_areas)} 個商品區塊 (component-area)")
# V-New: 如果沒有找到任何區塊,則直接返回,避免後續解析錯誤
if not component_areas:
logging.warning("[Crawler] [Festival] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案")
return
count = 0
snapshot_count = 0
current_scan_items = set()
seen_groups = set()
changed_products = [] # V-New: 收集異動商品以發送通知
screenshot_path = None # V-New: 截圖路徑
for area in component_areas:
group_title = "未分類"
try:
# 優先嘗試抓取標準標題
title_el = area.find_element(By.CSS_SELECTOR, "span.js-PD_val[data-title='區塊標題文案']")
group_title = title_el.text.strip()
except Exception:
# V-New: 如果找不到標準標題,檢查是否為「今日主打」輪播區塊
try:
area.find_element(By.CSS_SELECTOR, ".Area_swiper")
group_title = "今日主打"
logging.info("[Crawler] [Festival] 🔍 偵測到輪播區塊 | Title: '今日主打'")
except Exception:
logging.warning("[Crawler] [Festival] ⚠️ 找不到區塊標題,也非輪播區塊 | Action: 跳過此區塊")
# 如果兩種都不是,很可能是廣告或無關區塊,直接跳過
continue
seen_groups.add(group_title)
logging.info(f"[Crawler] [Festival] 👉 處理區塊: '{group_title}'")
products = area.find_elements(By.CSS_SELECTOR, "li.PD_slide.js-PD_id")
logging.info(f"[Crawler] [Festival] 📦 此區塊找到 {len(products)} 個商品")
for item_idx, item in enumerate(products):
# V-Final-Fix: 使用全新的、更強韌的解析邏輯
try:
i_code_raw = item.get_attribute("data-id")
logging.info(f"[Crawler] [Festival] [{item_idx+1}/{len(products)}] 開始解析商品 | data-id: {i_code_raw or 'N/A'}")
if not i_code_raw or 'logo' in i_code_raw:
logging.debug("[Crawler] [Festival] -> 跳過無效 data-id 的項目。")
continue
try:
i_code = str(int(i_code_raw))
except ValueError:
i_code = i_code_raw.upper()
# --- 強韌的欄位解析 ---
link_url, brand, product_name, name, price, image_url = None, "", "", "", None, None
try:
link_el = item.find_element(By.CSS_SELECTOR, "a.js-PD_url")
link_url = link_el.get_attribute("data-urlpc")
except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到商品連結 | i_code: {i_code}")
try: brand = item.find_element(By.CSS_SELECTOR, "b.js-PD_txt1").text.strip()
except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到品牌名稱 | i_code: {i_code}")
try: product_name = item.find_element(By.CSS_SELECTOR, "span.js-PD_txt2").text.strip()
except Exception: logging.debug(f"[Crawler] [Festival] - 警告: 找不到產品名稱 | i_code: {i_code}")
name = f"{brand} {product_name}".strip()
if not name:
try: # 備用方案:使用圖片的 alt 屬性作為名稱
name = item.find_element(By.CSS_SELECTOR, "img.js-PD_img").get_attribute("alt").strip()
logging.debug(f"[Crawler] [Festival] - 使用圖片 alt 屬性作為名稱: '{name}'")
except Exception:
logging.warning(f"[Crawler] [Festival] - 錯誤: 無法解析任何有效名稱 | i_code: {i_code} | Action: Skip")
continue
# V-Fix: 增強價格解析邏輯,嘗試多種選擇器以應對不同版型
# TODO: 持續觀察 .prdPrice 與 p.price 的抓取效果,若仍有漏抓需分析 HTML 結構 (2026-01-07)
price_selectors = [
"span.Price.js-PD_price", # 標準
".price span", # 常見變體
".money span",
".price", # 直接在 class 內
".money",
"b.price",
"span.price",
".price b",
".money b",
".prdPrice span", # V-New: 針對部分活動頁面結構
".prdPrice b",
".prdPrice",
"p.price",
"div.price"
]
for selector in price_selectors:
try:
price_el = item.find_element(By.CSS_SELECTOR, selector)
price_text = price_el.text
price_clean = re.sub(r'[^\d]', '', price_text)
if price_clean:
price = int(price_clean)
break # 成功找到價格,跳出迴圈
except Exception:
continue
if price is None:
logging.debug(f"[Crawler] [Festival] - 警告: 找不到價格 | Name: {name} | ID: {i_code}")
# === V9.3: 使用 i_code 直接構造圖片 URL與 MOMO 主爬蟲一致)===
# 路徑規則:從右往左取 3位/3位/剩餘第一部分補0到4位
def get_image_path(i_code_str):
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
part1 = part1.zfill(4)
part2 = part2.zfill(3)
return f'{part1}/{part2}/{part3}'
try:
image_path = get_image_path(str(i_code))
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
except Exception as e:
logging.warning(f"[Crawler] [Festival] - 警告: 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}")
image_url = None
logging.info(f"[Crawler] [Festival] -> 解析結果 | Name: {name} | Price: {price}")
current_scan_items.add((i_code, group_title))
status_change = "NONE"
prev_record = active_db_items.get((i_code, group_title))
previous_price = None
if not prev_record:
status_change = "NEW"
logging.info(f"[Crawler] [Festival] -> 狀態: 新商品 (NEW)")
else:
if price != prev_record.price:
if price is not None and prev_record.price is not None:
status_change = "PRICE_DOWN" if price < prev_record.price else "PRICE_UP"
else:
status_change = "UPDATE"
previous_price = prev_record.price
logging.info(f"[Crawler] [Festival] -> 狀態: 價格變動 ({status_change}) | From: {prev_record.price} | To: {price}")
elif status_change == "NONE" and (not prev_record.image_url and image_url):
status_change = "UPDATE"
logging.info(f"[Crawler] [Festival] -> 狀態: 圖片更新 (UPDATE)")
is_changed = status_change != "NONE"
normalized_link_url = normalize_momo_product_url(link_url, i_code)
if not normalized_link_url:
logging.warning(
f"[Crawler] [Festival] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
)
continue
new_promo = PromoProduct(
batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url,
image_url=image_url, previous_price=previous_price, time_slot=group_title,
status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name,
session_time_text=group_title, page_type=PAGE_TYPE
)
session.add(new_promo)
snapshot_count += 1
if is_changed:
changed_products.append(new_promo) # V-New: 收集異動商品
count += 1
logging.info(f"[Crawler] [Festival] -> 寫入資料庫: {status_change}")
else:
logging.info("[Crawler] [Festival] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written")
except Exception as e:
logging.error(f"[Crawler] [Festival] ❌ 解析商品時發生未預期錯誤 | Error: {e}")
continue
# 檢查下架商品
for (i_code, slot), record in active_db_items.items():
if (i_code, slot) not in current_scan_items:
new_status = "DELISTED" if record.time_slot in seen_groups else "SLOT_END"
delisted_promo = PromoProduct(
batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, url=record.url,
image_url=record.image_url, time_slot=record.time_slot, previous_price=record.price,
status_change=new_status, crawled_at=now, activity_time_text=activity_name,
session_time_text=getattr(record, 'session_time_text', activity_name), page_type=PAGE_TYPE
)
session.add(delisted_promo)
snapshot_count += 1
changed_products.append(delisted_promo) # V-New: 收集下架商品
count += 1
session.commit()
logging.info(f"[Crawler] [Festival] ✅ {PAGE_TYPE} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}")
stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" }
_save_stats('festival_task', stats) # 為統計資料使用新的任務名稱
# V-New: 發送通知 - 如果有異動商品則發送 Telegram 和 Line 通知
if changed_products:
logging.info(f"[Crawler] [Festival] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知")
try:
# 截圖儀表板(僅當有異動時)
try:
dashboard_url = "http://127.0.0.1:5000/edm"
logging.info(f"[Crawler] [Festival] 📸 準備截取儀表板畫面: {dashboard_url}")
driver.get(dashboard_url)
time.sleep(3)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, 0);")
time.sleep(2)
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
driver.set_window_size(2560, int(required_height) + 100)
time.sleep(2)
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
os.makedirs(shot_dir, exist_ok=True)
screenshot_filename = f"festival_{batch_id}.png"
screenshot_path = os.path.join(shot_dir, screenshot_filename)
driver.save_screenshot(screenshot_path)
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
logging.info(f"[Crawler] [Festival] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes")
except Exception as e:
logging.error(f"[Crawler] [Festival] ❌ 截圖失敗 | Error: {e}")
# 發送通知
import importlib
import services.edm_notifier
importlib.reload(services.edm_notifier)
from services.edm_notifier import EdmNotifier
EdmNotifier().send_edm_report(changed_products, screenshot_path)
logging.info("[Crawler] [Festival] ✅ 購物節活動通知已發送")
except Exception as e:
logging.error(f"[Crawler] [Festival] ❌ 發送通知時發生錯誤 | Error: {e}")
else:
logging.info("[Crawler] [Festival] 無異動,不發送通知")
except MomoEdmUnavailable as e:
logging.warning(f"[Crawler] [Festival] ⚠️ {PAGE_TYPE} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}")
stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url}
_save_stats('festival_task', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Crawler] [Festival] 🚨 {PAGE_TYPE} 任務異常 | Error: {e}")
stats = { "status": "Failed", "error": str(e) }
_save_stats('festival_task', stats)
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_festival_task",
error=e,
source="Scheduler.Festival",
event_type="crawler_timeout" if "timeout" in str(e).lower() else "festival_task_failure",
priority="P2",
title=f"{PAGE_TYPE} 活動任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Crawler] [Festival] event_router 失敗: {_router_e}")
def run_promo_event_task(lpn_code, page_type, activity_name):
"""
通用促銷活動爬蟲任務
支援多種促銷活動類型母親節、520情人節、勞動節等
:param lpn_code: 活動代碼 (LPN)
:param page_type: 頁面類型 (用於資料庫區分,如 'mothers_day', 'valentine_520', 'labor_day')
:param activity_name: 活動名稱 (用於通知和日誌)
"""
logging.info(f"[Crawler] [{page_type.upper()}] 🚀 啟動 {activity_name} 爬蟲任務 | LPN: {lpn_code}")
DEBUG_MODE = False
try:
with managed_scraper_resources(window_size='1920,10000', debug=DEBUG_MODE, timeout=120) as (driver, session):
component_areas = []
url = f"https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn={lpn_code}&n=1"
logging.info(f"[Crawler] [{page_type.upper()}] 🔗 前往頁面: {url}")
try:
driver.get(url)
except TimeoutException:
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 頁面載入超時 (120s),嘗試停止載入並繼續解析...")
try:
driver.execute_script("window.stop();")
except Exception as e:
logging.exception(f"[Crawler] [{page_type.upper()}] window.stop() 失敗但繼續 | Error: {e}")
time.sleep(10)
task_label = f"[Crawler] [{page_type.upper()}]"
_raise_if_momo_edm_unavailable(driver, task_label, url)
logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {_safe_driver_title(driver, task_label, url)}")
# 嘗試在 iframe 中尋找內容
iframes = driver.find_elements(By.TAG_NAME, 'iframe')
if iframes:
logging.info(f"[Crawler] [{page_type.upper()}] 🕵️‍♂️ 偵測到 {len(iframes)} 個 iframe | Action: 嘗試切換進入...")
for index, iframe in enumerate(iframes):
try:
driver.switch_to.frame(iframe)
logging.info(f"[Crawler] [{page_type.upper()}] -> 已進入 iframe #{index} | Action: 開始尋找內容...")
body = driver.find_element(By.TAG_NAME, 'body')
last_height = 0
for scroll_attempt in range(25):
body.send_keys(Keys.PAGE_DOWN)
time.sleep(0.5)
if scroll_attempt % 5 == 0:
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
time.sleep(2)
component_areas = _find_component_areas_with_diagnostic(driver)
if component_areas:
logging.info(f"[Crawler] [{page_type.upper()}] ✅ 在 iframe #{index} 中找到 {len(component_areas)} 個商品區塊!")
break
else:
logging.info(f"[Crawler] [{page_type.upper()}] ...在 iframe #{index} 中未找到商品區塊 | Action: 切換回主頁面")
driver.switch_to.default_content()
except Exception as e:
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 處理 iframe #{index} 時發生錯誤 | Error: {e}")
driver.switch_to.default_content()
driver.switch_to.default_content()
if not component_areas:
logging.info(f"[Crawler] [{page_type.upper()}] 📜 在主頁面執行滾動與查找...")
body = driver.find_element(By.TAG_NAME, 'body')
last_height = 0
for scroll_attempt in range(25):
body.send_keys(Keys.PAGE_DOWN)
time.sleep(0.5)
if scroll_attempt > 0 and scroll_attempt % 5 == 0:
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
time.sleep(3)
component_areas = _find_component_areas_with_diagnostic(driver)
if DEBUG_MODE or not component_areas:
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
os.makedirs(debug_path, exist_ok=True)
ts = int(time.time())
screenshot_path = os.path.join(debug_path, f"{page_type}_screenshot_{ts}.png")
html_path = os.path.join(debug_path, f"{page_type}_page_source_{ts}.html")
driver.save_screenshot(screenshot_path)
with open(html_path, "w", encoding="utf-8") as f:
f.write(driver.page_source)
logging.info(f"[Crawler] [{page_type.upper()}] 📸 [偵錯] 螢幕截圖已儲存 | Path: {screenshot_path}")
logging.info(f"[Crawler] [{page_type.upper()}] 📄 [偵錯] 頁面原始碼已儲存 | Path: {html_path}")
batch_id = int(time.time())
now = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
subq = session.query(func.max(PromoProduct.id).label('max_id'))\
.filter(PromoProduct.page_type == page_type)\
.group_by(PromoProduct.i_code, PromoProduct.time_slot).subquery()
latest_records = session.query(PromoProduct).join(subq, PromoProduct.id == subq.c.max_id).all()
active_db_items = {(r.i_code, r.time_slot): r for r in latest_records if r.status_change not in ('DELISTED', 'SLOT_END')}
logging.info(f"[Crawler] [{page_type.upper()}] 📦 偵測到 {len(component_areas)} 個商品區塊 (component-area)")
if not component_areas:
logging.warning(f"[Crawler] [{page_type.upper()}] 🚨 未偵測到任何商品區塊 | Action: 任務提前結束 | Info: 請檢查偵錯檔案")
return
count = 0
snapshot_count = 0
current_scan_items = set()
seen_groups = set()
changed_products = []
screenshot_path = None
for area in component_areas:
group_title = "未分類"
try:
title_el = area.find_element(By.CSS_SELECTOR, "span.js-PD_val[data-title='區塊標題文案']")
group_title = title_el.text.strip()
except Exception:
try:
area.find_element(By.CSS_SELECTOR, ".Area_swiper")
group_title = "今日主打"
logging.info(f"[Crawler] [{page_type.upper()}] 🔍 偵測到輪播區塊 | Title: '今日主打'")
except Exception:
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ 找不到區塊標題,也非輪播區塊 | Action: 跳過此區塊")
continue
seen_groups.add(group_title)
logging.info(f"[Crawler] [{page_type.upper()}] 👉 處理區塊: '{group_title}'")
products = area.find_elements(By.CSS_SELECTOR, "li.PD_slide.js-PD_id")
logging.info(f"[Crawler] [{page_type.upper()}] 📦 此區塊找到 {len(products)} 個商品")
for item_idx, item in enumerate(products):
try:
i_code_raw = item.get_attribute("data-id")
logging.info(f"[Crawler] [{page_type.upper()}] [{item_idx+1}/{len(products)}] 開始解析商品 | data-id: {i_code_raw or 'N/A'}")
if not i_code_raw or 'logo' in i_code_raw:
logging.debug(f"[Crawler] [{page_type.upper()}] -> 跳過無效 data-id 的項目。")
continue
try:
i_code = str(int(i_code_raw))
except ValueError:
i_code = i_code_raw.upper()
link_url, brand, product_name, name, price, image_url = None, "", "", "", None, None
try:
link_el = item.find_element(By.CSS_SELECTOR, "a.js-PD_url")
link_url = link_el.get_attribute("data-urlpc")
except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到商品連結 | i_code: {i_code}")
try: brand = item.find_element(By.CSS_SELECTOR, "b.js-PD_txt1").text.strip()
except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到品牌名稱 | i_code: {i_code}")
try: product_name = item.find_element(By.CSS_SELECTOR, "span.js-PD_txt2").text.strip()
except Exception: logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到產品名稱 | i_code: {i_code}")
name = f"{brand} {product_name}".strip()
if not name:
try:
name = item.find_element(By.CSS_SELECTOR, "img.js-PD_img").get_attribute("alt").strip()
logging.debug(f"[Crawler] [{page_type.upper()}] - 使用圖片 alt 屬性作為名稱: '{name}'")
except Exception:
logging.warning(f"[Crawler] [{page_type.upper()}] - 錯誤: 無法解析任何有效名稱 | i_code: {i_code} | Action: Skip")
continue
price_selectors = [
"span.Price.js-PD_price",
".price span",
".money span",
".price",
".money",
"b.price",
"span.price",
".price b",
".money b",
".prdPrice span",
".prdPrice b",
".prdPrice",
"p.price",
"div.price"
]
for selector in price_selectors:
try:
price_el = item.find_element(By.CSS_SELECTOR, selector)
price_text = price_el.text
price_clean = re.sub(r'[^\d]', '', price_text)
if price_clean:
price = int(price_clean)
break
except Exception:
continue
if price is None:
logging.debug(f"[Crawler] [{page_type.upper()}] - 警告: 找不到價格 | Name: {name} | ID: {i_code}")
def get_image_path(i_code_str):
part3 = i_code_str[-3:] if len(i_code_str) >= 3 else i_code_str.zfill(3)
part2 = i_code_str[-6:-3] if len(i_code_str) > 3 else '000'
part1 = i_code_str[:-6] if len(i_code_str) > 6 else '0'
part1 = part1.zfill(4)
part2 = part2.zfill(3)
return f'{part1}/{part2}/{part3}'
try:
image_path = get_image_path(str(i_code))
image_url = f"https://img.momoshop.com.tw/goodsimg/{image_path}/{i_code}_OL_m.webp"
except Exception as e:
logging.warning(f"[Crawler] [{page_type.upper()}] - 警告: 圖片 URL 構造失敗 | i_code: {i_code} | Error: {e}")
image_url = None
logging.info(f"[Crawler] [{page_type.upper()}] -> 解析結果 | Name: {name} | Price: {price}")
current_scan_items.add((i_code, group_title))
status_change = "NONE"
prev_record = active_db_items.get((i_code, group_title))
previous_price = None
if not prev_record:
status_change = "NEW"
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 新商品 (NEW)")
else:
if price != prev_record.price:
if price is not None and prev_record.price is not None:
status_change = "PRICE_DOWN" if price < prev_record.price else "PRICE_UP"
else:
status_change = "UPDATE"
previous_price = prev_record.price
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 價格變動 ({status_change}) | From: {prev_record.price} | To: {price}")
elif status_change == "NONE" and (not prev_record.image_url and image_url):
status_change = "UPDATE"
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 圖片更新 (UPDATE)")
is_changed = status_change != "NONE"
normalized_link_url = normalize_momo_product_url(link_url, i_code)
if not normalized_link_url:
logging.warning(
f"[Crawler] [{page_type.upper()}] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
)
continue
new_promo = PromoProduct(
batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url,
image_url=image_url, previous_price=previous_price, time_slot=group_title,
status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name,
session_time_text=group_title, page_type=page_type
)
session.add(new_promo)
snapshot_count += 1
if is_changed:
changed_products.append(new_promo)
count += 1
logging.info(f"[Crawler] [{page_type.upper()}] -> 寫入資料庫: {status_change}")
else:
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 無變動 (ACTIVE) | Action: Snapshot Written")
except Exception as e:
logging.error(f"[Crawler] [{page_type.upper()}] ❌ 解析商品時發生未預期錯誤 | Error: {e}")
continue
for (i_code, slot), record in active_db_items.items():
if (i_code, slot) not in current_scan_items:
new_status = "DELISTED" if record.time_slot in seen_groups else "SLOT_END"
delisted_promo = PromoProduct(
batch_id=batch_id, i_code=i_code, name=record.name, price=record.price, url=record.url,
image_url=record.image_url, time_slot=record.time_slot, previous_price=record.price,
status_change=new_status, crawled_at=now, activity_time_text=activity_name,
session_time_text=getattr(record, 'session_time_text', activity_name), page_type=page_type
)
session.add(delisted_promo)
snapshot_count += 1
changed_products.append(delisted_promo)
count += 1
session.commit()
logging.info(f"[Crawler] [{page_type.upper()}] ✅ {page_type} 任務完成 | Changed Records: {count} | Snapshot Records: {snapshot_count} | Batch: {batch_id}")
stats = { "changed_records": count, "snapshot_records": snapshot_count, "batch_id": batch_id, "status": "Success" }
_save_stats(f'{page_type}_task', stats)
if changed_products:
logging.info(f"[Crawler] [{page_type.upper()}] 📢 偵測到 {len(changed_products)} 筆異動 | Action: 準備發送通知")
try:
try:
dashboard_url = "http://127.0.0.1:5000/edm"
logging.info(f"[Crawler] [{page_type.upper()}] 📸 準備截取儀表板畫面: {dashboard_url}")
driver.get(dashboard_url)
time.sleep(3)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, 0);")
time.sleep(2)
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
driver.set_window_size(2560, int(required_height) + 100)
time.sleep(2)
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
os.makedirs(shot_dir, exist_ok=True)
screenshot_filename = f"{page_type}_{batch_id}.png"
screenshot_path = os.path.join(shot_dir, screenshot_filename)
driver.save_screenshot(screenshot_path)
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
logging.info(f"[Crawler] [{page_type.upper()}] 📸 已儲存頁面截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes")
except Exception as e:
logging.error(f"[Crawler] [{page_type.upper()}] ❌ 截圖失敗 | Error: {e}")
import importlib
import services.edm_notifier
importlib.reload(services.edm_notifier)
from services.edm_notifier import EdmNotifier
EdmNotifier().send_edm_report(changed_products, screenshot_path)
logging.info(f"[Crawler] [{page_type.upper()}] ✅ {activity_name} 通知已發送")
except Exception as e:
logging.error(f"[Crawler] [{page_type.upper()}] ❌ 發送通知時發生錯誤 | Error: {e}")
else:
logging.info(f"[Crawler] [{page_type.upper()}] 無異動,不發送通知")
except MomoEdmUnavailable as e:
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ {activity_name} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}")
stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url}
_save_stats(f'{page_type}_task', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Crawler] [{page_type.upper()}] 🚨 {page_type} 任務異常 | Error: {e}")
stats = { "status": "Failed", "error": str(e) }
_save_stats(f'{page_type}_task', stats)
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_promo_event_task",
error=e,
source=f"Scheduler.{page_type}",
event_type="crawler_timeout" if "timeout" in str(e).lower() else "promo_event_task_failure",
priority="P2",
title=f"{activity_name} 任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Crawler] [{page_type.upper()}] event_router 失敗: {_router_e}")
def run_whitepage_check():
"""
V-New: 檢查網頁服務是否變成白頁
每半小時執行一次,檢測正式環境網址
"""
try:
# 檢測的目標網址(正式環境)
target_url = "https://mo.wooo.work/"
logging.info(f"[Whitepage] [Check] 🔍 開始檢查網頁狀態 | URL: {target_url}")
# 1. 發送 HTTP 請求 (內部健康檢查verify=False 用於處理自簽憑證環境)
try:
response = requests.get(target_url, timeout=30, verify=False) # nosec B501
status_code = response.status_code
content_length = len(response.text)
logging.info(f"[Whitepage] [Check] 📊 HTTP 狀態碼: {status_code} | 內容長度: {content_length:,} bytes")
# 2. 檢查 HTTP 狀態碼
if status_code != 200:
error_msg = f"HTTP 狀態碼異常 ({status_code})"
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
NotificationManager().send_whitepage_alert(target_url, error_msg)
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
return
# 3. 檢查內容長度(白頁通常很短)
if content_length < 1000:
error_msg = f"頁面內容過短 ({content_length} bytes),疑似白頁"
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
NotificationManager().send_whitepage_alert(target_url, error_msg)
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
return
# 4. 檢查關鍵元素是否存在
content = response.text
missing_markers = _missing_whitepage_markers(content)
if missing_markers:
error_msg = f"缺少關鍵元素:{', '.join(missing_markers)}"
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
logging.debug(f"[Whitepage] [Check] 📄 頁面內容片段: {content[:500]}...")
NotificationManager().send_whitepage_alert(target_url, error_msg)
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
return
# 5. 所有檢查通過
logging.info(f"[Whitepage] [Check] ✅ 網頁狀態正常")
_save_stats('whitepage_check', {
"status": "Success",
"status_code": status_code,
"content_length": content_length
})
except requests.exceptions.Timeout:
error_msg = "連線逾時 (30秒)"
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
NotificationManager().send_whitepage_alert(target_url, error_msg)
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
except requests.exceptions.ConnectionError as e:
error_msg = f"無法連接伺服器:{str(e)}"
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
NotificationManager().send_whitepage_alert(target_url, error_msg)
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
except Exception as e:
error_msg = f"未預期的錯誤:{str(e)}"
logging.error(f"[Whitepage] [Check] ❌ {error_msg}")
NotificationManager().send_whitepage_alert(target_url, error_msg)
_save_stats('whitepage_check', {"status": "Failed", "error": error_msg})
except Exception as e:
logging.error(f"[Whitepage] [Check] 🚨 白頁檢查任務異常 | Error: {e}")
_save_stats('whitepage_check', {"status": "Failed", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_whitepage_check",
error=e,
source="Scheduler.Whitepage",
event_type="whitepage_check_failure",
priority="P1",
title="白頁檢查任務異常",
dedup_ttl_sec=1800,
)
except Exception as _router_e:
logging.error(f"[Whitepage] [Check] event_router 失敗: {_router_e}")
def start_scheduled_job():
"""使用執行緒啟動任務,避免阻塞主程式"""
thread = threading.Thread(target=run_momo_task)
thread.start()
def capture_page_screenshot(url, filename_prefix="screenshot", wait_time=5):
"""通用截圖函式 (供 app.py 呼叫截取儀表板) - 高解析度版本"""
try:
# V-Fix: 使用更大的視窗寬度以提高截圖清晰度
with managed_scraper_resources(window_size='2560,4000') as (driver, session):
logging.info(f"[Screenshot] 📸 準備截取畫面: {url}")
driver.get(url)
time.sleep(wait_time / 2)
# V-Fix: 滾動頁面以觸發所有元素的渲染
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)
driver.execute_script("window.scrollTo(0, 0);")
time.sleep(wait_time / 2)
# 設定視窗大小以進行滿版截圖
required_height = driver.execute_script("return document.body.parentNode.scrollHeight")
driver.set_window_size(2560, int(required_height) + 100)
# V-Fix: 等待視窗大小調整後的重新渲染
time.sleep(2)
shot_dir = os.path.join(BASE_DIR, 'web', 'static', 'screenshots')
os.makedirs(shot_dir, exist_ok=True)
timestamp = int(time.time())
filename = f"{filename_prefix}_{timestamp}.png"
screenshot_path = os.path.join(shot_dir, filename)
driver.save_screenshot(screenshot_path)
# 檢查截圖檔案大小
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
logging.info(f"[Screenshot] 📸 已儲存截圖 | Path: {screenshot_path} | Size: {file_size:,} bytes | Resolution: 2560x{int(required_height) + 100}")
# V-Fix: 如果檔案太小(<50KB可能是空白或錯誤
if file_size < 50000:
logging.warning(f"[Screenshot] ⚠️ 截圖檔案異常小 ({file_size} bytes),可能截圖失敗")
return screenshot_path
except Exception as e:
logging.error(f"[Screenshot] ❌ 截圖失敗 | Error: {e}")
return None
def verify_import_data_sync(expected_rows: int = None, date_range: dict = None) -> dict:
"""
2026-01-30 新增:驗證匯入資料是否正確同步到兩個資料表
Args:
expected_rows: 預期的匯入筆數
date_range: 預期的日期範圍 {'min': 'YYYY-MM-DD', 'max': 'YYYY-MM-DD'}
Returns:
dict: 驗證結果
"""
from config import DATABASE_PATH
from sqlalchemy import create_engine, text
date_min = date_range.get('min') if date_range else None
date_max = date_range.get('max') if date_range else None
result = {
'success': True,
'daily_sales_snapshot': {'ok': False, 'rows': 0, 'date_min': None, 'date_max': None},
'realtime_sales_monthly': {'ok': False, 'rows': 0, 'date_min': None, 'date_max': None},
'scope': {'date_min': date_min, 'date_max': date_max},
'errors': []
}
if expected_rows is not None and expected_rows <= 0:
result['success'] = False
result['errors'].append("本次匯入筆數為 0請檢查檔案是否實際匯入成功")
return result
if not date_min or not date_max:
result['success'] = False
result['errors'].append("缺少本次匯入日期範圍,無法驗證資料同步")
return result
try:
engine = create_engine(DATABASE_PATH)
with engine.connect() as conn:
is_sqlite = engine.dialect.name == 'sqlite'
if is_sqlite:
snapshot_query = text("""
SELECT COUNT(*) as cnt,
MIN(date(snapshot_date)) as date_min,
MAX(date(snapshot_date)) as date_max
FROM daily_sales_snapshot
WHERE date(snapshot_date) BETWEEN date(:date_min) AND date(:date_max)
""")
monthly_query = text("""
SELECT COUNT(*) as cnt,
MIN(date("日期")) as date_min,
MAX(date("日期")) as date_max
FROM realtime_sales_monthly
WHERE date("日期") BETWEEN date(:date_min) AND date(:date_max)
""")
else:
snapshot_query = text("""
SELECT COUNT(*) as cnt,
MIN(snapshot_date::date)::text as date_min,
MAX(snapshot_date::date)::text as date_max
FROM daily_sales_snapshot
WHERE snapshot_date::date BETWEEN :date_min AND :date_max
""")
monthly_query = text("""
SELECT COUNT(*) as cnt,
MIN("日期"::date)::text as date_min,
MAX("日期"::date)::text as date_max
FROM realtime_sales_monthly
WHERE "日期"::date BETWEEN :date_min AND :date_max
""")
# V-Fix: 只驗證本次匯入日期範圍。realtime_sales_monthly 保存歷史資料,
# 不可再拿全表總筆數和 daily_sales_snapshot 做相等比較。
params = {'date_min': date_min, 'date_max': date_max}
snapshot_result = conn.execute(snapshot_query, params).fetchone()
result['daily_sales_snapshot']['rows'] = snapshot_result[0]
result['daily_sales_snapshot']['date_min'] = snapshot_result[1]
result['daily_sales_snapshot']['date_max'] = snapshot_result[2]
monthly_result = conn.execute(monthly_query, params).fetchone()
result['realtime_sales_monthly']['rows'] = monthly_result[0]
result['realtime_sales_monthly']['date_min'] = monthly_result[1]
result['realtime_sales_monthly']['date_max'] = monthly_result[2]
# 驗證兩個表的資料是否一致
snapshot_rows = result['daily_sales_snapshot']['rows']
monthly_rows = result['realtime_sales_monthly']['rows']
# 檢查 1: 兩個表的筆數是否一致
if snapshot_rows != monthly_rows:
result['errors'].append(
f"資料筆數不一致: daily_sales_snapshot={snapshot_rows}, realtime_sales_monthly={monthly_rows}"
)
result['success'] = False
# 檢查 2: 本次匯入日期範圍是否一致
if (
result['daily_sales_snapshot']['date_min'] != result['realtime_sales_monthly']['date_min']
or result['daily_sales_snapshot']['date_max'] != result['realtime_sales_monthly']['date_max']
):
result['errors'].append(
f"日期範圍不一致: daily_sales_snapshot="
f"{result['daily_sales_snapshot']['date_min']}~{result['daily_sales_snapshot']['date_max']}, "
f"realtime_sales_monthly="
f"{result['realtime_sales_monthly']['date_min']}~{result['realtime_sales_monthly']['date_max']}"
)
result['success'] = False
# 檢查 3: 如果有預期的匯入筆數,驗證是否正確
if expected_rows is not None:
if snapshot_rows < expected_rows:
result['errors'].append(
f"daily_sales_snapshot 筆數({snapshot_rows})少於預期({expected_rows})"
)
result['success'] = False
if monthly_rows < expected_rows:
result['errors'].append(
f"realtime_sales_monthly 筆數({monthly_rows})少於預期({expected_rows})"
)
result['success'] = False
# 檢查 4: 如果有預期的日期範圍,驗證是否正確
if result['daily_sales_snapshot']['date_min'] != date_min or result['daily_sales_snapshot']['date_max'] != date_max:
result['errors'].append(
f"daily_sales_snapshot 日期範圍({result['daily_sales_snapshot']['date_min']}~"
f"{result['daily_sales_snapshot']['date_max']})與預期({date_min}~{date_max})不符"
)
result['success'] = False
if result['realtime_sales_monthly']['date_min'] != date_min or result['realtime_sales_monthly']['date_max'] != date_max:
result['errors'].append(
f"realtime_sales_monthly 日期範圍({result['realtime_sales_monthly']['date_min']}~"
f"{result['realtime_sales_monthly']['date_max']})與預期({date_min}~{date_max})不符"
)
result['success'] = False
# 設定各表驗證結果
result['daily_sales_snapshot']['ok'] = snapshot_rows > 0
result['realtime_sales_monthly']['ok'] = monthly_rows > 0 and monthly_rows == snapshot_rows
logging.info(f"[Scheduler] [Verify] 資料驗證完成: success={result['success']}, "
f"range={date_min}~{date_max}, snapshot={snapshot_rows}筆, monthly={monthly_rows}")
except Exception as e:
result['success'] = False
result['errors'].append(f"驗證時發生錯誤: {str(e)}")
logging.error(f"[Scheduler] [Verify] 資料驗證失敗: {e}")
return result
def run_auto_import_task():
"""
V-New: 自動從 Google Drive 匯入當日業績
每半小時檢查一次 Google Drive 是否有新的 Excel 檔案
"""
# ADR-012 Phase 4: HITL 暫停檢查
notification_sent = False
try:
from services.agent_actions import is_task_paused
if is_task_paused("run_auto_import_task"):
logging.info("[Scheduler] [AutoImport] ⏸️ 任務被 HITL 暫停中,本次跳過")
return
except Exception as pause_check_error:
logging.debug(
f"[Scheduler] [AutoImport] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}",
exc_info=True,
)
try:
from services.import_service import import_service
from services.notification_manager import NotificationManager
logging.info("[Scheduler] [AutoImport] 🚀 啟動 Google Drive 自動匯入任務")
# 執行自動匯入
result = import_service.auto_import_from_drive()
if result.get('success'):
logging.info(f"[Scheduler] [AutoImport] ✅ 自動匯入完成 | Message: {result.get('message')}")
stats = {
"file_count": result.get('file_count', 0),
"imported_count": result.get('imported_count', 0),
"status": "Success"
}
# V-New: 發送 Telegram 和 Line 通知(僅當有匯入新檔案時)
if result.get('file_count', 0) > 0:
try:
logging.info("[Scheduler] [AutoImport] 📢 準備發送自動匯入通知...")
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
# 格式化日期範圍
date_range_str = "未知"
date_range = result.get('date_range')
if date_range:
try:
from datetime import datetime as dt
date_min = dt.strptime(date_range['min'], '%Y-%m-%d')
date_max = dt.strptime(date_range['max'], '%Y-%m-%d')
date_min_str = date_min.strftime('%Y年%m月%d')
date_max_str = date_max.strftime('%Y年%m月%d')
if date_min_str == date_max_str:
date_range_str = date_min_str
else:
date_range_str = f"{date_min_str}{date_max_str}"
except Exception:
date_range_str = f"{date_range.get('min', '?')}{date_range.get('max', '?')}"
freshness_line = ""
data_lag_days = result.get('data_lag_days')
if data_lag_days is not None and data_lag_days > 0:
freshness_line = f"⚠️ 最新資料落後:{data_lag_days}\n"
# 組合通知訊息
message = (
f"📊 當日業績自動匯入通知 ({now_str})\n"
f"{'='*30}\n"
f"✅ 匯入狀態:成功\n"
f"📁 處理檔案數:{result.get('file_count', 0)}\n"
f"📝 共匯入記錄:{result.get('total_rows', 0)}\n"
f"📅 數據日期期間:{date_range_str}\n"
f"{freshness_line}"
f"{'='*30}\n"
f"詳細資料請至系統查看"
)
# 發送通知到所有頻道
notifier = NotificationManager()
notifier._send_line_messages([message])
notifier._send_telegram_messages([message])
logging.info("[Scheduler] [AutoImport] ✅ 自動匯入通知已發送")
# 2026-01-30 新增:匯入後驗證資料同步
logging.info("[Scheduler] [AutoImport] 🔍 開始驗證資料同步...")
verify_result = verify_import_data_sync(
expected_rows=result.get('total_rows'),
date_range=result.get('date_range')
)
if not verify_result['success']:
# 資料驗證失敗,發送警告通知
error_details = '\n'.join([f" ⚠️ {e}" for e in verify_result['errors']])
verify_message = (
f"🚨 當日業績匯入驗證失敗 ({now_str})\n"
f"{'='*30}\n"
f"匯入通知顯示成功,但資料驗證發現問題:\n"
f"{error_details}\n"
f"{'='*30}\n"
f"📊 daily_sales_snapshot: {verify_result['daily_sales_snapshot']['rows']}\n"
f"📈 realtime_sales_monthly: {verify_result['realtime_sales_monthly']['rows']}\n"
f"{'='*30}\n"
f"請立即檢查資料同步狀態!"
)
notifier._send_telegram_messages([verify_message])
logging.error(f"[Scheduler] [AutoImport] ❌ 資料驗證失敗: {verify_result['errors']}")
else:
logging.info("[Scheduler] [AutoImport] ✅ 資料驗證通過,兩個表已同步")
except Exception as e:
logging.error(f"[Scheduler] [AutoImport] ❌ 發送通知失敗 | Error: {e}")
else:
logging.info("[Scheduler] [AutoImport] 無新檔案,不發送通知")
else:
logging.error(f"[Scheduler] [AutoImport] ❌ 自動匯入失敗 | Message: {result.get('message')}")
stats = {
"status": "Failed",
"error": result.get('message', 'Unknown error')
}
# V-New: 匯入失敗時也發送通知
try:
logging.info("[Scheduler] [AutoImport] 📢 準備發送自動匯入失敗通知...")
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
message = (
f"⚠️ 當日業績自動匯入失敗 ({now_str})\n"
f"{'='*30}\n"
f"❌ 匯入狀態:失敗\n"
f"📌 錯誤訊息:{result.get('message', 'Unknown error')}\n"
f"{'='*30}\n"
f"請檢查 Google Drive 設定或手動匯入"
)
notifier = NotificationManager()
notifier._send_line_messages([message])
notifier._send_telegram_messages([message])
logging.info("[Scheduler] [AutoImport] ✅ 匯入失敗通知已發送")
except Exception as e:
logging.error(f"[Scheduler] [AutoImport] ❌ 發送失敗通知時發生錯誤 | Error: {e}")
_save_stats('auto_import_task', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [AutoImport] 🚨 自動匯入任務異常 | Error: {e}")
stats = {"status": "Failed", "error": str(e)}
_save_stats('auto_import_task', stats)
# ADR-012 Phase 2: 改走 EventRouterHermes L1 翻譯 + 降級鏈)
# LINE 通道保留event_router 只處理 Telegram
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_auto_import_task",
error=e,
source="Scheduler.AutoImport",
event_type="db_connection_error" if "translate host" in str(e).lower() or "operational" in str(e).lower() else "import_failure",
priority="P1",
title="當日業績自動匯入異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [AutoImport] event_router 失敗: {_router_e}")
# LINE 通知保留(獨立通道,不經 event_router
# Only send if notification hasn't been sent already
if not notification_sent:
try:
from services.notification_manager import NotificationManager
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
message = f"Daily Sales Auto Import Exception ({now_str})\nSystem error: {str(e)[:200]}"
NotificationManager()._send_line_messages([message])
notification_sent = True # Mark as sent to prevent duplicate
except Exception as notify_error:
logging.error(f"[Scheduler] [AutoImport] LINE notification failed | Error: {notify_error}")
# ADR-013: AIOps 自動修復 — PlayBook 匹配 + KM 沉澱
try:
from services.auto_heal_service import auto_heal_service
auto_heal_service.handle_exception(
task_name="run_auto_import_task",
exception=e,
traceback_str=_tb.format_exc(),
)
except Exception as _heal_e:
logging.error(f"[Scheduler] [AutoImport] auto_heal_service 失敗: {_heal_e}")
def run_competitor_price_feeder_task():
"""
競品價格補給線排程任務(每 4 小時執行一次)
從 PChome 抓取競品價格,同步寫入 competitor_prices 最新快取與
competitor_price_history 歷史快照,供 HermesAnalystService 與前端歷史圖表使用。
與 AI Pipeline 完全解耦:本任務失敗不影響核心分析流程。
ADR-ICAIM-2026-04-17 新增
"""
try:
from config import DATABASE_PATH
from sqlalchemy import create_engine
from services.competitor_intel_repository import clear_competitor_intel_cache
from services.competitor_price_feeder import CompetitorPriceFeeder
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
logging.info(f"[Scheduler] [Feeder] 🚀 啟動競品價格抓取任務 | {now_str}")
engine = create_engine(DATABASE_PATH)
feeder = CompetitorPriceFeeder(engine=engine)
result = feeder.run(source="pchome")
stats = {
"total_skus": result.total_skus,
"matched": result.matched,
"skipped_no_result": result.skipped_no_result,
"skipped_low_score": result.skipped_low_score,
"errors": result.errors,
"duration_sec": result.duration_sec,
"history_written": result.history_written,
"status": "Success",
}
logging.info(
f"[Scheduler] [Feeder] ✅ 完成 | "
f"matched={result.matched}/{result.total_skus} "
f"skip_no={result.skipped_no_result} "
f"skip_low={result.skipped_low_score} "
f"history_written={result.history_written} "
f"errors={result.errors} "
f"耗時={result.duration_sec}s"
)
clear_competitor_intel_cache()
_save_stats('competitor_price_feeder', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [Feeder] 🚨 任務異常 | Error: {e}")
_save_stats('competitor_price_feeder', {"status": "Failed", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_competitor_price_feeder_task",
error=e,
source="Scheduler.Feeder",
event_type="competitor_price_feeder_failure",
priority="P2",
title="競品價格補給任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [Feeder] event_router 失敗: {_router_e}")
def run_external_offer_sync_task():
"""
外部報價正規化同步任務(每 4 小時執行一次)
將已確認同款的既有比價快取自動同步到 external_offers讓 PChome 成長作戰清單
能吃共同資料層。CSV 僅保留備援,不是日常主流程。
"""
try:
from config import DATABASE_PATH
from sqlalchemy import create_engine
from services.external_market_offer_service import sync_legacy_momo_reference_offers
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
limit = int(os.getenv("EXTERNAL_OFFER_SYNC_LIMIT", "1000"))
logging.info(f"[Scheduler] [ExternalOfferSync] 🚀 啟動外部報價自動同步 | {now_str}")
engine = create_engine(DATABASE_PATH)
try:
result = sync_legacy_momo_reference_offers(engine, limit=limit, dry_run=False)
finally:
engine.dispose()
stats = {
"status": "Success" if result.get("success") else "Skipped",
"candidate_count": result.get("candidate_count", 0),
"written_count": result.get("written_count", 0),
"source_code": result.get("source_code", "momo_reference"),
"message": result.get("message"),
}
logging.info(
"[Scheduler] [ExternalOfferSync] ✅ 完成 | candidates=%s written=%s status=%s",
stats["candidate_count"],
stats["written_count"],
result.get("status"),
)
_save_stats('external_offer_sync', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [ExternalOfferSync] 🚨 任務異常 | Error: {e}")
_save_stats('external_offer_sync', {"status": "Failed", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_external_offer_sync_task",
error=e,
source="Scheduler.ExternalOfferSync",
event_type="external_offer_sync_failure",
priority="P2",
title="外部報價自動同步異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [ExternalOfferSync] event_router 失敗: {_router_e}")
def run_pchome_match_backfill_task():
"""
PChome 待比對商品補抓任務(每日執行)
優先處理尚未有有效 PChome 配對的高價 ACTIVE 商品,寫入 competitor_prices
與 competitor_price_history完成後重算 product_pick 挑品清單。
"""
try:
from config import DATABASE_PATH
from sqlalchemy import create_engine
from services.ai_product_pick_agent import generate_product_pick_list
from services.cache_manager import clear_dashboard_cache
from services.competitor_identity_revalidator import revalidate_existing_competitor_identities
from services.competitor_intel_repository import clear_competitor_intel_cache
from services.competitor_price_feeder import CompetitorPriceFeeder
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
logging.info(f"[Scheduler] [PChomeBackfill] 🚀 啟動待比對補抓任務 | {now_str}")
engine = create_engine(DATABASE_PATH)
revalidation_limit = int(os.getenv("PCHOME_BACKFILL_REVALIDATION_LIMIT", "1200"))
expired_refresh_limit = int(os.getenv("PCHOME_BACKFILL_EXPIRED_REFRESH_LIMIT", "1200"))
retryable_limit = int(os.getenv("PCHOME_BACKFILL_RETRYABLE_LIMIT", "240"))
unmatched_limit = int(os.getenv("PCHOME_BACKFILL_UNMATCHED_LIMIT", "240"))
revalidation_result = revalidate_existing_competitor_identities(
engine,
limit=revalidation_limit,
dry_run=False,
include_expired=True,
write_attempts=True,
)
feeder = CompetitorPriceFeeder(engine=engine)
refresh_result = feeder.run_expired_identity_refresh(limit=expired_refresh_limit)
retryable_result = feeder.run_retryable_candidate_revalidation(limit=retryable_limit, min_score=0.70)
feeder_result = feeder.run_unmatched_priority(limit=unmatched_limit)
pick_result = generate_product_pick_list(engine, limit=50)
clear_dashboard_cache()
clear_competitor_intel_cache()
stats = {
"total_skus": feeder_result.total_skus,
"matched": feeder_result.matched,
"skipped_no_result": feeder_result.skipped_no_result,
"skipped_low_score": feeder_result.skipped_low_score,
"errors": feeder_result.errors,
"duration_sec": feeder_result.duration_sec,
"history_written": feeder_result.history_written,
"identity_revalidated_fresh": revalidation_result.promoted_fresh,
"identity_revalidated_expired": revalidation_result.promoted_expired,
"identity_revalidation_rejected_low": revalidation_result.rejected_low_score,
"identity_revalidation_rejected_veto": revalidation_result.rejected_veto,
"expired_identity_refresh_total": refresh_result.total_skus,
"expired_identity_refresh_matched": refresh_result.matched,
"expired_identity_refresh_no_result": refresh_result.skipped_no_result,
"expired_identity_refresh_low_score": refresh_result.skipped_low_score,
"expired_identity_refresh_errors": refresh_result.errors,
"retryable_candidate_revalidation_total": retryable_result.total_skus,
"retryable_candidate_revalidation_matched": retryable_result.matched,
"retryable_candidate_revalidation_low_score": retryable_result.skipped_low_score,
"retryable_candidate_revalidation_errors": retryable_result.errors,
"pick_candidates": pick_result.candidates,
"pick_written": pick_result.written,
"status": "Success",
}
logging.info(
f"[Scheduler] [PChomeBackfill] ✅ 完成 | "
f"revalidated={revalidation_result.promoted_fresh}+{revalidation_result.promoted_expired} "
f"refreshed={refresh_result.matched}/{refresh_result.total_skus} "
f"retryable={retryable_result.matched}/{retryable_result.total_skus} "
f"matched={feeder_result.matched}/{feeder_result.total_skus} "
f"history_written={feeder_result.history_written} "
f"pick_written={pick_result.written} "
f"errors={feeder_result.errors + refresh_result.errors + retryable_result.errors} "
f"耗時={feeder_result.duration_sec + refresh_result.duration_sec + retryable_result.duration_sec}s"
)
_save_stats('pchome_match_backfill', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [PChomeBackfill] 🚨 任務異常 | Error: {e}")
_save_stats('pchome_match_backfill', {"status": "Failed", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_pchome_match_backfill_task",
error=e,
source="Scheduler.PChomeBackfill",
event_type="pchome_match_backfill_failure",
priority="P2",
title="PChome 待比對補抓任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [PChomeBackfill] event_router 失敗: {_router_e}")
def run_pchome_growth_momo_backfill_task():
"""
PChome 高業績商品 MOMO 對應補齊任務(每日執行)
優先處理成長作戰台中尚未有外部比價資料的高業績商品,寫入 external_offers。
"""
try:
from config import DATABASE_PATH
from sqlalchemy import create_engine
from services.pchome_growth_momo_backfill_service import run_pchome_growth_momo_backfill
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
limit = int(os.getenv("PCHOME_GROWTH_MOMO_BACKFILL_LIMIT", "8"))
logging.info(
"[Scheduler] [PChomeGrowthMomoBackfill] 🚀 啟動高業績商品 MOMO 對應補齊 | %s | limit=%s",
now_str,
limit,
)
engine = create_engine(DATABASE_PATH)
try:
result = run_pchome_growth_momo_backfill(engine, limit=limit)
finally:
engine.dispose()
data = result.get("data") or {}
before_stats = data.get("before_stats") or {}
after_stats = data.get("after_stats") or {}
sync_result = data.get("external_offer_sync") or {}
stats = {
"status": "Success" if result.get("success") else "Skipped",
"scanned_products": data.get("scanned_products", 0),
"candidate_count": data.get("candidate_count", 0),
"auto_compare_count": data.get("auto_compare_count", 0),
"review_count": data.get("review_count", 0),
"written_count": sync_result.get("written_count", 0),
"sync_status": sync_result.get("status"),
"mapping_rate_before": before_stats.get("mapping_rate"),
"mapping_rate_after": after_stats.get("mapping_rate"),
"mapped_count_before": before_stats.get("mapped_count"),
"mapped_count_after": after_stats.get("mapped_count"),
"needs_mapping_count_after": after_stats.get("needs_mapping_count"),
"message": result.get("message"),
}
logging.info(
"[Scheduler] [PChomeGrowthMomoBackfill] ✅ 完成 | scanned=%s candidates=%s auto=%s written=%s review=%s mapping=%s%%%s%%",
stats["scanned_products"],
stats["candidate_count"],
stats["auto_compare_count"],
stats["written_count"],
stats["review_count"],
stats["mapping_rate_before"],
stats["mapping_rate_after"],
)
_save_stats('pchome_growth_momo_backfill', stats)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [PChomeGrowthMomoBackfill] 🚨 任務異常 | Error: {e}")
_save_stats('pchome_growth_momo_backfill', {"status": "Failed", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_pchome_growth_momo_backfill_task",
error=e,
source="Scheduler.PChomeGrowthMomoBackfill",
event_type="pchome_growth_momo_backfill_failure",
priority="P2",
title="PChome 高業績商品 MOMO 對應補齊異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [PChomeGrowthMomoBackfill] event_router 失敗: {_router_e}")
def run_icaim_analysis_task():
"""
ICAIM 競價情報分析排程任務(每 6 小時執行一次)
Hermes 3 分析師 → NemoTron 派發器 → Telegram 競價告警
依賴 competitor_prices 表(由 run_competitor_price_feeder_task 每 4h 更新)
ADR-ICAIM-2026-04-17 新增
"""
try:
from config import DATABASE_PATH
from sqlalchemy import create_engine
from services.hermes_analyst_service import HermesAnalystService
from services.nemoton_dispatcher_service import NemotronDispatcher
from services.notification_manager import NotificationManager
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
logging.info(f"[Scheduler] [ICAIM] 🚀 啟動競價情報分析 | {now_str}")
engine = create_engine(DATABASE_PATH)
# Step 1Hermes 分析師
hermes_svc = HermesAnalystService(engine=engine)
t0 = datetime.now(TAIPEI_TZ)
result = hermes_svc.run()
hermes_duration = round((datetime.now(TAIPEI_TZ) - t0).total_seconds(), 1)
logging.info(
f"[Scheduler] [ICAIM] Hermes 完成 | "
f"candidates={result.total_candidates} threats={len(result.threats)} "
f"耗時={hermes_duration}s"
)
_save_stats('icaim_hermes', {
"candidates": result.total_candidates,
"threats": len(result.threats),
"duration_sec": hermes_duration,
"status": "Success" if result.success else "Failed",
"error": result.error,
})
if not result.success:
logging.error(f"[Scheduler] [ICAIM] Hermes 失敗: {result.error}")
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_icaim_analysis_task",
error=RuntimeError(result.error or "Hermes analysis failed"),
source="Scheduler.ICAIM",
event_type="icaim_hermes_failure",
priority="P1",
title="ICAIM Hermes 分析失敗",
payload={"duration_sec": hermes_duration, "candidates": result.total_candidates},
)
except Exception as _router_e:
logging.error(f"[Scheduler] [ICAIM] Hermes failure event_router 失敗: {_router_e}")
return
if not result.threats:
logging.info("[Scheduler] [ICAIM] 無威脅商品,跳過 NemoTron dispatch")
# Operation Ollama-First v5.0 Phase 4移除原 inline meta_analysis 觸發
# 原因:呼叫點 #16 月耗 ~2.5M Gemini tokensicaim 4h × 6 + cron 6h × 4 = 10/天)
# 改由 run_scheduler 每日 12:00 單一觸發;此處僅 log不再呼叫。
return
# Step 2NemoTron 派發器 → Telegram
hermes_stats = {"duration_sec": hermes_duration, "tokens": result.hermes_tokens}
notifier = NotificationManager()
dispatcher = NemotronDispatcher(notification_manager=notifier, engine=engine)
dispatch_result = dispatcher.dispatch(result.threats, hermes_stats=hermes_stats)
logging.info(
f"[Scheduler] [ICAIM] ✅ 完成 | "
f"dispatched={dispatch_result.get('dispatched', 0)} "
f"alerts={dispatch_result.get('alerts', 0)}"
)
_save_stats('icaim_dispatch', {**dispatch_result, "status": "Success"})
# Operation Ollama-First v5.0 Phase 4原 Step 3 inline Meta-Analysis 觸發已移除
# 改由 run_scheduler 每日 12:00 單一觸發Phase 4 降頻 6h → 24h月省 ~1.875M tokens
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [ICAIM] 🚨 任務異常 | Error: {e}")
_save_stats('icaim_analysis', {"status": "Failed", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_icaim_analysis_task",
error=e,
source="Scheduler.ICAIM",
event_type="icaim_analysis_failure",
priority="P1",
title="ICAIM 競價分析任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [ICAIM] event_router 失敗: {_router_e}")
# ADR-013: AIOps 自動修復
try:
from services.auto_heal_service import auto_heal_service
auto_heal_service.handle_exception(
task_name="run_icaim_analysis_task",
exception=e,
traceback_str=_tb.format_exc(),
)
except Exception as _heal_e:
logging.error(f"[Scheduler] [ICAIM] auto_heal_service 失敗: {_heal_e}")
def run_weekly_strategy_task():
"""
執行每週 Gemini 策略師週報任務 (Phase 4 實作)
產出高階經營決策週報,並存入 ai_insights 中作知識沈澱,同時透過 Telegram 推送
"""
logging.info("[Scheduler] [Strategy] 🚀 啟動 Gemini 策略師週報任務...")
try:
from services.openclaw_strategist_service import generate_weekly_strategy_report
generate_weekly_strategy_report(force_tg_alert=True)
logging.info("[Scheduler] [Strategy] ✅ Gemini 策略師週報任務完成")
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [Strategy] 🚨 任務異常 | Error: {e}")
_save_stats('weekly_strategy', {"status": "Failed", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_weekly_strategy_task",
error=e,
source="Scheduler.Strategy",
event_type="weekly_strategy_failure",
priority="P2",
title="每週策略週報任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [Strategy] event_router 失敗: {_router_e}")
# ADR-013: AIOps 自動修復
try:
from services.auto_heal_service import auto_heal_service
auto_heal_service.handle_exception(
task_name="run_weekly_strategy_task",
exception=e,
traceback_str=_tb.format_exc(),
)
except Exception as _heal_e:
logging.error(f"[Scheduler] [Strategy] auto_heal_service 失敗: {_heal_e}")
def run_db_backup_task():
"""
每日凌晨 02:00 執行 pg_dump 備份 momo_analytics
並清理超過 7 天的舊備份。完成後透過 Telegram 通知統帥。
失敗同樣發出告警,並沉澱到 ai_insights供 RAG 查詢)。
"""
logging.info("[Scheduler] [Backup] 🚀 啟動資料庫備份任務...")
try:
from services.db_backup_service import run_backup, cleanup_old_backups
from services.notification_manager import NotificationManager
result = run_backup()
deleted_count = cleanup_old_backups()
now_str = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M')
notifier = NotificationManager()
if result["success"]:
size_kb = result["file_size"] // 1024
msg = (
f"💾 資料庫備份完成 ({now_str})\n"
f"{'='*30}\n"
f"✅ 狀態:成功\n"
f"📁 檔案:{result['filename']}\n"
f"📦 大小:{size_kb} KB\n"
f"⏱ 耗時:{result['duration']:.1f}\n"
f"🗑 清理舊備份:{deleted_count}"
)
logging.info(f"[Scheduler] [Backup] ✅ 備份成功 | {result['filename']} ({size_kb}KB)")
_save_stats('db_backup', {"status": "Success", "filename": result["filename"], "size_kb": size_kb})
# 沉澱到 ai_insightsRAG 可查詢備份歷史)
try:
from services.openclaw_learning_service import store_insight
store_insight(
insight_type='backup_status',
content=f"資料庫備份成功:{result['filename']},大小 {size_kb}KB耗時 {result['duration']:.1f}s",
period=datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'),
metadata={"status": "success", "size_kb": size_kb, "deleted_old": deleted_count},
ai_model="scheduler",
)
except Exception as insight_error:
logging.warning(
f"[Scheduler] [Backup] ⚠️ 備份成功 insight 寫入失敗但繼續通知 | Error: {insight_error}",
exc_info=True,
)
else:
msg = (
f"🚨 資料庫備份失敗 ({now_str})\n"
f"{'='*30}\n"
f"❌ 狀態:失敗\n"
f"🔍 原因:{result.get('error', '未知錯誤')}\n"
f"⚠️ 請立即檢查 momo-db 容器狀態!"
)
logging.error(f"[Scheduler] [Backup] ❌ 備份失敗: {result.get('error')}")
_save_stats('db_backup', {"status": "Failed", "error": result.get("error")})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_db_backup_task",
error=RuntimeError(result.get("error") or "database backup failed"),
source="Scheduler.DBBackup",
event_type="db_backup_failure",
priority="P1",
title="資料庫備份失敗",
payload={"filename": result.get("filename")},
dedup_ttl_sec=3600,
)
except Exception as _router_e:
logging.error(f"[Scheduler] [Backup] event_router 失敗: {_router_e}")
try:
from services.openclaw_learning_service import store_insight
store_insight(
insight_type='backup_status',
content=f"資料庫備份失敗:{result.get('error', '未知')}",
period=datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'),
metadata={"status": "failed", "error": result.get("error")},
ai_model="scheduler",
)
except Exception as insight_error:
logging.warning(
f"[Scheduler] [Backup] ⚠️ 備份失敗 insight 寫入失敗但繼續通知 | Error: {insight_error}",
exc_info=True,
)
notifier._send_telegram_messages([msg])
except Exception as e:
logging.error(f"[Scheduler] [Backup] 🚨 任務異常 | Error: {e}")
_save_stats('db_backup', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_db_backup_task",
error=e,
source="Scheduler.DBBackup",
event_type="db_backup_failure",
priority="P1",
title="資料庫備份排程異常",
dedup_ttl_sec=3600,
)
except Exception as _router_e:
logging.error(f"[Scheduler] [Backup] event_router 失敗: {_router_e}")
try:
from services.notification_manager import NotificationManager
NotificationManager()._send_telegram_messages([
f"🚨 DB 備份排程異常\n錯誤:{e}"
])
except Exception as notify_error:
logging.warning(
f"[Scheduler] [Backup] ⚠️ 備份異常 Telegram 通知失敗 | Error: {notify_error}",
exc_info=True,
)
def run_backup_monitor_task():
"""
每 6 小時檢查最近一次備份是否在 25 小時內(允許一點偏差)。
若發現備份過期或從未備份,立即發出 Telegram 告警,
並透過 store_insight 讓 NemoTron/OpenClaw 可感知備份健康狀態。
"""
logging.info("[Scheduler] [BackupMonitor] 🔍 檢查備份健康狀態...")
try:
from services.db_backup_service import get_latest_backup_info
from services.notification_manager import NotificationManager
info = get_latest_backup_info()
now = datetime.now(TAIPEI_TZ)
alert_needed = False
alert_reason = ""
if info["status"] == "no_backup" or info["filename"] is None:
alert_needed = True
alert_reason = "從未執行過備份backup_log 與備份目錄均為空"
elif info["status"] == "failed":
alert_needed = True
alert_reason = f"最近一次備份失敗:{info.get('error', '未知')}"
else:
created_at = info["created_at"]
if created_at is not None:
# 統一為 naive datetime 比較
if hasattr(created_at, 'tzinfo') and created_at.tzinfo is not None:
created_at = created_at.replace(tzinfo=None)
now_naive = now.replace(tzinfo=None)
hours_ago = (now_naive - created_at).total_seconds() / 3600
if hours_ago > 25:
alert_needed = True
alert_reason = f"最近備份距今 {hours_ago:.1f} 小時(超過 25h 閾值),可能備份任務未執行"
if alert_needed:
now_str = now.strftime('%Y-%m-%d %H:%M')
msg = (
f"⚠️ 資料庫備份異常告警 ({now_str})\n"
f"{'='*30}\n"
f"🔴 原因:{alert_reason}\n"
f"📋 最新備份:{info.get('filename', '')}\n"
f"🕐 備份時間:{info.get('created_at', '')}\n"
f"💡 請確認 momo-scheduler 備份排程是否正常執行!"
)
logging.warning(f"[Scheduler] [BackupMonitor] ⚠️ 備份告警: {alert_reason}")
NotificationManager()._send_telegram_messages([msg])
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_backup_monitor_task",
error=RuntimeError(alert_reason),
source="Scheduler.BackupMonitor",
event_type="backup_monitor_alert",
priority="P2",
title="資料庫備份健康告警",
payload={"latest_file": info.get("filename"), "created_at": str(info.get("created_at"))},
dedup_ttl_sec=21600,
)
except Exception as _router_e:
logging.error(f"[Scheduler] [BackupMonitor] event_router 失敗: {_router_e}")
try:
from services.openclaw_learning_service import store_insight
store_insight(
insight_type='backup_status',
content=f"備份監控告警:{alert_reason}",
period=now.strftime('%Y-%m-%d'),
metadata={"alert": True, "reason": alert_reason, "latest_file": info.get("filename")},
ai_model="scheduler",
)
except Exception as insight_error:
logging.warning(
f"[Scheduler] [BackupMonitor] ⚠️ 備份監控 insight 寫入失敗但繼續 | Error: {insight_error}",
exc_info=True,
)
else:
created_at = info.get("created_at")
logging.info(f"[Scheduler] [BackupMonitor] ✅ 備份狀態正常 | 最新: {info.get('filename')} @ {created_at}")
_save_stats('backup_monitor', {
"status": "Alert" if alert_needed else "OK",
"latest_file": info.get("filename"),
"alert_reason": alert_reason if alert_needed else None,
})
except Exception as e:
logging.error(f"[Scheduler] [BackupMonitor] 🚨 監控任務異常 | Error: {e}")
_save_stats('backup_monitor', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_backup_monitor_task",
error=e,
source="Scheduler.BackupMonitor",
event_type="backup_monitor_failure",
priority="P2",
title="資料庫備份監控任務異常",
dedup_ttl_sec=3600,
)
except Exception as _router_e:
logging.error(f"[Scheduler] [BackupMonitor] event_router 失敗: {_router_e}")
def run_openclaw_meta_analysis_task():
"""每 6 小時 — OpenClaw Meta-AnalysisAI 系統效能自我審視 + 電商洞察快照)"""
try:
from services.openclaw_strategist_service import generate_meta_analysis_report
report = generate_meta_analysis_report()
logging.info(f"[Scheduler] [MetaAnalysis] ✅ 完成 | 長度={len(report)} 字元")
_save_stats('meta_analysis', {"status": "OK", "length": len(report)})
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [MetaAnalysis] 🚨 Meta-Analysis 任務異常: {e}")
_save_stats('meta_analysis', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_openclaw_meta_analysis_task",
error=e,
source="Scheduler.MetaAnalysis",
event_type="openclaw_report_failure",
priority="P2",
title="OpenClaw Meta-Analysis 任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [MetaAnalysis] event_router 失敗: {_router_e}")
try:
from services.auto_heal_service import auto_heal_service
auto_heal_service.handle_exception(
task_name="run_openclaw_meta_analysis_task",
exception=e,
traceback_str=_tb.format_exc(),
)
except Exception as _heal_e:
logging.error(f"[Scheduler] [MetaAnalysis] auto_heal_service 失敗: {_heal_e}")
def run_dedup_batch_task():
"""每日 03:00 — ai_insights 去重批次(同 SKU 同 type 同 period 保留最高品質)"""
try:
from services.openclaw_learning_service import run_dedup_batch
result = run_dedup_batch()
logging.info(
f"[Scheduler] [Dedup] 去重完成 | 歸檔={result.get('archived', 0)}"
f" / 掃描={result.get('scanned', 0)}"
)
_save_stats('dedup_batch', result)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [Dedup] 去重批次異常: {e}")
_save_stats('dedup_batch', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_dedup_batch_task",
error=e,
source="Scheduler.Dedup",
event_type="dedup_batch_failure",
priority="P2",
title="ai_insights 去重批次異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [Dedup] event_router 失敗: {_router_e}")
def run_quality_rescore_task():
"""每日 04:00 — ai_insights 品質分數時間衰減重算批次"""
try:
from services.openclaw_learning_service import run_quality_rescore_batch
result = run_quality_rescore_batch()
try:
from services.openclaw_learning_service import enqueue_missing_insight_embeddings
result["embedding_backfill"] = enqueue_missing_insight_embeddings(limit=200)
except Exception as _embed_e:
logging.warning(f"[Scheduler] [Rescore] embedding backfill 略過: {_embed_e}")
logging.info(
f"[Scheduler] [Rescore] 品質重算完成 | 更新={result.get('updated', 0)}"
f" | relearn 自動歸檔={result.get('relearn_reset', 0)}"
)
_save_stats('quality_rescore', result)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [Rescore] 品質分數重算異常: {e}")
_save_stats('quality_rescore', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_quality_rescore_task",
error=e,
source="Scheduler.Rescore",
event_type="quality_rescore_failure",
priority="P2",
title="ai_insights 品質重算批次異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [Rescore] event_router 失敗: {_router_e}")
def run_daily_report_task():
"""每日 09:00 — OpenClaw 電商日報(業績快報 + 競品威脅 + 圖表推播)"""
try:
from services.openclaw_strategist_service import generate_daily_report
result = generate_daily_report()
logging.info(
f"[Scheduler] [DailyReport] ✅ 完成 | period={result.get('period')} "
f"charts={result.get('chart_count', 0)} actions={result.get('action_count', 0)}"
)
_save_stats('daily_report', result)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [DailyReport] 🚨 日報任務異常: {e}")
_save_stats('daily_report', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_daily_report_task",
error=e,
source="Scheduler.DailyReport",
event_type="openclaw_report_failure",
priority="P2",
title="OpenClaw 日報任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [DailyReport] event_router 失敗: {_router_e}")
try:
from services.auto_heal_service import auto_heal_service
auto_heal_service.handle_exception(
task_name="run_daily_report_task",
exception=e,
traceback_str=_tb.format_exc(),
)
except Exception as _heal_e:
logging.error(f"[Scheduler] [DailyReport] auto_heal_service 失敗: {_heal_e}")
def run_monthly_report_task():
"""每月1日 07:00 — OpenClaw 電商月報(月度全景洞察 + 多圖表推播)"""
try:
from services.openclaw_strategist_service import generate_monthly_report
result = generate_monthly_report()
logging.info(
f"[Scheduler] [MonthlyReport] ✅ 完成 | period={result.get('period')} "
f"charts={result.get('chart_count', 0)} actions={result.get('action_count', 0)}"
)
_save_stats('monthly_report', result)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [MonthlyReport] 🚨 月報任務異常: {e}")
_save_stats('monthly_report', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_monthly_report_task",
error=e,
source="Scheduler.MonthlyReport",
event_type="openclaw_report_failure",
priority="P2",
title="OpenClaw 月報任務異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [MonthlyReport] event_router 失敗: {_router_e}")
try:
from services.auto_heal_service import auto_heal_service
auto_heal_service.handle_exception(
task_name="run_monthly_report_task",
exception=e,
traceback_str=_tb.format_exc(),
)
except Exception as _heal_e:
logging.error(f"[Scheduler] [MonthlyReport] auto_heal_service 失敗: {_heal_e}")
def run_ppt_auto_generation_task(schedule_kind=None):
"""依日/週/月/季/半年/年度節奏補齊 PPT 簡報。
22:00 的 ppt_vision_audit 只負責視覺審核;這個任務先依固定週期產出
已定義的簡報,並把每次嘗試寫入 ppt_generation_runs成功檔案寫入
ppt_reports.cached_data。
"""
try:
from services.ppt_auto_generation_service import generate_scheduled_ppt_reports
result = generate_scheduled_ppt_reports(schedule_kind=schedule_kind)
logging.info(
"[Scheduler] [PPTAutoGeneration] kind=%s status=%s ready=%s errors=%s",
schedule_kind or "due",
result.get("status"),
result.get("ready", 0),
result.get("errors", 0),
)
stat_key = f"ppt_auto_generation_{schedule_kind}" if schedule_kind else "ppt_auto_generation"
_save_stats(stat_key, result)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [PPTAutoGeneration] 🚨 自動簡報補齊異常: {e}")
stat_key = f"ppt_auto_generation_{schedule_kind}" if schedule_kind else "ppt_auto_generation"
_save_stats(stat_key, {"status": "Error", "error": str(e), "schedule_kind": schedule_kind})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_ppt_auto_generation_task",
error=e,
source="Scheduler.PPTAutoGeneration",
event_type="ppt_auto_generation_failure",
priority="P2",
title="PPT 自動簡報補齊異常",
trace=_tb.format_exc(),
)
except Exception as _router_e:
logging.error(f"[Scheduler] [PPTAutoGeneration] event_router 失敗: {_router_e}")
def run_ai_smoke_daily_summary_task():
"""每日 AI 自動化 Smoke 摘要推播;先執行一次只讀 smoke 以刷新 sentinel。"""
try:
from services.ai_automation_smoke_service import collect_ai_automation_smoke, send_smoke_daily_summary
smoke_result = collect_ai_automation_smoke(record_history=True, history_limit=20)
result = send_smoke_daily_summary()
logging.info(
"[Scheduler] [AISmokeSummary] 完成 | smoke=%s send_status=%s sent=%s failed=%s",
smoke_result.get("status"),
result.get("status"),
result.get("telegram", {}).get("sent", 0),
result.get("telegram", {}).get("failed", 0),
)
result["smoke"] = {
"status": smoke_result.get("status"),
"summary": smoke_result.get("summary"),
}
_save_stats('ai_smoke_daily_summary', result)
except Exception as e:
import traceback as _tb
logging.error(f"[Scheduler] [AISmokeSummary] 🚨 摘要推播異常: {e}")
_save_stats('ai_smoke_daily_summary', {"status": "Error", "error": str(e)})
try:
from services.event_router import notify_failure
notify_failure(
task_name="run_ai_smoke_daily_summary_task",
error=e,
source="Scheduler.AISmokeSummary",
event_type="ai_smoke_summary_failure",
priority="P2",
title="AI Smoke 每日摘要推播異常",
trace=_tb.format_exc(),
dedup_ttl_sec=3600,
)
except Exception as _router_e:
logging.error(f"[Scheduler] [AISmokeSummary] event_router 失敗: {_router_e}")
if __name__ == "__main__":
# 此檔案現在由 app.py 導入並由其主執行緒管理排程。
# 若需獨立測試,可在此處臨時加入調用程式碼。
logging.info("[Scheduler] [Main] 此為排程任務定義檔,請由主程式 app.py 執行。")