From 3ec5fa9cb5e516825f5590b9b3cca871d6aad510 Mon Sep 17 00:00:00 2001 From: OoO Date: Thu, 21 May 2026 15:47:34 +0800 Subject: [PATCH] =?UTF-8?q?=E7=95=A5=E9=81=8E=E5=A4=B1=E6=95=88=20EDM=20?= =?UTF-8?q?=E6=B4=BB=E5=8B=95=E9=A0=81=E5=91=8A=E8=AD=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- .../code_modularization_inventory_20260430.md | 5 +- docs/memory/history_logs.md | 1 + scheduler.py | 83 +++++++++++++++-- services/marketplace_product_matcher.py | 17 +++- tests/test_marketplace_product_matcher.py | 14 +++ tests/test_scheduler_edm_unavailable.py | 88 +++++++++++++++++++ 7 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 tests/test_scheduler_edm_unavailable.py diff --git a/config.py b/config.py index 40e6f15..0f12533 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.373" +SYSTEM_VERSION = "V10.374" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index efc89f5..fe5d96b 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -38,6 +38,7 @@ - 2026-05-21 追記:同步專業比價分級連動合併後的 `services/competitor_intel_repository.py` 與 `services/nemoton_dispatcher_service.py` 行數;此處只更新 inventory,不變更比價或告警行為。 - 2026-05-21 追記:同步 PChome/LUDEYA 商品線名稱漂移比對更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 - 2026-05-21 追記:同步 MAC/Yuskin/AHC 名稱漂移與 bundle equivalent matcher 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 +- 2026-05-21 追記:同步 EDM 失效頁 alert guard 與 REJURAN 唇膏寬價差 exact-identity matcher 更新後的 `scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 ## 達到或超過 800 行檔案清單 @@ -46,7 +47,7 @@ | 9225 | `routes/openclaw_bot_routes.py` | P0 巨型 Blueprint | route / bot command service / report service / scheduler hook;禁止再新增市場情報入口 | | 5499 | `services/ppt_generator.py` | P0 報表生成巨型 service | deck orchestration / slide builders / chart builders / report type registry | | 3186 | `routes/sales_routes.py` | P0 巨型 Blueprint | page routes / API routes / chart query service / calendar service;分析頁新增功能先抽 `services/sales/` | -| 2821 | `scheduler.py` | P0 排程總管 | task registry / crawler jobs / report jobs / notification jobs;市場情報只能透過獨立 job module 掛入 | +| 2973 | `scheduler.py` | P0 排程總管 | task registry / crawler jobs / report jobs / notification jobs;市場情報只能透過獨立 job module 掛入 | | 2731 | `services/openclaw_strategist_service.py` | P0 OpenClaw service | prompt builders / report composer / strategy rules | | 3681 | `routes/admin_observability_routes.py` | P0 觀測台巨型 Blueprint | `services/observability_query_service.py` / `services/observability_action_service.py` / route glue | | 1796 | `routes/ai_routes.py` | P1 AI Blueprint | route glue / AI orchestration service / prompt builders | @@ -64,7 +65,7 @@ | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders | | 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers | | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting | -| 2279 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | +| 2292 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service | | 961 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing | | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy | diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 7e470bb..960e9ca 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.374 EDM 失效頁告警止血**: `scheduler.py` 新增 MOMO EDM alert guard,`run_edm_task` / `run_festival_task` / `run_promo_event_task` 遇到「很抱歉此EDM不存在」時會接受 browser alert、寫入 `Skipped / edm_unavailable` stats,且不再送 EventRouter failure,避免 festival / mothers_day 過期活動頁重新累積 Telegram queue;同版整合 REJURAN 麗駐蘭唇膏同款在價格比過寬時的 exact-identity 價格懲罰豁免。 - **V10.373 PChome 同款名稱漂移整合**: 整合並修正 concurrent matcher work,新增 MAC/M.A.C 品牌 alias、Yuskin 經典乳霜 4入/4盒組同數量 bundle equivalent、AHC 瞬效 B5 玻尿酸關鍵字重排 anchor;修復 `_count_score()` 縮排破壞與 unreachable code,讓新增測試可穩定通過。 - **V10.372 Smoke 與 EventRouter queue 修復**: 修正 AI automation smoke 對 NemoTron fallback 的 class 判斷,改接受實際存在的 `NemotronDispatcher._hermes_rule_fallback`,避免 Hermes fallback 正常卻被誤報 critical;EventRouter 失敗佇列回放改為重建短版 HTML-safe 訊息,escape 標題/摘要/trace/error 並限制長度,避免舊 Selenium stacktrace 的 `` 造成 Telegram HTTP 400 反覆卡住;同版整合 LUDEYA 蜂王玫瑰商品線在 MOMO/PChome 名稱漂移時的 identity anchor alias。 - **V10.371 品牌缺失同款放行**: marketplace matcher 新增 `brandless_exact_identity` 加分,只限「一側有品牌、一側缺品牌」但 shared identity anchor 夠長、規格/序列/中文名相似度都高且無 hard veto 的案例;覆蓋小米有品小浪智能感應自動噴香機,讓 PChome 標題省略品牌時仍可進入同款告警候選。 diff --git a/scheduler.py b/scheduler.py index c64e6b0..8c8c3a2 100644 --- a/scheduler.py +++ b/scheduler.py @@ -12,7 +12,7 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from contextlib import contextmanager from selenium.webdriver.common.keys import Keys -from selenium.common.exceptions import TimeoutException +from selenium.common.exceptions import NoAlertPresentException, TimeoutException, UnexpectedAlertPresentException from sqlalchemy import desc, func from database.manager import DatabaseManager from database.models import Product, PriceRecord @@ -36,6 +36,62 @@ logging.basicConfig( # 設定台北時區 TAIPEI_TZ = timezone(timedelta(hours=8)) + +class MomoEdmUnavailable(RuntimeError): + """MOMO EDM page is expired or unavailable; skip without failure alert.""" + + def __init__(self, alert_text: str, url: str): + self.alert_text = str(alert_text or "").strip() + self.url = url + super().__init__(self.alert_text or "MOMO EDM unavailable") + + +def _is_momo_edm_unavailable_alert(alert_text): + text = str(alert_text or "").replace(" ", "") + return "很抱歉此EDM不存在" in text or "EDM不存在" in text + + +def _accept_current_alert(driver): + try: + alert = driver.switch_to.alert + alert_text = str(getattr(alert, "text", "") or "") + alert.accept() + return alert_text + except NoAlertPresentException: + return "" + + +def _raise_if_momo_edm_unavailable(driver, task_label, url): + alert_text = _accept_current_alert(driver) + if not alert_text: + return + if _is_momo_edm_unavailable_alert(alert_text): + logging.warning( + "%s ⚠️ MOMO 活動頁已失效,任務改為 Skipped,不送 failure alert | URL: %s | Alert: %s", + task_label, + url, + alert_text, + ) + raise MomoEdmUnavailable(alert_text, url) + logging.warning("%s ⚠️ 已接受瀏覽器 alert 後繼續解析 | Alert: %s", task_label, alert_text) + + +def _safe_driver_title(driver, task_label, url): + try: + return driver.title + except UnexpectedAlertPresentException as exc: + alert_text = _accept_current_alert(driver) or getattr(exc, "alert_text", "") or str(exc) + if _is_momo_edm_unavailable_alert(alert_text): + logging.warning( + "%s ⚠️ 讀取 title 時偵測到 MOMO 活動頁失效,任務改為 Skipped | URL: %s | Alert: %s", + task_label, + url, + alert_text, + ) + raise MomoEdmUnavailable(alert_text, url) from exc + raise + + def _save_stats(task_name, data): """將任務統計結果寫入 JSON 檔案""" stats_file = os.path.join(os.path.dirname(__file__), 'data', 'scheduler_stats.json') @@ -552,10 +608,12 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): logging.info(f"[Crawler] [EDM] 🔗 前往頁面: {url}") driver.get(url) time.sleep(5) # 等待 JS 渲染 - logging.info(f"[Crawler] [EDM] 📄 頁面標題: {driver.title}") + _raise_if_momo_edm_unavailable(driver, "[Crawler] [EDM]", url) + page_title = _safe_driver_title(driver, "[Crawler] [EDM]", url) + logging.info(f"[Crawler] [EDM] 📄 頁面標題: {page_title}") # 2. 準備批次資訊 - activity_name = driver.title.split("-")[0].strip() if "-" in driver.title else "限時搶購" + activity_name = page_title.split("-")[0].strip() if "-" in page_title else "限時搶購" batch_id = int(time.time()) now = datetime.now(TAIPEI_TZ).replace(tzinfo=None) @@ -897,6 +955,10 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): } _save_stats('edm_task', stats) + except MomoEdmUnavailable as e: + logging.warning(f"[Crawler] [EDM] ⚠️ EDM 頁面不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}") + stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url} + _save_stats('edm_task', stats) except Exception as e: import traceback as _tb logging.error(f"[Crawler] [EDM] 🚨 EDM 任務異常 | Error: {e}") @@ -983,7 +1045,8 @@ def run_festival_task(lpn_code="O7ylWfihYUM"): # V-Fix: 增加初始等待時間,確保頁面上的 Vue.js 框架有足夠時間初始化並掛載懶加載事件 time.sleep(10) - logging.info(f"[Crawler] [Festival] 📄 頁面標題: {driver.title}") + _raise_if_momo_edm_unavailable(driver, "[Crawler] [Festival]", url) + logging.info(f"[Crawler] [Festival] 📄 頁面標題: {_safe_driver_title(driver, '[Crawler] [Festival]', url)}") # V-Fix: 嘗試在 iframe 中尋找內容 iframes = driver.find_elements(By.TAG_NAME, 'iframe') @@ -1292,6 +1355,10 @@ def run_festival_task(lpn_code="O7ylWfihYUM"): else: logging.info("[Crawler] [Festival] ℹ️ 無異動,不發送通知") + except MomoEdmUnavailable as e: + logging.warning(f"[Crawler] [Festival] ⚠️ {PAGE_TYPE} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}") + stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url} + _save_stats('festival_task', stats) except Exception as e: import traceback as _tb logging.error(f"[Crawler] [Festival] 🚨 {PAGE_TYPE} 任務異常 | Error: {e}") @@ -1341,7 +1408,9 @@ def run_promo_event_task(lpn_code, page_type, activity_name): logging.exception(f"[Crawler] [{page_type.upper()}] window.stop() 失敗但繼續 | Error: {e}") time.sleep(10) - logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {driver.title}") + task_label = f"[Crawler] [{page_type.upper()}]" + _raise_if_momo_edm_unavailable(driver, task_label, url) + logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {_safe_driver_title(driver, task_label, url)}") # 嘗試在 iframe 中尋找內容 iframes = driver.find_elements(By.TAG_NAME, 'iframe') @@ -1630,6 +1699,10 @@ def run_promo_event_task(lpn_code, page_type, activity_name): else: logging.info(f"[Crawler] [{page_type.upper()}] ℹ️ 無異動,不發送通知") + except MomoEdmUnavailable as e: + logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ {activity_name} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}") + stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url} + _save_stats(f'{page_type}_task', stats) except Exception as e: import traceback as _tb logging.error(f"[Crawler] [{page_type.upper()}] 🚨 {page_type} 任務異常 | Error: {e}") diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 07655ad..8861660 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -1595,6 +1595,16 @@ def score_marketplace_match( and token_score >= 0.68 and sequence_score >= 0.72 ) + allow_wide_price_penalty_suppression = ( + shared_anchor + and len(shared_anchor.replace(" ", "")) >= 5 + and brand_score >= 0.95 + and not hard_veto + and type_score >= 0.55 + and spec_score >= 0.99 + and token_score >= 0.50 + and sequence_score >= 0.55 + ) if (ratio < 0.3 or ratio > 3.2) and token_score < 0.78: if allow_price_penalty_suppression: reasons.append("price_penalty_suppressed_exact_identity") @@ -1602,8 +1612,11 @@ def score_marketplace_match( price_penalty = 0.12 reasons.append("price_ratio_extreme") elif (ratio < 0.48 or ratio > 2.2) and token_score < 0.68: - price_penalty = 0.06 - reasons.append("price_ratio_wide") + if allow_wide_price_penalty_suppression: + reasons.append("price_penalty_suppressed_wide_exact_identity") + else: + price_penalty = 0.06 + reasons.append("price_ratio_wide") except (TypeError, ValueError, ZeroDivisionError): price_penalty = 0.0 diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 543fac1..69fa3eb 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -470,6 +470,20 @@ def test_marketplace_matcher_promotes_ludeya_line_with_platform_name_drift(): assert "shared_identity_anchor" in diagnostics.reasons or "shared_identity_anchor_no_spec" in diagnostics.reasons +def test_marketplace_matcher_suppresses_wide_price_penalty_for_exact_lip_product(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【REJURAN 麗珠蘭】官方直營|REJURAN 麗駐蘭唇膏 3.7g|台灣總代理 動物性 PDRN", + "REJURAN 麗駐蘭唇膏 3.7g", + momo_price=169, + competitor_price=380, + ) + + assert diagnostics.score >= 0.76 + assert "price_penalty_suppressed_wide_exact_identity" in diagnostics.reasons + + def test_marketplace_matcher_promotes_mac_brand_alias_and_exact_compact_name(): from services.marketplace_product_matcher import score_marketplace_match diff --git a/tests/test_scheduler_edm_unavailable.py b/tests/test_scheduler_edm_unavailable.py new file mode 100644 index 0000000..0d0770e --- /dev/null +++ b/tests/test_scheduler_edm_unavailable.py @@ -0,0 +1,88 @@ +from contextlib import contextmanager + + +class _FakeAlert: + def __init__(self, driver, text="很抱歉此EDM不存在"): + self._driver = driver + self.text = text + + def accept(self): + self._driver.alert_accepted = True + + +class _FakeSwitchTo: + def __init__(self, driver): + self._driver = driver + + @property + def alert(self): + import scheduler + + if self._driver.alert_accepted: + raise scheduler.NoAlertPresentException() + return _FakeAlert(self._driver) + + +class _ExpiredEdmDriver: + def __init__(self): + self.alert_accepted = False + self.loaded_url = None + self.switch_to = _FakeSwitchTo(self) + + def get(self, url): + self.loaded_url = url + + @property + def title(self): + raise AssertionError("expired EDM alert should be handled before reading title") + + +def _install_expired_edm_driver(monkeypatch): + import scheduler + + driver = _ExpiredEdmDriver() + + @contextmanager + def fake_resources(*_args, **_kwargs): + yield driver, object() + + stats = [] + monkeypatch.setattr(scheduler, "managed_scraper_resources", fake_resources) + monkeypatch.setattr(scheduler.time, "sleep", lambda *_args, **_kwargs: None) + monkeypatch.setattr(scheduler, "_save_stats", lambda task, data: stats.append((task, data))) + return scheduler, driver, stats + + +def test_promo_event_expired_edm_alert_skips_without_failure(monkeypatch): + scheduler, driver, stats = _install_expired_edm_driver(monkeypatch) + + scheduler.run_promo_event_task("expired-lpn", "mothers_day", "母親節超值限時購") + + assert driver.alert_accepted is True + assert stats == [( + "mothers_day_task", + { + "status": "Skipped", + "reason": "edm_unavailable", + "alert_text": "很抱歉此EDM不存在", + "url": "https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn=expired-lpn&n=1", + }, + )] + + +def test_festival_expired_edm_alert_skips_without_failure(monkeypatch): + scheduler, driver, stats = _install_expired_edm_driver(monkeypatch) + + scheduler.run_festival_task(lpn_code="expired-lpn") + + assert driver.alert_accepted is True + assert stats == [( + "festival_task", + { + "status": "Skipped", + "reason": "edm_unavailable", + "alert_text": "很抱歉此EDM不存在", + "url": "https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn=expired-lpn&n=1", + }, + )] +