This commit is contained in:
@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.373"
|
||||
SYSTEM_VERSION = "V10.374"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
- 2026-05-21 追記:同步專業比價分級連動合併後的 `services/competitor_intel_repository.py` 與 `services/nemoton_dispatcher_service.py` 行數;此處只更新 inventory,不變更比價或告警行為。
|
||||
- 2026-05-21 追記:同步 PChome/LUDEYA 商品線名稱漂移比對更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步 MAC/Yuskin/AHC 名稱漂移與 bundle equivalent matcher 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步 EDM 失效頁 alert guard 與 REJURAN 唇膏寬價差 exact-identity matcher 更新後的 `scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -46,7 +47,7 @@
|
||||
| 9225 | `routes/openclaw_bot_routes.py` | P0 巨型 Blueprint | route / bot command service / report service / scheduler hook;禁止再新增市場情報入口 |
|
||||
| 5499 | `services/ppt_generator.py` | P0 報表生成巨型 service | deck orchestration / slide builders / chart builders / report type registry |
|
||||
| 3186 | `routes/sales_routes.py` | P0 巨型 Blueprint | page routes / API routes / chart query service / calendar service;分析頁新增功能先抽 `services/sales/` |
|
||||
| 2821 | `scheduler.py` | P0 排程總管 | task registry / crawler jobs / report jobs / notification jobs;市場情報只能透過獨立 job module 掛入 |
|
||||
| 2973 | `scheduler.py` | P0 排程總管 | task registry / crawler jobs / report jobs / notification jobs;市場情報只能透過獨立 job module 掛入 |
|
||||
| 2731 | `services/openclaw_strategist_service.py` | P0 OpenClaw service | prompt builders / report composer / strategy rules |
|
||||
| 3681 | `routes/admin_observability_routes.py` | P0 觀測台巨型 Blueprint | `services/observability_query_service.py` / `services/observability_action_service.py` / route glue |
|
||||
| 1796 | `routes/ai_routes.py` | P1 AI Blueprint | route glue / AI orchestration service / prompt builders |
|
||||
@@ -64,7 +65,7 @@
|
||||
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
|
||||
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
|
||||
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
|
||||
| 2279 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 2292 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
|
||||
| 961 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
|
||||
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
## 📅 詳細更新日誌 (考古存檔)
|
||||
|
||||
### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化
|
||||
- **V10.374 EDM 失效頁告警止血**: `scheduler.py` 新增 MOMO EDM alert guard,`run_edm_task` / `run_festival_task` / `run_promo_event_task` 遇到「很抱歉此EDM不存在」時會接受 browser alert、寫入 `Skipped / edm_unavailable` stats,且不再送 EventRouter failure,避免 festival / mothers_day 過期活動頁重新累積 Telegram queue;同版整合 REJURAN 麗駐蘭唇膏同款在價格比過寬時的 exact-identity 價格懲罰豁免。
|
||||
- **V10.373 PChome 同款名稱漂移整合**: 整合並修正 concurrent matcher work,新增 MAC/M.A.C 品牌 alias、Yuskin 經典乳霜 4入/4盒組同數量 bundle equivalent、AHC 瞬效 B5 玻尿酸關鍵字重排 anchor;修復 `_count_score()` 縮排破壞與 unreachable code,讓新增測試可穩定通過。
|
||||
- **V10.372 Smoke 與 EventRouter queue 修復**: 修正 AI automation smoke 對 NemoTron fallback 的 class 判斷,改接受實際存在的 `NemotronDispatcher._hermes_rule_fallback`,避免 Hermes fallback 正常卻被誤報 critical;EventRouter 失敗佇列回放改為重建短版 HTML-safe 訊息,escape 標題/摘要/trace/error 並限制長度,避免舊 Selenium stacktrace 的 `<unknown>` 造成 Telegram HTTP 400 反覆卡住;同版整合 LUDEYA 蜂王玫瑰商品線在 MOMO/PChome 名稱漂移時的 identity anchor alias。
|
||||
- **V10.371 品牌缺失同款放行**: marketplace matcher 新增 `brandless_exact_identity` 加分,只限「一側有品牌、一側缺品牌」但 shared identity anchor 夠長、規格/序列/中文名相似度都高且無 hard veto 的案例;覆蓋小米有品小浪智能感應自動噴香機,讓 PChome 標題省略品牌時仍可進入同款告警候選。
|
||||
|
||||
83
scheduler.py
83
scheduler.py
@@ -12,7 +12,7 @@ from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from contextlib import contextmanager
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.common.exceptions import NoAlertPresentException, TimeoutException, UnexpectedAlertPresentException
|
||||
from sqlalchemy import desc, func
|
||||
from database.manager import DatabaseManager
|
||||
from database.models import Product, PriceRecord
|
||||
@@ -36,6 +36,62 @@ logging.basicConfig(
|
||||
# 設定台北時區
|
||||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||||
|
||||
|
||||
class MomoEdmUnavailable(RuntimeError):
|
||||
"""MOMO EDM page is expired or unavailable; skip without failure alert."""
|
||||
|
||||
def __init__(self, alert_text: str, url: str):
|
||||
self.alert_text = str(alert_text or "").strip()
|
||||
self.url = url
|
||||
super().__init__(self.alert_text or "MOMO EDM unavailable")
|
||||
|
||||
|
||||
def _is_momo_edm_unavailable_alert(alert_text):
|
||||
text = str(alert_text or "").replace(" ", "")
|
||||
return "很抱歉此EDM不存在" in text or "EDM不存在" in text
|
||||
|
||||
|
||||
def _accept_current_alert(driver):
|
||||
try:
|
||||
alert = driver.switch_to.alert
|
||||
alert_text = str(getattr(alert, "text", "") or "")
|
||||
alert.accept()
|
||||
return alert_text
|
||||
except NoAlertPresentException:
|
||||
return ""
|
||||
|
||||
|
||||
def _raise_if_momo_edm_unavailable(driver, task_label, url):
|
||||
alert_text = _accept_current_alert(driver)
|
||||
if not alert_text:
|
||||
return
|
||||
if _is_momo_edm_unavailable_alert(alert_text):
|
||||
logging.warning(
|
||||
"%s ⚠️ MOMO 活動頁已失效,任務改為 Skipped,不送 failure alert | URL: %s | Alert: %s",
|
||||
task_label,
|
||||
url,
|
||||
alert_text,
|
||||
)
|
||||
raise MomoEdmUnavailable(alert_text, url)
|
||||
logging.warning("%s ⚠️ 已接受瀏覽器 alert 後繼續解析 | Alert: %s", task_label, alert_text)
|
||||
|
||||
|
||||
def _safe_driver_title(driver, task_label, url):
|
||||
try:
|
||||
return driver.title
|
||||
except UnexpectedAlertPresentException as exc:
|
||||
alert_text = _accept_current_alert(driver) or getattr(exc, "alert_text", "") or str(exc)
|
||||
if _is_momo_edm_unavailable_alert(alert_text):
|
||||
logging.warning(
|
||||
"%s ⚠️ 讀取 title 時偵測到 MOMO 活動頁失效,任務改為 Skipped | URL: %s | Alert: %s",
|
||||
task_label,
|
||||
url,
|
||||
alert_text,
|
||||
)
|
||||
raise MomoEdmUnavailable(alert_text, url) from exc
|
||||
raise
|
||||
|
||||
|
||||
def _save_stats(task_name, data):
|
||||
"""將任務統計結果寫入 JSON 檔案"""
|
||||
stats_file = os.path.join(os.path.dirname(__file__), 'data', 'scheduler_stats.json')
|
||||
@@ -552,10 +608,12 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"):
|
||||
logging.info(f"[Crawler] [EDM] 🔗 前往頁面: {url}")
|
||||
driver.get(url)
|
||||
time.sleep(5) # 等待 JS 渲染
|
||||
logging.info(f"[Crawler] [EDM] 📄 頁面標題: {driver.title}")
|
||||
_raise_if_momo_edm_unavailable(driver, "[Crawler] [EDM]", url)
|
||||
page_title = _safe_driver_title(driver, "[Crawler] [EDM]", url)
|
||||
logging.info(f"[Crawler] [EDM] 📄 頁面標題: {page_title}")
|
||||
|
||||
# 2. 準備批次資訊
|
||||
activity_name = driver.title.split("-")[0].strip() if "-" in driver.title else "限時搶購"
|
||||
activity_name = page_title.split("-")[0].strip() if "-" in page_title else "限時搶購"
|
||||
batch_id = int(time.time())
|
||||
now = datetime.now(TAIPEI_TZ).replace(tzinfo=None)
|
||||
|
||||
@@ -897,6 +955,10 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"):
|
||||
}
|
||||
_save_stats('edm_task', stats)
|
||||
|
||||
except MomoEdmUnavailable as e:
|
||||
logging.warning(f"[Crawler] [EDM] ⚠️ EDM 頁面不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}")
|
||||
stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url}
|
||||
_save_stats('edm_task', stats)
|
||||
except Exception as e:
|
||||
import traceback as _tb
|
||||
logging.error(f"[Crawler] [EDM] 🚨 EDM 任務異常 | Error: {e}")
|
||||
@@ -983,7 +1045,8 @@ def run_festival_task(lpn_code="O7ylWfihYUM"):
|
||||
|
||||
# V-Fix: 增加初始等待時間,確保頁面上的 Vue.js 框架有足夠時間初始化並掛載懶加載事件
|
||||
time.sleep(10)
|
||||
logging.info(f"[Crawler] [Festival] 📄 頁面標題: {driver.title}")
|
||||
_raise_if_momo_edm_unavailable(driver, "[Crawler] [Festival]", url)
|
||||
logging.info(f"[Crawler] [Festival] 📄 頁面標題: {_safe_driver_title(driver, '[Crawler] [Festival]', url)}")
|
||||
|
||||
# V-Fix: 嘗試在 iframe 中尋找內容
|
||||
iframes = driver.find_elements(By.TAG_NAME, 'iframe')
|
||||
@@ -1292,6 +1355,10 @@ def run_festival_task(lpn_code="O7ylWfihYUM"):
|
||||
else:
|
||||
logging.info("[Crawler] [Festival] ℹ️ 無異動,不發送通知")
|
||||
|
||||
except MomoEdmUnavailable as e:
|
||||
logging.warning(f"[Crawler] [Festival] ⚠️ {PAGE_TYPE} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}")
|
||||
stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url}
|
||||
_save_stats('festival_task', stats)
|
||||
except Exception as e:
|
||||
import traceback as _tb
|
||||
logging.error(f"[Crawler] [Festival] 🚨 {PAGE_TYPE} 任務異常 | Error: {e}")
|
||||
@@ -1341,7 +1408,9 @@ def run_promo_event_task(lpn_code, page_type, activity_name):
|
||||
logging.exception(f"[Crawler] [{page_type.upper()}] window.stop() 失敗但繼續 | Error: {e}")
|
||||
|
||||
time.sleep(10)
|
||||
logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {driver.title}")
|
||||
task_label = f"[Crawler] [{page_type.upper()}]"
|
||||
_raise_if_momo_edm_unavailable(driver, task_label, url)
|
||||
logging.info(f"[Crawler] [{page_type.upper()}] 📄 頁面標題: {_safe_driver_title(driver, task_label, url)}")
|
||||
|
||||
# 嘗試在 iframe 中尋找內容
|
||||
iframes = driver.find_elements(By.TAG_NAME, 'iframe')
|
||||
@@ -1630,6 +1699,10 @@ def run_promo_event_task(lpn_code, page_type, activity_name):
|
||||
else:
|
||||
logging.info(f"[Crawler] [{page_type.upper()}] ℹ️ 無異動,不發送通知")
|
||||
|
||||
except MomoEdmUnavailable as e:
|
||||
logging.warning(f"[Crawler] [{page_type.upper()}] ⚠️ {activity_name} 活動頁不可用,任務略過 | Alert: {e.alert_text} | URL: {e.url}")
|
||||
stats = {"status": "Skipped", "reason": "edm_unavailable", "alert_text": e.alert_text, "url": e.url}
|
||||
_save_stats(f'{page_type}_task', stats)
|
||||
except Exception as e:
|
||||
import traceback as _tb
|
||||
logging.error(f"[Crawler] [{page_type.upper()}] 🚨 {page_type} 任務異常 | Error: {e}")
|
||||
|
||||
@@ -1595,6 +1595,16 @@ def score_marketplace_match(
|
||||
and token_score >= 0.68
|
||||
and sequence_score >= 0.72
|
||||
)
|
||||
allow_wide_price_penalty_suppression = (
|
||||
shared_anchor
|
||||
and len(shared_anchor.replace(" ", "")) >= 5
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and type_score >= 0.55
|
||||
and spec_score >= 0.99
|
||||
and token_score >= 0.50
|
||||
and sequence_score >= 0.55
|
||||
)
|
||||
if (ratio < 0.3 or ratio > 3.2) and token_score < 0.78:
|
||||
if allow_price_penalty_suppression:
|
||||
reasons.append("price_penalty_suppressed_exact_identity")
|
||||
@@ -1602,8 +1612,11 @@ def score_marketplace_match(
|
||||
price_penalty = 0.12
|
||||
reasons.append("price_ratio_extreme")
|
||||
elif (ratio < 0.48 or ratio > 2.2) and token_score < 0.68:
|
||||
price_penalty = 0.06
|
||||
reasons.append("price_ratio_wide")
|
||||
if allow_wide_price_penalty_suppression:
|
||||
reasons.append("price_penalty_suppressed_wide_exact_identity")
|
||||
else:
|
||||
price_penalty = 0.06
|
||||
reasons.append("price_ratio_wide")
|
||||
except (TypeError, ValueError, ZeroDivisionError):
|
||||
price_penalty = 0.0
|
||||
|
||||
|
||||
@@ -470,6 +470,20 @@ def test_marketplace_matcher_promotes_ludeya_line_with_platform_name_drift():
|
||||
assert "shared_identity_anchor" in diagnostics.reasons or "shared_identity_anchor_no_spec" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_suppresses_wide_price_penalty_for_exact_lip_product():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【REJURAN 麗珠蘭】官方直營|REJURAN 麗駐蘭唇膏 3.7g|台灣總代理 動物性 PDRN",
|
||||
"REJURAN 麗駐蘭唇膏 3.7g",
|
||||
momo_price=169,
|
||||
competitor_price=380,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert "price_penalty_suppressed_wide_exact_identity" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_mac_brand_alias_and_exact_compact_name():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
|
||||
88
tests/test_scheduler_edm_unavailable.py
Normal file
88
tests/test_scheduler_edm_unavailable.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
class _FakeAlert:
|
||||
def __init__(self, driver, text="很抱歉此EDM不存在"):
|
||||
self._driver = driver
|
||||
self.text = text
|
||||
|
||||
def accept(self):
|
||||
self._driver.alert_accepted = True
|
||||
|
||||
|
||||
class _FakeSwitchTo:
|
||||
def __init__(self, driver):
|
||||
self._driver = driver
|
||||
|
||||
@property
|
||||
def alert(self):
|
||||
import scheduler
|
||||
|
||||
if self._driver.alert_accepted:
|
||||
raise scheduler.NoAlertPresentException()
|
||||
return _FakeAlert(self._driver)
|
||||
|
||||
|
||||
class _ExpiredEdmDriver:
|
||||
def __init__(self):
|
||||
self.alert_accepted = False
|
||||
self.loaded_url = None
|
||||
self.switch_to = _FakeSwitchTo(self)
|
||||
|
||||
def get(self, url):
|
||||
self.loaded_url = url
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
raise AssertionError("expired EDM alert should be handled before reading title")
|
||||
|
||||
|
||||
def _install_expired_edm_driver(monkeypatch):
|
||||
import scheduler
|
||||
|
||||
driver = _ExpiredEdmDriver()
|
||||
|
||||
@contextmanager
|
||||
def fake_resources(*_args, **_kwargs):
|
||||
yield driver, object()
|
||||
|
||||
stats = []
|
||||
monkeypatch.setattr(scheduler, "managed_scraper_resources", fake_resources)
|
||||
monkeypatch.setattr(scheduler.time, "sleep", lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(scheduler, "_save_stats", lambda task, data: stats.append((task, data)))
|
||||
return scheduler, driver, stats
|
||||
|
||||
|
||||
def test_promo_event_expired_edm_alert_skips_without_failure(monkeypatch):
|
||||
scheduler, driver, stats = _install_expired_edm_driver(monkeypatch)
|
||||
|
||||
scheduler.run_promo_event_task("expired-lpn", "mothers_day", "母親節超值限時購")
|
||||
|
||||
assert driver.alert_accepted is True
|
||||
assert stats == [(
|
||||
"mothers_day_task",
|
||||
{
|
||||
"status": "Skipped",
|
||||
"reason": "edm_unavailable",
|
||||
"alert_text": "很抱歉此EDM不存在",
|
||||
"url": "https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn=expired-lpn&n=1",
|
||||
},
|
||||
)]
|
||||
|
||||
|
||||
def test_festival_expired_edm_alert_skips_without_failure(monkeypatch):
|
||||
scheduler, driver, stats = _install_expired_edm_driver(monkeypatch)
|
||||
|
||||
scheduler.run_festival_task(lpn_code="expired-lpn")
|
||||
|
||||
assert driver.alert_accepted is True
|
||||
assert stats == [(
|
||||
"festival_task",
|
||||
{
|
||||
"status": "Skipped",
|
||||
"reason": "edm_unavailable",
|
||||
"alert_text": "很抱歉此EDM不存在",
|
||||
"url": "https://www.momoshop.com.tw/edm/cmmedm.jsp?lpn=expired-lpn&n=1",
|
||||
},
|
||||
)]
|
||||
|
||||
Reference in New Issue
Block a user