diff --git a/config.py b/config.py index 0f12533..29fe856 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.374" +SYSTEM_VERSION = "V10.375" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index fe5d96b..51f3aa3 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -39,6 +39,7 @@ - 2026-05-21 追記:同步 PChome/LUDEYA 商品線名稱漂移比對更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 - 2026-05-21 追記:同步 MAC/Yuskin/AHC 名稱漂移與 bundle equivalent matcher 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 - 2026-05-21 追記:同步 EDM 失效頁 alert guard 與 REJURAN 唇膏寬價差 exact-identity matcher 更新後的 `scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 +- 2026-05-21 追記:同步過期 EDM / seasonal promo crawler 排程改為 opt-in、NIVEA/OPI 搜尋 noise 與 identity anchor 補強後的 `run_scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 ## 達到或超過 800 行檔案清單 @@ -58,14 +59,14 @@ | 1237 | `app.py` | P1 bootstrap | 保持只做 app setup;繼續往 app_factory / extension setup 抽;Phase 42 只做 metadata table name 對齊 | | 1800 | `services/elephant_alpha_autonomous_engine.py` | P1 ElephantAlpha engine | HITL / executor / planning policy | | 970 | `routes/cicd_routes.py` | P2 CI/CD Blueprint | route glue / CI query service / deployment action service | -| 1017 | `run_scheduler.py` | P2 scheduler entrypoint | observability jobs / token report jobs / task registration 分離 | +| 1124 | `run_scheduler.py` | P2 scheduler entrypoint | observability jobs / token report jobs / task registration 分離 | | 916 | `services/ppt_auto_generation_service.py` | P2 PPT 自動產線 service | schedule resolver / generation queue / missing report planner | | 966 | `services/trend_crawler.py` | P2 crawler service | source adapters / parser / persistence | | 942 | `services/learning_pipeline.py` | P2 RAG learning pipeline | distiller / promotion gate / persistence / telemetry | | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders | | 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers | | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting | -| 2292 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | +| 2323 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service | | 961 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing | | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy | diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 960e9ca..8e0fc88 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.375 過期活動爬蟲排程 opt-in**: `run_scheduler.py` 將固定 LPN 的 `edm_task` / `festival_task` 改為 `MOMO_ENABLE_LEGACY_EDM_SCHEDULE=true` 才註冊,季節活動 `mothers_day_2026` / `valentine_520_2026` / `labor_day_2026` 改為 `MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE=true` 才註冊;`services/data/crawler_config.json` 同步暫停已失效的 mothers_day LPN,避免 scheduler 定時打過期 MOMO 活動頁造成 Selenium browser loop 與無效負載。手動 API / CLI 指定 LPN 仍保留;同版整合 NIVEA/OPI 等比價搜尋 noise 與 identity anchor 補強。 - **V10.374 EDM 失效頁告警止血**: `scheduler.py` 新增 MOMO EDM alert guard,`run_edm_task` / `run_festival_task` / `run_promo_event_task` 遇到「很抱歉此EDM不存在」時會接受 browser alert、寫入 `Skipped / edm_unavailable` stats,且不再送 EventRouter failure,避免 festival / mothers_day 過期活動頁重新累積 Telegram queue;同版整合 REJURAN 麗駐蘭唇膏同款在價格比過寬時的 exact-identity 價格懲罰豁免。 - **V10.373 PChome 同款名稱漂移整合**: 整合並修正 concurrent matcher work,新增 MAC/M.A.C 品牌 alias、Yuskin 經典乳霜 4入/4盒組同數量 bundle equivalent、AHC 瞬效 B5 玻尿酸關鍵字重排 anchor;修復 `_count_score()` 縮排破壞與 unreachable code,讓新增測試可穩定通過。 - **V10.372 Smoke 與 EventRouter queue 修復**: 修正 AI automation smoke 對 NemoTron fallback 的 class 判斷,改接受實際存在的 `NemotronDispatcher._hermes_rule_fallback`,避免 Hermes fallback 正常卻被誤報 critical;EventRouter 失敗佇列回放改為重建短版 HTML-safe 訊息,escape 標題/摘要/trace/error 並限制長度,避免舊 Selenium stacktrace 的 `` 造成 Telegram HTTP 400 反覆卡住;同版整合 LUDEYA 蜂王玫瑰商品線在 MOMO/PChome 名稱漂移時的 identity anchor alias。 diff --git a/run_scheduler.py b/run_scheduler.py index 4f44e03..61f5387 100644 --- a/run_scheduler.py +++ b/run_scheduler.py @@ -55,6 +55,23 @@ logger = logging.getLogger(__name__) _AI_CALLS_ERROR_SPIKE_LAST_PUSH_TS = 0.0 +def _env_flag(name: str, default: bool = False) -> bool: + raw = os.getenv(name) + if raw is None: + return default + return str(raw).strip().lower() in {"1", "true", "yes", "on"} + + +def _legacy_edm_schedule_enabled() -> bool: + """Legacy fixed-LPN EDM/Festival crawlers are opt-in to avoid stale campaign browser loops.""" + return _env_flag("MOMO_ENABLE_LEGACY_EDM_SCHEDULE", False) + + +def _seasonal_promo_schedule_enabled() -> bool: + """Seasonal promo crawlers are opt-in; expired LPNs should not keep opening MOMO pages.""" + return _env_flag("MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE", False) + + def _notify_scheduler_failure( task_name: str, error: Exception, @@ -98,11 +115,17 @@ def _register_schedules(): schedule.every(1).hours.do(run_momo_task) logger.info("📅 每 1 小時:momo_task") - schedule.every(1).hours.do(run_edm_task) - logger.info("📅 每 1 小時:edm_task") + if _legacy_edm_schedule_enabled(): + schedule.every(1).hours.do(run_edm_task) + logger.info("📅 每 1 小時:edm_task") - schedule.every(1).hours.do(run_festival_task) - logger.info("📅 每 1 小時:festival_task") + schedule.every(1).hours.do(run_festival_task) + logger.info("📅 每 1 小時:festival_task") + else: + logger.info( + "⏸️ legacy EDM/festival crawler schedules disabled " + "(set MOMO_ENABLE_LEGACY_EDM_SCHEDULE=true to enable)" + ) # 動態註冊促銷活動爬蟲(根據配置) from services.crawler_config_loader import get_enabled_crawlers @@ -114,19 +137,26 @@ def _register_schedules(): 'labor_day_2026': {'lpn': '', 'page_type': 'labor_day', 'name': '勞動節購物優惠'} } - for crawler_key, config in enabled_crawlers.items(): - if crawler_key in promo_event_configs: - event_config = promo_event_configs[crawler_key] - lpn_code = config.get('lpn_code', '') - if lpn_code: - schedule_hours = config.get('schedule_hours', 4) - schedule.every(schedule_hours).hours.do( - lambda lpn=lpn_code, pt=event_config['page_type'], an=event_config['name']: - run_promo_event_task(lpn, pt, an) - ) - logger.info(f"📅 每 {schedule_hours} 小時:{event_config['name']} ({event_config['page_type']})") - else: - logger.warning(f"⚠️ {event_config['name']} 未配置 LPN 代碼,跳過排程") + if not _seasonal_promo_schedule_enabled(): + if any(crawler_key in promo_event_configs for crawler_key in enabled_crawlers): + logger.info( + "⏸️ seasonal promo crawler schedules disabled " + "(set MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE=true to enable)" + ) + else: + for crawler_key, config in enabled_crawlers.items(): + if crawler_key in promo_event_configs: + event_config = promo_event_configs[crawler_key] + lpn_code = config.get('lpn_code', '') + if lpn_code: + schedule_hours = config.get('schedule_hours', 4) + schedule.every(schedule_hours).hours.do( + lambda lpn=lpn_code, pt=event_config['page_type'], an=event_config['name']: + run_promo_event_task(lpn, pt, an) + ) + logger.info(f"📅 每 {schedule_hours} 小時:{event_config['name']} ({event_config['page_type']})") + else: + logger.warning(f"⚠️ {event_config['name']} 未配置 LPN 代碼,跳過排程") schedule.every(4).hours.do(run_competitor_price_feeder_task) logger.info("📅 每 4 小時:competitor_price_feeder") diff --git a/services/data/crawler_config.json b/services/data/crawler_config.json index 003cf2f..d4ce621 100644 --- a/services/data/crawler_config.json +++ b/services/data/crawler_config.json @@ -16,12 +16,15 @@ "name": "1.1 狂歡購物節爬蟲" }, "mothers_day_2026": { - "enabled": true, + "enabled": false, "schedule_hours": 4, "lpn_code": "O7ylWdZJHj8", "activity_name": "母親節超值限時購", "page_type": "mothers_day", - "name": "2026 母親節促銷爬蟲" + "name": "2026 母親節促銷爬蟲", + "status": "paused", + "pause_reason": "活動頁已回傳「很抱歉此EDM不存在」,改為手動指定 LPN 或啟用 MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE 後再排程", + "paused_date": "2026-05-21" }, "valentine_520_2026": { "enabled": false, diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 8861660..387ed5c 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -232,6 +232,20 @@ SEARCH_NOISE_PHRASES = ( "多色任選", "多色可選", "多色", + "德國妮維雅", + "無印止汗滾珠", + "眉彩刷", + "眉餅盒分開販售", + "極細筆芯", + "防水抗暈", + "兒童化妝品", + "無毒防曬霜", + "天然彩妝", + "內贈芳香劑", + "衛浴精油擴香瓶棒組", + "衛浴精油擴香瓶", + "三色選一", + "贈複方", ) SEARCH_NOISE_TOKENS = { @@ -290,6 +304,15 @@ SEARCH_NOISE_TOKENS = { } SEARCH_IDENTITY_ANCHORS = ( + "智能光感應無線自動除臭芳香噴霧機", + "usb精油薰香機", + "超音波水氧機", + "類光繚指甲油", + "多效提亮防曬霜", + "速描眼線膠筆", + "經典旋轉眉筆", + "3d造型眉彩餅補充芯", + "止汗爽身乳液", "持久植物香氛精油", "口袋雙色修容打亮盤", "經典乳霜", @@ -496,6 +519,7 @@ BRAND_ALIAS_OVERRIDES = { "za": ("za",), "xiaomi": ("小米有品", "小米", "xiaomi"), "mac": ("m.a.c", "mac", "m a c"), + "opi": ("o.p.i", "opi", "o p i"), } PRODUCT_TYPES = { @@ -1927,6 +1951,8 @@ def _extract_anchor_phrases(token: str) -> list[str]: return [] phrases: list[str] = [] + if "經典旋轉眉筆" in cleaned: + phrases.append("經典旋轉眉筆") if "悠斯晶" in normalized and "經典乳霜" in normalized: phrases.append("悠斯晶經典乳霜") if "經典乳霜" in normalized: @@ -2206,6 +2232,8 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]: return "romand" if {"im", "meme"} <= identity.brand_tokens: return "im meme" + if {"recipe", "box"} <= identity.brand_tokens: + return "recipe box" chinese = sorted( (token for token in identity.brand_tokens if re.search(r"[\u4e00-\u9fff]", token)), key=lambda token: (-len(token), token), @@ -2233,6 +2261,9 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]: brand_part = primary_brand_phrase() spec_part = " ".join(_search_spec_terms(identity)) core_phrases = _ranked_search_core_phrases(identity, limit=4) + full_name_anchor_phrases = _extract_anchor_phrases(name) + if full_name_anchor_phrases: + core_phrases = list(dict.fromkeys(full_name_anchor_phrases + core_phrases)) core_short = " ".join(core_phrases[:2]) core_primary = core_phrases[0] if core_phrases else "" product_type_aliases = set(PRODUCT_TYPES.get(identity.product_type or "", ())) diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 69fa3eb..31b81e5 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -266,6 +266,25 @@ def test_marketplace_matcher_promotes_nivea_deodorant_spray_identity(): assert diagnostics.hard_veto is False +def test_marketplace_matcher_promotes_nivea_deodorant_lotion_noise_variants(): + from services.marketplace_product_matcher import build_search_terms, score_marketplace_match + + momo_name = "【NIVEA 妮維雅】德國妮維雅 止汗爽身乳液50ml(無印止汗滾珠)" + diagnostics = score_marketplace_match( + momo_name, + "NIVEA 妮維雅 止汗爽身乳液 50ml", + momo_price=149, + competitor_price=169, + ) + terms = build_search_terms(momo_name, max_terms=5) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert terms[0] == "妮維雅 止汗爽身乳液 50ml" + assert "德國妮維雅" not in " ".join(terms[:3]) + assert "無印止汗滾珠" not in " ".join(terms[:3]) + + def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder(): from services.marketplace_product_matcher import score_marketplace_match @@ -1099,7 +1118,7 @@ def test_marketplace_search_terms_prioritize_precise_primer_identity_phrase(): assert "校色" not in " ".join(shu_terms[:3]) assert any("水凝光透 妝前防護乳" in term for term in meme_terms[:4]) assert "好氣色" not in " ".join(meme_terms[:3]) - assert eaoron_terms[0] == "eaoron 素顏霜 50ml" + assert eaoron_terms[0] == "eaoron 經典素顏霜 50ml" assert "懶人霜" not in " ".join(eaoron_terms[:3]) @@ -1119,7 +1138,7 @@ def test_marketplace_search_terms_prefer_exact_identity_for_nail_foam_and_foot_m max_terms=5, ) - assert opi_terms[0] == "ist31 閃耀保色護甲油 15ml" + assert opi_terms[0] == "opi 閃耀保色護甲油 15ml" assert "小銀蓋" not in " ".join(opi_terms[:3]) assert arau_terms[0] == "愛樂寶 溫和洗手慕斯 300ml" assert "溫和不乾澀" not in " ".join(arau_terms[:3]) @@ -1182,14 +1201,39 @@ def test_marketplace_search_terms_prioritize_exact_identity_for_low_score_fronti "【PERIPERA】雙頭旋轉極細眉筆 0.05g(09灰褐棕)", max_terms=5, ) + za_classic_terms = build_search_terms( + "【Za】官方直營 經典旋轉眉筆(色號任選)", + max_terms=5, + ) + kate_refill_terms = build_search_terms( + "【KATE 凱婷】3D造型眉彩餅補充芯(眉彩刷、眉餅盒分開販售)", + max_terms=5, + ) + peripera_liner_terms = build_search_terms( + "【peripera官方直營】速描眼線膠筆_多色任選(極細筆芯 防水抗暈)", + max_terms=5, + ) + recipe_box_sunscreen_terms = build_search_terms( + "【Recipe Box】Recipe Box多效提亮防曬霜(兒童化妝品/無毒防曬霜/天然彩妝/防曬/提亮)", + max_terms=5, + ) + opi_terms = build_search_terms( + "【O.P.I】紅蘋果 類光繚指甲油-ISLN25(小銀蓋/如膠似漆2.0系列指彩/美甲彩繪/官方直營)", + max_terms=5, + ) assert ludeya_terms[0] == "ludeya 蜂王玫瑰瑰泌霜 60ml" assert "兩入組" not in " ".join(ludeya_terms[:3]) assert estee_terms[0] == "雅詩蘭黛 微分子肌底原生露 200ml" assert "櫻花輕盈版" not in " ".join(estee_terms[:3]) - assert za_palette_terms[0] == "za 立體持色眉彩盤 3.4g" + assert za_palette_terms[0] == "za 3d立體持色眉彩盤 3.4g" assert za_pencil_terms[0] == "za 細芯睛彩雙頭眉筆 0.1g" assert peripera_terms[0] == "peripera 雙頭旋轉極細眉筆 09 0.05g" + assert za_classic_terms[0] == "za 經典旋轉眉筆" + assert kate_refill_terms[0] == "凱婷 3d造型眉彩餅補充芯" + assert peripera_liner_terms[0] == "peripera 速描眼線膠筆" + assert recipe_box_sunscreen_terms[0] == "recipe box 多效提亮防曬霜" + assert opi_terms[0] == "opi 類光繚指甲油 isln25" def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch): diff --git a/tests/test_run_scheduler_embed_consistency.py b/tests/test_run_scheduler_embed_consistency.py index 97d35a6..0d1e9e1 100644 --- a/tests/test_run_scheduler_embed_consistency.py +++ b/tests/test_run_scheduler_embed_consistency.py @@ -163,6 +163,26 @@ def test_roi_ai_smoke_and_daily_report_schedules_stay_staggered(): assert "schedule.every(6).hours.do(run_action_plan_hygiene_task)" in source +def test_legacy_edm_and_seasonal_promo_schedules_are_opt_in(monkeypatch): + run_scheduler = _load_run_scheduler(monkeypatch) + source = inspect.getsource(run_scheduler._register_schedules) + + monkeypatch.delenv("MOMO_ENABLE_LEGACY_EDM_SCHEDULE", raising=False) + monkeypatch.delenv("MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE", raising=False) + assert run_scheduler._legacy_edm_schedule_enabled() is False + assert run_scheduler._seasonal_promo_schedule_enabled() is False + + monkeypatch.setenv("MOMO_ENABLE_LEGACY_EDM_SCHEDULE", "true") + monkeypatch.setenv("MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE", "1") + assert run_scheduler._legacy_edm_schedule_enabled() is True + assert run_scheduler._seasonal_promo_schedule_enabled() is True + + assert "if _legacy_edm_schedule_enabled():" in source + assert "if not _seasonal_promo_schedule_enabled():" in source + assert "MOMO_ENABLE_LEGACY_EDM_SCHEDULE" in Path("run_scheduler.py").read_text() + assert "MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE" in Path("run_scheduler.py").read_text() + + def test_ai_smoke_daily_summary_refreshes_smoke_before_push(monkeypatch): run_scheduler = _load_run_scheduler(monkeypatch) source = inspect.getsource(run_scheduler.run_ai_smoke_daily_summary_task)