This commit is contained in:
@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.374"
|
||||
SYSTEM_VERSION = "V10.375"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
- 2026-05-21 追記:同步 PChome/LUDEYA 商品線名稱漂移比對更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步 MAC/Yuskin/AHC 名稱漂移與 bundle equivalent matcher 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步 EDM 失效頁 alert guard 與 REJURAN 唇膏寬價差 exact-identity matcher 更新後的 `scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步過期 EDM / seasonal promo crawler 排程改為 opt-in、NIVEA/OPI 搜尋 noise 與 identity anchor 補強後的 `run_scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -58,14 +59,14 @@
|
||||
| 1237 | `app.py` | P1 bootstrap | 保持只做 app setup;繼續往 app_factory / extension setup 抽;Phase 42 只做 metadata table name 對齊 |
|
||||
| 1800 | `services/elephant_alpha_autonomous_engine.py` | P1 ElephantAlpha engine | HITL / executor / planning policy |
|
||||
| 970 | `routes/cicd_routes.py` | P2 CI/CD Blueprint | route glue / CI query service / deployment action service |
|
||||
| 1017 | `run_scheduler.py` | P2 scheduler entrypoint | observability jobs / token report jobs / task registration 分離 |
|
||||
| 1124 | `run_scheduler.py` | P2 scheduler entrypoint | observability jobs / token report jobs / task registration 分離 |
|
||||
| 916 | `services/ppt_auto_generation_service.py` | P2 PPT 自動產線 service | schedule resolver / generation queue / missing report planner |
|
||||
| 966 | `services/trend_crawler.py` | P2 crawler service | source adapters / parser / persistence |
|
||||
| 942 | `services/learning_pipeline.py` | P2 RAG learning pipeline | distiller / promotion gate / persistence / telemetry |
|
||||
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
|
||||
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
|
||||
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
|
||||
| 2292 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 2323 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
|
||||
| 961 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
|
||||
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
## 📅 詳細更新日誌 (考古存檔)
|
||||
|
||||
### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化
|
||||
- **V10.375 過期活動爬蟲排程 opt-in**: `run_scheduler.py` 將固定 LPN 的 `edm_task` / `festival_task` 改為 `MOMO_ENABLE_LEGACY_EDM_SCHEDULE=true` 才註冊,季節活動 `mothers_day_2026` / `valentine_520_2026` / `labor_day_2026` 改為 `MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE=true` 才註冊;`services/data/crawler_config.json` 同步暫停已失效的 mothers_day LPN,避免 scheduler 定時打過期 MOMO 活動頁造成 Selenium browser loop 與無效負載。手動 API / CLI 指定 LPN 仍保留;同版整合 NIVEA/OPI 等比價搜尋 noise 與 identity anchor 補強。
|
||||
- **V10.374 EDM 失效頁告警止血**: `scheduler.py` 新增 MOMO EDM alert guard,`run_edm_task` / `run_festival_task` / `run_promo_event_task` 遇到「很抱歉此EDM不存在」時會接受 browser alert、寫入 `Skipped / edm_unavailable` stats,且不再送 EventRouter failure,避免 festival / mothers_day 過期活動頁重新累積 Telegram queue;同版整合 REJURAN 麗駐蘭唇膏同款在價格比過寬時的 exact-identity 價格懲罰豁免。
|
||||
- **V10.373 PChome 同款名稱漂移整合**: 整合並修正 concurrent matcher work,新增 MAC/M.A.C 品牌 alias、Yuskin 經典乳霜 4入/4盒組同數量 bundle equivalent、AHC 瞬效 B5 玻尿酸關鍵字重排 anchor;修復 `_count_score()` 縮排破壞與 unreachable code,讓新增測試可穩定通過。
|
||||
- **V10.372 Smoke 與 EventRouter queue 修復**: 修正 AI automation smoke 對 NemoTron fallback 的 class 判斷,改接受實際存在的 `NemotronDispatcher._hermes_rule_fallback`,避免 Hermes fallback 正常卻被誤報 critical;EventRouter 失敗佇列回放改為重建短版 HTML-safe 訊息,escape 標題/摘要/trace/error 並限制長度,避免舊 Selenium stacktrace 的 `<unknown>` 造成 Telegram HTTP 400 反覆卡住;同版整合 LUDEYA 蜂王玫瑰商品線在 MOMO/PChome 名稱漂移時的 identity anchor alias。
|
||||
|
||||
@@ -55,6 +55,23 @@ logger = logging.getLogger(__name__)
|
||||
_AI_CALLS_ERROR_SPIKE_LAST_PUSH_TS = 0.0
|
||||
|
||||
|
||||
def _env_flag(name: str, default: bool = False) -> bool:
|
||||
raw = os.getenv(name)
|
||||
if raw is None:
|
||||
return default
|
||||
return str(raw).strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _legacy_edm_schedule_enabled() -> bool:
|
||||
"""Legacy fixed-LPN EDM/Festival crawlers are opt-in to avoid stale campaign browser loops."""
|
||||
return _env_flag("MOMO_ENABLE_LEGACY_EDM_SCHEDULE", False)
|
||||
|
||||
|
||||
def _seasonal_promo_schedule_enabled() -> bool:
|
||||
"""Seasonal promo crawlers are opt-in; expired LPNs should not keep opening MOMO pages."""
|
||||
return _env_flag("MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE", False)
|
||||
|
||||
|
||||
def _notify_scheduler_failure(
|
||||
task_name: str,
|
||||
error: Exception,
|
||||
@@ -98,11 +115,17 @@ def _register_schedules():
|
||||
schedule.every(1).hours.do(run_momo_task)
|
||||
logger.info("📅 每 1 小時:momo_task")
|
||||
|
||||
schedule.every(1).hours.do(run_edm_task)
|
||||
logger.info("📅 每 1 小時:edm_task")
|
||||
if _legacy_edm_schedule_enabled():
|
||||
schedule.every(1).hours.do(run_edm_task)
|
||||
logger.info("📅 每 1 小時:edm_task")
|
||||
|
||||
schedule.every(1).hours.do(run_festival_task)
|
||||
logger.info("📅 每 1 小時:festival_task")
|
||||
schedule.every(1).hours.do(run_festival_task)
|
||||
logger.info("📅 每 1 小時:festival_task")
|
||||
else:
|
||||
logger.info(
|
||||
"⏸️ legacy EDM/festival crawler schedules disabled "
|
||||
"(set MOMO_ENABLE_LEGACY_EDM_SCHEDULE=true to enable)"
|
||||
)
|
||||
|
||||
# 動態註冊促銷活動爬蟲(根據配置)
|
||||
from services.crawler_config_loader import get_enabled_crawlers
|
||||
@@ -114,19 +137,26 @@ def _register_schedules():
|
||||
'labor_day_2026': {'lpn': '', 'page_type': 'labor_day', 'name': '勞動節購物優惠'}
|
||||
}
|
||||
|
||||
for crawler_key, config in enabled_crawlers.items():
|
||||
if crawler_key in promo_event_configs:
|
||||
event_config = promo_event_configs[crawler_key]
|
||||
lpn_code = config.get('lpn_code', '')
|
||||
if lpn_code:
|
||||
schedule_hours = config.get('schedule_hours', 4)
|
||||
schedule.every(schedule_hours).hours.do(
|
||||
lambda lpn=lpn_code, pt=event_config['page_type'], an=event_config['name']:
|
||||
run_promo_event_task(lpn, pt, an)
|
||||
)
|
||||
logger.info(f"📅 每 {schedule_hours} 小時:{event_config['name']} ({event_config['page_type']})")
|
||||
else:
|
||||
logger.warning(f"⚠️ {event_config['name']} 未配置 LPN 代碼,跳過排程")
|
||||
if not _seasonal_promo_schedule_enabled():
|
||||
if any(crawler_key in promo_event_configs for crawler_key in enabled_crawlers):
|
||||
logger.info(
|
||||
"⏸️ seasonal promo crawler schedules disabled "
|
||||
"(set MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE=true to enable)"
|
||||
)
|
||||
else:
|
||||
for crawler_key, config in enabled_crawlers.items():
|
||||
if crawler_key in promo_event_configs:
|
||||
event_config = promo_event_configs[crawler_key]
|
||||
lpn_code = config.get('lpn_code', '')
|
||||
if lpn_code:
|
||||
schedule_hours = config.get('schedule_hours', 4)
|
||||
schedule.every(schedule_hours).hours.do(
|
||||
lambda lpn=lpn_code, pt=event_config['page_type'], an=event_config['name']:
|
||||
run_promo_event_task(lpn, pt, an)
|
||||
)
|
||||
logger.info(f"📅 每 {schedule_hours} 小時:{event_config['name']} ({event_config['page_type']})")
|
||||
else:
|
||||
logger.warning(f"⚠️ {event_config['name']} 未配置 LPN 代碼,跳過排程")
|
||||
|
||||
schedule.every(4).hours.do(run_competitor_price_feeder_task)
|
||||
logger.info("📅 每 4 小時:competitor_price_feeder")
|
||||
|
||||
@@ -16,12 +16,15 @@
|
||||
"name": "1.1 狂歡購物節爬蟲"
|
||||
},
|
||||
"mothers_day_2026": {
|
||||
"enabled": true,
|
||||
"enabled": false,
|
||||
"schedule_hours": 4,
|
||||
"lpn_code": "O7ylWdZJHj8",
|
||||
"activity_name": "母親節超值限時購",
|
||||
"page_type": "mothers_day",
|
||||
"name": "2026 母親節促銷爬蟲"
|
||||
"name": "2026 母親節促銷爬蟲",
|
||||
"status": "paused",
|
||||
"pause_reason": "活動頁已回傳「很抱歉此EDM不存在」,改為手動指定 LPN 或啟用 MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE 後再排程",
|
||||
"paused_date": "2026-05-21"
|
||||
},
|
||||
"valentine_520_2026": {
|
||||
"enabled": false,
|
||||
|
||||
@@ -232,6 +232,20 @@ SEARCH_NOISE_PHRASES = (
|
||||
"多色任選",
|
||||
"多色可選",
|
||||
"多色",
|
||||
"德國妮維雅",
|
||||
"無印止汗滾珠",
|
||||
"眉彩刷",
|
||||
"眉餅盒分開販售",
|
||||
"極細筆芯",
|
||||
"防水抗暈",
|
||||
"兒童化妝品",
|
||||
"無毒防曬霜",
|
||||
"天然彩妝",
|
||||
"內贈芳香劑",
|
||||
"衛浴精油擴香瓶棒組",
|
||||
"衛浴精油擴香瓶",
|
||||
"三色選一",
|
||||
"贈複方",
|
||||
)
|
||||
|
||||
SEARCH_NOISE_TOKENS = {
|
||||
@@ -290,6 +304,15 @@ SEARCH_NOISE_TOKENS = {
|
||||
}
|
||||
|
||||
SEARCH_IDENTITY_ANCHORS = (
|
||||
"智能光感應無線自動除臭芳香噴霧機",
|
||||
"usb精油薰香機",
|
||||
"超音波水氧機",
|
||||
"類光繚指甲油",
|
||||
"多效提亮防曬霜",
|
||||
"速描眼線膠筆",
|
||||
"經典旋轉眉筆",
|
||||
"3d造型眉彩餅補充芯",
|
||||
"止汗爽身乳液",
|
||||
"持久植物香氛精油",
|
||||
"口袋雙色修容打亮盤",
|
||||
"經典乳霜",
|
||||
@@ -496,6 +519,7 @@ BRAND_ALIAS_OVERRIDES = {
|
||||
"za": ("za",),
|
||||
"xiaomi": ("小米有品", "小米", "xiaomi"),
|
||||
"mac": ("m.a.c", "mac", "m a c"),
|
||||
"opi": ("o.p.i", "opi", "o p i"),
|
||||
}
|
||||
|
||||
PRODUCT_TYPES = {
|
||||
@@ -1927,6 +1951,8 @@ def _extract_anchor_phrases(token: str) -> list[str]:
|
||||
return []
|
||||
|
||||
phrases: list[str] = []
|
||||
if "經典旋轉眉筆" in cleaned:
|
||||
phrases.append("經典旋轉眉筆")
|
||||
if "悠斯晶" in normalized and "經典乳霜" in normalized:
|
||||
phrases.append("悠斯晶經典乳霜")
|
||||
if "經典乳霜" in normalized:
|
||||
@@ -2206,6 +2232,8 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
|
||||
return "romand"
|
||||
if {"im", "meme"} <= identity.brand_tokens:
|
||||
return "im meme"
|
||||
if {"recipe", "box"} <= identity.brand_tokens:
|
||||
return "recipe box"
|
||||
chinese = sorted(
|
||||
(token for token in identity.brand_tokens if re.search(r"[\u4e00-\u9fff]", token)),
|
||||
key=lambda token: (-len(token), token),
|
||||
@@ -2233,6 +2261,9 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
|
||||
brand_part = primary_brand_phrase()
|
||||
spec_part = " ".join(_search_spec_terms(identity))
|
||||
core_phrases = _ranked_search_core_phrases(identity, limit=4)
|
||||
full_name_anchor_phrases = _extract_anchor_phrases(name)
|
||||
if full_name_anchor_phrases:
|
||||
core_phrases = list(dict.fromkeys(full_name_anchor_phrases + core_phrases))
|
||||
core_short = " ".join(core_phrases[:2])
|
||||
core_primary = core_phrases[0] if core_phrases else ""
|
||||
product_type_aliases = set(PRODUCT_TYPES.get(identity.product_type or "", ()))
|
||||
|
||||
@@ -266,6 +266,25 @@ def test_marketplace_matcher_promotes_nivea_deodorant_spray_identity():
|
||||
assert diagnostics.hard_veto is False
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_nivea_deodorant_lotion_noise_variants():
|
||||
from services.marketplace_product_matcher import build_search_terms, score_marketplace_match
|
||||
|
||||
momo_name = "【NIVEA 妮維雅】德國妮維雅 止汗爽身乳液50ml(無印止汗滾珠)"
|
||||
diagnostics = score_marketplace_match(
|
||||
momo_name,
|
||||
"NIVEA 妮維雅 止汗爽身乳液 50ml",
|
||||
momo_price=149,
|
||||
competitor_price=169,
|
||||
)
|
||||
terms = build_search_terms(momo_name, max_terms=5)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert terms[0] == "妮維雅 止汗爽身乳液 50ml"
|
||||
assert "德國妮維雅" not in " ".join(terms[:3])
|
||||
assert "無印止汗滾珠" not in " ".join(terms[:3])
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
@@ -1099,7 +1118,7 @@ def test_marketplace_search_terms_prioritize_precise_primer_identity_phrase():
|
||||
assert "校色" not in " ".join(shu_terms[:3])
|
||||
assert any("水凝光透 妝前防護乳" in term for term in meme_terms[:4])
|
||||
assert "好氣色" not in " ".join(meme_terms[:3])
|
||||
assert eaoron_terms[0] == "eaoron 素顏霜 50ml"
|
||||
assert eaoron_terms[0] == "eaoron 經典素顏霜 50ml"
|
||||
assert "懶人霜" not in " ".join(eaoron_terms[:3])
|
||||
|
||||
|
||||
@@ -1119,7 +1138,7 @@ def test_marketplace_search_terms_prefer_exact_identity_for_nail_foam_and_foot_m
|
||||
max_terms=5,
|
||||
)
|
||||
|
||||
assert opi_terms[0] == "ist31 閃耀保色護甲油 15ml"
|
||||
assert opi_terms[0] == "opi 閃耀保色護甲油 15ml"
|
||||
assert "小銀蓋" not in " ".join(opi_terms[:3])
|
||||
assert arau_terms[0] == "愛樂寶 溫和洗手慕斯 300ml"
|
||||
assert "溫和不乾澀" not in " ".join(arau_terms[:3])
|
||||
@@ -1182,14 +1201,39 @@ def test_marketplace_search_terms_prioritize_exact_identity_for_low_score_fronti
|
||||
"【PERIPERA】雙頭旋轉極細眉筆 0.05g(09灰褐棕)",
|
||||
max_terms=5,
|
||||
)
|
||||
za_classic_terms = build_search_terms(
|
||||
"【Za】官方直營 經典旋轉眉筆(色號任選)",
|
||||
max_terms=5,
|
||||
)
|
||||
kate_refill_terms = build_search_terms(
|
||||
"【KATE 凱婷】3D造型眉彩餅補充芯(眉彩刷、眉餅盒分開販售)",
|
||||
max_terms=5,
|
||||
)
|
||||
peripera_liner_terms = build_search_terms(
|
||||
"【peripera官方直營】速描眼線膠筆_多色任選(極細筆芯 防水抗暈)",
|
||||
max_terms=5,
|
||||
)
|
||||
recipe_box_sunscreen_terms = build_search_terms(
|
||||
"【Recipe Box】Recipe Box多效提亮防曬霜(兒童化妝品/無毒防曬霜/天然彩妝/防曬/提亮)",
|
||||
max_terms=5,
|
||||
)
|
||||
opi_terms = build_search_terms(
|
||||
"【O.P.I】紅蘋果 類光繚指甲油-ISLN25(小銀蓋/如膠似漆2.0系列指彩/美甲彩繪/官方直營)",
|
||||
max_terms=5,
|
||||
)
|
||||
|
||||
assert ludeya_terms[0] == "ludeya 蜂王玫瑰瑰泌霜 60ml"
|
||||
assert "兩入組" not in " ".join(ludeya_terms[:3])
|
||||
assert estee_terms[0] == "雅詩蘭黛 微分子肌底原生露 200ml"
|
||||
assert "櫻花輕盈版" not in " ".join(estee_terms[:3])
|
||||
assert za_palette_terms[0] == "za 立體持色眉彩盤 3.4g"
|
||||
assert za_palette_terms[0] == "za 3d立體持色眉彩盤 3.4g"
|
||||
assert za_pencil_terms[0] == "za 細芯睛彩雙頭眉筆 0.1g"
|
||||
assert peripera_terms[0] == "peripera 雙頭旋轉極細眉筆 09 0.05g"
|
||||
assert za_classic_terms[0] == "za 經典旋轉眉筆"
|
||||
assert kate_refill_terms[0] == "凱婷 3d造型眉彩餅補充芯"
|
||||
assert peripera_liner_terms[0] == "peripera 速描眼線膠筆"
|
||||
assert recipe_box_sunscreen_terms[0] == "recipe box 多效提亮防曬霜"
|
||||
assert opi_terms[0] == "opi 類光繚指甲油 isln25"
|
||||
|
||||
|
||||
def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch):
|
||||
|
||||
@@ -163,6 +163,26 @@ def test_roi_ai_smoke_and_daily_report_schedules_stay_staggered():
|
||||
assert "schedule.every(6).hours.do(run_action_plan_hygiene_task)" in source
|
||||
|
||||
|
||||
def test_legacy_edm_and_seasonal_promo_schedules_are_opt_in(monkeypatch):
|
||||
run_scheduler = _load_run_scheduler(monkeypatch)
|
||||
source = inspect.getsource(run_scheduler._register_schedules)
|
||||
|
||||
monkeypatch.delenv("MOMO_ENABLE_LEGACY_EDM_SCHEDULE", raising=False)
|
||||
monkeypatch.delenv("MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE", raising=False)
|
||||
assert run_scheduler._legacy_edm_schedule_enabled() is False
|
||||
assert run_scheduler._seasonal_promo_schedule_enabled() is False
|
||||
|
||||
monkeypatch.setenv("MOMO_ENABLE_LEGACY_EDM_SCHEDULE", "true")
|
||||
monkeypatch.setenv("MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE", "1")
|
||||
assert run_scheduler._legacy_edm_schedule_enabled() is True
|
||||
assert run_scheduler._seasonal_promo_schedule_enabled() is True
|
||||
|
||||
assert "if _legacy_edm_schedule_enabled():" in source
|
||||
assert "if not _seasonal_promo_schedule_enabled():" in source
|
||||
assert "MOMO_ENABLE_LEGACY_EDM_SCHEDULE" in Path("run_scheduler.py").read_text()
|
||||
assert "MOMO_ENABLE_SEASONAL_PROMO_SCHEDULE" in Path("run_scheduler.py").read_text()
|
||||
|
||||
|
||||
def test_ai_smoke_daily_summary_refreshes_smoke_before_push(monkeypatch):
|
||||
run_scheduler = _load_run_scheduler(monkeypatch)
|
||||
source = inspect.getsource(run_scheduler.run_ai_smoke_daily_summary_task)
|
||||
|
||||
Reference in New Issue
Block a user