From 6d4b18878788db703a30090cf6c899bcaf6d92bd Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 10:32:42 +0800 Subject: [PATCH] V10.391 handle catalog variant listings --- config.py | 2 +- docs/memory/history_logs.md | 1 + services/competitor_price_feeder.py | 18 +++++- services/marketplace_product_matcher.py | 64 +++++++++++++++++++ ...t_competitor_match_attempts_persistence.py | 64 +++++++++++++++++++ tests/test_marketplace_product_matcher.py | 29 +++++++++ 6 files changed, 174 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index a661e93..f78b709 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.390" +SYSTEM_VERSION = "V10.391" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 8200892..c8bfba1 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.391 多款任選 catalog listing 防錯配**: marketplace matcher 新增 `catalog_variant_listing_alignment`,當 MOMO/PChome 雙方都是多款/多色/多香味任選 listing,且商品線、規格與類型一致時,可放行香氛擴香罐、香氛蠟燭等 catalog 型同款;同時把 Relove 菸鹼醯胺 vs 胺基酸私密清潔凝露列為變體衝突,並讓 competitor feeder 不再只因 `strong_exact_spec_match` 就把低分候選視為 recoverable,避免只同規格但品線不同的商品回寫正式比價。 - **V10.390 PChome 近門檻商品比對規則**: marketplace matcher 補 17 組近門檻真同款召回與錯配防線,包含 OBgE 防曬棒、ARTMIS 私密清潔慕斯、Seche Vite 快乾亮油、TAICEND 屁屁噴、femfresh / VIGILL 私密清潔、Solone 眼部飾底乳、HYDSTO 車載香薰、小米 S101 刮鬍刀、PRAMY 定妝噴霧、I'M MEME 修容打亮棒、檜山坊滾珠精油、ARM&HAMMER 體香膏、Brush Baby WildOnes 電動牙刷與 Palmer's 按摩乳;同時把香氛/私密慕斯/定妝噴霧 finish 差異列為 variant-sensitive,避免不同香味、蔓越莓 vs 金縷梅、柔焦霧面 vs 水光亮面被誤推成直接價格告警。 - **V10.388 精華乳 / 精華霜變體防錯配**: marketplace matcher 新增精華類 formulation conflict guard,當共享 identity anchor 只到「精華」但一側是「精華乳」、另一側是「精華霜 / 精華液」時會標記 `variant_descriptor_conflict` 並壓低同款分數,避免自白肌等同品牌相近品線被錯推成 PChome/MOMO 可直接價格告警。Competitor feeder 同步會用最新 matcher 重新驗舊配對;若舊 `identity_v2` 已被現行 matcher 判成低分或 veto,允許新的高信心候選替換,避免歷史錯配卡住正式 `competitor_prices`。 - **V10.387 EA 比價 HITL 告警證據排版**: Elephant Alpha 的 DB evidence 與 Hermes pre-fetch action 現在會把 PChome/MOMO 同款證據帶進 Telegram:`match_type`、`price_basis`、`alert_tier` 與 `match_score` 會獨立成「證據」行,讓人工審核能分辨高信心同款、總價可比、單位價覆核與身份覆核,不再只看到乾巴巴的 `MOMO vs PChome` 長句。同版 marketplace matcher 補 Relove「私密潔淨凝露」identity anchor 與聯名款搜尋噪音,避免 PLAY BOY / 小虎等活動詞壓過真同款名稱。 diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index dc3ca19..987f5bf 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -55,7 +55,6 @@ BROWSE_SH_MAX_EXECUTIONS_PER_RUN = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_MAX_PE BROWSE_SH_OUTPUT_PREVIEW_CHARS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS", "1200")) RECOVERABLE_DIAGNOSTIC_REASONS = { "strong_product_line_match", - "strong_exact_spec_match", "shared_identity_anchor", "shared_identity_anchor_no_spec", "shared_identity_anchor_packaging_variant", @@ -93,6 +92,15 @@ def _has_recoverable_identity_signal(diagnostics) -> bool: ) +def _is_multi_variant_listing_name(name: str) -> bool: + return bool( + re.search( + r"(多款任選|多款可選|多色任選|多色可選|多種香味|多種香氣|香味任選|香味可選|味道可選)", + name or "", + ) + ) + + def _classify_low_score_attempt(score: float, diagnostics) -> str: if getattr(diagnostics, "hard_veto", False): return "identity_veto" @@ -117,10 +125,11 @@ def _has_variant_selection_gap( source_identity = parse_product_identity(momo_name) source_options = set(_explicit_variant_option_tokens(source_identity)) + source_multi_listing = _is_multi_variant_listing_name(momo_name) if re.search(r"任選\s*[一二兩三四五六七八九十0-9]+\s*款", momo_name): source_options -= {str(value) for value in range(1, 11)} source_options -= {f"{value:02d}" for value in range(1, 11)} - if source_options: + if source_options and not source_multi_listing: return False threshold = max(best_score - 0.02, RECOVERABLE_LOW_SCORE_FLOOR) @@ -128,7 +137,10 @@ def _has_variant_selection_gap( for product, score, diagnostics in ranked_matches[:5]: if getattr(diagnostics, "hard_veto", False) or score < threshold: continue - candidate_identity = parse_product_identity(getattr(product, "name", "") or "") + candidate_name = getattr(product, "name", "") or "" + if source_multi_listing != _is_multi_variant_listing_name(candidate_name): + return True + candidate_identity = parse_product_identity(candidate_name) options = _explicit_variant_option_tokens(candidate_identity) if len(options) >= 2: return True diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 7b140de..88dce66 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -454,6 +454,9 @@ VARIANT_SENSITIVE_KEYWORDS = { "車用香氛", "香味", "私密清潔慕斯", + "私密清潔凝露", + "私密潔淨凝露", + "私密淨白清潔凝露", "定妝噴霧", "妝前防護乳", "妝前乳", @@ -492,6 +495,8 @@ VARIANT_OPTION_COLOR_WORDS = { "金縷梅", "柔焦霧面", "水光亮面", + "菸鹼醯胺", + "胺基酸", "黑色", "棕色", "咖啡色", @@ -534,6 +539,20 @@ VARIANT_DESCRIPTOR_NOISE_KEYWORDS = { "盒組", } +MULTI_VARIANT_LISTING_PHRASES = ( + "多款任選", + "多款可選", + "多色任選", + "多色可選", + "多種香味", + "多種香氣", + "香味任選", + "香味可選", + "味道可選", + "任選", + "可選", +) + SEARCH_AMBIGUOUS_PRODUCT_TERMS = { "保護膜", "保護貼", @@ -2009,6 +2028,18 @@ def score_marketplace_match( ): score += 0.04 reasons.append("shared_model_token_brush_baby_wildones") + if ( + _has_catalog_variant_listing_alignment(left, right) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and spec_score >= 0.85 + and type_score >= 0.95 + and sequence_score >= 0.50 + and not variant_descriptor_conflict + ): + score += 0.06 + reasons.append("catalog_variant_listing_alignment") if ( shared_anchor and len(shared_anchor.replace(" ", "")) >= 5 @@ -2390,6 +2421,37 @@ def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right: ) +def _is_relove_private_cleanser_line(left: ProductIdentity, right: ProductIdentity) -> bool: + return ( + "relove" in (left.brand_tokens | right.brand_tokens) + and "私密" in left.searchable_name + and "私密" in right.searchable_name + and "凝露" in left.searchable_name + and "凝露" in right.searchable_name + ) + + +def _is_multi_variant_catalog_listing(identity: ProductIdentity) -> bool: + text = identity.normalized_name + return any(phrase in text for phrase in MULTI_VARIANT_LISTING_PHRASES) + + +def _has_catalog_variant_listing_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + if not (_is_multi_variant_catalog_listing(left) and _is_multi_variant_catalog_listing(right)): + return False + if left.product_type != right.product_type or left.product_type not in {"精油", "護唇膏"}: + return False + if not _has_overlapping_base_spec(left, right): + return False + shared_core = left.core_tokens & right.core_tokens + if shared_core: + return True + left_text = left.searchable_name + right_text = right.searchable_name + catalog_terms = ("香氛擴香罐", "香氛蠟燭", "蠟燭", "擴香罐", "修護唇膏") + return any(term in left_text and term in right_text for term in catalog_terms) + + def _is_variant_sensitive_identity( left: ProductIdentity, right: ProductIdentity, @@ -2412,6 +2474,8 @@ def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdenti return False if _has_brush_baby_wildones_toothbrush_alignment(left, right): return False + if _is_relove_private_cleanser_line(left, right): + return False if ( shared_anchor and shared_anchor not in SEARCH_BROAD_ANCHORS diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index 7e7451f..9507954 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -666,6 +666,70 @@ def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatc assert attempts[0]["attempt_status"] == "true_low_confidence" +def test_competitor_feeder_does_not_treat_spec_only_match_as_recoverable(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + from services.pchome_crawler import PChomeProduct + + product = PChomeProduct( + product_id="DDAB01-SPEC", + name="LANCOME 蘭蔻 超極限肌因精華露150ml 專櫃公司貨", + price=3200, + original_price=3600, + discount=11, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-SPEC", + stock=20, + store="24h", + rating=4.6, + review_count=12, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def search_products(self, *_args, **_kwargs): + return True, "ok", [product] + + def fake_score(*_args, **_kwargs): + return SimpleNamespace( + score=0.748, + brand_score=1.0, + token_score=0.42, + spec_score=1.0, + sequence_score=0.49, + type_score=0.55, + price_penalty=0.0, + hard_veto=False, + reasons=("strong_exact_spec_match",), + comparison_mode="exact_identity", + tags=["identity_v2", "comparison_exact_identity", "brand_match"], + ) + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + + result = feeder._run_sku_items([{ + "sku": "LAN001", + "name": "【LANCOME 蘭蔻】官方直營 超極光活粹晶露150ml", + "product_id": 10, + "momo_price": 3200, + }]) + + assert result.matched == 0 + assert result.skipped_low_score == 1 + assert attempts[0]["attempt_status"] == "true_low_confidence" + + def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 8b7fde6..34de37e 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -774,6 +774,10 @@ def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismat "【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)", "【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面", ), + ( + "【Relove】8%菸鹼醯胺私密淨白清潔凝露120ml(私密清潔 私密美白 涼感潔淨 PH3.8弱酸呵護)", + "RELOVE胺基酸私密清潔凝露120ml", + ), ] for momo_name, competitor_name in cases: @@ -784,6 +788,31 @@ def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismat assert "variant_option_conflict" in diagnostics.reasons +def test_marketplace_matcher_promotes_multi_variant_catalog_listings(): + from services.marketplace_product_matcher import score_marketplace_match + + cases = [ + ( + "【日本John’s Blend】香氛擴香罐85g(車用/任選/白麝香/黑麝香/茉莉/櫻花/繡球花/魔髮奇緣/青檸羅勒)", + "日本John’s Blend 車用香氛擴香罐85g(多款可選)", + ), + ( + "【COCODOR】香氛蠟燭170g(多款任選/官方直營)", + "COCODOR Premium Jar Candle 香氛精油蠟燭170g(多種香味任選)", + ), + ( + "【COCODOR】香氛蠟燭95g(多款任選/官方直營)", + "COCODOR Premium Jar Candle 香氛精油蠟燭95g(多種香味任選)", + ), + ] + + for momo_name, competitor_name in cases: + diagnostics = score_marketplace_match(momo_name, competitor_name) + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert "catalog_variant_listing_alignment" in diagnostics.reasons + + def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack(): from services.marketplace_product_matcher import score_marketplace_match