From af48452fd6e03e347de254992886653ab838342d Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 10:54:03 +0800 Subject: [PATCH] V10.393 refine bundle plus matching --- config.py | 2 +- docs/memory/history_logs.md | 1 + services/marketplace_product_matcher.py | 65 +++++++++++++++++++++-- tests/test_marketplace_product_matcher.py | 54 +++++++++++++++++++ 4 files changed, 116 insertions(+), 6 deletions(-) diff --git a/config.py b/config.py index 59f3ade..16d6749 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.392" +SYSTEM_VERSION = "V10.393" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index cb35985..f80ab36 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.393 組合包 `+` 判定修正與 catalog 補強**: marketplace matcher 的組合包件數判定會先排除 `SPF50+`、`PA++++` 等防曬係數加號,以及 `NTT80+AL414` 這類純型號碼串,避免把防曬品與 OPI 套組的規格/型號加號誤判成多一個商品組件;CeraVe 三件組 vs 兩件組仍維持 `multi_component_count_conflict` hard veto。同版收緊品牌 alias 判定,避免只有品牌名就觸發商品線加成,並補 Baan 貝恩嬰兒修護唇膏「原味/草莓」catalog listing 放行。 - **V10.392 組合包件數防錯配**: marketplace matcher 新增 `multi_component_count_conflict`,當 MOMO 與 PChome 都是 `+`/`+` 組合包但組件數不同時直接進 `not_comparable`,避免三件組被拿去跟兩件組做總價告警;同步把該原因加入 evidence flags,讓告警與審核畫面可以清楚顯示「組合包件數不同」。 - **V10.391 多款任選 catalog listing 防錯配**: marketplace matcher 新增 `catalog_variant_listing_alignment`,當 MOMO/PChome 雙方都是多款/多色/多香味任選 listing,且商品線、規格與類型一致時,可放行香氛擴香罐、香氛蠟燭等 catalog 型同款;同時把 Relove 菸鹼醯胺 vs 胺基酸私密清潔凝露列為變體衝突,並讓 competitor feeder 不再只因 `strong_exact_spec_match` 就把低分候選視為 recoverable,避免只同規格但品線不同的商品回寫正式比價。 - **V10.390 PChome 近門檻商品比對規則**: marketplace matcher 補 17 組近門檻真同款召回與錯配防線,包含 OBgE 防曬棒、ARTMIS 私密清潔慕斯、Seche Vite 快乾亮油、TAICEND 屁屁噴、femfresh / VIGILL 私密清潔、Solone 眼部飾底乳、HYDSTO 車載香薰、小米 S101 刮鬍刀、PRAMY 定妝噴霧、I'M MEME 修容打亮棒、檜山坊滾珠精油、ARM&HAMMER 體香膏、Brush Baby WildOnes 電動牙刷與 Palmer's 按摩乳;同時把香氛/私密慕斯/定妝噴霧 finish 差異列為 variant-sensitive,避免不同香味、蔓越莓 vs 金縷梅、柔焦霧面 vs 水光亮面被誤推成直接價格告警。 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 759273b..a368e4b 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -826,6 +826,19 @@ def _extract_model_tokens(text: str) -> set[str]: return tokens +def _brand_alias_present(text: str, alias_norm: str, text_tokens: set[str]) -> bool: + if not alias_norm: + return False + if re.search(r"[\u4e00-\u9fff]", alias_norm): + return alias_norm in text + if " " not in alias_norm and alias_norm in text_tokens: + return True + if re.fullmatch(r"[a-z0-9][a-z0-9\s'&.-]*", alias_norm): + pattern = r"(? set[str]: tokens: set[str] = set() try: @@ -843,9 +856,10 @@ def _known_brand_tokens(text: str) -> set[str]: for alias in aliases: alias_map[alias.lower()] = canonical + text_tokens = _tokenize(text) for alias, canonical in alias_map.items(): alias_norm = normalize_product_text(alias) - if alias_norm and alias_norm in text: + if _brand_alias_present(text, alias_norm, text_tokens): tokens.add(canonical) tokens.update( token for token in _tokenize(alias_norm) @@ -1233,7 +1247,7 @@ def _has_bundle_offer(identity: ProductIdentity) -> bool: def _has_multi_component(identity: ProductIdentity) -> bool: - text = identity.normalized_name + text = _component_separator_text(identity) return bool( "+" in text or "+" in text @@ -1241,8 +1255,21 @@ def _has_multi_component(identity: ProductIdentity) -> bool: ) -def _multi_component_count(identity: ProductIdentity) -> int: +def _component_separator_text(identity: ProductIdentity) -> str: text = identity.normalized_name + text = re.sub(r"spf\s*(\d+)\s*[++]+", r"spf\1", text, flags=re.I) + text = re.sub(r"pa\s*[++]{1,5}", "pa", text, flags=re.I) + text = re.sub( + r"\b[a-z]{1,6}\d{1,6}\s*[++]\s*[a-z]{1,6}\d{1,6}\b", + lambda match: re.sub(r"[++]", " ", match.group(0)), + text, + flags=re.I, + ) + return text + + +def _multi_component_count(identity: ProductIdentity) -> int: + text = _component_separator_text(identity) if not ("+" in text or "+" in text): return 1 parts = [ @@ -1478,7 +1505,7 @@ def _has_strong_product_line_signal( token_score: float, chinese_name_score: float, ) -> bool: - shared_core = left.core_tokens & right.core_tokens + shared_core = (left.core_tokens & right.core_tokens) - left.brand_tokens - right.brand_tokens shared_latin_or_model = { token for token in shared_core if re.fullmatch(r"[a-z][a-z0-9-]{3,}", token) @@ -1486,7 +1513,7 @@ def _has_strong_product_line_signal( } if shared_latin_or_model and token_score >= 0.50: return True - return token_score >= 0.56 and chinese_name_score >= 0.45 + return bool(shared_core) and token_score >= 0.56 and chinese_name_score >= 0.45 def _has_safe_exact_spec_signal( @@ -2062,6 +2089,18 @@ def score_marketplace_match( ): score += 0.06 reasons.append("catalog_variant_listing_alignment") + if ( + _has_baan_baby_lip_catalog_alignment(left, right) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and token_score >= 0.70 + and sequence_score >= 0.45 + and not variant_descriptor_conflict + ): + score += 0.05 + reasons.append("catalog_variant_listing_alignment_baan_lip") if ( shared_anchor and len(shared_anchor.replace(" ", "")) >= 5 @@ -2443,6 +2482,20 @@ def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right: ) +def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + brand_tokens = {"baan", "貝恩"} + left_options = _explicit_variant_option_tokens(left) + right_options = _explicit_variant_option_tokens(right) + return ( + bool(left.brand_tokens & brand_tokens) + and bool(right.brand_tokens & brand_tokens) + and "嬰兒修護唇膏" in left.searchable_name + and "嬰兒修護唇膏" in right.searchable_name + and {"原味", "草莓"} <= left_options + and {"原味", "草莓"} <= right_options + ) + + def _is_relove_private_cleanser_line(left: ProductIdentity, right: ProductIdentity) -> bool: return ( "relove" in (left.brand_tokens | right.brand_tokens) @@ -2496,6 +2549,8 @@ def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdenti return False if _has_brush_baby_wildones_toothbrush_alignment(left, right): return False + if _has_baan_baby_lip_catalog_alignment(left, right): + return False if _is_relove_private_cleanser_line(left, right): return False if ( diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 495fd5d..0c83f60 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -797,6 +797,35 @@ def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismat assert expected_reason in diagnostics.reasons +def test_marketplace_matcher_ignores_sunscreen_and_model_plus_markers_for_bundle_count(): + from services.marketplace_product_matcher import score_marketplace_match + + cases = [ + ( + "【O.P.I】受損型硬甲油記得卸組-亮麗增強基礎護甲油15mL+專業去光水110mL-NTT80+AL414(禮物/官方直營)", + "OPI 護甲油記得卸組(亮麗增強基礎護甲油15mL+去光水110mL)", + ), + ( + "【Neutrogena 露得清】新品上市★水感透亮防曬乳SPF50+ PA++++(40mlx2入組)", + "露得清水感透亮防曬乳SPF50+ 40ml *2入", + ), + ( + "【AGE20】輕透光潤色素顏霜(SPF50+/PA++++/台灣總代理)", + "AGE20’S 輕透光潤色素顏霜SPF50+/PA++++", + ), + ( + "【Estee Lauder 雅詩蘭黛】粉持久完美鎖妝氣墊粉餅(SPF45/PA+++/一盒一蕊/IU同款)", + "【ESTEE LAUDER雅詩蘭黛】粉持久完美鎖妝氣墊粉餅SPF45/PA+++", + ), + ] + + for momo_name, competitor_name in cases: + diagnostics = score_marketplace_match(momo_name, competitor_name, momo_price=1000, competitor_price=900) + assert diagnostics.hard_veto is False + assert diagnostics.score >= 0.76 + assert "multi_component_count_conflict" not in diagnostics.reasons + + def test_marketplace_matcher_promotes_multi_variant_catalog_listings(): from services.marketplace_product_matcher import score_marketplace_match @@ -822,6 +851,31 @@ def test_marketplace_matcher_promotes_multi_variant_catalog_listings(): assert "catalog_variant_listing_alignment" in diagnostics.reasons +def test_marketplace_matcher_requires_non_brand_product_line_evidence(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【CEZANNE】柔潤腮紅", + "Cezanne", + ) + + assert diagnostics.score < 0.76 + assert "strong_product_line_match" not in diagnostics.reasons + + +def test_marketplace_matcher_promotes_baan_lip_catalog_with_same_options(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【Baan 貝恩】嬰兒修護唇膏(草莓/原味) 4.5g/個 (新東海藥局)", + "【貝恩】嬰兒修護唇膏(原味/草莓)", + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert "catalog_variant_listing_alignment_baan_lip" in diagnostics.reasons + + def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack(): from services.marketplace_product_matcher import score_marketplace_match