From 4622be344147530f42ce99b26189983d789a7bf9 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 12:05:30 +0800 Subject: [PATCH] V10.398 recover true low confidence matches --- config.py | 2 +- docs/memory/history_logs.md | 3 + services/marketplace_product_matcher.py | 88 +++++++++++++++++++++++ tests/test_marketplace_product_matcher.py | 55 +++++++++++++- 4 files changed, 146 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index c3f08bd..b3ce577 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.397" +SYSTEM_VERSION = "V10.398" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 8403cab..0a478f2 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -12,6 +12,9 @@ ## 📅 詳細更新日誌 (考古存檔) +### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.398 true low confidence 保守回收**: marketplace matcher 針對正式前段 `true_low_confidence` 補一輪 focused exact identity lines,讓 Baan 嬰兒修護唇膏、植村秀 3D 極細防水眼線膠筆、YSL 恆久完美透膚煙染腮紅、HH 私密植萃美白緊緻凝露、Lab52 學習刷牙漱口水、Benefit 經典菲菲染唇液、Herb24 晨霧純精油擴香儀、Pavaruni 40 香味 10ml 精油與 GATSBY 爆水擦澡濕巾等近門檻真同款可被回收;未放寬 `MIN_MATCH_SCORE`。同版保留 peripera 多色任選對單一色號、LUNASOL 頰彩對眼彩組、MUJI 細軸棉棒對黑色棉棒的低信心保護,並讓多組件套組即使達強身份證據也停在 `identity_review`,避免總價被誤當精準價格告警。 + ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 - **V10.397 離線 audit false negative 收斂**: marketplace matcher 針對 audit 剩餘 fresh bad 補三個保守修正:`200ml+200ml` 這類括號內規格串不再被計成額外組件;Kiehl’s `1號護唇膏` 的 `1` 視為商品線名稱而非色號;P.SHINE BEAUTY FOOT 雙面足部去硬皮磨砂棒以品牌 + 足部硬皮磨砂棒語意進 `identity_review`。同批補 focused exact identity lines,讓 Biodance、SAB、LUSH、Kanebo、ARTMIS、Nailmatic、小浪、YUNMI、AQUIESSE、資生堂等低分但強證據同款被正確拉回;高證據 exact 才可進 `price_alert_exact`,證據不足者仍進 `identity_review`。離線 audit fresh bad 從 9 降到 6,剩餘皆為多組件/套組差異。 - **V10.396 多選 catalog 對 generic count 組合放行**: marketplace matcher 對「多款任選 catalog listing」對上同數量 generic `N入組` 候選新增保守豁免:需品牌、品類、基礎規格與數量一致,且 generic 端沒有具名色款/香味選項,才不觸發 `variant_option_conflict`。John’s Blend 香氛擴香罐 85g 任選 3 入 vs PChome 3入組會進 `identity_review`,不直接價格告警。 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 38b496f..acb0139 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -352,6 +352,16 @@ SEARCH_IDENTITY_ANCHORS = ( "麗駐蘭修復舒緩面膜", "濕度數顯智能加濕器", "新艷陽夏水離子熱防禦隔離露", + "3d極細防水眼線膠筆", + "恆久完美透膚煙染腮紅", + "私密植萃美白緊緻凝露", + "學習刷牙漱口水", + "羅馬柱智慧居家車用香氛機", + "經典菲菲染唇液", + "染唇液", + "晨霧純精油擴香儀", + "天然植物香氛精油", + "爆水擦澡濕巾", "3d立體持色眉彩盤", "細芯睛彩雙頭眉筆", "雙頭旋轉極細眉筆", @@ -598,6 +608,7 @@ BRAND_ALIAS_OVERRIDES = { "jo malone": ("jo malone",), "prada": ("prada", "普拉達"), "za": ("za",), + "hh": ("hh", "草本新淨界"), "小浪": ("小浪",), "xiaomi": ("小米有品", "小米", "xiaomi"), "mac": ("m.a.c", "mac", "m a c"), @@ -1649,6 +1660,7 @@ def _classify_match_quality( shared_anchor: str, shared_models: set[str], catalog_count_omission: bool, + multi_component_pair: bool, ) -> tuple[str, str, str]: """Map raw matcher scores into operator-facing price comparison lanes.""" reason_set = set(reasons) @@ -1670,6 +1682,8 @@ def _classify_match_quality( and (direct_spec_evidence or (shared_anchor and token_score >= 0.62 and sequence_score >= 0.58)) ) if strong_identity_evidence and not catalog_count_omission: + if multi_component_pair: + return "exact", "manual_review", "identity_review" return "exact", "total_price", "price_alert_exact" if score >= 0.76: @@ -2288,6 +2302,7 @@ def score_marketplace_match( shared_anchor=shared_anchor, shared_models=shared_models, catalog_count_omission=catalog_count_omission, + multi_component_pair=_has_multi_component(left) and _has_multi_component(right), ) evidence_flags = _build_evidence_flags( brand_score=brand_score, @@ -2571,6 +2586,20 @@ def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductId ) +def _has_pavaruni_40_scent_oil_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + left_text = left.searchable_name + right_text = right.searchable_name + return ( + "pavaruni" in (left.brand_tokens & right.brand_tokens) + and "天然植物" in f"{left_text} {right_text}" + and "精油" in left_text + and "精油" in right_text + and _has_shared_volume(left, right, 10) + and ("40香味" in left_text or "40種香味" in left_text) + and ("40香味" in right_text or "40種香味" in right_text) + ) + + def _has_shared_count(left: ProductIdentity, right: ProductIdentity, count: int, unit: str) -> bool: return (count, unit) in set(left.counts) and (count, unit) in set(right.counts) @@ -2677,6 +2706,63 @@ def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: Pro and "隔離露" in right_text ): return "shiseido_blue_sunscreen" + if ( + {"baan", "貝恩"} & (left.brand_tokens & right.brand_tokens) + and "嬰兒修護唇膏" in left_text + and "嬰兒修護唇膏" in right_text + and left.product_type == right.product_type == "護唇膏" + ): + return "baan_baby_lip_base_catalog" + if ( + {"shu uemura", "植村秀"} & (left.brand_tokens & right.brand_tokens) + and "3d極細防水眼線膠筆" in left_text + and "3d極細防水眼線膠筆" in right_text + ): + return "shu_3d_eyeliner" + if ( + {"ysl", "聖羅蘭"} & (left.brand_tokens & right.brand_tokens) + and "恆久完美透膚煙染腮紅" in left_text + and "恆久完美透膚煙染腮紅" in right_text + ): + return "ysl_blush_catalog" + if ( + {"hh", "草本新淨界"} & (left.brand_tokens & right.brand_tokens) + and "私密植萃美白緊緻凝露" in left_text + and "私密植萃美白緊緻凝露" in right_text + and _has_shared_volume(left, right, 30) + ): + return "hh_private_gel" + if ( + {"lab52", "齒妍堂"} & (left.brand_tokens & right.brand_tokens) + and "學習刷牙漱口水" in left_text + and "學習刷牙漱口水" in right_text + and _has_overlapping_base_spec(left, right) + ): + return "lab52_mouthwash" + if ( + "benefit" in (left.brand_tokens & right.brand_tokens) + and "染唇液" in left_text + and "染唇液" in right_text + and "唇頰兩用" in pair_text + ): + return "benefit_lip_tint" + if ( + {"herb24", "草本"} & (left.brand_tokens & right.brand_tokens) + and "晨霧純精油擴香儀ii" in left_text + and "晨霧純精油擴香儀ii" in right_text + and (("霧黑" in left_text and "黑色" in right_text) or ("霧黑" in right_text and "黑色" in left_text)) + ): + return "herb24_mist_diffuser_black" + if _has_pavaruni_40_scent_oil_alignment(left, right): + return "pavaruni_40_scent_oil" + if ( + "gatsby" in (left.brand_tokens & right.brand_tokens) + and "爆水擦澡濕巾" in left_text + and "爆水擦澡濕巾" in right_text + and "24張入" in left_text + and "24張入" in right_text + ): + return "gatsby_body_wipes_24" return "" @@ -2735,6 +2821,8 @@ def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdenti return False if _has_baan_baby_lip_catalog_alignment(left, right): return False + if _has_pavaruni_40_scent_oil_alignment(left, right): + return False if _is_relove_private_cleanser_line(left, right): return False if ( diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index d31db26..d79300a 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -1013,6 +1013,51 @@ def test_marketplace_matcher_promotes_focused_low_score_exact_identity_lines(): "《SHISEIDO 資生堂》新艷陽夏水離子熱防禦隔離露 50ml", "focused_exact_identity_shiseido_blue_sunscreen", ), + ( + "【Baan 貝恩】嬰兒修護唇膏4.5g", + "【貝恩】嬰兒修護唇膏(原味/草莓)", + "focused_exact_identity_baan_baby_lip_base_catalog", + ), + ( + "【植村秀】官方直營 3D極細防水眼線膠筆(Shu uemura)", + "《Shu Uemura 植村秀》3D極細防水眼線膠筆 0.08g-#深棕", + "focused_exact_identity_shu_3d_eyeliner", + ), + ( + "【YSL】官方直營 恆久完美透膚煙染腮紅(任選1款/新品上市)", + "【YSL聖羅蘭】恆久完美透膚煙染腮紅 6g ( #12/ #57/ #93)", + "focused_exact_identity_ysl_blush_catalog", + ), + ( + "【HH草本新淨界】私密植萃美白緊緻凝露30ml(私密美白 保濕緊緻 私密緊實)", + "HH 私密植萃美白緊緻凝露(30ml)", + "focused_exact_identity_hh_private_gel", + ), + ( + "【Lab52 齒妍堂】學習刷牙漱口水170g/瓶(食品級配方/牙菌斑顯示劑/口腔清潔居家檢測)", + "Lab52齒妍堂 汪汪隊立大功學習刷牙漱口水170g", + "focused_exact_identity_lab52_mouthwash", + ), + ( + "【benefit 貝玲妃】經典染唇液(唇頰兩用/持久不脫妝)", + "【Benefit】經典菲菲染唇液6ml(唇頰兩用/持久不脫妝)多色", + "focused_exact_identity_benefit_lip_tint", + ), + ( + "【草本24】Herb24 晨霧純精油擴香儀II_霧黑-無須用水(贈-紅桔 純質精油)(歐盟EMC證書 主機享半年保固)", + "【草本24。Herb24】晨霧純精油擴香儀II_黑色(贈:任選價值780元純質精油10ml)", + "focused_exact_identity_herb24_mist_diffuser_black", + ), + ( + "【Pavaruni】美國天然植物精油40香味10ml(香薰 擴香 萃取 薰香機 水氧機 薰衣草 檀香 玫瑰 雪松 白茶 茉莉)", + "【美國Pavaruni】天然植物香氛精油40種香味10ml 多款任選", + "focused_exact_identity_pavaruni_40_scent_oil", + ), + ( + "【GATSBY】爆水擦澡濕巾24張入(涼感乾洗澡)", + "GATSBY 爆水擦澡濕巾24張入(240g)", + "focused_exact_identity_gatsby_body_wipes_24", + ), ] for momo_name, competitor_name, expected_reason in cases: @@ -1033,8 +1078,16 @@ def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_ "【Lactacyd 立朵舒】清新舒涼 私密潔浴露250ml(天然薄荷涼感 私密清潔 私密保養 乾癢修護)", "Lactacyd 立朵舒私密潔浴露-生理呵護250ml", ) + lunasol = score_marketplace_match( + "即期品【Kanebo 佳麗寶】LUNASOL 晶巧霓光/明艷頰彩(多色任選)", + "【Kanebo 佳麗寶】LUNASOL 晶巧霓光眼彩寵愛組", + ) + muji_swab = score_marketplace_match( + "【MUJI 無印良品】細軸棉棒/200支", + "【MUJI 無印良品】棉棒(黑色)/200支", + ) - for diagnostics in (lush, lactacyd): + for diagnostics in (lush, lactacyd, lunasol, muji_swab): assert diagnostics.score < 0.76 assert not any(reason.startswith("focused_exact_identity_") for reason in diagnostics.reasons)