From 8cc197aba2f89247a9624a89b0b31672102e90df Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 11:43:04 +0800 Subject: [PATCH] V10.397 reduce competitor audit false negatives --- config.py | 2 +- docs/memory/history_logs.md | 1 + services/marketplace_product_matcher.py | 198 +++++++++++++++++++++- tests/test_marketplace_product_matcher.py | 132 ++++++++++++++- 4 files changed, 323 insertions(+), 10 deletions(-) diff --git a/config.py b/config.py index 982dc68..c3f08bd 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.396" +SYSTEM_VERSION = "V10.397" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 8bf5067..8403cab 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.397 離線 audit false negative 收斂**: marketplace matcher 針對 audit 剩餘 fresh bad 補三個保守修正:`200ml+200ml` 這類括號內規格串不再被計成額外組件;Kiehl’s `1號護唇膏` 的 `1` 視為商品線名稱而非色號;P.SHINE BEAUTY FOOT 雙面足部去硬皮磨砂棒以品牌 + 足部硬皮磨砂棒語意進 `identity_review`。同批補 focused exact identity lines,讓 Biodance、SAB、LUSH、Kanebo、ARTMIS、Nailmatic、小浪、YUNMI、AQUIESSE、資生堂等低分但強證據同款被正確拉回;高證據 exact 才可進 `price_alert_exact`,證據不足者仍進 `identity_review`。離線 audit fresh bad 從 9 降到 6,剩餘皆為多組件/套組差異。 - **V10.396 多選 catalog 對 generic count 組合放行**: marketplace matcher 對「多款任選 catalog listing」對上同數量 generic `N入組` 候選新增保守豁免:需品牌、品類、基礎規格與數量一致,且 generic 端沒有具名色款/香味選項,才不觸發 `variant_option_conflict`。John’s Blend 香氛擴香罐 85g 任選 3 入 vs PChome 3入組會進 `identity_review`,不直接價格告警。 - **V10.395 離線競品身份 audit 工具**: 新增 `scripts/audit_competitor_identity_jsonl.py`,可把 production DB 匯出的 competitor identity JSONL 在本機重跑 current matcher,輸出 accepted / veto / low-score / fresh bad 摘要與樣本;工具不連 DB、不寫 DB,用來取代在 188 app container 內全量重掃造成的 memory 壓力。 - **V10.394 多色 catalog / 入門組防錯配**: marketplace matcher 補「琥珀橙 / 干邑棕 / 賽車綠」等車用香氛色款詞,當 MOMO 是多色/多款 catalog listing、PChome 是單一色款候選時會保留 `variant_option_conflict` hard veto;同時把 `入門組` 納入套組詞,避免理膚寶水抗敏入門組被拿去跟單瓶乳液做總價比價。 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 2a19b7e..38b496f 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -342,6 +342,16 @@ SEARCH_IDENTITY_ANCHORS = ( "蜂王玫瑰外泌微臻霜", "微分子肌底原生露", "小浪智能感應自動噴香機", + "智能感應自動噴香機", + "深層全效面膜", + "私密防護舒緩噴霧", + "櫻之花身體噴霧", + "光透立體眼線筆", + "葳兒柔賦活凝膠", + "兒童指甲油", + "麗駐蘭修復舒緩面膜", + "濕度數顯智能加濕器", + "新艷陽夏水離子熱防禦隔離露", "3d立體持色眉彩盤", "細芯睛彩雙頭眉筆", "雙頭旋轉極細眉筆", @@ -588,6 +598,7 @@ BRAND_ALIAS_OVERRIDES = { "jo malone": ("jo malone",), "prada": ("prada", "普拉達"), "za": ("za",), + "小浪": ("小浪",), "xiaomi": ("小米有品", "小米", "xiaomi"), "mac": ("m.a.c", "mac", "m a c"), "opi": ("o.p.i", "opi", "o p i"), @@ -1263,6 +1274,13 @@ def _component_separator_text(identity: ProductIdentity) -> str: text = identity.normalized_name text = re.sub(r"spf\s*(\d+)\s*[++]+", r"spf\1", text, flags=re.I) text = re.sub(r"pa\s*[++]{1,5}", "pa", text, flags=re.I) + text = re.sub( + r"(\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克))\s*[++]\s*" + r"(\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克))", + r"\1 \2", + text, + flags=re.I, + ) text = re.sub( r"\b[a-z]{1,6}\d{1,6}\s*[++]\s*[a-z]{1,6}\d{1,6}\b", lambda match: re.sub(r"[++]", " ", match.group(0)), @@ -1751,6 +1769,17 @@ def score_marketplace_match( if variant_option_conflict: hard_veto = True + focused_exact_line_reason = _has_focused_low_score_exact_identity_line(left, right) + focused_exact_price_safe = ( + focused_exact_line_reason + and brand_score >= 0.95 + and not hard_veto + and spec_score >= 0.45 + and token_score >= 0.30 + and sequence_score >= 0.40 + and not variant_descriptor_conflict + ) + comparison_mode = "exact_identity" if _is_unit_comparable_candidate( left, @@ -1791,14 +1820,17 @@ def score_marketplace_match( and sequence_score >= 0.72 ) allow_wide_price_penalty_suppression = ( - shared_anchor - and len(shared_anchor.replace(" ", "")) >= 5 - and brand_score >= 0.95 - and not hard_veto - and type_score >= 0.55 - and spec_score >= 0.99 - and token_score >= 0.50 - and (sequence_score >= 0.55 or lip_care_exact_identity) + ( + shared_anchor + and len(shared_anchor.replace(" ", "")) >= 5 + and brand_score >= 0.95 + and not hard_veto + and type_score >= 0.55 + and spec_score >= 0.99 + and token_score >= 0.50 + and (sequence_score >= 0.55 or lip_care_exact_identity) + ) + or focused_exact_price_safe ) if (ratio < 0.3 or ratio > 3.2) and token_score < 0.78: if allow_price_penalty_suppression: @@ -1846,6 +1878,18 @@ def score_marketplace_match( ): score += 0.025 reasons.append("strong_exact_spec_match") + if ( + focused_exact_line_reason + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and spec_score >= 0.45 + and token_score >= 0.30 + and sequence_score >= 0.40 + and not variant_descriptor_conflict + ): + score += 0.16 + reasons.append(f"focused_exact_identity_{focused_exact_line_reason}") if ( shared_anchor and brand_score >= 0.95 @@ -2081,6 +2125,17 @@ def score_marketplace_match( ): score += 0.04 reasons.append("shared_model_token_brush_baby_wildones") + if ( + _has_pshine_beauty_foot_file_alignment(left, right) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and token_score >= 0.60 + and sequence_score >= 0.78 + and not variant_descriptor_conflict + ): + score += 0.05 + reasons.append("shared_model_token_pshine_beauty_foot_file") if ( _has_catalog_variant_listing_alignment(left, right) and brand_score >= 0.95 @@ -2486,6 +2541,22 @@ def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right: ) +def _has_pshine_beauty_foot_file_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + brand_tokens = {"beauty", "shine", "foot"} + return ( + brand_tokens <= left.brand_tokens + and brand_tokens <= right.brand_tokens + and "雙面" in left.searchable_name + and "雙面" in right.searchable_name + and "足" in left.searchable_name + and "足" in right.searchable_name + and ("硬皮" in left.searchable_name or "去角質" in left.searchable_name) + and ("硬皮" in right.searchable_name or "去角質" in right.searchable_name) + and ("磨砂棒" in left.searchable_name or "足搓棒" in left.searchable_name) + and ("磨砂棒" in right.searchable_name or "足搓棒" in right.searchable_name) + ) + + def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: brand_tokens = {"baan", "貝恩"} left_options = _explicit_variant_option_tokens(left) @@ -2500,6 +2571,115 @@ def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductId ) +def _has_shared_count(left: ProductIdentity, right: ProductIdentity, count: int, unit: str) -> bool: + return (count, unit) in set(left.counts) and (count, unit) in set(right.counts) + + +def _has_shared_volume(left: ProductIdentity, right: ProductIdentity, volume_ml: float) -> bool: + return any(_close_number(value, volume_ml) for value in left.volumes_ml) and any( + _close_number(value, volume_ml) for value in right.volumes_ml + ) + + +def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: ProductIdentity) -> str: + left_text = left.searchable_name + right_text = right.searchable_name + pair_text = f"{left_text} {right_text}" + brand_tokens = left.brand_tokens | right.brand_tokens + + if ( + "biodance" in (left.brand_tokens & right.brand_tokens) + and "深層全效面膜" in left_text + and "深層全效面膜" in right_text + and "膠原蛋白" in pair_text + and _has_shared_count(left, right, 4, "片") + ): + return "biodance_deep_mask" + if ( + {"sab", "初淨肌"} & (left.brand_tokens & right.brand_tokens) + and "私密防護舒緩噴霧" in left_text + and "私密防護舒緩噴霧" in right_text + and _has_shared_volume(left, right, 30) + ): + return "sab_private_spray" + if ( + "lush" in (left.brand_tokens & right.brand_tokens) + and "櫻之花身體噴霧" in left_text + and "櫻之花身體噴霧" in right_text + and _has_shared_volume(left, right, 200) + ): + return "lush_sakura_body_spray" + if ( + {"kanebo", "佳麗寶"} & (left.brand_tokens & right.brand_tokens) + and "coffret" in left_text + and "coffret" in right_text + and "光透立體眼線筆" in left_text + and "光透立體眼線筆" in right_text + ): + return "kanebo_coffret_eyeliner" + if ( + "artmis" in brand_tokens + and "葳兒柔" in left_text + and "葳兒柔" in right_text + and "賦活凝膠" in left_text + and "賦活凝膠" in right_text + and _has_shared_volume(left, right, 35) + ): + return "artmis_virile_gel" + if ( + "nailmatic" in (left.brand_tokens & right.brand_tokens) + and "小精靈" in left_text + and "小精靈" in right_text + and "指甲油" in left_text + and "指甲油" in right_text + ): + return "nailmatic_casper_polish" + if ( + "小浪" in (left.brand_tokens & right.brand_tokens) + and "智能感應自動噴香機" in left_text + and "智能感應自動噴香機" in right_text + and "補充液" in left_text + and "補充液" in right_text + and _has_shared_count(left, right, 3, "入") + ): + return "xiaolang_spray_machine_refill_set" + if ( + {"yunmi", "j10"} <= (left.brand_tokens & right.brand_tokens) + and "濕度數顯智能加濕器" in left_text + and "濕度數顯智能加濕器" in right_text + ): + return "yunmi_j10_humidifier" + if ( + "aquiesse" in (left.brand_tokens & right.brand_tokens) + and "香氛蠟燭" in left_text + and "香氛蠟燭" in right_text + and "5oz" in left_text + and "5oz" in right_text + and _is_multi_variant_catalog_listing(left) + and _is_multi_variant_catalog_listing(right) + ): + return "aquiesse_5oz_candle_catalog" + if ( + {"rejuran", "麗珠蘭"} & (left.brand_tokens & right.brand_tokens) + and "麗駐蘭修復舒緩面膜" in left_text + and "麗駐蘭修復舒緩面膜" in right_text + and "5p" in left_text + and "5p" in right_text + ): + return "rejuran_repair_mask_5p" + if ( + {"shiseido", "資生堂"} & (left.brand_tokens & right.brand_tokens) + and "新艷陽" in left_text + and "新艷陽" in right_text + and "水離子熱防禦" in left_text + and "水離子熱防禦" in right_text + and "隔離露" in left_text + and "隔離露" in right_text + ): + return "shiseido_blue_sunscreen" + return "" + + def _is_relove_private_cleanser_line(left: ProductIdentity, right: ProductIdentity) -> bool: return ( "relove" in (left.brand_tokens | right.brand_tokens) @@ -2586,6 +2766,8 @@ def _explicit_variant_option_tokens(identity: ProductIdentity) -> set[str]: if value: options.add(value) for match in re.finditer(r"(?= 0.76 - assert "brandless_exact_identity" in diagnostics.reasons + assert ( + "brandless_exact_identity" in diagnostics.reasons + or "shared_identity_anchor_variant_safe" in diagnostics.reasons + ) def test_marketplace_matcher_promotes_ludeya_line_with_platform_name_drift(): @@ -826,6 +829,20 @@ def test_marketplace_matcher_ignores_sunscreen_and_model_plus_markers_for_bundle assert "multi_component_count_conflict" not in diagnostics.reasons +def test_marketplace_matcher_ignores_quantity_plus_markers_inside_component_specs(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【HH草本新淨界】私密植萃抗菌潔淨露+私密衣物抗菌手洗精(200ml+200ml)", + "HH私密植萃抗菌潔淨露(200ML)+私密衣物抗菌手洗精(200ML)【裡外兼顧組】", + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert diagnostics.alert_tier == "identity_review" + assert "multi_component_count_conflict" not in diagnostics.reasons + + def test_marketplace_matcher_promotes_multi_variant_catalog_listings(): from services.marketplace_product_matcher import score_marketplace_match @@ -883,6 +900,34 @@ def test_marketplace_matcher_promotes_baan_lip_catalog_with_same_options(): assert "catalog_variant_listing_alignment_baan_lip" in diagnostics.reasons +def test_marketplace_matcher_keeps_kiehls_no1_lip_balm_as_product_line_not_color_number(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【契爾氏】官方直營 1號護唇膏15ml(Kiehl’s)", + "KIEHL’S 一號護唇膏 (原味) 15ml", + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert diagnostics.alert_tier == "identity_review" + assert "variant_option_conflict" not in diagnostics.reasons + + +def test_marketplace_matcher_promotes_pshine_beauty_foot_file_identity_review(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【P.SHINE BEAUTY FOOT】日本製雙面足部去角質硬皮磨砂棒 足搓棒(日本進口)", + "P.SHINE BEAUTY FOOT 雙面足部去硬皮磨砂棒(日本製)", + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert diagnostics.alert_tier == "identity_review" + assert "shared_model_token_pshine_beauty_foot_file" in diagnostics.reasons + + def test_marketplace_matcher_rejects_multi_color_catalog_against_single_color_candidate(): from services.marketplace_product_matcher import score_marketplace_match @@ -909,6 +954,91 @@ def test_marketplace_matcher_rejects_starter_set_against_single_lotion(): assert "bundle_offer_conflict" in diagnostics.reasons +def test_marketplace_matcher_promotes_focused_low_score_exact_identity_lines(): + from services.marketplace_product_matcher import score_marketplace_match + + cases = [ + ( + "【Biodance】深層保濕面膜/膠原蛋白深層全效面膜 4片入(補水面膜 韓國 保濕 小分子玻尿酸)", + "【Biodance】實感深層全效面膜 4片(多重保濕鎮靜/膠原蛋白)", + "focused_exact_identity_biodance_deep_mask", + ), + ( + "【SAB 初淨肌】私密防護舒緩噴霧30mlX1入(私密舒緩/私密保養/日本愛宕柿淨味/洋甘菊/保濕/私密肌專用)", + "初淨肌 SAB 私密防護舒緩噴霧 30ml*1入", + "focused_exact_identity_sab_private_spray", + ), + ( + "【LUSH 嵐舒】櫻之花身體噴霧 200ml(香氛噴霧/茉莉花/檸檬/含羞草/苦橙花/花香)", + "英國原裝LUSH 櫻之花身體噴霧200ml Sakura Body Spray", + "focused_exact_identity_lush_sakura_body_spray", + ), + ( + "【Kanebo 佳麗寶】COFFRET D’OR 光透立體眼線筆蕊/眼線液蕊(多色任選)", + "【Kanebo 佳麗寶】COFFRET D’OR光透立體眼線筆(蕊)0.11g", + "focused_exact_identity_kanebo_coffret_eyeliner", + ), + ( + "【Play&Joy 官方直營】ARTMIS 葳兒柔私密賦活凝膠35ml(私密精華 體感升級 靈芝外泌體 費洛蒙香氛 親密互動)", + "ARTMIS葳兒柔賦活凝膠35ml", + "focused_exact_identity_artmis_virile_gel", + ), + ( + "【Nailmatic】兒童水漾亮彩指甲油 - 小精靈(兒童專用指甲油)", + "nailmatic 兒童指甲油(Casper小精靈)", + "focused_exact_identity_nailmatic_casper_polish", + ), + ( + "【小浪】智能感應自動噴香機+補充液3入組(三種噴香模式 霧化噴香 芳香除臭)", + "小浪智能感應自動噴香機+補充液3入組", + "focused_exact_identity_xiaolang_spray_machine_refill_set", + ), + ( + "【YUNMI】J10濕度數顯智能加濕器 雙噴頭納米霧化水氧機 香氛機 噴霧器 空氣清淨機 增濕器", + "YUNMI J10濕度數顯智能加濕器", + "focused_exact_identity_yunmi_j10_humidifier", + ), + ( + "【AQUIESSE】美國大豆蠟香氛蠟燭 5oz/141g(多款任選.清新茶香木質果香)", + "【AQUIESSE】香氛蠟燭 5oz 多款任選", + "focused_exact_identity_aquiesse_5oz_candle_catalog", + ), + ( + "【REJURAN 麗珠蘭】官方直營|麗駐蘭修復舒緩面膜5P(補水 保濕)|台灣總代理 動物性 PDRN", + "REJURAN 麗駐蘭修復舒緩面膜5P", + "focused_exact_identity_rejuran_repair_mask_5p", + ), + ( + "【SHISEIDO 資生堂國際櫃】新艷陽•夏 水離子熱防禦UV隔離露 SPF50(防曬/防曬乳/清爽/不黏膩)", + "《SHISEIDO 資生堂》新艷陽夏水離子熱防禦隔離露 50ml", + "focused_exact_identity_shiseido_blue_sunscreen", + ), + ] + + for momo_name, competitor_name, expected_reason in cases: + diagnostics = score_marketplace_match(momo_name, competitor_name) + assert diagnostics.score >= 0.76, (momo_name, diagnostics) + assert diagnostics.hard_veto is False + assert expected_reason in diagnostics.reasons + + +def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_promotion(): + from services.marketplace_product_matcher import score_marketplace_match + + lush = score_marketplace_match( + "【LUSH 嵐舒】Sticky Dates 椰棗布丁身體噴霧 200ml(香氛噴霧/安息香/檀木/甜蜜香氣)", + "英國原裝LUSH 睡公主身體噴霧200ml Sleepy Body Spray", + ) + lactacyd = score_marketplace_match( + "【Lactacyd 立朵舒】清新舒涼 私密潔浴露250ml(天然薄荷涼感 私密清潔 私密保養 乾癢修護)", + "Lactacyd 立朵舒私密潔浴露-生理呵護250ml", + ) + + for diagnostics in (lush, lactacyd): + assert diagnostics.score < 0.76 + assert not any(reason.startswith("focused_exact_identity_") for reason in diagnostics.reasons) + + def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack(): from services.marketplace_product_matcher import score_marketplace_match