From d2dea022eadae1866e2444fdd5cf90fc862547a5 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 10:22:59 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=20PChome=20=E8=BF=91?= =?UTF-8?q?=E9=96=80=E6=AA=BB=E5=95=86=E5=93=81=E6=AF=94=E5=B0=8D=E8=A6=8F?= =?UTF-8?q?=E5=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- services/marketplace_product_matcher.py | 194 +++++++++++++++++++++- tests/test_marketplace_product_matcher.py | 127 ++++++++++++++ 3 files changed, 315 insertions(+), 8 deletions(-) diff --git a/config.py b/config.py index 82835fc..a661e93 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.388" +SYSTEM_VERSION = "V10.390" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 79cf3ec..7b140de 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -313,6 +313,16 @@ SEARCH_NOISE_TOKENS = { } SEARCH_IDENTITY_ANCHORS = ( + "控油清爽防曬棒", + "蔓越莓私密清潔慕斯", + "金縷梅私密清潔慕斯", + "光采奪目眼部飾底乳", + "男性私密沐浴露", + "私密沐浴露", + "hydsto 車載香薰", + "電動刮鬍刀 s101", + "磁吸控油定妝噴霧", + "修容打亮棒", "私密潔淨凝露", "柔霧裸唇膏", "潤浸保濕清爽身體乳液", @@ -436,6 +446,15 @@ SEARCH_BROAD_ANCHORS = { } VARIANT_SENSITIVE_KEYWORDS = { + "香氛蠟燭", + "芬香蠟燭", + "蠟燭", + "擴香", + "融蠟燈", + "車用香氛", + "香味", + "私密清潔慕斯", + "定妝噴霧", "妝前防護乳", "妝前乳", "素顏霜", @@ -453,9 +472,26 @@ VARIANT_SENSITIVE_KEYWORDS = { "蜜粉餅", "粉底棒", "遮瑕棒", + "修容打亮棒", } VARIANT_OPTION_COLOR_WORDS = { + "茉莉花", + "梔子花", + "白麝香", + "黑麝香", + "青檸羅勒", + "炭木香", + "無花果", + "清甜柚香", + "杏仁牛奶", + "完熟白桃", + "原味", + "草莓", + "蔓越莓", + "金縷梅", + "柔焦霧面", + "水光亮面", "黑色", "棕色", "咖啡色", @@ -544,7 +580,13 @@ PRODUCT_TYPES = { "私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"), "私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"), "護甲油": ("護甲油", "亮油", "top coat"), + "定妝噴霧": ("定妝噴霧", "setting spray"), + "修容打亮棒": ("修容打亮棒", "修容棒", "打亮棒"), + "刮鬍刀": ("刮鬍刀", "刮胡刀", "shaver", "razor"), + "體香膏": ("體香膏", "體香劑", "deodorant"), + "電動牙刷": ("電動牙刷", "聲波電動牙刷", "electric toothbrush"), "洗手慕斯": ("洗手慕斯", "洗手泡泡", "hand wash foam"), + "私密慕斯": ("私密清潔慕斯", "私密防護慕絲", "私密慕斯"), "足膜": ("足膜", "足部膜", "足部去角質"), "妝前乳": ("妝前乳", "妝前防護乳", "妝前隔離", "primer"), "素顏霜": ("素顏霜", "tone up cream"), @@ -557,7 +599,7 @@ PRODUCT_TYPES = { "粉底棒": ("粉底棒", "foundation stick"), "精華": ("精華", "精華液", "essence", "serum", "安瓶"), "化妝水": ("化妝水", "機能水", "toner", "lotion"), - "乳液": ("乳液", "emulsion", "milk"), + "乳液": ("乳液", "按摩乳", "emulsion", "milk"), "面霜": ("面霜", "乳霜", "霜", "cream"), "防曬": ("防曬", "spf", "uv", "sunscreen"), "洗面乳": ("洗面乳", "洗顏", "潔面", "cleanser", "foam"), @@ -728,6 +770,7 @@ def normalize_product_text(value: str) -> str: ) text = text.replace("×", "x").replace("*", "x").replace("*", "x") text = text.replace("/", "/").replace("&", "&") + text = text.replace("粧", "妝") text = re.sub(r"[\u3000\r\n\t]+", " ", text) text = text.lower() text = re.sub(r"[??]+", " ", text) @@ -1850,6 +1893,34 @@ def score_marketplace_match( ): score += 0.10 reasons.append("shared_identity_anchor_lactacyd_wash") + if ( + "私密潔膚露" in shared_anchor + and {"femfresh", "芳芯"} & (left.brand_tokens | right.brand_tokens) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and spec_score >= 0.85 + and token_score >= 0.30 + and sequence_score >= 0.45 + and not variant_descriptor_conflict + ): + score += 0.06 + reasons.append("shared_identity_anchor_femfresh_wash") + if ( + "私密沐浴露" in shared_anchor + and {"vigill", "婦潔"} & (left.brand_tokens | right.brand_tokens) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and spec_score >= 0.70 + and token_score >= 0.45 + and sequence_score >= 0.55 + and not variant_descriptor_conflict + ): + score += 0.06 + reasons.append("shared_identity_anchor_vigill_private_wash") if ( "私密潔淨凝露" in shared_anchor and {"relove"} <= (left.brand_tokens | right.brand_tokens) @@ -1892,6 +1963,52 @@ def score_marketplace_match( ): score += 0.12 reasons.append("shared_identity_anchor_karadium_eye_stick") + if ( + _has_seche_vite_top_coat_alignment(left, right) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and token_score >= 0.70 + and sequence_score >= 0.70 + and not variant_descriptor_conflict + ): + score += 0.04 + reasons.append("shared_identity_anchor_seche_vite_top_coat") + if ( + _has_xiaomi_s101_shaver_alignment(left, right) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and token_score >= 0.60 + and not variant_descriptor_conflict + ): + score += 0.04 + reasons.append("shared_model_token_xiaomi_s101_shaver") + if ( + _has_hinoki_roller_oil_alignment(left, right) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and spec_score >= 0.85 + and sequence_score >= 0.50 + and not variant_descriptor_conflict + ): + score += 0.04 + reasons.append("shared_identity_anchor_hinoki_roller_oil") + if ( + _has_brush_baby_wildones_toothbrush_alignment(left, right) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and token_score >= 0.78 + and sequence_score >= 0.90 + and not variant_descriptor_conflict + ): + score += 0.04 + reasons.append("shared_model_token_brush_baby_wildones") if ( shared_anchor and len(shared_anchor.replace(" ", "")) >= 5 @@ -2108,14 +2225,15 @@ def _extract_anchor_phrases(token: str) -> list[str]: if "瞬效" in cleaned and "b5" in cleaned and "玻尿酸" in cleaned and "精華" in cleaned: phrases.append("瞬效b5玻尿酸精華") for anchor in SEARCH_IDENTITY_ANCHORS: - if anchor not in cleaned: + anchor_phrase = _clean_search_phrase(anchor) + if not anchor_phrase or anchor_phrase not in cleaned: continue - if re.search(r"[\u4e00-\u9fff]", anchor): - prefix_width = 0 if len(anchor) >= 5 else (4 if len(anchor) >= 3 else 6) - match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor)})", cleaned) - phrase = match.group(1) if match else anchor + if re.search(r"[\u4e00-\u9fff]", anchor_phrase): + prefix_width = 0 if len(anchor_phrase) >= 5 else (4 if len(anchor_phrase) >= 3 else 6) + match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor_phrase)})", cleaned) + phrase = match.group(1) if match else anchor_phrase else: - phrase = anchor + phrase = anchor_phrase phrase = _clean_search_phrase(phrase) if phrase.startswith("款") and len(phrase) > 2: phrase = phrase[1:] @@ -2214,6 +2332,64 @@ def _has_serum_formulation_conflict(left: ProductIdentity, right: ProductIdentit return bool(left_hit and right_hit and left_hit != right_hit) +def _has_taicend_baby_spray_equivalence(left: ProductIdentity, right: ProductIdentity) -> bool: + brand_tokens = {"taicend", "泰陞"} + return ( + bool(left.brand_tokens & brand_tokens) + and bool(right.brand_tokens & brand_tokens) + and "屁屁噴" in left.searchable_name + and "屁屁噴" in right.searchable_name + and _has_overlapping_base_spec(left, right) + ) + + +def _has_seche_vite_top_coat_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + brand_tokens = {"seche", "vite"} + return ( + brand_tokens <= (left.brand_tokens | right.brand_tokens) + and bool(left.brand_tokens & brand_tokens) + and bool(right.brand_tokens & brand_tokens) + and "快乾亮油" in left.searchable_name + and "快乾亮油" in right.searchable_name + ) + + +def _has_xiaomi_s101_shaver_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + brand_tokens = {"xiaomi", "小米", "小米有品"} + return ( + bool(left.brand_tokens & brand_tokens) + and bool(right.brand_tokens & brand_tokens) + and "s101" in left.searchable_name + and "s101" in right.searchable_name + and "電動刮鬍刀" in left.searchable_name + and "電動刮鬍刀" in right.searchable_name + ) + + +def _has_hinoki_roller_oil_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + return ( + "檜山坊" in left.brand_tokens + and "檜山坊" in right.brand_tokens + and "檜木精油" in left.searchable_name + and "檜木精油" in right.searchable_name + and "滾珠瓶" in left.searchable_name + and "滾珠瓶" in right.searchable_name + and _has_overlapping_base_spec(left, right) + ) + + +def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + brand_tokens = {"brush", "baby", "wildones"} + return ( + brand_tokens <= left.brand_tokens + and brand_tokens <= right.brand_tokens + and "電動牙刷" in left.searchable_name + and "電動牙刷" in right.searchable_name + and "0-10y" in left.searchable_name + and "0-10y" in right.searchable_name + ) + + def _is_variant_sensitive_identity( left: ProductIdentity, right: ProductIdentity, @@ -2232,6 +2408,10 @@ def _is_variant_sensitive_identity( def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdentity, shared_anchor: str) -> bool: if _has_serum_formulation_conflict(left, right, shared_anchor): return True + if _has_taicend_baby_spray_equivalence(left, right): + return False + if _has_brush_baby_wildones_toothbrush_alignment(left, right): + return False if ( shared_anchor and shared_anchor not in SEARCH_BROAD_ANCHORS diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index b4eb794..8b7fde6 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -657,6 +657,133 @@ def test_marketplace_matcher_promotes_kate_bare_lip_line_with_series_copy(): assert "shared_identity_anchor_kate_bare_lip" in diagnostics.reasons +def test_marketplace_matcher_promotes_next_recoverable_exact_identity_cohorts(): + from services.marketplace_product_matcher import score_marketplace_match + + cases = [ + ( + "【OBgE】【官方公司貨】控油清爽防曬棒 18g (清爽 輕薄 控油 防曬棒 補擦不卡粉 戶外通勤 油肌友善 李多慧)", + "OBgE/控油清爽防曬棒18g", + "shared_identity_anchor", + ), + ( + "【Play&Joy 官方直營】ARTMIS 蔓越莓私密清潔慕斯 250ml(私密清潔 胺基酸配方 綿密泡沫 粉嫩如春 私密保養)", + "ARTMIS 蔓越莓私密清潔慕斯 250ml", + "shared_identity_anchor", + ), + ( + "【Play&Joy 官方直營】ARTMIS 金縷梅私密清潔慕斯 250ml(私密清潔 胺基酸配方 綿密泡沫 粉嫩如春 私密保養)", + "ARTMIS 金縷梅私密清潔慕斯 250ml", + "shared_identity_anchor", + ), + ( + "美國 Seche Vite 快乾亮油", + "美國Seche Vite指甲快乾亮油14ml", + "shared_identity_anchor_seche_vite_top_coat", + ), + ( + "【TAICEND 泰陞】寶貝液體保護膜 屁屁噴(100ml/1入組)", + "TAICEND泰陞 寶貝液體護膜100ml 屁屁噴 屁屁膏", + "strong_exact_spec_match", + ), + ( + "【femfresh 芳芯】私密潔膚露 250ml 長效清新 純淨植萃 舒緩敏感肌膚 私密處清潔 私密處護理|繁華中西藥局|", + "【femfresh芳芯 官方直營】私密潔膚露250ml (任選)", + "shared_identity_anchor_femfresh_wash", + ), + ( + "【VIGILL 婦潔】男性私密沐浴露220ml(男性私密清潔 一瓶洗全身)", + "【VIGILL 婦潔】日常潔淨 私密沐浴露220ml", + "shared_identity_anchor_vigill_private_wash", + ), + ( + "【Solone】光采奪目眼部飾底乳(眼部打底 眼影打底)", + "Solone 光采奪目眼部飾底乳 2.8g", + "shared_identity_anchor_packaging_variant", + ), + ( + "【小米有品】HYDSTO 車載香薰(全車淨化/持久清香/車用香水/香薰)", + "小米有品 HYDSTO 車載香薰", + "shared_identity_anchor_packaging_variant", + ), + ( + "【小米】電動刮鬍刀 S101(米家電動刮鬍刀 小米刮鬍刀 電動刮鬍刀 米家刮鬍刀)", + "【小米 Xiaomi】 小米電動刮鬍刀 S101", + "shared_model_token_xiaomi_s101_shaver", + ), + ( + "【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)", + "【柏瑞美PRAMY】 磁吸控油定粧噴霧 柔焦霧面", + "shared_identity_anchor", + ), + ( + "【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(水光亮面)", + "【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面", + "shared_identity_anchor", + ), + ( + "【i’m meme】韓國 Multi Stick Dual 雙頭修容打亮棒 3.3g (修容 打亮 修容棒 打亮筆 修容筆)", + "【I’M MEME】我愛小臉修容打亮棒 3.3g", + "shared_identity_anchor", + ), + ( + "【檜山坊】檜木精油滾珠瓶5ml 兩入(療癒 放鬆 穩定心神)", + "【檜山坊】台灣原生檜木精油5ml滾珠瓶兩入組", + "shared_identity_anchor_hinoki_roller_oil", + ), + ( + "【ARM&HAMMER 鐵鎚】小蘇打配方體香膏(71g)", + "Arm & Hammer 小蘇打體香膏 2.5oz /71g 長效防護 植物萃取 溫合無鋁", + "strong_exact_spec_match", + ), + ( + "【Brush Baby】WildOnes 充電式兒童聲波電動牙刷 0-10Y(多款可選)", + "Brush Baby WildOnes 充電式兒童電動牙刷(0-10Y)-多款可選", + "shared_model_token_brush_baby_wildones", + ), + ( + "【PALMER’S 帕瑪氏】新撫紋按摩乳250ml(新配方效果全新升級)", + "Palmer s 可可脂撫紋按摩乳液 250ml", + "strong_exact_spec_match", + ), + ] + + for momo_name, competitor_name, expected_reason in cases: + diagnostics = score_marketplace_match(momo_name, competitor_name) + assert diagnostics.score >= 0.76, (momo_name, diagnostics) + assert diagnostics.hard_veto is False + if expected_reason == "shared_identity_anchor": + assert any(reason.startswith("shared_identity_anchor") for reason in diagnostics.reasons) + else: + assert expected_reason in diagnostics.reasons + + +def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismatch(): + from services.marketplace_product_matcher import score_marketplace_match + + cases = [ + ( + "【MUJI 無印良品】芬香蠟燭.茉莉花香味/85g", + "芬香蠟燭.梔子花香味/85g【MUJI 無印良品】", + ), + ( + "【Play&Joy 官方直營】ARTMIS 蔓越莓私密清潔慕斯 250ml", + "ARTMIS 金縷梅私密清潔慕斯 250ml", + ), + ( + "【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)", + "【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面", + ), + ] + + for momo_name, competitor_name in cases: + diagnostics = score_marketplace_match(momo_name, competitor_name) + assert diagnostics.hard_veto is True + assert diagnostics.comparison_mode == "not_comparable" + assert diagnostics.score < 0.76 + assert "variant_option_conflict" in diagnostics.reasons + + def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack(): from services.marketplace_product_matcher import score_marketplace_match