ewoooc/services/marketplace_product_matcher.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""跨電商商品身份比對工具。

這裡處理「是否為同一個商品」；價格只作為 sanity check，不能主導配對。
"""

from __future__ import annotations

import re
import unicodedata
from dataclasses import dataclass, field
from difflib import SequenceMatcher
from typing import Iterable, Optional


NOISE_PHRASES = (
    "momo",
    "pchome",
    "24h",
    "官方直營",
    "官方",
    "公司貨",
    "台灣公司貨",
    "專櫃公司貨",
    "正貨",
    "原廠",
    "限時",
    "特惠",
    "優惠",
    "超值",
    "加贈",
    "贈品",
    "送禮",
    "送",
    "買一送一",
    "買1送1",
    "限定版",
    "璀璨奢金限定版",
    "單入任選",
    "單入",
    "全肌防護",
    "經典防護王",
    "賦活美學",
    "弱酸性",
    "植萃複方",
    "溫和潤澤護理",
    "ph值平衡",
    "淨味沐浴乳",
    "香氛凝膠",
    "絲絨甜點新色",
    "鎖吻棒",
    "水光持色",
    "贈精油一瓶",
    "贈送薰衣草精油",
    "超聲波大噴霧",
    "天然陶瓷",
    "女大電視節目推薦",
    "醫師好辣推薦",
    "嬰兒界萬用霜",
    "48h抑味爽身",
    "10度c順降",
    "vit b5",
    "vitb5",
    "任選",
    "即期品",
    "福利品",
    "預購",
    "免運",
    "熱銷",
    "人氣",
    "必買",
    "推薦",
    "新品",
    "升級版",
    "經典",
    "獨家",
    "囤貨組",
    "超值組",
    "優惠組",
    "分享包",
    "組合",
    "多款可選",
    "多款任選",
    "任選多款",
    "多色可選",
    "色號可選",
    "平行輸入",
    "大容量",
    "附燈泡",
    "贈燈泡",
    "定時",
    "調節亮度",
    "可調光",
    "聖誕禮物",
    "聖誕節禮物",
    "懶人霜",
    "打造素顏女神",
    "第三代經典版白",
)

GENERIC_TOKENS = {
    "官方",
    "直營",
    "公司貨",
    "專櫃",
    "正貨",
    "原廠",
    "限時",
    "特惠",
    "優惠",
    "超值",
    "加贈",
    "贈品",
    "送禮",
    "即期品",
    "新品",
    "升級版",
    "經典",
    "人氣",
    "熱銷",
    "必買",
    "推薦",
    "組",
    "入",
    "瓶",
    "盒",
    "包",
    "片",
    "支",
    "條",
    "件",
    "ml",
    "g",
    "mg",
    "la",
    "paris",
    "多款",
    "可選",
    "任選",
    "平行輸入",
    "大容量",
    "日本",
    "韓國",
    "澳洲",
    "法國",
    "英國",
    "美國",
}

SEARCH_NOISE_PHRASES = (
    "新品上市",
    "全新上市",
    "國際航空版",
    "超取免運",
    "任選一款",
    "任選1款",
    "任選一色",
    "任選1色",
    "多款任選",
    "多款可選",
    "色號可選",
    "香味可選",
    "口味可選",
    "送精美紙袋",
    "精美紙袋",
    "交換禮物",
    "聖誕禮物",
    "限定版",
    "璀璨奢金限定版",
    "單入任選",
    "全肌防護",
    "經典防護王",
    "賦活美學",
    "弱酸性",
    "植萃複方",
    "溫和潤澤護理",
    "ph值平衡",
    "淨味沐浴乳",
    "香氛凝膠",
    "絲絨甜點新色",
    "鎖吻棒",
    "水光持色",
    "贈精油一瓶",
    "贈送薰衣草精油",
    "超聲波大噴霧",
    "天然陶瓷",
    "女大電視節目推薦",
    "醫師好辣推薦",
    "嬰兒界萬用霜",
    "48h抑味爽身",
    "10度c順降",
    "vit b5",
    "vitb5",
    "母親節",
    "父親節",
    "情人節",
    "外出清潔",
    "卸除髒汙",
    "卸除防曬",
    "卸防曬",
    "防水眼線",
    "寶寶牙刷",
    "紗布牙刷",
    "調節亮度",
    "韓國彩妝",
    "水光感",
    "官方直營",
    "官方",
    "經典款",
    "校色",
    "控油",
    "好氣色",
    "懶人霜",
    "打造素顏女神",
    "我愛修膚",
    "第三代經典版白",
    "溫和不乾澀",
    "寶寶共和國",
    "任選三款",
    "三款",
    "枚入",
    "類光繚指甲油專用亮油",
    "小銀蓋",
    "如膠似漆",
    "第三代",
    "經典版",
    "櫻花輕盈版",
    "兩入組",
    "超值兩入組",
    "任選色號",
    "多色任選",
    "多色可選",
    "多色",
    "德國妮維雅",
    "無印止汗滾珠",
    "眉彩刷",
    "眉餅盒分開販售",
    "極細筆芯",
    "防水抗暈",
    "兒童化妝品",
    "無毒防曬霜",
    "天然彩妝",
    "內贈芳香劑",
    "衛浴精油擴香瓶棒組",
    "衛浴精油擴香瓶",
    "三色選一",
    "贈複方",
)

SEARCH_NOISE_TOKENS = {
    "一款",
    "1款",
    "一色",
    "1色",
    "上市",
    "全新",
    "新品",
    "香味",
    "口味",
    "味道",
    "顏色",
    "色號",
    "紙袋",
    "禮物",
    "清潔",
    "髒汙",
    "防曬",
    "彩妝",
    "水光感",
    "超取",
    "免運",
    "航空版",
    "國際版",
    "附燈泡",
    "定時",
    "眼妝",
    "滅菌",
    "保濕",
    "抗老",
    "超品日",
    "經典款",
    "款",
    "pdrn",
    "校色",
    "控油",
    "好氣色",
    "懶人霜",
    "我愛修膚",
    "第三代",
    "經典版",
    "版白",
    "限量聯名款",
    "play",
    "boy",
    "小虎",
    "啾啾妹",
    "煎妮花",
    "涼感潔淨",
    "私密處清潔",
    "溫和不乾澀",
    "寶寶共和國",
    "三款",
    "枚入",
    "小銀蓋",
    "如膠似漆",
    "美甲",
    "3d",
    "多色",
    "提亮",
    "兩入組",
    "櫻花輕盈版",
}

SEARCH_IDENTITY_ANCHORS = (
    "控油清爽防曬棒",
    "蔓越莓私密清潔慕斯",
    "金縷梅私密清潔慕斯",
    "光采奪目眼部飾底乳",
    "男性私密沐浴露",
    "私密沐浴露",
    "hydsto 車載香薰",
    "電動刮鬍刀 s101",
    "磁吸控油定妝噴霧",
    "修容打亮棒",
    "私密潔淨凝露",
    "柔霧裸唇膏",
    "潤浸保濕清爽身體乳液",
    "閃亮珍珠眼影棒",
    "智能光感應無線自動除臭芳香噴霧機",
    "usb精油薰香機",
    "超音波水氧機",
    "類光繚指甲油",
    "多效提亮防曬霜",
    "速描眼線膠筆",
    "經典旋轉眉筆",
    "3d造型眉彩餅補充芯",
    "止汗爽身乳液",
    "慕之幼爽身潤膚乳",
    "精油芬香護手霜",
    "持久植物香氛精油",
    "口袋雙色修容打亮盤",
    "經典乳霜",
    "蜂王玫瑰外泌微臻霜",
    "微分子肌底原生露",
    "小浪智能感應自動噴香機",
    "智能感應自動噴香機",
    "深層全效面膜",
    "私密防護舒緩噴霧",
    "櫻之花身體噴霧",
    "光透立體眼線筆",
    "葳兒柔賦活凝膠",
    "兒童指甲油",
    "麗駐蘭修復舒緩面膜",
    "濕度數顯智能加濕器",
    "新艷陽夏水離子熱防禦隔離露",
    "3d極細防水眼線膠筆",
    "恆久完美透膚煙染腮紅",
    "私密植萃美白緊緻凝露",
    "學習刷牙漱口水",
    "羅馬柱智慧居家車用香氛機",
    "經典菲菲染唇液",
    "染唇液",
    "晨霧純精油擴香儀",
    "天然植物香氛精油",
    "爆水擦澡濕巾",
    "嬰兒潤膚乳",
    "可撕式水性兒童指甲油",
    "aroma lava 解憂放鬆緩緩燈",
    "經典款香氛蠟燭暖燈",
    "我愛超磁妝定妝噴霧",
    "全天候超完美定妝噴霧",
    "怪獸級持色唇膏",
    "焦糖楓葉香氛擴香花禮盒",
    "香氛蠟燭20種香味",
    "tokyo車用夾式消臭芳香劑",
    "北歐簡樸融蠟燈桌面氣氛夜燈",
    "大地有機植萃護膚油",
    "3d立體持色眉彩盤",
    "細芯睛彩雙頭眉筆",
    "雙頭旋轉極細眉筆",
    "武士刀眉筆",
    "自動武士刀眉筆筆蕊",
    "無極限保濕防曬妝前乳",
    "水凝光透 妝前防護乳",
    "水凝光透妝前防護乳",
    "經典素顏霜",
    "閃耀保色護甲油",
    "溫和洗手慕斯",
    "足足稱奇足膜",
    "時尚潮流美甲片",
    "止汗爽身噴霧",
    "止汗爽身乳膏pro",
    "零粉感超持久粉底棒",
    "超持久水光鎖吻唇釉",
    "裸光蜜粉餅",
    "私密潔膚露",
    "私密肌潔膚露",
    "男性私密醒肌抑菌噴霧",
    "男性私密激淨凝露",
    "私密抑菌噴霧",
    "天然陶瓷精油香薰機",
    "裸光幻閃亮采餅",
    "絕對持久定妝噴霧",
    "兒童防曬氣墊粉餅",
    "勝過眼皮十色眼影盤",
    "提提亮膚打亮液",
    "甜甜嫩頰腮紅液",
    "自動武士刀眉筆",
    "超進化光感輕潤遮瑕棒",
    "4合1微臻全能氣墊粉餅",
    "唯我玫瑰裸光潤唇膏",
    "晨曦冷香儀",
    "舒恬良修護霜",
    "頂級濃潤柔霜潤唇膏",
    "絕對完美永生玫瑰逆齡乳霜",
    "永生玫瑰逆齡乳霜",
    "永生玫瑰霜",
    "玫瑰精露",
    "玫瑰霜",
    "青春敷面膜",
    "長效潤膚霜",
    "小黑瓶",
    "私密處護潔露",
    "私密護潔露",
    "口腔清潔棒",
    "含氟防蛀修護牙膏",
    "自然遮瑕素顏霜",
    "超持久細滑眼線筆",
    "香氛融蠟燈",
    "水晶香氛能量寶盒禮盒組",
    "零粉感超持久柔焦蜜粉餅",
    "私密肌潔淨露",
    "私密潔浴露",
    "身體除毛器",
    "免用水潔淨液",
    "身體按摩精油",
    "按摩精油",
    "擴香補充瓶",
    "擴香瓶",
    "全面修復霜",
    "修復霜",
    "護膚膏",
    "屁屁噴",
    "身體乳",
    "緊實乳",
    "妝前防護乳",
    "妝前乳",
    "素顏霜",
    "潔膚露",
    "浴潔露",
    "潔淨液",
    "護甲油",
    "指甲油",
    "美甲片",
    "唇凍",
    "唇釉",
    "唇膏",
    "粉底棒",
    "遮瑕棒",
    "化妝水",
    "精華液",
    "精華",
    "面膜",
    "乳液",
    "乳霜",
    "面霜",
    "精油",
    "水氧機",
    "香氛機",
)

FOCUSED_IDENTITY_REVIEW_ONLY_REASONS = {
    "muji_aroma_hand_cream_brandless",
    "johnsons_baby_lotion_variant_catalog",
    "im_meme_fixx_cool_setting_spray",
    "so_natural_fixx_setting_spray_catalog",
    "kate_powder_case_catalog",
    "kate_monster_lipstick_catalog",
    "opi_gel_polish_series_catalog",
    "romand_juicy_lip_tint_2_catalog",
    "recipe_box_peelable_child_polish_catalog",
    "solone_longlasting_eyeliner",
    "shu_auto_hard_formula_refill_catalog",
    "summer_eve_full_skin_wash_2pack",
    "the_forest_maple_diffuser_flower_brandless",
}

FOCUSED_IDENTITY_VARIANT_REVIEW_BYPASS_REASONS = {
    "artmis_cranberry_private_mousse_250ml",
    "artmis_witch_hazel_private_mousse_250ml",
    "arden_eight_hour_lip_spf15_3_7g_3pack",
    "baan_baby_lip_original_strawberry_catalog",
    "dhc_olive_lip_1_5g",
    "flortte_fruit_salad_eyeliner_0_5ml_catalog",
    "frudia_honey_blueberry_lip_10g",
    "hh_private_cleanser_laundry_wash_set",
    "kanebo_allie_bright_uv_milk_60g_catalog",
    "laroche_posay_lip_balm_4_7ml",
    "laroche_posay_repair_lip_balm_7_5ml",
    "lush_sakura_body_spray",
    "neutrogena_hand_cream_56g_scent_catalog",
    "natures_care_sheep_oil_exact_pack",
    "opi_gel_polish_exact_model",
    "sebamed_baby_lip_4_8g_2pack",
    "sebamed_ph38_private_wash_200ml_2pack",
    "so_natural_fixx_setting_spray_120ml_plain",
    "sk_ii_essence_330ml_2pack",
    "amiino_whitening_repair_cream_30ml",
    "tomoon_nail_clipper_luxury_size",
    "yes_cuticle_scissors_9cm",
    "yes_curved_scissors_9cm",
    "yes_foot_nail_scissors_10_5cm",
    "yes_nail_tool_exact_model_size",
    "cetaphil_long_lotion_237ml",
    "cetaphil_long_lotion_473ml",
    "cetaphil_long_moisturizing_cream_250g",
    "cetaphil_ad_repair_cream_227g",
    "clarins_double_serum_eye_20ml",
    "lab52_paw_patrol_baby_toothbrush_2pack",
    "derma_baby_wash_150ml",
    "derma_baby_wash_500ml",
    "physiogel_ai_ice_essence_200ml_2pack",
    "playjoy_powerman_male_care_30ml",
    "ts6_private_elastic_gel_40g",
    "beauty_foot_mask_exact_pack",
    "kameria_centella_foot_mask_17ml_2pc",
    "ts6_lubricant_100g_3pack",
    "ts6_peach_bright_gel_45g_3pack",
    "ts6_white_wash_peach_gel_kit",
    "ts6_cooling_clean_mousse_100g",
    "vaseline_baby_jelly_368g_3pack",
}

FOCUSED_IDENTITY_BRANDLESS_REVIEW_REASONS = {
    "herbacin_classic_hand_cream_20ml_brandless",
    "muji_aroma_hand_cream_brandless",
    "the_forest_maple_diffuser_flower_brandless",
}

FOCUSED_IDENTITY_BRANDLESS_TOTAL_PRICE_REASONS = {
    "herbacin_classic_hand_cream_20ml_brandless",
}

FOCUSED_IDENTITY_TOTAL_PRICE_REASONS = {
    "3w_clinic_collagen_foundation_50ml_2pack",
    "hanamisui_moisture_original_gel_1_7g_3pack",
    "hanamisui_inclear_private_gel_1_7g_3pack",
    "hanamisui_relax_lavender_gel_1_7g_3pack",
    "the_ordinary_caffeine_egcg_30ml",
    "herbacin_classic_hand_cream_20ml_brandless",
    "sab_private_spray",
    "st_clare_private_mousse_150ml_2pack",
    "st_clare_private_mousse_spray_set",
    "biopeutic_plus_aha_lotion_20_150ml",
    "taisu_baby_bath_shampoo_3pc",
    "arden_eight_hour_lip_spf15_3_7g_3pack",
    "flortte_fruit_salad_eyeliner_0_5ml_catalog",
    "hh_private_cleanser_laundry_wash_set",
    "kanebo_allie_bright_uv_milk_60g_catalog",
    "laroche_posay_repair_lip_balm_7_5ml",
    "neutrogena_hand_cream_56g_scent_catalog",
    "natures_care_sheep_oil_exact_pack",
    "opi_gel_polish_exact_model",
    "sebamed_ph38_private_wash_200ml_2pack",
    "sk_ii_essence_330ml_2pack",
    "amiino_whitening_repair_cream_30ml",
    "kussen_baby_butt_cream_50ml_3pack",
    "tomoon_nail_clipper_luxury_size",
    "yes_cuticle_scissors_9cm",
    "yes_curved_scissors_9cm",
    "yes_foot_nail_scissors_10_5cm",
    "yes_nail_tool_exact_model_size",
    "bone_diffuser_gift_3pack",
    "selection1990_half_dome_wax_lamp_white",
    "selection1990_bendable_wax_lamp_white",
    "canmake_tear_bag_palette",
    "gdesign_aroma_lava_lamp_2",
    "hooome_classic_white_wax_lamp_bulbs_giftbox",
    "herb24_mist_diffuser_black",
    "pavaruni_40_scent_oil",
    "pavaruni_20_scent_candle",
    "artmis_cranberry_private_mousse_250ml",
    "artmis_witch_hazel_private_mousse_250ml",
    "baan_baby_lip_original_strawberry_catalog",
    "dhc_olive_lip_1_5g",
    "derma_eco_skin_oil",
    "frudia_honey_blueberry_lip_10g",
    "laroche_posay_lip_balm_4_7ml",
    "lush_sakura_body_spray",
    "sebamed_baby_lip_4_8g_2pack",
    "so_natural_fixx_setting_spray_120ml_plain",
    "cetaphil_long_lotion_237ml",
    "cetaphil_long_lotion_473ml",
    "cetaphil_long_moisturizing_cream_250g",
    "cetaphil_ad_repair_cream_227g",
    "clarins_double_serum_eye_20ml",
    "lab52_paw_patrol_baby_toothbrush_2pack",
    "derma_baby_wash_150ml",
    "derma_baby_wash_500ml",
    "physiogel_ai_ice_essence_200ml_2pack",
    "playjoy_powerman_male_care_30ml",
    "ts6_private_elastic_gel_40g",
    "beauty_foot_mask_exact_pack",
    "kameria_centella_foot_mask_17ml_2pc",
    "ts6_lubricant_100g_3pack",
    "ts6_peach_bright_gel_45g_3pack",
    "ts6_white_wash_peach_gel_kit",
    "ts6_cooling_clean_mousse_100g",
    "vaseline_baby_jelly_368g_3pack",
    "nivea_creme_100ml",
    "schick_womens_sensitive_blade_3pack",
}

SEARCH_BROAD_ANCHORS = {
    "乳霜",
    "面霜",
    "面膜",
    "精華",
    "乳液",
    "精油",
    "香氛融蠟燈",
}

VARIANT_SENSITIVE_KEYWORDS = {
    "香氛蠟燭",
    "芬香蠟燭",
    "蠟燭",
    "擴香",
    "融蠟燈",
    "車用香氛",
    "香味",
    "私密清潔慕斯",
    "私密清潔凝露",
    "私密潔淨凝露",
    "私密淨白清潔凝露",
    "私密防護慕絲",
    "慕絲",
    "定妝噴霧",
    "妝前防護乳",
    "妝前乳",
    "素顏霜",
    "粉底",
    "美甲片",
    "指甲油",
    "指彩",
    "眼影盤",
    "唇釉",
    "唇膏",
    "唇凍",
    "潤唇膏",
    "眉筆",
    "眼線筆",
    "腮紅液",
    "打亮液",
    "蜜粉餅",
    "粉餅盒",
    "粉底棒",
    "遮瑕棒",
    "遮瑕蜜",
    "護手霜",
    "滋養霜",
    "修護乳",
    "修容打亮棒",
    "防曬",
    "防曬乳",
    "防曬霜",
    "防曬噴霧",
    "防曬棒",
}

VARIANT_OPTION_COLOR_WORDS = {
    "茉莉花",
    "梔子花",
    "白茶蘭花",
    "白茶",
    "白麝香",
    "黑麝香",
    "清新花園",
    "寶貝粉香",
    "青檸羅勒",
    "炭木香",
    "無花果",
    "鼠尾草",
    "海鹽",
    "檸檬草",
    "茶樹",
    "英國梨",
    "小蒼蘭",
    "英國梨小蒼蘭",
    "櫻花",
    "繡球花",
    "魔髮奇緣",
    "清甜柚香",
    "杏仁牛奶",
    "杏仁",
    "薄荷",
    "橙花",
    "完熟白桃",
    "琥珀橙",
    "干邑棕",
    "賽車綠",
    "原味",
    "草莓",
    "蔓越莓",
    "金縷梅",
    "柔焦霧面",
    "水光亮面",
    "菸鹼醯胺",
    "胺基酸",
    "黑色",
    "棕色",
    "咖啡色",
    "灰色",
    "rose",
    "白色",
    "紅色",
    "粉色",
    "粉紅",
    "桃紅",
    "玫瑰",
    "玫瑰色",
    "珊瑚",
    "珊瑚色",
    "橘色",
    "橙色",
    "裸色",
    "奶茶色",
    "豆沙色",
    "紫色",
    "絕絕紫",
    "薰衣草",
    "藍色",
    "綠色",
    "膚色",
    "自然色",
    "明亮色",
    "透明色",
    "清爽型",
    "滋潤型",
    "橡棕",
    "暗灰",
    "灰棕",
    "淺玫粉",
    "極光之藍",
    "月光銀影",
}

VARIANT_DESCRIPTOR_NOISE_KEYWORDS = {
    "平輸航空版",
    "多色任選",
    "色號任選",
    "任選色號",
    "極細筆頭",
    "筆頭",
    "官方直營",
    "入組",
    "盒組",
}

MULTI_VARIANT_LISTING_PHRASES = (
    "多款任選",
    "多款可選",
    "多色任選",
    "多色可選",
    "多種香味",
    "多種香氣",
    "香味任選",
    "香味可選",
    "味道可選",
    "任選",
    "可選",
)

SEARCH_AMBIGUOUS_PRODUCT_TERMS = {
    "保護膜",
    "保護貼",
}

BRAND_ALIAS_OVERRIDES = {
    "clarins": ("克蘭詩", "clarins"),
    "nars": ("nars",),
    "relove": ("relove",),
    "stadler form": ("stadler form", "stadlerform"),
    "cetaphil": ("舒特膚", "cetaphil"),
    "sisley": ("希思黎", "sisley"),
    "gennies": ("奇妮", "gennies"),
    "uruhimemomoko": ("潤姬桃子", "uruhimemomoko", "uruhime momoko"),
    "arau baby": ("arau baby", "arau", "愛樂寶", "saraya"),
    "sebamed": ("sebamed", "施巴"),
    "shu uemura": ("shu uemura", "shuuemura", "植村秀"),
    "johnsons": ("johnsons", "johnson's", "johnson", "嬌生"),
    "gillette": ("gillette", "吉列"),
    "schick": ("schick", "舒適牌"),
    "obge": ("obge",),
    "vaseline": ("vaseline", "凡士林"),
    "eaoron": ("eaoron",),
    "kameria": ("kameria", "凱蜜菈"),
    "cocodor": ("cocodor",),
    "peripera": ("peripera",),
    "solone": ("solone",),
    "im meme": ("im meme", "i'm meme", "i’m meme"),
    "dr.wu": ("dr.wu", "dr wu", "drwu", "達爾膚"),
    "febreze": ("febreze", "風倍清"),
    "jo malone": ("jo malone",),
    "prada": ("prada", "普拉達"),
    "za": ("za",),
    "hh": ("hh", "草本新淨界"),
    "小浪": ("小浪",),
    "xiaomi": ("小米有品", "小米", "xiaomi"),
    "mac": ("m.a.c", "mac", "m a c"),
    "opi": ("o.p.i", "opi", "o p i"),
    "curel": ("curel", "珂潤"),
    "karadium": ("karadium",),
    "st雞仔牌": ("日本雞仔牌st", "日本st雞仔牌", "st雞仔牌", "雞仔牌st", "雞仔牌"),
}

PRODUCT_TYPES = {
    "止汗噴霧": ("止汗爽身噴霧", "爽身噴霧", "止汗噴霧"),
    "潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash", "私密潔浴露"),
    "私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"),
    "私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"),
    "護甲油": ("護甲油", "亮油", "top coat"),
    "定妝噴霧": ("定妝噴霧", "setting spray"),
    "修容打亮棒": ("修容打亮棒", "修容棒", "打亮棒"),
    "刮鬍刀": ("刮鬍刀", "刮胡刀", "shaver", "razor"),
    "體香膏": ("體香膏", "體香劑", "deodorant"),
    "電動牙刷": ("電動牙刷", "聲波電動牙刷", "electric toothbrush"),
    "洗手慕斯": ("洗手慕斯", "洗手泡泡", "hand wash foam"),
    "私密慕斯": ("私密清潔慕斯", "私密防護慕絲", "私密慕斯"),
    "足膜": ("足膜", "足部膜", "足部去角質"),
    "妝前乳": ("妝前乳", "妝前防護乳", "妝前隔離", "primer"),
    "素顏霜": ("素顏霜", "tone up cream"),
    "氣墊粉餅": ("氣墊粉餅", "cushion"),
    "眼影盤": ("眼影盤",),
    "打亮液": ("打亮液",),
    "腮紅液": ("腮紅液",),
    "護唇膏": ("護唇膏", "潤唇膏"),
    "唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"),
    "粉底棒": ("粉底棒", "foundation stick"),
    "精華": ("精華", "精華液", "essence", "serum", "安瓶"),
    "化妝水": ("化妝水", "機能水", "toner", "lotion"),
    "乳液": ("乳液", "按摩乳", "潤膚乳", "身體乳", "嬰兒乳液", "寶寶乳液", "emulsion", "milk"),
    "面霜": ("面霜", "乳霜", "霜", "cream"),
    "防曬": ("防曬", "spf", "uv", "sunscreen"),
    "洗面乳": ("洗面乳", "洗顏", "潔面", "cleanser", "foam"),
    "面膜": ("面膜", "mask"),
    "眼霜": ("眼霜", "眼部", "眼膜", "eye"),
    "卸妝": ("卸妝", "cleansing", "remover"),
    "粉底": ("粉底", "粉霜", "粉凝露", "foundation"),
    "蜜粉": ("蜜粉", "powder"),
    "精油": ("精油", "香氛", "擴香"),
    "保健": ("錠", "膠囊", "粉", "飲", "包", "健康食品"),
}

COUNT_UNITS = {"入", "組", "瓶", "支", "條", "盒", "包", "袋", "片", "顆", "粒", "錠", "枚", "件", "罐", "杯", "本", "刀把", "刀片", "刀頭", "蕊"}
COUNT_UNIT_PATTERN = r"(?:刀把|刀片|刀頭|入|組|瓶|支|條|盒|包|袋|片|顆|粒|錠|枚|件|罐|杯|本|蕊)"
PIECE_UNITS = {"包", "袋", "片", "顆", "粒", "錠", "枚"}
CONTAINER_UNITS = {"入", "組", "盒", "罐", "杯", "本", "瓶", "支", "條", "件"}
COUNT_UNIT_FAMILIES = {
    "刀片": "blade",
    "刀頭": "blade",
    "蕊": "refill",
}
ENGLISH_COUNT_UNIT_RE = r"(?:pcs?|pieces?|capsules?|caps?|tablets?|tabs?|packs?|sachets?|bottles?|boxes?)"
BUNDLE_OFFER_PHRASES = (
    "囤貨組",
    "超值組",
    "特惠組",
    "優惠組",
    "優惠套組",
    "入門組",
    "禮盒組",
    "加大組",
    "加量組",
    "分享組",
    "明星組",
    "套組",
    "組合",
    "組合包",
    "雙件組",
    "二件組",
    "2件組",
    "家庭組",
    "多入組",
)
NON_BRAND_BRACKET_PHRASES = (
    "保濕組",
    "熱銷款",
    "限定",
    "特惠",
    "優惠",
    "超值",
    "囤貨",
    "組合",
    "套組",
    "禮盒",
    "分享",
    "雙件",
    "二件",
    "2件",
    "家庭",
    "多入",
    "任選",
    "買",
    "母親節",
)
CHINESE_COUNT = {
    "一": 1,
    "二": 2,
    "兩": 2,
    "雙": 2,
    "三": 3,
    "四": 4,
    "五": 5,
    "六": 6,
    "七": 7,
    "八": 8,
    "九": 9,
    "十": 10,
}


@dataclass(frozen=True)
class ProductIdentity:
    original_name: str
    normalized_name: str
    searchable_name: str
    brand_tokens: frozenset[str]
    product_type: Optional[str]
    tokens: frozenset[str]
    core_tokens: frozenset[str]
    volumes_ml: tuple[float, ...]
    weights_g: tuple[float, ...]
    dosages_mg: tuple[float, ...]
    counts: tuple[tuple[int, str], ...]
    total_piece_count: Optional[int]


@dataclass(frozen=True)
class MatchDiagnostics:
    score: float
    brand_score: float
    token_score: float
    spec_score: float
    sequence_score: float
    type_score: float
    price_penalty: float
    hard_veto: bool
    reasons: tuple[str, ...]
    comparison_mode: str = "exact_identity"
    match_type: str = "exact"
    price_basis: str = "total_price"
    alert_tier: str = "price_alert_exact"
    evidence_flags: tuple[str, ...] = ()
    identity_evidence: dict[str, object] = field(default_factory=dict)
    offer_evidence: dict[str, object] = field(default_factory=dict)

    @property
    def tags(self) -> list[str]:
        tags: list[str] = ["identity_v2"]
        if self.comparison_mode:
            tags.append(f"comparison_{self.comparison_mode}")
        if self.match_type:
            tags.append(f"match_type_{self.match_type}")
        if self.price_basis:
            tags.append(f"price_basis_{self.price_basis}")
        if self.alert_tier:
            tags.append(f"alert_tier_{self.alert_tier}")
        if self.brand_score >= 0.95:
            tags.append("brand_match")
        if self.spec_score >= 0.85:
            tags.append("spec_match")
        if self.hard_veto:
            tags.append("identity_veto")
        for flag in self.evidence_flags:
            tags.append(f"evidence_{flag}")
        return tags


@dataclass(frozen=True)
class UnitPriceComparison:
    comparable: bool
    reason: str
    unit_label: str = ""
    momo_total_quantity: Optional[float] = None
    competitor_total_quantity: Optional[float] = None
    momo_unit_price: Optional[float] = None
    competitor_unit_price: Optional[float] = None
    unit_gap_amount: Optional[float] = None
    unit_gap_pct: Optional[float] = None
    summary: str = ""

    def as_dict(self) -> dict:
        return {
            "comparable": self.comparable,
            "reason": self.reason,
            "unit_label": self.unit_label,
            "momo_total_quantity": self.momo_total_quantity,
            "competitor_total_quantity": self.competitor_total_quantity,
            "momo_unit_price": self.momo_unit_price,
            "competitor_unit_price": self.competitor_unit_price,
            "unit_gap_amount": self.unit_gap_amount,
            "unit_gap_pct": self.unit_gap_pct,
            "summary": self.summary,
        }


def normalize_product_text(value: str) -> str:
    text = unicodedata.normalize("NFKC", value or "")
    text = "".join(
        char for char in unicodedata.normalize("NFKD", text)
        if not unicodedata.combining(char)
    )
    text = text.replace("×", "x").replace("＊", "x").replace("*", "x")
    text = text.replace("／", "/").replace("＆", "&")
    text = text.replace("粧", "妝")
    text = re.sub(r"[\u3000\r\n\t]+", " ", text)
    text = text.lower()
    text = re.sub(r"[?？]+", " ", text)
    text = re.sub(r"[【】\[\]{}「」『』]", " ", text)
    text = re.sub(r"[()（）]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


def _strip_noise(value: str) -> str:
    text = value
    for phrase in sorted(NOISE_PHRASES, key=len, reverse=True):
        text = text.replace(phrase.lower(), " ")
    text = re.sub(r"\s+", " ", text).strip()
    return text


def _tokenize(value: str) -> list[str]:
    raw_tokens = re.findall(r"[a-z0-9]+|[\u4e00-\u9fff]+", value)
    tokens: list[str] = []
    for token in raw_tokens:
        if len(token) <= 1 and not token.isdigit():
            continue
        tokens.append(token)
    return tokens


def _extract_model_tokens(text: str) -> set[str]:
    tokens: set[str] = set()
    for match in re.finditer(r"(?<![a-z0-9])([a-z]{1,4}-?[a-z]{0,3}\d{2,}[a-z0-9-]*)(?![a-z0-9])", text, re.I):
        compact = re.sub(r"[^a-z0-9]", "", match.group(1).lower())
        if _is_spec_like_latin_token(compact):
            continue
        if len(compact) >= 4 and re.search(r"[a-z]", compact) and re.search(r"\d", compact):
            tokens.add(compact)
    return tokens


def _is_spec_like_latin_token(token: str) -> bool:
    return bool(
        re.fullmatch(r"spf\d{1,3}[a-z]?", token)
        or re.fullmatch(r"pa\d*", token)
        or token in {"uva", "uvb", "uv", "spf"}
    )


def _brand_alias_present(text: str, alias_norm: str, text_tokens: set[str]) -> bool:
    if not alias_norm:
        return False
    if re.search(r"[\u4e00-\u9fff]", alias_norm):
        return alias_norm in text
    if " " not in alias_norm and alias_norm in text_tokens:
        return True
    if re.fullmatch(r"[a-z0-9][a-z0-9\s'&.-]*", alias_norm):
        pattern = r"(?<![a-z0-9])" + re.escape(alias_norm).replace(r"\ ", r"\s+") + r"(?![a-z0-9])"
        return bool(re.search(pattern, text))
    return alias_norm in text


def _known_brand_tokens(text: str) -> set[str]:
    tokens: set[str] = set()
    try:
        from services.price_comparison import BRAND_ALIASES, BRAND_NORMALIZE_MAP
    except Exception:
        BRAND_ALIASES = {}
        BRAND_NORMALIZE_MAP = {}

    alias_map = dict(BRAND_NORMALIZE_MAP)
    alias_groups = {canonical: list(aliases) for canonical, aliases in BRAND_ALIASES.items()}
    for canonical, aliases in BRAND_ALIAS_OVERRIDES.items():
        alias_groups.setdefault(canonical, [])
        alias_groups[canonical].extend(aliases)
        alias_map[canonical.lower()] = canonical
        for alias in aliases:
            alias_map[alias.lower()] = canonical

    text_tokens = _tokenize(text)
    for alias, canonical in alias_map.items():
        alias_norm = normalize_product_text(alias)
        if _brand_alias_present(text, alias_norm, text_tokens):
            tokens.add(canonical)
            tokens.update(
                token for token in _tokenize(alias_norm)
                if not re.fullmatch(r"[a-z]{1,2}", token)
            )
            for related in alias_groups.get(canonical, []):
                tokens.update(
                    token for token in _tokenize(normalize_product_text(related))
                    if not re.fullmatch(r"[a-z]{1,2}", token)
                )

    return {token for token in tokens if token and token not in GENERIC_TOKENS}


def _leading_brand_tokens(original: str, normalized: str) -> set[str]:
    tokens: set[str] = set()
    bracket_match = re.match(r"\s*[【\[]([^】\]]{2,40})[】\]]", original or "")
    if bracket_match:
        content = normalize_product_text(bracket_match.group(1))
        if not any(phrase in content for phrase in NON_BRAND_BRACKET_PHRASES):
            for token in _tokenize(_strip_noise(content)):
                if token not in GENERIC_TOKENS:
                    tokens.add(token)

    leading = normalized[:48]
    leading_tokens = _tokenize(leading)
    if leading_tokens:
        first_token = leading_tokens[0]
        if re.fullmatch(r"[\u4e00-\u9fff]{2,6}", first_token) and first_token not in GENERIC_TOKENS:
            tokens.add(first_token)
    for token in _tokenize(leading):
        if re.fullmatch(r"[a-z][a-z0-9\-']{2,}", token) and not _is_spec_like_latin_token(token):
            tokens.add(token)
    return tokens


def _extract_product_type(text: str) -> Optional[str]:
    for product_type, aliases in PRODUCT_TYPES.items():
        if any(alias.lower() in text for alias in aliases):
            return product_type
    return None


def _convert_volume(value: str, unit: str) -> Optional[tuple[str, float]]:
    try:
        number = float(value)
    except (TypeError, ValueError):
        return None
    unit = unit.lower()
    if unit in {"ml", "毫升"}:
        return ("ml", number)
    if unit == "l":
        return ("ml", number * 1000)
    if unit in {"g", "公克"}:
        return ("g", number)
    if unit == "kg":
        return ("g", number * 1000)
    if unit in {"mg", "毫克"}:
        return ("mg", number)
    if unit in {"mcg", "μg", "ug", "微克"}:
        return ("mg", number / 1000)
    return None


def _count_unit_family(unit: str) -> str:
    return COUNT_UNIT_FAMILIES.get(unit, unit)


def _extract_specs(
    text: str,
) -> tuple[tuple[float, ...], tuple[float, ...], tuple[float, ...], tuple[tuple[int, str], ...], Optional[int]]:
    volumes_ml: list[float] = []
    weights_g: list[float] = []
    dosages_mg: list[float] = []
    for match in re.finditer(r"(\d+(?:\.\d+)?)\s*(ml|毫升|l|g|公克|kg|mg|毫克|mcg|μg|ug|微克)", text, re.I):
        converted = _convert_volume(match.group(1), match.group(2))
        if not converted:
            continue
        unit, number = converted
        if unit == "ml":
            volumes_ml.append(number)
        elif unit == "g":
            weights_g.append(number)
        else:
            dosages_mg.append(number)

    counts: list[tuple[int, str]] = []
    for match in re.finditer(rf"(\d+)\s*({COUNT_UNIT_PATTERN})", text):
        counts.append((int(match.group(1)), match.group(2)))
    for match in re.finditer(rf"([一二兩雙三四五六七八九十])\s*({COUNT_UNIT_PATTERN})", text):
        counts.append((CHINESE_COUNT[match.group(1)], match.group(2)))
    for match in re.finditer(rf"(?:x|乘)\s*(\d+)\s*({COUNT_UNIT_PATTERN})?", text, re.I):
        unit = match.group(2) or "入"
        if not match.group(2):
            prefix = text[max(0, match.start() - 4):match.start()].strip().lower()
            suffix = text[match.end():match.end() + 1]
            spec_prefixed = bool(re.search(r"(?:ml|毫升|g|公克|kg|mg|oz)$", prefix))
            if re.search(r"[a-z]$", prefix) and not spec_prefixed:
                continue
            if suffix and re.match(r"[\u4e00-\u9fff]", suffix) and not spec_prefixed:
                continue
        counts.append((int(match.group(1)), unit))
    for match in re.finditer(rf"(\d+)\s*{ENGLISH_COUNT_UNIT_RE}", text, re.I):
        counts.append((int(match.group(1)), "入"))
    buy_get = re.search(r"買\s*(\d+|[一二兩雙三四五六七八九十])\s*送\s*(\d+|[一二兩雙三四五六七八九十])", text)
    if buy_get:
        total_count = (_count_text_value(buy_get.group(1)) or 0) + (_count_text_value(buy_get.group(2)) or 0)
        if total_count > 1:
            counts.append((total_count, "入"))
    if "買一送一" in text or "買1送1" in text:
        counts.append((2, "入"))

    total_piece_count = None
    explicit_total = re.search(r"共\s*(\d+)\s*([包袋片顆粒錠枚])", text)
    if explicit_total:
        total_piece_count = int(explicit_total.group(1))
    else:
        piece_counts = [count for count, unit in counts if unit in PIECE_UNITS]
        container_counts = [count for count, unit in counts if unit in CONTAINER_UNITS]
        if piece_counts and container_counts:
            total_piece_count = max(piece_counts) * max(container_counts)
        elif piece_counts:
            total_piece_count = max(piece_counts)

    unique_counts = tuple(sorted(set(counts)))
    return (
        tuple(sorted(set(volumes_ml))),
        tuple(sorted(set(weights_g))),
        tuple(sorted(set(dosages_mg))),
        unique_counts,
        total_piece_count,
    )


def parse_product_identity(name: str) -> ProductIdentity:
    normalized = normalize_product_text(name)
    searchable = _strip_noise(normalized)
    tokens = set(_tokenize(searchable))
    product_type = _extract_product_type(searchable)
    known_brand_tokens = _known_brand_tokens(searchable)
    brand_tokens = known_brand_tokens or _leading_brand_tokens(name, normalized)

    core_tokens = {
        token
        for token in tokens
        if token not in GENERIC_TOKENS
        and not token.isdigit()
        and not re.fullmatch(r"\d+(ml|g|kg|l|mg|mcg|ug)?", token)
    }
    core_tokens -= brand_tokens
    core_tokens.update(_extract_model_tokens(searchable))

    volumes_ml, weights_g, dosages_mg, counts, total_piece_count = _extract_specs(normalized)
    return ProductIdentity(
        original_name=name or "",
        normalized_name=normalized,
        searchable_name=searchable,
        brand_tokens=frozenset(brand_tokens),
        product_type=product_type,
        tokens=frozenset(tokens),
        core_tokens=frozenset(core_tokens),
        volumes_ml=volumes_ml,
        weights_g=weights_g,
        dosages_mg=dosages_mg,
        counts=counts,
        total_piece_count=total_piece_count,
    )


def _weighted_token_score(left: ProductIdentity, right: ProductIdentity) -> float:
    def expand_tokens(identity: ProductIdentity) -> set[str]:
        tokens = set(identity.brand_tokens | identity.core_tokens)
        for token in identity.core_tokens:
            chinese = "".join(char for char in token if "\u4e00" <= char <= "\u9fff")
            if len(chinese) >= 3:
                tokens.update(f"zh:{chinese[i:i + 2]}" for i in range(len(chinese) - 1))
        return tokens

    left_tokens = expand_tokens(left)
    right_tokens = expand_tokens(right)
    if not left_tokens or not right_tokens:
        return SequenceMatcher(None, left.searchable_name, right.searchable_name).ratio() * 0.6

    def weight(token: str) -> float:
        if token in left.brand_tokens or token in right.brand_tokens:
            return 1.4
        if token.startswith("zh:"):
            return 0.55
        if re.search(r"\d", token):
            return 1.2
        if len(token) >= 4:
            return 1.25
        return 1.0

    overlap = left_tokens & right_tokens
    overlap_weight = sum(weight(token) for token in overlap)
    total_weight = sum(weight(token) for token in left_tokens) + sum(weight(token) for token in right_tokens)
    dice = (2 * overlap_weight / total_weight) if total_weight else 0
    sequence = SequenceMatcher(None, " ".join(sorted(left_tokens)), " ".join(sorted(right_tokens))).ratio()
    return min(1.0, dice * 0.72 + sequence * 0.28)


def _brand_score(left: ProductIdentity, right: ProductIdentity) -> tuple[float, bool, str | None]:
    if not left.brand_tokens or not right.brand_tokens:
        return 0.55, False, None
    if left.brand_tokens & right.brand_tokens:
        return 1.0, False, None
    maquillage_anchor = "心機星魅蜜光圈潤唇膏"
    left_has_shiseido = bool({"shiseido", "資生堂"} & left.brand_tokens)
    right_has_shiseido = bool({"shiseido", "資生堂"} & right.brand_tokens)
    left_has_maquillage = bool({"maquillage", "心機彩妝"} & left.brand_tokens)
    right_has_maquillage = bool({"maquillage", "心機彩妝"} & right.brand_tokens)
    if (
        maquillage_anchor in left.normalized_name
        and maquillage_anchor in right.normalized_name
        and ((left_has_shiseido and right_has_maquillage) or (right_has_shiseido and left_has_maquillage))
    ):
        return 1.0, False, None
    return 0.0, True, "brand_conflict"


def _close_number(left: float, right: float, tolerance: float = 0.04) -> bool:
    denominator = max(abs(left), abs(right), 1.0)
    return abs(left - right) / denominator <= tolerance


def _spec_component(left_values: Iterable[float], right_values: Iterable[float]) -> tuple[float, bool]:
    left_tuple = tuple(sorted(set(left_values)))
    right_tuple = tuple(sorted(set(right_values)))
    if not left_tuple and not right_tuple:
        return 0.55, False
    if not left_tuple or not right_tuple:
        return 0.45, False
    if len(left_tuple) > 1 or len(right_tuple) > 1:
        if len(left_tuple) != len(right_tuple):
            return 0.0, True
        unmatched = list(right_tuple)
        for left_value in left_tuple:
            match_index = next(
                (
                    index
                    for index, right_value in enumerate(unmatched)
                    if _close_number(left_value, right_value)
                ),
                None,
            )
            if match_index is None:
                return 0.0, True
            unmatched.pop(match_index)
        return 1.0, False
    for left_value in left_tuple:
        if any(_close_number(left_value, right_value) for right_value in right_tuple):
            return 1.0, False
    return 0.0, True


def _has_hard_count_unit_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not left.counts or not right.counts:
        return False
    left_by_count: dict[int, set[str]] = {}
    right_by_count: dict[int, set[str]] = {}
    for count, unit in left.counts:
        left_by_count.setdefault(count, set()).add(_count_unit_family(unit))
    for count, unit in right.counts:
        right_by_count.setdefault(count, set()).add(_count_unit_family(unit))

    for count in set(left_by_count) & set(right_by_count):
        left_units = left_by_count[count]
        right_units = right_by_count[count]
        if left_units & right_units:
            continue
        if (
            (left_units & PIECE_UNITS and right_units & CONTAINER_UNITS)
            or (right_units & PIECE_UNITS and left_units & CONTAINER_UNITS)
        ):
            return True
    return False


def _allow_catalog_count_omission(left: ProductIdentity, right: ProductIdentity) -> bool:
    """Allow catalog-side piece counts for Dashing Diva nail lines when MOMO omits pack count."""
    left_has_counts = bool(left.counts)
    right_has_counts = bool(right.counts)
    if left_has_counts == right_has_counts:
        return False

    shared_brand_tokens = {token.lower() for token in left.brand_tokens} & {
        token.lower() for token in right.brand_tokens
    }
    if not ({"dashing", "diva"} <= shared_brand_tokens):
        return False

    searchable_pair = f"{left.searchable_name} {right.searchable_name}"
    if "美甲片" not in searchable_pair:
        return False

    counted = left if left_has_counts else right
    omitted = right if left_has_counts else left
    if omitted.counts:
        return False
    if (counted.total_piece_count or 0) < 20:
        return False

    return any(
        anchor in searchable_pair
        for anchor in ("時尚潮流美甲片", "頂級璀燦美甲片", "薄型經典美甲片")
    )


def _count_score(left: ProductIdentity, right: ProductIdentity) -> tuple[float, bool]:
    left_counts = [count for count, _unit in left.counts]
    right_counts = [count for count, _unit in right.counts]
    left_by_unit: dict[str, set[int]] = {}
    right_by_unit: dict[str, set[int]] = {}
    for count, unit in left.counts:
        left_by_unit.setdefault(_count_unit_family(unit), set()).add(count)
    for count, unit in right.counts:
        right_by_unit.setdefault(_count_unit_family(unit), set()).add(count)

    if left.total_piece_count and right.total_piece_count:
        if left.total_piece_count == right.total_piece_count:
            return 1.0, False
        ratio = max(left.total_piece_count, right.total_piece_count) / max(min(left.total_piece_count, right.total_piece_count), 1)
        return (0.0, True) if ratio >= 1.5 else (0.45, False)

    for unit in set(left_by_unit) & set(right_by_unit):
        if left_by_unit[unit] != right_by_unit[unit]:
            return 0.0, True
    if left.counts and right.counts:
        if set(left.counts) & set(right.counts):
            return 0.85, False
        if _has_hard_count_unit_conflict(left, right):
            return 0.0, True
        if left_counts and right_counts:
            ratio = max(max(left_counts), max(right_counts)) / max(min(max(left_counts), max(right_counts)), 1)
            if ratio >= 1.5:
                return 0.0, True
        return 0.35, False
    if _allow_catalog_count_omission(left, right):
        return 0.55, False
    if (left_counts and max(left_counts) > 1) or (right_counts and max(right_counts) > 1):
        return 0.0, True
    return 0.5, False


def _has_exact_count_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not left.counts or not right.counts:
        return False
    left_counts = sorted(count for count, _ in left.counts)
    right_counts = sorted(count for count, _ in right.counts)
    return left_counts == right_counts


def _has_pack_quantity_difference(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not left.counts or not right.counts or _has_exact_count_alignment(left, right):
        return False

    if left.total_piece_count and right.total_piece_count:
        return left.total_piece_count != right.total_piece_count

    left_by_unit: dict[str, set[int]] = {}
    right_by_unit: dict[str, set[int]] = {}
    for count, unit in left.counts:
        family = _count_unit_family(unit)
        if family in COUNT_UNITS or unit in COUNT_UNITS:
            left_by_unit.setdefault(family, set()).add(count)
    for count, unit in right.counts:
        family = _count_unit_family(unit)
        if family in COUNT_UNITS or unit in COUNT_UNITS:
            right_by_unit.setdefault(family, set()).add(count)

    for unit in set(left_by_unit) & set(right_by_unit):
        if left_by_unit[unit] != right_by_unit[unit]:
            return True
    return False


NAMED_COMPONENT_QUANTITY_GROUPS = (
    ("嬰兒沐浴精", "嬰幼童洗髮精"),
    ("魅惑麋香", "湛藍海洋", "花妍巧語", "絲絨玫瑰"),
)


def _named_component_quantity_map(identity: ProductIdentity, terms: Iterable[str]) -> dict[str, int]:
    text = identity.searchable_name
    present_terms = tuple(term for term in terms if term in text)
    if len(present_terms) < 2:
        return {}

    quantities: dict[str, int] = {}
    for term in present_terms:
        term_index = text.find(term)
        if term_index < 0:
            continue
        suffix = text[term_index + len(term):term_index + len(term) + 28]
        explicit_count = re.search(
            r"(?:\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克))?\s*(?:x|乘)\s*(\d+)",
            suffix,
            flags=re.I,
        )
        if explicit_count:
            quantities[term] = int(explicit_count.group(1))

    if len(quantities) == len(present_terms):
        return quantities

    pack_counts = [
        count
        for count, unit in identity.counts
        if _count_unit_family(unit) in COUNT_UNITS or unit in COUNT_UNITS
    ]
    if not quantities and pack_counts and max(pack_counts) == len(present_terms) and re.search(r"[+＋/／、]", text):
        return {term: 1 for term in present_terms}
    return {}


def _has_named_component_quantity_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    """同名組合品若命名元件相同但數量反轉，不能視為同一價格標的。"""
    for terms in NAMED_COMPONENT_QUANTITY_GROUPS:
        left_quantities = _named_component_quantity_map(left, terms)
        right_quantities = _named_component_quantity_map(right, terms)
        shared_terms = set(left_quantities) & set(right_quantities)
        if len(shared_terms) < 2:
            continue
        if any(left_quantities[term] != right_quantities[term] for term in shared_terms):
            return True
    return False


def _spec_score(left: ProductIdentity, right: ProductIdentity) -> tuple[float, bool, tuple[str, ...]]:
    volume_score, volume_conflict = _spec_component(left.volumes_ml, right.volumes_ml)
    weight_score, weight_conflict = _spec_component(left.weights_g, right.weights_g)
    dosage_score, dosage_conflict = _spec_component(left.dosages_mg, right.dosages_mg)
    count_score, count_conflict = _count_score(left, right)

    available = []
    if left.volumes_ml or right.volumes_ml:
        available.append(volume_score)
    if left.weights_g or right.weights_g:
        available.append(weight_score)
    if left.dosages_mg or right.dosages_mg:
        available.append(dosage_score)
    if left.counts or right.counts:
        available.append(count_score)
    if not available:
        return 0.55, False, ()

    score = sum(available) / len(available)
    conflicts = []
    if volume_conflict:
        conflicts.append("volume_conflict")
    if weight_conflict:
        conflicts.append("weight_conflict")
    if dosage_conflict:
        conflicts.append("dosage_conflict")
    if count_conflict:
        conflicts.append("count_conflict")
    return score, bool(conflicts), tuple(conflicts)


def _has_bundle_offer(identity: ProductIdentity) -> bool:
    text = identity.normalized_name
    return bool(
        re.search(r"買\s*\d+\s*送\s*\d+", text)
        or re.search(r"買\s*[一二兩雙三四五六七八九十]\s*送\s*[一二兩雙三四五六七八九十]", text)
        or "買一送一" in text
        or any(phrase in text for phrase in BUNDLE_OFFER_PHRASES)
    )


def _has_multi_component(identity: ProductIdentity) -> bool:
    text = _component_separator_text(identity)
    return bool(
        "+" in text
        or "＋" in text
        or re.search(r"\d+\s*(?:ml|g|mg|毫升|公克|毫克)\s*x\s*\d+", text, re.I)
    )


def _component_separator_text(identity: ProductIdentity) -> str:
    text = identity.normalized_name
    text = re.sub(r"spf\s*(\d+)\s*[+＋]+", r"spf\1", text, flags=re.I)
    text = re.sub(r"pa\s*[+＋]{1,5}", "pa", text, flags=re.I)
    text = re.sub(
        r"(\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克))\s*[+＋]\s*"
        r"(\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克))",
        r"\1 \2",
        text,
        flags=re.I,
    )
    text = re.sub(
        r"\b[a-z]{1,6}\d{1,6}\s*[+＋]\s*[a-z]{1,6}\d{1,6}\b",
        lambda match: re.sub(r"[+＋]", " ", match.group(0)),
        text,
        flags=re.I,
    )
    return text


def _multi_component_count(identity: ProductIdentity) -> int:
    text = _component_separator_text(identity)
    if not ("+" in text or "＋" in text):
        return 1
    parts = [
        part.strip()
        for part in re.split(r"[+＋]", text)
        if part.strip() and not re.fullmatch(r"[\s\d-]+", part.strip())
    ]
    return len(parts) if len(parts) > 1 else 1


def _repeated_single_spec_count(identity: ProductIdentity) -> Optional[int]:
    text = _component_separator_text(identity)
    matches = re.findall(
        r"\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克)\s*x\s*(\d+)",
        text,
        flags=re.I,
    )
    if len(matches) != 1:
        return None
    try:
        count = int(matches[0])
    except (TypeError, ValueError):
        return None
    return count if count > 1 else None


def _refill_piece_count(identity: ProductIdentity) -> Optional[int]:
    refill_counts = [
        count
        for count, unit in identity.counts
        if _count_unit_family(unit) == "refill"
    ]
    return max(refill_counts) if refill_counts else None


def _has_cushion_refill_pack_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    """Align cushion compact refill language such as `一盒兩蕊` with `15g x2`."""
    if left.product_type != "氣墊粉餅" or right.product_type != "氣墊粉餅":
        return False

    def aligned(refill_side: ProductIdentity, spec_side: ProductIdentity) -> bool:
        refill_count = _refill_piece_count(refill_side)
        spec_count = _repeated_single_spec_count(spec_side)
        if not refill_count or not spec_count or refill_count != spec_count:
            return False
        box_counts = [
            count
            for count, unit in refill_side.counts
            if unit in {"盒", "組", "入"} and count > 1
        ]
        return not box_counts

    return aligned(left, right) or aligned(right, left)


def _has_paulas_choice_body_lotion_210ml_2pack_alignment(
    left: ProductIdentity,
    right: ProductIdentity,
) -> bool:
    """Align PChome Nick `210ml x2` with MOMO `210ml二入` for the same body lotion."""
    brand_tokens = left.brand_tokens | right.brand_tokens
    if not ({"寶拉珍選", "paulas", "choice"} & brand_tokens):
        return False
    if not _has_shared_volume(left, right, 210):
        return False
    if not all("水楊酸" in item.searchable_name and "身體乳" in item.searchable_name for item in (left, right)):
        return False

    def has_two_pack(identity: ProductIdentity) -> bool:
        text = identity.searchable_name
        return bool(
            re.search(r"(?:x\s*2|2\s*入|二\s*入|兩\s*入|雙\s*入|雙入組|二入組|兩入組)", text, re.I)
            or (2, "入") in identity.counts
        )

    return has_two_pack(left) and has_two_pack(right)


def _has_nivea_creme_100ml_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = left.brand_tokens | right.brand_tokens
    if not ({"nivea", "妮維雅"} & brand_tokens):
        return False
    if not _has_shared_volume(left, right, 100):
        return False
    return all("妮維雅霜" in item.searchable_name and "隨身版" in item.searchable_name for item in (left, right))


def _has_cetaphil_moisturizer_type_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    """Treat Cetaphil moisturizer wording variants as the same type only on exact named lines."""
    if not ({"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)):
        return False
    if {left.product_type, right.product_type} != {"乳液", "面霜"}:
        return False

    left_text = left.searchable_name
    right_text = right.searchable_name
    if all("長效潤膚霜" in item for item in (left_text, right_text)):
        return _has_shared_weight(left, right, 250)
    if all("益膚康修護舒敏乳霜" in item for item in (left_text, right_text)):
        return _has_shared_weight(left, right, 227)
    return False


def _has_refill_pack(identity: ProductIdentity) -> bool:
    text = identity.normalized_name
    return bool(
        "補充瓶" in text
        or "補充包" in text
        or "補充芯" in text
        or "補充蕊" in text
        or "替換蕊" in text
        or "替換芯" in text
        or "refill" in text
    )


def _has_accessory_case(identity: ProductIdentity) -> bool:
    text = identity.normalized_name
    return bool(
        "眉彩餅盒" in text
        or "盒一入款" in text
        or "盒三入款" in text
        or "盒單入" in text
        or "空盒" in text
    )


def _spec_mention_count(identity: ProductIdentity) -> int:
    return len(
        re.findall(
            r"\d+(?:\.\d+)?\s*(?:ml|毫升|l|g|公克|kg|mg|毫克|mcg|μg|ug|微克)",
            identity.normalized_name,
            re.I,
        )
    )


def _count_text_value(value: str) -> Optional[int]:
    if value.isdigit():
        return int(value)
    return CHINESE_COUNT.get(value)


def _pack_multiplier(identity: ProductIdentity) -> int:
    text = identity.normalized_name
    buy_get = re.search(r"買\s*(\d+|[一二兩雙三四五六七八九十])\s*送\s*(\d+|[一二兩雙三四五六七八九十])", text)
    if buy_get:
        left = _count_text_value(buy_get.group(1)) or 0
        right = _count_text_value(buy_get.group(2)) or 0
        if left + right > 1:
            return left + right
    if "買一送一" in text or "買1送1" in text:
        return 2
    piece_pack = re.search(r"(\d+|[一二兩雙三四五六七八九十])\s*件\s*組", text)
    if piece_pack:
        count = _count_text_value(piece_pack.group(1)) or 0
        if count > 1:
            return count

    multipliers = [count for count, unit in identity.counts if unit in COUNT_UNITS and count > 1]
    if multipliers:
        return max(multipliers)
    return 1


def _has_overlapping_base_spec(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_volumes = tuple(sorted(set(left.volumes_ml)))
    right_volumes = tuple(sorted(set(right.volumes_ml)))
    if left_volumes or right_volumes:
        if not left_volumes or not right_volumes:
            return False
        if len(left_volumes) > 1 or len(right_volumes) > 1:
            return False
        return _close_number(left_volumes[0], right_volumes[0])

    left_weights = tuple(sorted(set(left.weights_g)))
    right_weights = tuple(sorted(set(right.weights_g)))
    if left_weights or right_weights:
        if not left_weights or not right_weights:
            return False
        if len(left_weights) > 1 or len(right_weights) > 1:
            return False
        return _close_number(left_weights[0], right_weights[0])

    return False


def _single_unit_total(identity: ProductIdentity) -> tuple[Optional[str], Optional[float], str]:
    volumes = tuple(sorted(set(identity.volumes_ml)))
    weights = tuple(sorted(set(identity.weights_g)))
    if volumes and weights:
        return None, None, "mixed_volume_weight"
    if len(volumes) > 1 or len(weights) > 1:
        return None, None, "multi_spec_component"
    if volumes:
        return "ml", volumes[0] * _pack_multiplier(identity), "ok"
    if weights:
        multiplier = identity.total_piece_count or _pack_multiplier(identity)
        return "g", weights[0] * multiplier, "ok"
    if identity.total_piece_count:
        return "入", float(identity.total_piece_count), "ok"
    return None, None, "missing_single_unit"


def build_unit_price_comparison(
    momo_name: str,
    competitor_name: str,
    momo_price: Optional[float],
    competitor_price: Optional[float],
) -> dict:
    """Build deterministic unit-price evidence for unit-comparable candidates."""
    diagnostics = score_marketplace_match(
        momo_name,
        competitor_name,
        momo_price=momo_price,
        competitor_price=competitor_price,
    )
    if diagnostics.comparison_mode != "unit_comparable":
        return UnitPriceComparison(False, diagnostics.comparison_mode).as_dict()

    left = parse_product_identity(momo_name)
    right = parse_product_identity(competitor_name)
    left_unit, left_total, left_reason = _single_unit_total(left)
    right_unit, right_total, right_reason = _single_unit_total(right)
    if left_reason != "ok" or right_reason != "ok":
        return UnitPriceComparison(False, f"{left_reason}:{right_reason}").as_dict()
    if left_unit != right_unit or not left_total or not right_total:
        return UnitPriceComparison(False, "unit_mismatch").as_dict()

    try:
        momo_price_num = float(momo_price or 0)
        competitor_price_num = float(competitor_price or 0)
    except (TypeError, ValueError):
        return UnitPriceComparison(False, "invalid_price").as_dict()
    if momo_price_num <= 0 or competitor_price_num <= 0:
        return UnitPriceComparison(False, "invalid_price").as_dict()

    momo_unit_price = momo_price_num / left_total
    competitor_unit_price = competitor_price_num / right_total
    unit_gap_amount = momo_unit_price - competitor_unit_price
    unit_gap_pct = unit_gap_amount / competitor_unit_price * 100 if competitor_unit_price else 0
    summary = (
        f"MOMO ${momo_unit_price:.2f}/{left_unit} vs "
        f"PChome ${competitor_unit_price:.2f}/{left_unit} "
        f"({unit_gap_pct:+.1f}%)"
    )
    return UnitPriceComparison(
        comparable=True,
        reason="unit_comparable",
        unit_label=left_unit,
        momo_total_quantity=round(left_total, 3),
        competitor_total_quantity=round(right_total, 3),
        momo_unit_price=round(momo_unit_price, 4),
        competitor_unit_price=round(competitor_unit_price, 4),
        unit_gap_amount=round(unit_gap_amount, 4),
        unit_gap_pct=round(unit_gap_pct, 2),
        summary=summary,
    ).as_dict()


def _is_unit_comparable_candidate(
    left: ProductIdentity,
    right: ProductIdentity,
    token_score: float,
    chinese_name_score: float,
    brand_conflict: bool,
    type_score: float,
    reasons: Iterable[str],
) -> bool:
    """Identify same core product sold in different packs.

    These are not safe exact matches. They can only enter a normalized unit-price
    review lane, otherwise a bundle price may be incorrectly compared with a
    single-item price.
    """
    reason_set = set(reasons)
    pack_difference = bool(reason_set & {
        "bundle_offer_conflict",
        "multi_component_conflict",
        "count_conflict",
        "component_count_conflict",
        "pack_quantity_difference",
    })
    if not pack_difference:
        return False
    if brand_conflict or "brand_conflict" in reason_set:
        return False
    if "refill_pack_conflict" in reason_set:
        return False
    if type_score == 0.0 or "type_conflict" in reason_set:
        return False
    if not _has_overlapping_base_spec(left, right):
        return False
    if token_score < 0.45 and chinese_name_score < 0.28:
        return False
    if "product_line_conflict" in reason_set and token_score < 0.72:
        return False
    return True


def _chinese_bigram_score(left: ProductIdentity, right: ProductIdentity) -> float:
    def signature(identity: ProductIdentity) -> set[str]:
        text = identity.searchable_name
        for token in sorted(identity.brand_tokens, key=len, reverse=True):
            text = text.replace(token, " ")
        text = re.sub(r"[a-z0-9]+", " ", text)
        text = "".join(char for char in text if "\u4e00" <= char <= "\u9fff")
        for phrase in (
            "官方", "直營", "公司貨", "專櫃", "正貨", "原廠", "限定", "獨家",
            "期間", "超值", "特惠", "優惠", "新品", "經典", "人氣", "熱銷",
            "必買", "推薦", "任選", "禮盒", "母親節", "超品日", "多款",
            "組", "入", "瓶", "盒", "包", "片", "支", "條",
        ):
            text = text.replace(phrase, "")
        return {text[i:i + 2] for i in range(max(0, len(text) - 1))}

    left_signature = signature(left)
    right_signature = signature(right)
    if not left_signature or not right_signature:
        return 0.55
    return 2 * len(left_signature & right_signature) / (len(left_signature) + len(right_signature))


def _has_strong_product_line_signal(
    left: ProductIdentity,
    right: ProductIdentity,
    token_score: float,
    chinese_name_score: float,
) -> bool:
    shared_core = (left.core_tokens & right.core_tokens) - left.brand_tokens - right.brand_tokens
    shared_latin_or_model = {
        token for token in shared_core
        if re.fullmatch(r"[a-z][a-z0-9-]{3,}", token)
        or re.fullmatch(r"[a-z]{2,}-?\d+[a-z0-9-]*", token)
    }
    if shared_latin_or_model and token_score >= 0.50:
        return True
    return bool(shared_core) and token_score >= 0.56 and chinese_name_score >= 0.45


def _has_safe_exact_spec_signal(
    left: ProductIdentity,
    right: ProductIdentity,
    token_score: float,
    sequence_score: float,
    type_score: float,
) -> bool:
    if type_score < 0.55:
        return False
    if _spec_mention_count(left) > 1 or _spec_mention_count(right) > 1:
        return False
    if not _has_overlapping_base_spec(left, right):
        return False
    return token_score >= 0.42 or sequence_score >= 0.50


def _model_line_tokens(identity: ProductIdentity) -> set[str]:
    tokens: set[str] = set()
    for token in identity.core_tokens:
        if token in GENERIC_TOKENS:
            continue
        if _is_spec_like_latin_token(token):
            continue
        if re.fullmatch(r"[a-z][a-z0-9-]{2,}", token):
            tokens.add(token)
        for match in re.finditer(r"([\u4e00-\u9fff]{2,})(?:系列)", token):
            value = match.group(1)
            if value not in GENERIC_TOKENS:
                tokens.add(value)
    return tokens


def _has_model_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_tokens = _model_line_tokens(left)
    right_tokens = _model_line_tokens(right)
    if not left_tokens or not right_tokens:
        return False
    return not bool(left_tokens & right_tokens)


def _nail_polish_model_codes(identity: ProductIdentity) -> set[str]:
    if not any(term in identity.searchable_name for term in ("指甲油", "指彩", "美甲")):
        return set()
    text = f"{identity.original_name} {identity.searchable_name}".lower()
    codes: set[str] = set()
    for pattern in (
        r"(?<![a-z0-9])a10[\._-]?\d{2,3}(?:[\._-]?\d{2,3})?(?![a-z0-9])",
        r"(?<![a-z0-9])isl[a-z0-9]*\d{2,3}(?![a-z0-9])",
    ):
        for match in re.finditer(pattern, text):
            codes.add(re.sub(r"[\._-]+", "", match.group(0)))
    return codes


def _has_nail_polish_model_code_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("指甲油", "指彩", "美甲")):
        return False
    left_codes = _nail_polish_model_codes(left)
    right_codes = _nail_polish_model_codes(right)
    return bool(left_codes and right_codes and left_codes.isdisjoint(right_codes))


def _dedupe_tuple(values: Iterable[str]) -> tuple[str, ...]:
    result: list[str] = []
    seen: set[str] = set()
    for value in values:
        if not value or value in seen:
            continue
        seen.add(value)
        result.append(value)
    return tuple(result)


def _build_evidence_flags(
    *,
    brand_score: float,
    token_score: float,
    spec_score: float,
    sequence_score: float,
    type_score: float,
    shared_anchor: str,
    shared_models: set[str],
    reasons: Iterable[str],
    catalog_count_omission: bool,
) -> tuple[str, ...]:
    reason_set = set(reasons)
    flags: list[str] = []
    if brand_score >= 0.95:
        flags.append("brand")
    if spec_score >= 0.85:
        flags.append("spec")
    if token_score >= 0.72:
        flags.append("tokens")
    if sequence_score >= 0.70:
        flags.append("name_sequence")
    if type_score >= 0.95:
        flags.append("product_type")
    if shared_anchor:
        flags.append("identity_anchor")
    if shared_models:
        flags.append("model_token")
    if catalog_count_omission:
        flags.append("catalog_count_omission")
    for reason in (
        "unit_comparable",
        "variant_selection_review",
        "variant_option_conflict",
        "variant_descriptor_conflict",
        "pack_quantity_difference",
        "count_conflict",
        "bundle_offer_conflict",
        "multi_component_conflict",
        "multi_component_count_conflict",
        "accessory_case_conflict",
        "refill_pack_conflict",
        "price_ratio_extreme",
        "price_ratio_wide",
    ):
        if reason in reason_set:
            flags.append(reason)
    return _dedupe_tuple(flags)


def _number_values(values: Iterable[float]) -> list[float | int]:
    result: list[float | int] = []
    for value in values or ():
        try:
            number = float(value)
        except (TypeError, ValueError):
            continue
        result.append(int(number) if number.is_integer() else round(number, 3))
    return result


def _count_values(values: Iterable[tuple[int, str]]) -> list[str]:
    return [f"{count}{unit}" for count, unit in sorted(set(values or ()))]


def _identity_spec_payload(identity: ProductIdentity) -> dict[str, object]:
    return {
        "volumes_ml": _number_values(identity.volumes_ml),
        "weights_g": _number_values(identity.weights_g),
        "dosages_mg": _number_values(identity.dosages_mg),
        "counts": _count_values(identity.counts),
        "total_piece_count": identity.total_piece_count,
    }


def _spec_mismatch_payload(left: ProductIdentity, right: ProductIdentity) -> list[dict[str, object]]:
    specs = (
        ("volume_ml", "容量", _number_values(left.volumes_ml), _number_values(right.volumes_ml)),
        ("weight_g", "重量", _number_values(left.weights_g), _number_values(right.weights_g)),
        ("dosage_mg", "劑量", _number_values(left.dosages_mg), _number_values(right.dosages_mg)),
        ("count", "入數/件數", _count_values(left.counts), _count_values(right.counts)),
    )
    mismatches: list[dict[str, object]] = []
    for field_name, label, momo_values, competitor_values in specs:
        if momo_values and competitor_values and set(momo_values).isdisjoint(set(competitor_values)):
            mismatches.append({
                "field": field_name,
                "label": label,
                "momo": momo_values,
                "competitor": competitor_values,
            })
        elif bool(momo_values) != bool(competitor_values):
            mismatches.append({
                "field": field_name,
                "label": f"{label}單側缺漏",
                "momo": momo_values,
                "competitor": competitor_values,
                "needs_review": True,
            })
    return mismatches


def _identity_evidence_payload(
    left: ProductIdentity,
    right: ProductIdentity,
    *,
    brand_score: float,
    token_score: float,
    spec_score: float,
    sequence_score: float,
    type_score: float,
    hard_veto: bool,
    comparison_mode: str,
    match_type: str,
    price_basis: str,
    alert_tier: str,
    shared_anchor: str,
    shared_models: set[str],
    reasons: Iterable[str],
    catalog_count_omission: bool,
) -> dict[str, object]:
    reason_set = set(reasons or ())
    conflict_reasons = [
        reason for reason in reason_set
        if "conflict" in reason
        or reason in {
            "variant_selection_review",
            "catalog_count_omission",
            "pack_quantity_difference",
            "unit_comparable",
        }
    ]
    shared_brand = sorted(left.brand_tokens & right.brand_tokens)
    shared_core = sorted((left.core_tokens & right.core_tokens) - left.brand_tokens - right.brand_tokens)[:20]
    return {
        "version": "identity_evidence_v1",
        "lane": {
            "comparison_mode": comparison_mode,
            "match_type": match_type,
            "price_basis": price_basis,
            "alert_tier": alert_tier,
        },
        "confidence_components": {
            "brand_score": round(brand_score, 3),
            "token_score": round(token_score, 3),
            "spec_score": round(spec_score, 3),
            "sequence_score": round(sequence_score, 3),
            "type_score": round(type_score, 3),
        },
        "brand": {
            "momo": sorted(left.brand_tokens),
            "competitor": sorted(right.brand_tokens),
            "shared": shared_brand,
        },
        "product_type": {
            "momo": left.product_type or "",
            "competitor": right.product_type or "",
            "matched": bool(left.product_type and right.product_type and left.product_type == right.product_type),
        },
        "identity_anchor": shared_anchor or "",
        "shared_model_tokens": sorted(shared_models),
        "shared_core_tokens": shared_core,
        "specs": {
            "momo": _identity_spec_payload(left),
            "competitor": _identity_spec_payload(right),
            "mismatches": _spec_mismatch_payload(left, right),
        },
        "variant_guardrails": {
            "hard_veto": bool(hard_veto),
            "conflict_reasons": sorted(conflict_reasons),
            "catalog_count_omission": bool(catalog_count_omission),
        },
    }


def _offer_evidence_payload(
    momo_price: Optional[float],
    competitor_price: Optional[float],
    *,
    price_penalty: float,
    price_basis: str,
    alert_tier: str,
) -> dict[str, object]:
    payload: dict[str, object] = {
        "version": "offer_evidence_v1",
        "price_basis": price_basis,
        "alert_tier": alert_tier,
        "price_is_identity_evidence": False,
        "price_penalty": round(price_penalty, 3),
    }
    try:
        momo_value = float(momo_price) if momo_price is not None else None
        competitor_value = float(competitor_price) if competitor_price is not None else None
    except (TypeError, ValueError):
        momo_value = None
        competitor_value = None
    if momo_value is not None:
        payload["momo_price"] = round(momo_value, 2)
    if competitor_value is not None:
        payload["competitor_price"] = round(competitor_value, 2)
    if momo_value is not None and competitor_value and competitor_value > 0:
        payload["gap_amount"] = round(momo_value - competitor_value, 2)
        payload["gap_pct"] = round((momo_value - competitor_value) / max(competitor_value, 1) * 100, 2)
    return payload


def _has_safe_multi_component_exact_total_price(
    left: ProductIdentity,
    right: ProductIdentity,
    *,
    brand_score: float,
    token_score: float,
    spec_score: float,
    sequence_score: float,
    type_score: float,
    hard_veto: bool,
    variant_descriptor_conflict: bool,
    reasons: Iterable[str],
) -> bool:
    """Allow exact total-price writes for same-component sets, not mixed bundles."""
    if hard_veto or variant_descriptor_conflict:
        return False
    if not (_has_multi_component(left) and _has_multi_component(right)):
        return False
    reason_set = set(reasons)
    blocked = {
        "variant_selection_review",
        "variant_option_conflict",
        "variant_descriptor_conflict",
        "pack_quantity_difference",
        "count_conflict",
        "bundle_offer_conflict",
        "multi_component_conflict",
        "multi_component_count_conflict",
        "commercial_condition_gap",
        "refill_pack_conflict",
        "unit_comparable",
        "price_ratio_extreme",
        "price_ratio_wide",
    }
    if reason_set & blocked:
        return False
    if brand_score < 0.95 or spec_score < 0.85:
        return False
    exact_count_alignment = _has_exact_count_alignment(left, right)
    same_base_spec = _has_overlapping_base_spec(left, right)
    if (
        exact_count_alignment
        and type_score >= 0.55
        and token_score >= 0.80
        and sequence_score >= 0.75
        and (
            same_base_spec
            or (
                token_score >= 0.90
                and sequence_score >= 0.90
                and "strong_product_line_match" in reason_set
            )
        )
    ):
        return True
    if type_score < 0.95:
        return False
    if exact_count_alignment:
        return token_score >= 0.50 and sequence_score >= 0.50
    return (
        token_score >= 0.62
        and sequence_score >= 0.62
        and bool(reason_set & {"strong_exact_spec_match", "shared_model_token", "spec_name_alignment"})
    )


def _classify_match_quality(
    *,
    score: float,
    brand_score: float,
    token_score: float,
    spec_score: float,
    sequence_score: float,
    type_score: float,
    hard_veto: bool,
    comparison_mode: str,
    reasons: Iterable[str],
    shared_anchor: str,
    shared_models: set[str],
    catalog_count_omission: bool,
    multi_component_pair: bool,
) -> tuple[str, str, str]:
    """Map raw matcher scores into operator-facing price comparison lanes."""
    reason_set = set(reasons)
    if comparison_mode == "unit_comparable":
        return "same_product_different_pack", "unit_price", "unit_price_review"

    if hard_veto or comparison_mode == "not_comparable":
        variant_conflict = bool(reason_set & {"variant_option_conflict", "variant_descriptor_conflict"})
        same_line_signal = bool(shared_anchor and brand_score >= 0.95 and type_score >= 0.55)
        if variant_conflict and same_line_signal:
            return "same_line_variant", "manual_review", "suppress"
        return "no_match", "none", "suppress"

    direct_spec_evidence = spec_score >= 0.85 or bool(shared_models)
    focused_total_price_safe = "focused_exact_total_price_safe" in reason_set
    strong_identity_evidence = (
        (
            brand_score >= 0.95
            and type_score >= 0.55
            and score >= 0.86
            and (direct_spec_evidence or (shared_anchor and token_score >= 0.62 and sequence_score >= 0.58))
        )
        or (
            focused_total_price_safe
            and type_score >= 0.55
            and score >= 0.86
        )
    )
    if strong_identity_evidence and not catalog_count_omission:
        if focused_total_price_safe and "variant_selection_review" not in reason_set:
            return "exact", "total_price", "price_alert_exact"
        safe_multi_component_total_price = "safe_multi_component_exact_total_price" in reason_set
        if "variant_selection_review" in reason_set:
            return "exact", "manual_review", "identity_review"
        if multi_component_pair and not safe_multi_component_total_price:
            return "exact", "manual_review", "identity_review"
        return "exact", "total_price", "price_alert_exact"

    if score >= 0.76:
        if catalog_count_omission:
            return "same_product_different_pack", "manual_review", "unit_price_review"
        return "comparable", "manual_review", "identity_review"

    return "no_match", "none", "suppress"


def score_marketplace_match(
    momo_name: str,
    competitor_name: str,
    momo_price: Optional[float] = None,
    competitor_price: Optional[float] = None,
) -> MatchDiagnostics:
    left = parse_product_identity(momo_name)
    right = parse_product_identity(competitor_name)

    brand_score, brand_conflict, brand_reason = _brand_score(left, right)
    token_score = _weighted_token_score(left, right)
    spec_score, spec_conflict, spec_reasons = _spec_score(left, right)
    sequence_score = SequenceMatcher(None, left.searchable_name, right.searchable_name).ratio()
    chinese_name_score = _chinese_bigram_score(left, right)
    nivea_creme_100ml_alignment = _has_nivea_creme_100ml_alignment(left, right)
    cetaphil_moisturizer_type_alignment = _has_cetaphil_moisturizer_type_alignment(left, right)
    type_aligned = (
        left.product_type == right.product_type
        or nivea_creme_100ml_alignment
        or cetaphil_moisturizer_type_alignment
    )
    if left.product_type and right.product_type:
        type_score = 1.0 if type_aligned else 0.0
    else:
        type_score = 0.55

    reasons = []
    if brand_reason:
        reasons.append(brand_reason)
    reasons.extend(spec_reasons)
    if left.product_type and right.product_type and left.product_type != right.product_type and not type_aligned:
        reasons.append("type_conflict")
    if nivea_creme_100ml_alignment:
        reasons.append("nivea_creme_100ml_type_alignment")
    if cetaphil_moisturizer_type_alignment:
        reasons.append("cetaphil_moisturizer_type_alignment")
    model_line_conflict = _has_model_line_conflict(left, right)
    if model_line_conflict:
        reasons.append("model_line_conflict")
    nail_polish_model_code_conflict = _has_nail_polish_model_code_conflict(left, right)
    if nail_polish_model_code_conflict:
        reasons.append("nail_polish_model_code_conflict")
    bundle_offer_conflict = (
        _has_bundle_offer(left) != _has_bundle_offer(right)
        and not (
            left.total_piece_count
            and right.total_piece_count
            and left.total_piece_count == right.total_piece_count
        )
    )
    if bundle_offer_conflict:
        reasons.append("bundle_offer_conflict")
    cushion_refill_pack_alignment = _has_cushion_refill_pack_alignment(left, right)
    paulas_choice_body_lotion_2pack_alignment = _has_paulas_choice_body_lotion_210ml_2pack_alignment(left, right)
    if (
        _has_multi_component(left) != _has_multi_component(right)
        and not cushion_refill_pack_alignment
        and not paulas_choice_body_lotion_2pack_alignment
    ):
        reasons.append("multi_component_conflict")
    if cushion_refill_pack_alignment:
        reasons.append("cushion_refill_pack_alignment")
    if paulas_choice_body_lotion_2pack_alignment:
        reasons.append("paulas_choice_body_lotion_210ml_2pack_alignment")
    multi_component_count_conflict = (
        _has_multi_component(left)
        and _has_multi_component(right)
        and _multi_component_count(left) != _multi_component_count(right)
    )
    if multi_component_count_conflict:
        reasons.append("multi_component_count_conflict")
    if _has_refill_pack(left) != _has_refill_pack(right):
        reasons.append("refill_pack_conflict")
    accessory_case_conflict = _has_accessory_case(left) != _has_accessory_case(right)
    if accessory_case_conflict:
        reasons.append("accessory_case_conflict")
    left_spec_mentions = _spec_mention_count(left)
    right_spec_mentions = _spec_mention_count(right)
    if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
        reasons.append("component_count_conflict")
    if chinese_name_score < 0.16:
        reasons.append("product_line_conflict")
    shared_anchor = _shared_identity_anchor(left, right)
    catalog_count_omission = _allow_catalog_count_omission(left, right)
    if catalog_count_omission:
        reasons.append("catalog_count_omission")
    if _has_pack_quantity_difference(left, right):
        reasons.append("pack_quantity_difference")
    named_component_quantity_conflict = _has_named_component_quantity_conflict(left, right)
    if named_component_quantity_conflict:
        reasons.append("named_component_quantity_conflict")
    variant_descriptor_conflict = _has_variant_descriptor_conflict(left, right, shared_anchor)
    sun_protection_line_conflict = (
        variant_descriptor_conflict
        and left.product_type == right.product_type == "防曬"
        and not shared_anchor
    )
    if sun_protection_line_conflict:
        reasons.append("variant_descriptor_conflict")
        reasons.append("sun_protection_line_conflict")
    variant_option_conflict = _has_explicit_variant_option_conflict(left, right, shared_anchor)
    if variant_option_conflict:
        reasons.append("variant_option_conflict")
    saugella_variant_conflict = _has_saugella_private_wash_variant_conflict(left, right)
    if saugella_variant_conflict:
        reasons.append("saugella_variant_conflict")
    lactacyd_variant_conflict = _has_lactacyd_private_wash_variant_conflict(left, right)
    if lactacyd_variant_conflict:
        reasons.append("lactacyd_variant_conflict")
    makeup_usage_conflict = _has_makeup_usage_conflict(left, right)
    if makeup_usage_conflict:
        reasons.append("makeup_usage_conflict")
    makeup_finish_conflict = _has_makeup_finish_conflict(left, right)
    if makeup_finish_conflict:
        reasons.append("makeup_finish_conflict")
    sun_protection_spf_conflict = _has_sun_protection_spf_conflict(left, right)
    if sun_protection_spf_conflict:
        reasons.append("spf_value_conflict")
    makeup_spray_line_conflict = _has_makeup_spray_line_conflict(left, right)
    if makeup_spray_line_conflict:
        reasons.append("makeup_spray_line_conflict")
    romand_lip_line_conflict = _has_romand_lip_line_conflict(left, right)
    if romand_lip_line_conflict:
        reasons.append("romand_lip_line_conflict")
    nail_tool_function_conflict = _has_nail_tool_function_conflict(left, right)
    if nail_tool_function_conflict:
        reasons.append("nail_tool_function_conflict")
    schick_razor_line_conflict = _has_schick_razor_line_conflict(left, right)
    if schick_razor_line_conflict:
        reasons.append("schick_razor_line_conflict")
    lancome_line_conflict = _has_lancome_ultra_line_conflict(left, right)
    if lancome_line_conflict:
        reasons.append("lancome_line_conflict")
    dr_hsieh_line_conflict = _has_dr_hsieh_labsmart_serum_line_conflict(left, right)
    if dr_hsieh_line_conflict:
        reasons.append("dr_hsieh_labsmart_line_conflict")
    cotton_swab_variant_conflict = _has_cotton_swab_variant_conflict(left, right)
    if cotton_swab_variant_conflict:
        reasons.append("cotton_swab_variant_conflict")
    kanebo_milano_type_conflict = _has_kanebo_milano_powder_perfume_conflict(left, right)
    if kanebo_milano_type_conflict:
        reasons.append("kanebo_milano_type_conflict")
    hoi_candle_line_conflict = _has_hoi_candle_line_conflict(left, right)
    if hoi_candle_line_conflict:
        reasons.append("hoi_candle_line_conflict")
    aroma_scent_variant_conflict = _has_aroma_scent_variant_conflict(left, right)
    if aroma_scent_variant_conflict:
        reasons.append("aroma_scent_variant_conflict")
    unknown_scent_variant_conflict = _has_unknown_scent_variant_conflict(left, right)
    if unknown_scent_variant_conflict:
        reasons.append("unknown_scent_variant_conflict")
    nail_polish_color_name_conflict = _has_nail_polish_color_name_conflict(left, right)
    if nail_polish_color_name_conflict:
        reasons.append("nail_polish_color_name_conflict")
    ingredient_line_conflict = _has_core_ingredient_line_conflict(left, right)
    if ingredient_line_conflict:
        reasons.append("core_ingredient_line_conflict")
    clarins_body_oil_line_conflict = _has_clarins_body_oil_line_conflict(left, right)
    if clarins_body_oil_line_conflict:
        reasons.append("clarins_body_oil_line_conflict")
    branded_powder_line_conflict = _has_branded_powder_line_conflict(left, right)
    if branded_powder_line_conflict:
        reasons.append("branded_powder_line_conflict")
    cleanser_lotion_line_conflict = _has_cleanser_lotion_line_conflict(left, right)
    if cleanser_lotion_line_conflict:
        reasons.append("cleanser_lotion_line_conflict")
    selection1990_wax_lamp_design_conflict = _has_selection1990_wax_lamp_design_conflict(left, right)
    if selection1990_wax_lamp_design_conflict:
        reasons.append("selection1990_wax_lamp_design_conflict")
    aroma_lamp_style_selection_gap = _has_aroma_lamp_style_selection_gap(left, right)
    if aroma_lamp_style_selection_gap:
        reasons.append("aroma_lamp_style_selection_gap")
    hooome_wax_lamp_design_gap = _has_hooome_wax_lamp_design_gap(left, right)
    if hooome_wax_lamp_design_gap:
        reasons.append("hooome_wax_lamp_design_gap")
    wax_lamp_size_letter_conflict = _has_wax_lamp_size_letter_conflict(left, right)
    if wax_lamp_size_letter_conflict:
        reasons.append("size_letter_variant_conflict")
    nitori_diffuser_model_conflict = _has_nitori_diffuser_model_conflict(left, right)
    if nitori_diffuser_model_conflict:
        reasons.append("nitori_diffuser_model_conflict")
    commercial_condition_gap = _has_commercial_condition_gap(left, right)
    if commercial_condition_gap:
        reasons.append("commercial_condition_gap")
    relove_private_cleanser_variant_gap = _has_relove_private_cleanser_variant_gap(left, right)
    if relove_private_cleanser_variant_gap:
        reasons.append("relove_private_cleanser_variant_gap")
    candle_catalog_selection_gap = _has_candle_catalog_selection_gap(left, right)
    if candle_catalog_selection_gap:
        reasons.append("candle_catalog_selection_gap")
    bath_additive_variant_gap = _has_bath_additive_variant_gap(left, right)
    if bath_additive_variant_gap:
        reasons.append("bath_additive_variant_gap")
    makeup_catalog_selection_gap = _has_makeup_catalog_selection_gap(left, right)
    if makeup_catalog_selection_gap:
        reasons.append("makeup_catalog_selection_gap")
    loreal_serum_variant_gap = _has_loreal_serum_variant_gap(left, right)
    if loreal_serum_variant_gap:
        reasons.append("loreal_serum_variant_gap")
    sebamed_shampoo_variant_catalog_gap = _has_sebamed_shampoo_variant_catalog_gap(left, right)
    if sebamed_shampoo_variant_catalog_gap:
        reasons.append("sebamed_shampoo_variant_catalog_gap")
    schick_2in1_model_gap = _has_schick_2in1_model_gap(left, right)
    if schick_2in1_model_gap:
        reasons.append("schick_2in1_model_gap")
    taicend_protection_form_gap = _has_taicend_protection_form_gap(left, right)
    if taicend_protection_form_gap:
        reasons.append("taicend_protection_form_gap")
    variant_selection_review = (
        _has_named_variant_selection_review(left, right, shared_anchor)
        or commercial_condition_gap
        or relove_private_cleanser_variant_gap
        or candle_catalog_selection_gap
        or bath_additive_variant_gap
        or aroma_lamp_style_selection_gap
        or hooome_wax_lamp_design_gap
        or makeup_catalog_selection_gap
        or loreal_serum_variant_gap
        or sebamed_shampoo_variant_catalog_gap
        or schick_2in1_model_gap
        or taicend_protection_form_gap
    )
    if variant_selection_review:
        reasons.append("variant_selection_review")

    hard_veto = brand_conflict or spec_conflict
    if bundle_offer_conflict:
        hard_veto = True
    if (
        _has_multi_component(left) != _has_multi_component(right)
        and not cushion_refill_pack_alignment
        and not paulas_choice_body_lotion_2pack_alignment
    ):
        hard_veto = True
    if multi_component_count_conflict:
        hard_veto = True
    if named_component_quantity_conflict:
        hard_veto = True
    if _has_refill_pack(left) != _has_refill_pack(right):
        hard_veto = True
    if accessory_case_conflict:
        hard_veto = True
    if model_line_conflict:
        hard_veto = True
    if nail_polish_model_code_conflict:
        hard_veto = True
    if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
        hard_veto = True
    if chinese_name_score < 0.16 and token_score < 0.72:
        hard_veto = True
    if left.product_type and right.product_type and left.product_type != right.product_type and not type_aligned:
        hard_veto = True
    if sun_protection_line_conflict:
        hard_veto = True
    if variant_option_conflict:
        hard_veto = True
    if saugella_variant_conflict:
        hard_veto = True
    if lactacyd_variant_conflict:
        hard_veto = True
    if makeup_usage_conflict:
        hard_veto = True
    if makeup_finish_conflict:
        hard_veto = True
    if sun_protection_spf_conflict:
        hard_veto = True
    if makeup_spray_line_conflict:
        hard_veto = True
    if romand_lip_line_conflict:
        hard_veto = True
    if nail_tool_function_conflict:
        hard_veto = True
    if schick_razor_line_conflict:
        hard_veto = True
    if lancome_line_conflict:
        hard_veto = True
    if dr_hsieh_line_conflict:
        hard_veto = True
    if cotton_swab_variant_conflict:
        hard_veto = True
    if kanebo_milano_type_conflict:
        hard_veto = True
    if hoi_candle_line_conflict:
        hard_veto = True
    if aroma_scent_variant_conflict:
        hard_veto = True
    if unknown_scent_variant_conflict:
        hard_veto = True
    if nail_polish_color_name_conflict:
        hard_veto = True
    if ingredient_line_conflict:
        hard_veto = True
    if clarins_body_oil_line_conflict:
        hard_veto = True
    if branded_powder_line_conflict:
        hard_veto = True
    if cleanser_lotion_line_conflict:
        hard_veto = True
    if selection1990_wax_lamp_design_conflict:
        hard_veto = True
    if wax_lamp_size_letter_conflict:
        hard_veto = True
    if nitori_diffuser_model_conflict:
        hard_veto = True

    focused_exact_line_reason = _has_focused_low_score_exact_identity_line(left, right)
    if focused_exact_line_reason in FOCUSED_IDENTITY_REVIEW_ONLY_REASONS:
        reasons.append("variant_selection_review")
    if (
        focused_exact_line_reason in FOCUSED_IDENTITY_VARIANT_REVIEW_BYPASS_REASONS
        and not commercial_condition_gap
    ):
        reasons = [reason for reason in reasons if reason != "variant_selection_review"]
        variant_selection_review = False
    focused_exact_price_safe = (
        focused_exact_line_reason
        and brand_score >= 0.95
        and not hard_veto
        and spec_score >= 0.45
        and token_score >= 0.30
        and sequence_score >= 0.40
        and not variant_descriptor_conflict
    )
    focused_exact_review_boost_safe = (
        focused_exact_line_reason
        and not hard_veto
        and spec_score >= 0.45
        and token_score >= 0.30
        and sequence_score >= 0.40
        and not variant_descriptor_conflict
        and (
            brand_score >= 0.95
            or (
                focused_exact_line_reason in FOCUSED_IDENTITY_BRANDLESS_REVIEW_REASONS
                and brand_score == 0.55
                and bool(left.brand_tokens) != bool(right.brand_tokens)
                and spec_score >= 0.85
                and token_score >= 0.55
                and sequence_score >= 0.50
            )
        )
    )
    focused_total_price_brand_safe = (
        brand_score >= 0.95
        or (
            focused_exact_line_reason in FOCUSED_IDENTITY_BRANDLESS_TOTAL_PRICE_REASONS
            and brand_score == 0.55
            and bool(left.brand_tokens) != bool(right.brand_tokens)
            and spec_score >= 0.85
            and token_score >= 0.70
            and sequence_score >= 0.55
        )
    )
    focused_exact_total_price_safe = (
        focused_exact_line_reason in FOCUSED_IDENTITY_TOTAL_PRICE_REASONS
        and focused_total_price_brand_safe
        and not hard_veto
        and spec_score >= 0.45
        and token_score >= 0.30
        and sequence_score >= 0.40
        and (
            not variant_descriptor_conflict
            or focused_exact_line_reason == "hanamisui_inclear_private_gel_1_7g_3pack"
        )
        and "variant_selection_review" not in reasons
        and "commercial_condition_gap" not in reasons
    )
    if focused_exact_total_price_safe:
        reasons.append("focused_exact_total_price_safe")
        reasons.append(f"focused_exact_identity_{focused_exact_line_reason}")

    comparison_mode = "exact_identity"
    if _is_unit_comparable_candidate(
        left,
        right,
        token_score,
        chinese_name_score,
        brand_conflict,
        type_score,
        reasons,
    ):
        comparison_mode = "unit_comparable"
        reasons.append("unit_comparable")
    elif hard_veto:
        comparison_mode = "not_comparable"

    price_penalty = 0.0
    try:
        if momo_price and competitor_price:
            ratio = float(competitor_price) / max(float(momo_price), 1.0)
            lip_care_exact_identity = (
                shared_anchor
                and "唇膏" in shared_anchor
                and brand_score >= 0.95
                and not hard_veto
                and spec_score >= 0.99
                and token_score >= 0.50
                and sequence_score >= 0.50
                and not variant_descriptor_conflict
            )
            allow_price_penalty_suppression = (
                shared_anchor
                and len(shared_anchor.replace(" ", "")) >= 7
                and brand_score >= 0.95
                and not hard_veto
                and type_score >= 0.55
                and spec_score >= 0.99
                and token_score >= 0.68
                and sequence_score >= 0.72
            )
            allow_wide_price_penalty_suppression = (
                (
                    shared_anchor
                    and len(shared_anchor.replace(" ", "")) >= 5
                    and brand_score >= 0.95
                    and not hard_veto
                    and type_score >= 0.55
                    and spec_score >= 0.99
                    and token_score >= 0.50
                    and (sequence_score >= 0.55 or lip_care_exact_identity)
                )
                or focused_exact_price_safe
            )
            if (ratio < 0.3 or ratio > 3.2) and token_score < 0.78:
                if allow_price_penalty_suppression:
                    reasons.append("price_penalty_suppressed_exact_identity")
                else:
                    price_penalty = 0.12
                    reasons.append("price_ratio_extreme")
            elif (ratio < 0.48 or ratio > 2.2) and token_score < 0.68:
                if allow_wide_price_penalty_suppression:
                    reasons.append("price_penalty_suppressed_wide_exact_identity")
                else:
                    price_penalty = 0.06
                    reasons.append("price_ratio_wide")
    except (TypeError, ValueError, ZeroDivisionError):
        price_penalty = 0.0

    score = (
        brand_score * 0.20
        + token_score * 0.36
        + spec_score * 0.25
        + sequence_score * 0.12
        + type_score * 0.07
        - price_penalty
    )

    if token_score >= 0.72 and spec_score >= 0.82 and not brand_conflict:
        score += 0.08

    if (
        brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.55
        and not variant_descriptor_conflict
        and _has_strong_product_line_signal(left, right, token_score, chinese_name_score)
    ):
        score += 0.07
        reasons.append("strong_product_line_match")
    if (
        brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and _has_safe_exact_spec_signal(left, right, token_score, sequence_score, type_score)
    ):
        score += 0.025
        reasons.append("strong_exact_spec_match")
    if (
        cushion_refill_pack_alignment
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and token_score >= 0.65
        and sequence_score >= 0.65
        and not variant_descriptor_conflict
    ):
        score += 0.04
        reasons.append("cushion_refill_pack_alignment_score")
    if (
        focused_exact_review_boost_safe
        and price_penalty == 0
    ):
        score += 0.16
        reasons.append(f"focused_exact_identity_{focused_exact_line_reason}")
    if (
        shared_anchor
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and spec_score >= 0.85
        and (token_score >= 0.43 or sequence_score >= 0.58)
    ):
        score += 0.08
        reasons.append("shared_identity_anchor")
    if (
        shared_anchor
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.55
        and token_score >= 0.70
        and sequence_score >= 0.62
        and not variant_descriptor_conflict
    ):
        score += 0.03
        reasons.append("shared_identity_anchor_no_spec")
    if (
        shared_anchor
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.45
        and token_score >= 0.56
        and sequence_score >= 0.60
        and not variant_descriptor_conflict
    ):
        score += 0.02
        reasons.append("shared_identity_anchor_packaging_variant")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 8
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.45
        and token_score >= 0.60
        and sequence_score >= 0.68
        and not variant_descriptor_conflict
    ):
        score += 0.03
        reasons.append("shared_identity_anchor_marketing_variant")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 5
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.45
        and token_score >= 0.88
        and not variant_descriptor_conflict
    ):
        score += 0.02
        reasons.append("shared_identity_anchor_core_line")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 6
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.45
        and token_score >= 0.86
        and sequence_score >= 0.75
        and not variant_descriptor_conflict
    ):
        score += 0.07
        reasons.append("shared_identity_anchor_exact_line")
    if (
        "無印乾爽止汗爽身乳液" in shared_anchor
        and {"nivea", "妮維雅"} & (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.45
        and token_score >= 0.55
        and sequence_score >= 0.62
        and not variant_descriptor_conflict
    ):
        score += 0.08
        reasons.append("shared_identity_anchor_nivea_dry_lotion")
    if (
        "多效提亮防曬霜" in shared_anchor
        and {"recipe", "box"} <= (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.55
        and token_score >= 0.54
        and sequence_score >= 0.50
        and not variant_descriptor_conflict
    ):
        score += 0.09
        reasons.append("shared_identity_anchor_recipe_box_line")
    if (
        "私密潔浴露" in shared_anchor
        and {"lactacyd", "立朵舒"} & (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.70
        and token_score >= 0.35
        and sequence_score >= 0.50
        and not variant_descriptor_conflict
    ):
        score += 0.10
        reasons.append("shared_identity_anchor_lactacyd_wash")
    if (
        "私密潔膚露" in shared_anchor
        and {"femfresh", "芳芯"} & (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.85
        and token_score >= 0.30
        and sequence_score >= 0.45
        and not variant_descriptor_conflict
    ):
        score += 0.06
        reasons.append("shared_identity_anchor_femfresh_wash")
    if (
        "私密沐浴露" in shared_anchor
        and {"vigill", "婦潔"} & (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.70
        and token_score >= 0.45
        and sequence_score >= 0.55
        and not variant_descriptor_conflict
    ):
        score += 0.06
        reasons.append("shared_identity_anchor_vigill_private_wash")
    if (
        "私密潔淨凝露" in shared_anchor
        and {"relove"} <= (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.85
        and token_score >= 0.30
        and sequence_score >= 0.40
        and not variant_descriptor_conflict
    ):
        score += 0.11
        reasons.append("shared_identity_anchor_relove_cleanser")
    if (
        "柔霧裸唇膏" in shared_anchor
        and {"kate", "凱婷"} & (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.45
        and token_score >= 0.50
        and sequence_score >= 0.50
        and not variant_descriptor_conflict
    ):
        score += 0.15
        reasons.append("shared_identity_anchor_kate_bare_lip")
    if (
        "閃亮珍珠眼影棒" in shared_anchor
        and {"karadium"} <= (left.brand_tokens | right.brand_tokens)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.55
        and token_score >= 0.50
        and sequence_score >= 0.60
        and not variant_descriptor_conflict
    ):
        score += 0.12
        reasons.append("shared_identity_anchor_karadium_eye_stick")
    if (
        _has_seche_vite_top_coat_alignment(left, right)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and token_score >= 0.70
        and sequence_score >= 0.70
        and not variant_descriptor_conflict
    ):
        score += 0.04
        reasons.append("shared_identity_anchor_seche_vite_top_coat")
    if (
        _has_xiaomi_s101_shaver_alignment(left, right)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and token_score >= 0.60
        and not variant_descriptor_conflict
    ):
        score += 0.04
        reasons.append("shared_model_token_xiaomi_s101_shaver")
    if (
        _has_hinoki_roller_oil_alignment(left, right)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.85
        and sequence_score >= 0.50
        and not variant_descriptor_conflict
    ):
        score += 0.04
        reasons.append("shared_identity_anchor_hinoki_roller_oil")
    if (
        _has_brush_baby_wildones_toothbrush_alignment(left, right)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and token_score >= 0.78
        and sequence_score >= 0.90
        and not variant_descriptor_conflict
    ):
        score += 0.04
        reasons.append("shared_model_token_brush_baby_wildones")
    if (
        _has_pshine_beauty_foot_file_alignment(left, right)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and token_score >= 0.60
        and sequence_score >= 0.78
        and not variant_descriptor_conflict
    ):
        score += 0.05
        reasons.append("shared_model_token_pshine_beauty_foot_file")
    if (
        _has_catalog_variant_listing_alignment(left, right)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and spec_score >= 0.85
        and type_score >= 0.95
        and sequence_score >= 0.50
        and not variant_descriptor_conflict
    ):
        score += 0.06
        reasons.append("catalog_variant_listing_alignment")
    if (
        _has_baan_baby_lip_catalog_alignment(left, right)
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and token_score >= 0.70
        and sequence_score >= 0.45
        and not variant_descriptor_conflict
    ):
        score += 0.05
        reasons.append("catalog_variant_listing_alignment_baan_lip")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 5
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.45
        and token_score >= 0.74
        and sequence_score >= 0.60
        and _shared_variant_descriptors(left, right)
        and not variant_descriptor_conflict
    ):
        score += 0.05
        reasons.append("shared_variant_descriptor_alignment")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 8
        and not hard_veto
        and price_penalty == 0
        and brand_score == 0.55
        and bool(left.brand_tokens) != bool(right.brand_tokens)
        and type_score >= 0.55
        and spec_score >= 0.55
        and token_score >= 0.80
        and sequence_score >= 0.80
        and chinese_name_score >= 0.42
        and not variant_descriptor_conflict
    ):
        score += 0.09
        reasons.append("brandless_exact_identity")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 6
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.85
        and token_score >= 0.30
        and sequence_score >= 0.50
        and not variant_descriptor_conflict
    ):
        score += 0.06
        reasons.append("shared_identity_anchor_reordered_line")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 4
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.95
        and spec_score >= 0.65
        and token_score >= 0.50
        and sequence_score >= 0.50
        and _has_exact_count_alignment(left, right)
        and not variant_descriptor_conflict
    ):
        score += 0.05
        reasons.append("shared_identity_anchor_bundle_equivalent")
    if (
        shared_anchor
        and len(shared_anchor.replace(" ", "")) >= 6
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and type_score >= 0.55
        and spec_score >= 0.45
        and token_score >= 0.58
        and sequence_score >= 0.50
        and not variant_descriptor_conflict
    ):
        score += 0.025
        reasons.append("shared_identity_anchor_variant_safe")
    if (
        brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and spec_score >= 0.99
        and token_score >= 0.44
        and sequence_score >= 0.60
        and type_score >= 0.55
    ):
        score += 0.025
        reasons.append("spec_name_alignment")
    shared_models = _shared_model_tokens(left, right)
    if (
        shared_models
        and brand_score >= 0.95
        and not hard_veto
        and price_penalty == 0
        and token_score >= 0.50
        and sequence_score >= 0.62
    ):
        score += 0.04
        reasons.append("shared_model_token")
    if variant_descriptor_conflict and spec_score < 0.85:
        score -= 0.05
        reasons.append("variant_descriptor_conflict")
    if (
        brand_score >= 0.95
        and not hard_veto
        and not reasons
        and price_penalty == 0
        and type_score >= 0.95
        and token_score >= 0.82
        and spec_score >= 0.40
        and chinese_name_score >= 0.65
    ):
        score += 0.04
        reasons.append("strong_component_line_match")
    if hard_veto:
        score = min(score, 0.74 if comparison_mode == "unit_comparable" else 0.32)
    score = max(0.0, min(1.0, score))
    if _has_safe_multi_component_exact_total_price(
        left,
        right,
        brand_score=brand_score,
        token_score=token_score,
        spec_score=spec_score,
        sequence_score=sequence_score,
        type_score=type_score,
        hard_veto=hard_veto,
        variant_descriptor_conflict=variant_descriptor_conflict,
        reasons=reasons,
    ):
        reasons.append("safe_multi_component_exact_total_price")
    reason_tuple = _dedupe_tuple(reasons)
    match_type, price_basis, alert_tier = _classify_match_quality(
        score=score,
        brand_score=brand_score,
        token_score=token_score,
        spec_score=spec_score,
        sequence_score=sequence_score,
        type_score=type_score,
        hard_veto=hard_veto,
        comparison_mode=comparison_mode,
        reasons=reason_tuple,
        shared_anchor=shared_anchor,
        shared_models=shared_models,
        catalog_count_omission=catalog_count_omission,
        multi_component_pair=_has_multi_component(left) and _has_multi_component(right),
    )
    evidence_flags = _build_evidence_flags(
        brand_score=brand_score,
        token_score=token_score,
        spec_score=spec_score,
        sequence_score=sequence_score,
        type_score=type_score,
        shared_anchor=shared_anchor,
        shared_models=shared_models,
        reasons=reason_tuple,
        catalog_count_omission=catalog_count_omission,
    )
    identity_evidence = _identity_evidence_payload(
        left,
        right,
        brand_score=brand_score,
        token_score=token_score,
        spec_score=spec_score,
        sequence_score=sequence_score,
        type_score=type_score,
        hard_veto=hard_veto,
        comparison_mode=comparison_mode,
        match_type=match_type,
        price_basis=price_basis,
        alert_tier=alert_tier,
        shared_anchor=shared_anchor,
        shared_models=shared_models,
        reasons=reason_tuple,
        catalog_count_omission=catalog_count_omission,
    )
    offer_evidence = _offer_evidence_payload(
        momo_price,
        competitor_price,
        price_penalty=price_penalty,
        price_basis=price_basis,
        alert_tier=alert_tier,
    )

    return MatchDiagnostics(
        score=round(score, 3),
        brand_score=round(brand_score, 3),
        token_score=round(token_score, 3),
        spec_score=round(spec_score, 3),
        sequence_score=round(sequence_score, 3),
        type_score=round(type_score, 3),
        price_penalty=round(price_penalty, 3),
        hard_veto=hard_veto,
        reasons=reason_tuple,
        comparison_mode=comparison_mode,
        match_type=match_type,
        price_basis=price_basis,
        alert_tier=alert_tier,
        evidence_flags=evidence_flags,
        identity_evidence=identity_evidence,
        offer_evidence=offer_evidence,
    )


def _clean_search_phrase(value: str) -> str:
    text = normalize_product_text(value)
    for phrase in sorted(SEARCH_NOISE_PHRASES, key=len, reverse=True):
        text = text.replace(phrase.lower(), " ")
    text = re.sub(r"(?<=\d)\.(?=\d)", "DECIMALPOINT", text)
    text = re.sub(r"[^\w\u4e00-\u9fff]+", " ", text)
    text = text.replace("DECIMALPOINT", ".").replace("decimalpoint", ".")
    text = " ".join(
        token for token in text.split()
        if token not in SEARCH_NOISE_TOKENS and token not in GENERIC_TOKENS
    )
    text = re.sub(r"\s+", " ", text).strip()
    return text


def _search_spec_terms(identity: ProductIdentity) -> list[str]:
    specs: list[str] = []
    if identity.volumes_ml:
        volume = identity.volumes_ml[0]
        specs.append(f"{volume:g}ml")
    if identity.weights_g:
        weight = identity.weights_g[0]
        specs.append(f"{weight:g}g")
    if identity.dosages_mg:
        dosage = identity.dosages_mg[0]
        specs.append(f"{dosage:g}mg")
    if identity.total_piece_count:
        specs.append(f"{identity.total_piece_count}包")
    return specs


def _extract_anchor_phrases(token: str) -> list[str]:
    normalized = normalize_product_text(token)
    cleaned = _clean_search_phrase(token)
    if not cleaned:
        if "經典乳霜" in normalized:
            return ["經典乳霜"]
        return []

    phrases: list[str] = []
    if "經典旋轉眉筆" in cleaned:
        phrases.append("經典旋轉眉筆")
    if "無印乾爽" in cleaned and "止汗爽身乳液" in cleaned:
        phrases.append("無印乾爽止汗爽身乳液")
    if "智能光感應" in cleaned and "無線自動除臭芳香噴霧機" in cleaned:
        phrases.append("智能光感應無線自動除臭芳香噴霧機")
    if "悠斯晶" in normalized and "經典乳霜" in normalized:
        phrases.append("悠斯晶經典乳霜")
    if "經典乳霜" in normalized:
        phrases.append("經典乳霜")
    if "蜂王玫瑰" in cleaned and any(
        keyword in cleaned for keyword in ("外泌微臻霜", "微泌新生霜", "瑰泌霜")
    ):
        phrases.append("蜂王玫瑰瑰泌霜")
    if "瞬效" in cleaned and "b5" in cleaned and "玻尿酸" in cleaned and "精華" in cleaned:
        phrases.append("瞬效b5玻尿酸精華")
    if "慕之幼" in cleaned and "爽身潤膚乳" in cleaned:
        phrases.append("慕之幼爽身潤膚乳")
    for anchor in SEARCH_IDENTITY_ANCHORS:
        anchor_phrase = _clean_search_phrase(anchor)
        if not anchor_phrase or anchor_phrase not in cleaned:
            continue
        if re.search(r"[\u4e00-\u9fff]", anchor_phrase):
            prefix_width = 0 if len(anchor_phrase) >= 5 else (4 if len(anchor_phrase) >= 3 else 6)
            match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor_phrase)})", cleaned)
            phrase = match.group(1) if match else anchor_phrase
        else:
            phrase = anchor_phrase
        phrase = _clean_search_phrase(phrase)
        if phrase.startswith("款") and len(phrase) > 2:
            phrase = phrase[1:]
        if any(existing in phrase and existing != phrase for existing in phrases):
            continue
        if len(phrase) >= 2 and phrase not in phrases:
            phrases.append(phrase)
    return phrases


def _shared_identity_anchor(left: ProductIdentity, right: ProductIdentity) -> str:
    left_anchors: set[str] = set()
    right_anchors: set[str] = set()
    for token in left.core_tokens:
        left_anchors.update(_extract_anchor_phrases(token))
    for token in right.core_tokens:
        right_anchors.update(_extract_anchor_phrases(token))
    left_anchors.update(_extract_anchor_phrases(left.normalized_name))
    right_anchors.update(_extract_anchor_phrases(right.normalized_name))
    left_anchors.update(_extract_anchor_phrases(left.searchable_name))
    right_anchors.update(_extract_anchor_phrases(right.searchable_name))

    partial_shared: set[str] = set()
    for left_anchor in left_anchors:
        left_compact = left_anchor.replace(" ", "")
        for right_anchor in right_anchors:
            right_compact = right_anchor.replace(" ", "")
            if left_compact == right_compact:
                partial_shared.add(left_anchor)
                continue
            if len(left_compact) >= 5 and left_compact in right_compact:
                partial_shared.add(left_anchor)
            elif len(right_compact) >= 5 and right_compact in left_compact:
                partial_shared.add(right_anchor)

    shared = sorted(
        {
            anchor for anchor in partial_shared
            if len(anchor.replace(" ", "")) >= 5 and anchor not in SEARCH_BROAD_ANCHORS
        },
        key=lambda anchor: (-len(anchor.replace(" ", "")), anchor),
    )
    return shared[0] if shared else ""


def _shared_model_tokens(left: ProductIdentity, right: ProductIdentity) -> set[str]:
    return {
        token
        for token in left.core_tokens & right.core_tokens
        if len(token) >= 4
        and re.search(r"[a-z]", token)
        and re.search(r"\d", token)
        and not _is_spec_like_latin_token(token)
    }


def _variant_descriptors(identity: ProductIdentity) -> set[str]:
    descriptors: set[str] = set()
    brand_compacts = {brand.replace(" ", "") for brand in identity.brand_tokens}
    for token in identity.core_tokens:
        value = token
        for anchor in sorted(_extract_anchor_phrases(token), key=len, reverse=True):
            value = value.replace(anchor, " ")
        value = _clean_search_phrase(value)
        compact = value.replace(" ", "")
        if len(compact) < 2:
            continue
        if compact in brand_compacts:
            continue
        if compact in SEARCH_NOISE_TOKENS or compact in SEARCH_BROAD_ANCHORS:
            continue
        if any(keyword in compact for keyword in VARIANT_DESCRIPTOR_NOISE_KEYWORDS):
            continue
        if re.fullmatch(r"[a-z0-9-]+", compact):
            continue
        descriptors.add(compact.removesuffix("款"))
    return {token for token in descriptors if token}


def _shared_variant_descriptors(left: ProductIdentity, right: ProductIdentity) -> set[str]:
    left_descriptors = _variant_descriptors(left)
    right_descriptors = _variant_descriptors(right)
    shared: set[str] = set()
    for left_descriptor in left_descriptors:
        for right_descriptor in right_descriptors:
            if left_descriptor == right_descriptor:
                shared.add(left_descriptor)
                continue
            if len(left_descriptor) >= 2 and left_descriptor in right_descriptor:
                shared.add(left_descriptor)
            elif len(right_descriptor) >= 2 and right_descriptor in left_descriptor:
                shared.add(right_descriptor)
    return shared


def _has_serum_formulation_conflict(left: ProductIdentity, right: ProductIdentity, shared_anchor: str) -> bool:
    if "精華" not in shared_anchor:
        return False
    formulations = ("精華乳", "精華霜", "精華液")
    left_hit = next((token for token in formulations if token in left.normalized_name), None)
    right_hit = next((token for token in formulations if token in right.normalized_name), None)
    return bool(left_hit and right_hit and left_hit != right_hit)


def _has_saugella_private_wash_variant_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if not (
        ("saugella" in left_text or "賽吉兒" in left_text)
        and ("saugella" in right_text or "賽吉兒" in right_text)
    ):
        return False
    variant_tokens = ("日用", "日用型", "加強", "潤澤", "黃金女郎型")
    left_hits = {token for token in variant_tokens if token in left_text}
    right_hits = {token for token in variant_tokens if token in right_text}
    return bool(left_hits and right_hits and left_hits.isdisjoint(right_hits))


def _has_lactacyd_private_wash_variant_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if not (
        ("lactacyd" in left_text or "立朵舒" in left_text)
        and ("lactacyd" in right_text or "立朵舒" in right_text)
    ):
        return False
    variant_tokens = (
        "清新舒涼",
        "生理呵護",
        "滋潤緊緻",
        "加倍修護",
        "柔軟滋潤",
        "亮肌柔滑",
        "全日清爽",
    )
    left_hits = {token for token in variant_tokens if token in left_text}
    right_hits = {token for token in variant_tokens if token in right_text}
    return bool(left_hits and right_hits and left_hits.isdisjoint(right_hits))


def _has_makeup_usage_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    cheek_terms = ("頰彩", "腮紅", "blush")
    eye_terms = ("眼彩", "眼影", "eyeshadow")
    left_cheek = any(term in left_text for term in cheek_terms)
    right_cheek = any(term in right_text for term in cheek_terms)
    left_eye = any(term in left_text for term in eye_terms)
    right_eye = any(term in right_text for term in eye_terms)
    return bool((left_cheek and right_eye) or (left_eye and right_cheek))


def _has_makeup_finish_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if "mac" not in (left.brand_tokens & right.brand_tokens):
        return False
    if not (
        "macximal" in left_text
        and "macximal" in right_text
        and "唇膏" in left_text
        and "唇膏" in right_text
    ):
        return False
    matte_terms = ("柔霧", "霧面", "matte")
    satin_terms = ("緞光", "satin")
    left_matte = any(term in left_text for term in matte_terms)
    right_matte = any(term in right_text for term in matte_terms)
    left_satin = any(term in left_text for term in satin_terms)
    right_satin = any(term in right_text for term in satin_terms)
    return bool((left_matte and right_satin) or (left_satin and right_matte))


def _spf_values(identity: ProductIdentity) -> set[int]:
    return {
        int(match.group(1))
        for match in re.finditer(r"spf\s*(\d{1,3})", identity.normalized_name, re.I)
    }


def _has_sun_protection_spf_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("防曬", "素顏霜", "spf")):
        return False
    left_spf = _spf_values(left)
    right_spf = _spf_values(right)
    return bool(left_spf and right_spf and left_spf.isdisjoint(right_spf))


def _makeup_spray_line_groups(identity: ProductIdentity) -> set[str]:
    text = identity.searchable_name
    groups: set[str] = set()
    if "fix+" in text or "定妝噴霧" in text or "超持妝" in text:
        groups.add("setting_spray")
    if "活氧水" in text or "激活版" in text:
        groups.add("activating_water")
    if "精華版" in text:
        groups.add("serum_variant")
    if "控油" in text or "黑特霧" in text:
        groups.add("oil_control")
    return groups


def _has_makeup_spray_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("定妝噴霧", "活氧水", "fix+", "超光肌", "超持妝")):
        return False
    left_groups = _makeup_spray_line_groups(left)
    right_groups = _makeup_spray_line_groups(right)
    if not left_groups or not right_groups:
        return False
    return bool(
        ("setting_spray" in left_groups and "activating_water" in right_groups)
        or ("activating_water" in left_groups and "setting_spray" in right_groups)
    )


def _has_makeup_spray_variant_selection_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_groups = _makeup_spray_line_groups(left)
    right_groups = _makeup_spray_line_groups(right)
    if not left_groups or not right_groups or _has_makeup_spray_line_conflict(left, right):
        return False
    return left_groups != right_groups


def _romand_lip_line_groups(identity: ProductIdentity) -> set[str]:
    text = identity.searchable_name
    groups: set[str] = set()
    if "果汁唇釉" in text or "juicy" in text:
        groups.add("juicy")
    if "零絲絨" in text or "zero velvet" in text or "霧面唇釉" in text:
        groups.add("zero_velvet")
    if "果凍唇釉" in text or "glasting" in text or "唇凍" in text:
        groups.add("glasting")
    if "水感唇釉" in text:
        groups.add("water_gloss")
    return groups


def _has_romand_lip_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not (
        {"rom", "romand"} & (left.brand_tokens | right.brand_tokens)
        or "rom&nd" in pair_text
        or "romand" in pair_text
    ):
        return False
    if "唇" not in left.searchable_name or "唇" not in right.searchable_name:
        return False
    left_groups = _romand_lip_line_groups(left)
    right_groups = _romand_lip_line_groups(right)
    return bool(left_groups and right_groups and left_groups.isdisjoint(right_groups))


def _has_nail_tool_function_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if "erbe" not in (left.brand_tokens & right.brand_tokens):
        return False
    if "指甲" not in left_text or "指甲" not in right_text:
        return False
    cleaning_terms = ("清垢棒", "清潔棒")
    plane_terms = ("指甲緣刨刀", "刨刀")
    left_cleaning = any(term in left_text for term in cleaning_terms)
    right_cleaning = any(term in right_text for term in cleaning_terms)
    left_plane = any(term in left_text for term in plane_terms)
    right_plane = any(term in right_text for term in plane_terms)
    return bool((left_cleaning and right_plane) or (left_plane and right_cleaning))


def _has_yes_nail_tool_exact_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"yes", "德悅氏"} & (left.brand_tokens & right.brand_tokens)):
        return False
    left_text = left.searchable_name
    right_text = right.searchable_name
    pair_text = f"{left_text} {right_text}"
    if not any(term in pair_text for term in ("指甲剪", "銼刀", "腳皮銼", "拋光棒")):
        return False

    exact_lines = (
        ("指甲剪附除垢銼刀", ("8cm",), ("亮面", "霧面")),
        ("腳皮銼腳板", ("23.5cm",), ()),
        ("藍寶石銼刀", ("9cm",), ("可收納",)),
        ("指甲拋光棒", ("17.5cm",), ("三面",)),
    )
    for line, sizes, required_options in exact_lines:
        if line not in left_text or line not in right_text:
            continue
        if not any(size in left_text and size in right_text for size in sizes):
            continue
        if required_options and not any(option in left_text and option in right_text for option in required_options):
            continue
        return True

    if "指甲剪" in left_text and "指甲剪" in right_text:
        excluded = ("附除垢", "腳", "硬皮", "鋒利窄弧型")
        if any(term in left_text or term in right_text for term in excluded):
            return False
        if not any(size in left_text and size in right_text for size in ("6cm", "8cm")):
            return False
        finishes = ("亮面", "霧面", "不掉屑")
        return any(finish in left_text and finish in right_text for finish in finishes)

    return False


def _has_schick_razor_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if not ({"schick", "舒適牌"} & (left.brand_tokens & right.brand_tokens)):
        return False
    pair_text = f"{left_text} {right_text}"
    if "除毛刀" not in pair_text:
        return False
    women_razor_terms = ("仕女", "除毛刀")
    if not all(term in pair_text for term in women_razor_terms):
        return False
    left_silk_effects = "舒芙" in left_text
    right_silk_effects = "舒芙" in right_text
    left_intuition = "舒綺" in left_text
    right_intuition = "舒綺" in right_text
    return bool((left_silk_effects and right_intuition) or (left_intuition and right_silk_effects))


def _has_lancome_ultra_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if not (
        ("lancome" in left_text or "蘭蔻" in left_text)
        and ("lancome" in right_text or "蘭蔻" in right_text)
    ):
        return False
    glow_terms = ("超極光", "極光水", "晶露", "活粹晶露", "四重酸")
    genifique_terms = ("超極限", "肌因", "小黑瓶", "賦活露", "肌因精華")
    left_glow = any(term in left_text for term in glow_terms)
    right_glow = any(term in right_text for term in glow_terms)
    left_genifique = any(term in left_text for term in genifique_terms)
    right_genifique = any(term in right_text for term in genifique_terms)
    return bool((left_glow and right_genifique) or (left_genifique and right_glow))


def _has_dr_hsieh_labsmart_serum_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if not (
        ("dr" in left_text or "達特醫" in left_text)
        and ("dr" in right_text or "達特醫" in right_text)
    ):
        return False
    if left.product_type != "精華" or right.product_type != "精華":
        return False

    labsmart_terms = ("labsmart", "hi tech", "hi-tech", "classic")
    repair_terms = ("神經醯胺多重修復", "多重修復保濕精華", "多重修復保濕精華液")
    left_labsmart = any(term in left_text for term in labsmart_terms)
    right_labsmart = any(term in right_text for term in labsmart_terms)
    left_repair = any(term in left_text for term in repair_terms)
    right_repair = any(term in right_text for term in repair_terms)
    return bool((left_labsmart and right_repair and not right_labsmart) or (right_labsmart and left_repair and not left_labsmart))


def _has_cotton_swab_variant_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if "棉棒" not in left_text or "棉棒" not in right_text:
        return False
    variant_tokens = ("細軸", "黑色")
    left_hits = {token for token in variant_tokens if token in left_text}
    right_hits = {token for token in variant_tokens if token in right_text}
    return bool(left_hits and right_hits and left_hits.isdisjoint(right_hits))


def _has_kanebo_milano_powder_perfume_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if not ({"kanebo", "佳麗寶"} & (left.brand_tokens & right.brand_tokens)):
        return False
    if not (
        ("milano" in left_text or "米蘭" in left_text or "collection" in left_text)
        and ("milano" in right_text or "米蘭" in right_text or "collection" in right_text)
    ):
        return False
    powder_terms = ("蜜粉", "粉餅")
    fragrance_terms = ("香水", "淡香精", "淡香水", "perfume")
    left_powder = any(term in left_text for term in powder_terms)
    right_powder = any(term in right_text for term in powder_terms)
    left_fragrance = any(term in left_text for term in fragrance_terms)
    right_fragrance = any(term in right_text for term in fragrance_terms)
    return bool((left_powder and right_fragrance) or (right_powder and left_fragrance))


def _has_hoi_candle_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    if "hoi" not in (left.brand_tokens & right.brand_tokens):
        return False
    if "蠟燭" not in left_text or "蠟燭" not in right_text:
        return False
    day_mountain_terms = ("日京山風",)
    lab_terms = ("hoi!lab", "hoilab", "實驗室香氛", "經典篇")
    left_day_mountain = any(term in left_text for term in day_mountain_terms)
    right_day_mountain = any(term in right_text for term in day_mountain_terms)
    left_lab = any(term in left_text for term in lab_terms)
    right_lab = any(term in right_text for term in lab_terms)
    return bool((left_day_mountain and right_lab) or (right_day_mountain and left_lab))


def _has_aroma_scent_variant_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if any(term in pair_text for term in ("護手霜", "融蠟燈", "蠟燭暖燈")):
        return False
    if not any(
        term in pair_text
        for term in (
            "香氛固體凝膠",
            "香氛凝膠",
            "空氣芳香劑",
            "車用香氛",
            "車用擴香",
            "擴香蕊",
            "擴香罐",
            "香薰蠟燭",
            "香氛蠟燭",
            "蠟燭",
            "滾珠精油",
            "香氛精油",
            "植物精油",
        )
    ):
        return False
    if _is_multi_variant_catalog_listing(left) or _is_multi_variant_catalog_listing(right):
        return False

    left_options = _explicit_variant_option_tokens(left)
    right_options = _explicit_variant_option_tokens(right)
    if left_options and right_options:
        return not bool(left_options & right_options)

    scent_words = {
        "藤蔓果園",
        "清新花園",
        "白麝香",
        "黑麝香",
        "寶貝粉香",
        "青檸羅勒",
        "炭木香",
        "無花果",
        "白茶蘭花",
        "白茶",
        "檸檬草",
        "茶樹",
        "鼠尾草",
        "海鹽",
        "橙花",
        "薄荷",
        "杏仁",
        "薰衣草",
        "茉莉",
        "櫻花",
        "繡球花",
        "玫瑰",
        "雪松",
        "檀香",
    }
    left_scent = {word for word in scent_words if word in left.searchable_name}
    right_scent = {word for word in scent_words if word in right.searchable_name}
    if bool(left_options or left_scent) != bool(right_options or right_scent):
        return True
    if left_scent and right_scent and not (left_scent & right_scent):
        return True
    return False


def _has_unknown_scent_variant_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if any(term in pair_text for term in ("暖燈", "融蠟燈", "融燭燈", "香氛燈")):
        return False
    if any(term in pair_text for term in ("香氛誘霜", "車用", "芳香劑", "香氛機", "擴香儀", "噴香機")):
        return False
    if not (
        ("護手霜" in pair_text and any(term in pair_text for term in ("芬香", "香味", "香氣", "精油")))
        or "擴香瓶" in pair_text
    ):
        return False
    if _is_multi_variant_catalog_listing(left) or _is_multi_variant_catalog_listing(right):
        return False
    left_descriptors = _variant_descriptors(left)
    right_descriptors = _variant_descriptors(right)
    if not left_descriptors or not right_descriptors:
        return False
    shared = _shared_variant_descriptors(left, right)
    left_unique = left_descriptors - shared
    right_unique = right_descriptors - shared
    descriptor_noise = ("護手霜", "擴香瓶", "精油芬香", "經典擴香")
    left_unique = {
        descriptor for descriptor in left_unique
        if 2 <= len(descriptor) <= 6 and not any(noise in descriptor for noise in descriptor_noise)
    }
    right_unique = {
        descriptor for descriptor in right_unique
        if 2 <= len(descriptor) <= 6 and not any(noise in descriptor for noise in descriptor_noise)
    }
    if not left_unique or not right_unique:
        return False
    for left_descriptor in left_unique:
        for right_descriptor in right_unique:
            if left_descriptor == right_descriptor:
                return False
            if left_descriptor in right_descriptor or right_descriptor in left_descriptor:
                return False
    return True


def _has_nail_polish_color_name_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("指甲油", "指彩", "美甲彩繪")):
        return False
    if _shared_model_tokens(left, right):
        return False
    left_model_codes = _nail_polish_model_codes(left)
    right_model_codes = _nail_polish_model_codes(right)
    if left_model_codes and right_model_codes:
        return left_model_codes.isdisjoint(right_model_codes)
    if _is_multi_variant_catalog_listing(left) or _is_multi_variant_catalog_listing(right):
        return False
    left_descriptors = _variant_descriptors(left)
    right_descriptors = _variant_descriptors(right)
    if not left_descriptors or not right_descriptors:
        return False
    shared = _shared_variant_descriptors(left, right)
    left_unique = left_descriptors - shared
    right_unique = right_descriptors - shared
    if not left_unique or not right_unique:
        return False
    for left_descriptor in left_unique:
        for right_descriptor in right_unique:
            if left_descriptor in right_descriptor or right_descriptor in left_descriptor:
                return False
    return True


def _has_aroma_lamp_style_selection_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not ({"les", "nez", "香鼻子"} & (left.brand_tokens & right.brand_tokens)):
        return False
    if not any(term in pair_text for term in ("融蠟燈", "融燭燈", "蠟燭暖燈", "香氛燈")):
        return False
    if not any(term in left.searchable_name for term in ("融蠟燈", "融燭燈", "蠟燭暖燈", "香氛燈")):
        return False
    if not any(term in right.searchable_name for term in ("融蠟燈", "融燭燈", "蠟燭暖燈", "香氛燈")):
        return False

    style_aliases = {
        "流金歲月": ("流金歲月",),
        "暮光琥珀": ("暮光琥珀",),
        "閃耀琥珀": ("閃耀琥珀",),
        "星夜": ("星夜款", "星夜"),
        "流光玫瑰金": ("流光玫瑰金", "玫瑰金"),
        "土耳其風": ("土耳其風",),
        "手工拼貼玻璃": ("手工拼貼玻璃",),
        "手工玻璃": ("手工玻璃",),
        "北歐": ("北歐",),
        "水晶燈": ("水晶燈",),
    }
    left_styles = {
        style
        for style, aliases in style_aliases.items()
        if any(alias in left.searchable_name for alias in aliases)
    }
    right_styles = {
        style
        for style, aliases in style_aliases.items()
        if any(alias in right.searchable_name for alias in aliases)
    }
    if not left_styles and not right_styles:
        return False
    if left_styles == right_styles:
        return False
    shared_styles = left_styles & right_styles
    left_specific = left_styles - shared_styles
    right_specific = right_styles - shared_styles
    return bool(left_specific or right_specific)


def _has_core_ingredient_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("油膏", "護膚油", "身體油", "精油", "基礎油", "按摩油", "甜杏仁油", "酪梨油", "霜", "乳霜")):
        return False
    ingredient_groups = {
        "coconut_oil": ("椰子油", "coconut"),
        "shea_butter": ("乳木果油", "shea"),
        "sweet_almond_oil": ("甜杏仁油", "sweet almond"),
        "apricot_kernel_oil": ("杏桃核仁油", "杏核仁油", "apricot kernel"),
        "avocado_oil": ("酪梨油", "avocado"),
    }
    left_groups = {
        group
        for group, terms in ingredient_groups.items()
        if any(term in left.searchable_name for term in terms)
    }
    right_groups = {
        group
        for group, terms in ingredient_groups.items()
        if any(term in right.searchable_name for term in terms)
    }
    return bool(left_groups and right_groups and not (left_groups & right_groups))


def _has_clarins_body_oil_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"clarins", "克蘭詩"} & (left.brand_tokens & right.brand_tokens)):
        return False
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("護理油", "身體油", "美體油", "調和護理油")):
        return False
    line_groups = {
        "contour_lightweight": ("輕盈美體", "美體護理油", "contour"),
        "tonic_body": ("身體調和", "調和護理油", "孕期身體調和", "tonic"),
    }
    left_groups = {
        group
        for group, terms in line_groups.items()
        if any(term in left.searchable_name for term in terms)
    }
    right_groups = {
        group
        for group, terms in line_groups.items()
        if any(term in right.searchable_name for term in terms)
    }
    return bool(left_groups and right_groups and left_groups.isdisjoint(right_groups))


def _has_branded_powder_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"港香蘭"} & (left.brand_tokens & right.brand_tokens)):
        return False
    if "爽身粉" not in left.searchable_name or "爽身粉" not in right.searchable_name:
        return False
    named_lines = ("漢本", "艾魔菈")
    left_lines = {line for line in named_lines if line in left.searchable_name}
    right_lines = {line for line in named_lines if line in right.searchable_name}
    return bool(left_lines and right_lines and not (left_lines & right_lines))


def _has_cleanser_lotion_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not (left.brand_tokens & right.brand_tokens):
        return False
    if not _has_overlapping_base_spec(left, right):
        return False
    cleanser_terms = ("潔膚露", "潔膚", "潔淨露", "潔面", "洗面乳", "cleanser")
    lotion_terms = ("修護乳", "乳液", "身體乳", "潤膚乳", "lotion")
    left_cleanser = any(term in left.searchable_name for term in cleanser_terms)
    right_cleanser = any(term in right.searchable_name for term in cleanser_terms)
    left_lotion = any(term in left.searchable_name for term in lotion_terms)
    right_lotion = any(term in right.searchable_name for term in lotion_terms)
    return bool((left_cleanser and right_lotion) or (right_cleanser and left_lotion))


def _selection1990_wax_lamp_design_groups(identity: ProductIdentity) -> set[str]:
    text = identity.searchable_name
    groups: set[str] = set()
    if "現代簡約半圓罩融燭燈" in text or "半圓罩" in text:
        groups.add("half_dome")
    if "歐式可彎融燭燈" in text or "可彎融燭燈" in text:
        groups.add("bendable")
    if "韓風原木底座融燭燈" in text or "原木底座融燭燈" in text:
        groups.add("wood_base")
    if "北歐簡樸融蠟燈" in text or "北歐簡樸" in text:
        groups.add("nordic")
    return groups


def _has_selection1990_wax_lamp_design_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"1990", "選物"} <= (left.brand_tokens & right.brand_tokens)):
        return False
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("融燭燈", "蠟燭暖燈", "融蠟燈")):
        return False
    left_groups = _selection1990_wax_lamp_design_groups(left)
    right_groups = _selection1990_wax_lamp_design_groups(right)
    return bool(left_groups and right_groups and left_groups.isdisjoint(right_groups))


def _has_hooome_wax_lamp_design_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    if "hooome" not in (left.brand_tokens & right.brand_tokens):
        return False
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("香氛蠟燭暖燈", "蠟燭暖燈", "融蠟燈")):
        return False
    concrete_design_terms = ("大理石", "雲石", "原木", "半圓罩", "陶瓷", "玻璃", "水晶", "金屬", "鐵藝")
    left_designs = {term for term in concrete_design_terms if term in left.searchable_name}
    right_designs = {term for term in concrete_design_terms if term in right.searchable_name}
    return bool(left_designs or right_designs) and left_designs != right_designs


def _standalone_size_letter_tokens(identity: ProductIdentity) -> set[str]:
    text = identity.searchable_name
    return {
        match.group(1).lower()
        for match in re.finditer(r"(?<![a-z0-9])([sml])(?![a-z0-9])", text, re.I)
    }


def _has_wax_lamp_size_letter_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("香氛蠟燭暖燈", "蠟燭暖燈", "融蠟燈")):
        return False
    left_sizes = _standalone_size_letter_tokens(left)
    right_sizes = _standalone_size_letter_tokens(right)
    return bool(left_sizes and right_sizes and not (left_sizes & right_sizes))


def _has_nitori_diffuser_model_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"nitori", "宜得利家居"} & (left.brand_tokens & right.brand_tokens)):
        return False
    if "香氛噴霧器" not in left.searchable_name or "香氛噴霧器" not in right.searchable_name:
        return False

    def model_tokens(identity: ProductIdentity) -> set[str]:
        text = identity.searchable_name
        numeric_models = set(re.findall(r"(?<![a-z0-9])\d{3,5}(?![a-z0-9])", text))
        compact_models = {
            match.group(1).lower()
            for match in re.finditer(r"(?<![a-z0-9])([a-z]{1,4}\d{2,}[a-z0-9-]*)(?![a-z0-9])", text, re.I)
            if not _is_spec_like_latin_token(match.group(1).lower())
        }
        return _extract_model_tokens(text) | numeric_models | compact_models

    left_models = model_tokens(left)
    right_models = model_tokens(right)
    return bool(left_models and right_models and not (left_models & right_models))


def _makeup_shade_tokens(identity: ProductIdentity) -> set[str]:
    text = identity.searchable_name
    tokens = set(_explicit_variant_option_tokens(identity))
    shade_pattern = (
        r"(?<![a-z0-9])(?:#|no\.?|色號|號色)?\s*(\d{1,3})\s+"
        r"(rosy ivory|ivory|beige|sand|fair|light|medium|porcelain|rose)(?![a-z0-9])"
    )
    for match in re.finditer(shade_pattern, text, re.I):
        tokens.add(match.group(1).lower())
        tokens.add(match.group(2).lower().replace(" ", "_"))
    for match in re.finditer(r"(?<![a-z0-9])([a-z]?\d{1,3}[a-z]?)(?=\s*[\u4e00-\u9fff]{2,})", text, re.I):
        value = re.sub(r"[^a-z0-9]", "", match.group(1).lower())
        if re.fullmatch(r"\d+(?:g|m|l|ml|mg)", value):
            continue
        if value:
            tokens.add(value)
    return tokens


def _has_makeup_shade_selection_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(
        term in pair_text
        for term in ("氣墊粉霜", "粉底", "粉霜", "蜜粉", "唇釉", "唇膏", "唇蜜", "染眉膏", "眉筆", "眉膏", "眉彩", "眼線", "遮瑕")
    ):
        return False
    left_shades = _makeup_shade_tokens(left)
    right_shades = _makeup_shade_tokens(right)
    return bool(left_shades) != bool(right_shades)


def _commercial_condition_terms(identity: ProductIdentity) -> set[str]:
    text = identity.normalized_name
    terms: set[str] = set()
    if any(term in text for term in ("即期品", "臨期", "短效", "短效期", "效期", "保存期限", "有效期限")):
        terms.add("expiry_sensitive")
    if any(term in text for term in ("盒損", "盒損品", "外盒損", "外盒瑕疵")):
        terms.add("box_damage")
    if any(term in text for term in ("福利品", "瑕疵品")):
        terms.add("clearance_condition")
    return terms


def _has_commercial_condition_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_terms = _commercial_condition_terms(left)
    right_terms = _commercial_condition_terms(right)
    return bool(left_terms or right_terms) and left_terms != right_terms


def _has_relove_private_cleanser_variant_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not _is_relove_cleanser_gel_like(left, right):
        return False
    brightening_terms = ("傳明酸", "淨白", "美白", "亮白", "菸鹼醯胺", "niacinamide")
    left_brightening = any(term in left.searchable_name for term in brightening_terms)
    right_brightening = any(term in right.searchable_name for term in brightening_terms)
    return left_brightening != right_brightening


def _has_makeup_catalog_selection_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    sensitive_terms = (
        "遮瑕蜜",
        "遮瑕",
        "粉底",
        "粉霜",
        "氣墊",
        "蜜粉",
        "腮紅",
        "眼線",
        "眉筆",
        "染眉膏",
        "唇膏",
        "唇釉",
        "唇蜜",
    )
    if not any(term in pair_text for term in sensitive_terms):
        return False
    if not (_is_catalog_or_delimited_variant_listing(left) or _is_catalog_or_delimited_variant_listing(right)):
        return False
    left_shades = _makeup_shade_tokens(left)
    right_shades = _makeup_shade_tokens(right)
    if left_shades and right_shades and _variant_options_overlap(left_shades, right_shades):
        return False
    return True


def _is_candle_scent_catalog_listing(identity: ProductIdentity) -> bool:
    text = identity.searchable_name
    if _is_multi_variant_catalog_listing(identity):
        return True
    return bool(re.search(r"\d+\s*種(?:香味|香氣|味道)", text))


def _has_candle_catalog_selection_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("香氛蠟燭", "大豆蠟燭", "蠟燭")):
        return False
    if "融蠟燈" in pair_text or "融燭燈" in pair_text or "蠟燭燈" in pair_text:
        return False
    left_catalog = _is_candle_scent_catalog_listing(left)
    right_catalog = _is_candle_scent_catalog_listing(right)
    return left_catalog != right_catalog


def _has_loreal_serum_variant_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not ({"loreal", "巴黎萊雅"} & (left.brand_tokens | right.brand_tokens)):
        return False
    if "玻尿酸瞬效保濕" not in pair_text:
        return False
    variant_terms = ("啵啵精華水", "液態紫熨斗", "水光精華", "修護晶露", "保濕水光")
    left_terms = {term for term in variant_terms if term in left.searchable_name}
    right_terms = {term for term in variant_terms if term in right.searchable_name}
    if not (left_terms or right_terms):
        return False
    return left_terms != right_terms or _is_catalog_or_delimited_variant_listing(left) != _is_catalog_or_delimited_variant_listing(right)


def _has_sebamed_shampoo_variant_catalog_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"sebamed", "施巴"} & (left.brand_tokens | right.brand_tokens)):
        return False
    if "洗髮乳" not in left.searchable_name or "洗髮乳" not in right.searchable_name:
        return False
    variant_terms = ("溫和", "油性抗屑", "抗屑", "乾性", "敏感")
    left_terms = {term for term in variant_terms if term in left.searchable_name}
    right_terms = {term for term in variant_terms if term in right.searchable_name}
    if _is_catalog_or_delimited_variant_listing(left) != _is_catalog_or_delimited_variant_listing(right):
        return True
    return bool(left_terms or right_terms) and left_terms != right_terms


def _has_schick_2in1_model_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"schick", "舒適牌"} & (left.brand_tokens & right.brand_tokens)):
        return False
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if "舒綺" not in pair_text or "美型刀" not in pair_text:
        return False
    left_2in1 = bool(re.search(r"2\s*(?:-?in-?|合)?\s*1", left.searchable_name, re.I))
    right_2in1 = bool(re.search(r"2\s*(?:-?in-?|合)?\s*1", right.searchable_name, re.I))
    return left_2in1 != right_2in1


def _has_taicend_protection_form_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not ({"taicend", "泰陞"} & (left.brand_tokens & right.brand_tokens)):
        return False
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if "保護膜" not in pair_text and "保護噴霧" not in pair_text and "液態皮膚保護膜" not in pair_text:
        return False
    if "屁屁噴" in left.searchable_name and "屁屁噴" in right.searchable_name:
        return False
    left_terms = {
        term
        for term in ("寶貝液體保護膜", "液態皮膚保護膜", "皮膚保護噴霧", "保護噴霧")
        if term in left.searchable_name
    }
    right_terms = {
        term
        for term in ("寶貝液體保護膜", "液態皮膚保護膜", "皮膚保護噴霧", "保護噴霧")
        if term in right.searchable_name
    }
    return bool(left_terms or right_terms) and left_terms != right_terms


def _has_catalog_specific_variant_selection_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(
        term in pair_text
        for term in (
            "身體去角質",
            "美體主張",
            "私密潔浴露",
            "私密潔浴",
            "私密防護慕絲",
            "私密慕絲",
            "慕絲",
            "嬰兒潤膚乳",
            "定妝噴霧",
            "染眉膏",
            "眼線膠筆",
            "粉餅盒",
            "遮瑕蜜",
            "護手霜",
            "護唇膏",
            "護唇棒",
            "唇釉",
            "唇膏",
            "蜜粉",
            "防曬素顏霜",
            "車用香氛",
            "車用擴香",
            "車用擴香蕊",
            "香氛擴香罐",
            "擴香瓶",
            "擴香罐",
            "擴香蕊",
            "水性指甲油",
            "指甲油",
            "足膜",
            "泡澡入浴劑",
            "入浴劑",
            "融蠟小夜燈",
            "融蠟燈",
            "滋養霜",
        )
    ):
        return False
    left_catalog = _is_catalog_or_delimited_variant_listing(left)
    right_catalog = _is_catalog_or_delimited_variant_listing(right)
    return left_catalog != right_catalog


def _has_bath_additive_variant_gap(left: ProductIdentity, right: ProductIdentity) -> bool:
    pair_text = f"{left.searchable_name} {right.searchable_name}"
    if not any(term in pair_text for term in ("入浴劑", "泡澡錠", "泡澡包", "泡澡")):
        return False
    if not (left.brand_tokens & right.brand_tokens):
        return False
    left_terms = {
        term
        for term in ("馨香", "懷舊", "橘盒", "綠盒", "粉盒", "藍盒")
        if term in left.searchable_name
    }
    right_terms = {
        term
        for term in ("馨香", "懷舊", "橘盒", "綠盒", "粉盒", "藍盒")
        if term in right.searchable_name
    }
    return bool(left_terms and right_terms and not (left_terms & right_terms))


def _has_taicend_baby_spray_equivalence(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = {"taicend", "泰陞"}
    return (
        bool(left.brand_tokens & brand_tokens)
        and bool(right.brand_tokens & brand_tokens)
        and "屁屁噴" in left.searchable_name
        and "屁屁噴" in right.searchable_name
        and _has_overlapping_base_spec(left, right)
    )


def _has_seche_vite_top_coat_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = {"seche", "vite"}
    return (
        brand_tokens <= (left.brand_tokens | right.brand_tokens)
        and bool(left.brand_tokens & brand_tokens)
        and bool(right.brand_tokens & brand_tokens)
        and "快乾亮油" in left.searchable_name
        and "快乾亮油" in right.searchable_name
    )


def _has_xiaomi_s101_shaver_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = {"xiaomi", "小米", "小米有品"}
    return (
        bool(left.brand_tokens & brand_tokens)
        and bool(right.brand_tokens & brand_tokens)
        and "s101" in left.searchable_name
        and "s101" in right.searchable_name
        and "電動刮鬍刀" in left.searchable_name
        and "電動刮鬍刀" in right.searchable_name
    )


def _has_hinoki_roller_oil_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    return (
        "檜山坊" in left.brand_tokens
        and "檜山坊" in right.brand_tokens
        and "檜木精油" in left.searchable_name
        and "檜木精油" in right.searchable_name
        and "滾珠瓶" in left.searchable_name
        and "滾珠瓶" in right.searchable_name
        and _has_overlapping_base_spec(left, right)
    )


def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = {"brush", "baby", "wildones"}
    return (
        brand_tokens <= left.brand_tokens
        and brand_tokens <= right.brand_tokens
        and "電動牙刷" in left.searchable_name
        and "電動牙刷" in right.searchable_name
        and "0-10y" in left.searchable_name
        and "0-10y" in right.searchable_name
    )


def _has_pshine_beauty_foot_file_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = {"beauty", "shine", "foot"}
    return (
        brand_tokens <= left.brand_tokens
        and brand_tokens <= right.brand_tokens
        and "雙面" in left.searchable_name
        and "雙面" in right.searchable_name
        and "足" in left.searchable_name
        and "足" in right.searchable_name
        and ("硬皮" in left.searchable_name or "去角質" in left.searchable_name)
        and ("硬皮" in right.searchable_name or "去角質" in right.searchable_name)
        and ("磨砂棒" in left.searchable_name or "足搓棒" in left.searchable_name)
        and ("磨砂棒" in right.searchable_name or "足搓棒" in right.searchable_name)
    )


def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = {"baan", "貝恩"}
    left_options = _explicit_variant_option_tokens(left)
    right_options = _explicit_variant_option_tokens(right)
    return (
        bool(left.brand_tokens & brand_tokens)
        and bool(right.brand_tokens & brand_tokens)
        and "嬰兒修護唇膏" in left.searchable_name
        and "嬰兒修護唇膏" in right.searchable_name
        and {"原味", "草莓"} <= left_options
        and {"原味", "草莓"} <= right_options
    )


def _has_recipe_box_child_sunscreen_cushion_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    brand_tokens = left.brand_tokens | right.brand_tokens
    return (
        {"recipe", "box"} <= brand_tokens
        and "兒童防曬氣墊粉餅" in left.searchable_name
        and "兒童防曬氣墊粉餅" in right.searchable_name
    )


def _has_pavaruni_40_scent_oil_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    return (
        "pavaruni" in (left.brand_tokens & right.brand_tokens)
        and "天然植物" in f"{left_text} {right_text}"
        and "精油" in left_text
        and "精油" in right_text
        and _has_shared_volume(left, right, 10)
        and ("40香味" in left_text or "40種香味" in left_text)
        and ("40香味" in right_text or "40種香味" in right_text)
    )


def _has_pavaruni_20_scent_candle_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    return (
        "pavaruni" in (left.brand_tokens & right.brand_tokens)
        and "香氛蠟燭" in left_text
        and "香氛蠟燭" in right_text
        and _has_shared_weight(left, right, 450)
        and ("20香味" in left_text or "20種香味" in left_text)
        and ("20香味" in right_text or "20種香味" in right_text)
    )


def _has_laundrin_tokyo_car_freshener_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    left_text = left.searchable_name
    right_text = right.searchable_name
    return (
        {"laundrin", "朗德林"} & (left.brand_tokens & right.brand_tokens)
        and "tokyo" in left_text
        and "tokyo" in right_text
        and "車用" in left_text
        and "車用" in right_text
        and "芳香劑" in left_text
        and "芳香劑" in right_text
        and _has_shared_count(left, right, 1, "入")
    )


def _has_shared_count(left: ProductIdentity, right: ProductIdentity, count: int, unit: str) -> bool:
    return (count, unit) in set(left.counts) and (count, unit) in set(right.counts)


def _has_shared_volume(left: ProductIdentity, right: ProductIdentity, volume_ml: float) -> bool:
    return any(_close_number(value, volume_ml) for value in left.volumes_ml) and any(
        _close_number(value, volume_ml) for value in right.volumes_ml
    )


def _has_shared_weight(left: ProductIdentity, right: ProductIdentity, weight_g: float) -> bool:
    return any(_close_number(value, weight_g) for value in left.weights_g) and any(
        _close_number(value, weight_g) for value in right.weights_g
    )


def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: ProductIdentity) -> str:
    left_text = left.searchable_name
    right_text = right.searchable_name
    pair_text = f"{left_text} {right_text}"
    left_raw = left.original_name.lower()
    right_raw = right.original_name.lower()
    brand_tokens = left.brand_tokens | right.brand_tokens

    if (
        "biodance" in (left.brand_tokens & right.brand_tokens)
        and "深層全效面膜" in left_text
        and "深層全效面膜" in right_text
        and "膠原蛋白" in pair_text
        and _has_shared_count(left, right, 4, "片")
    ):
        return "biodance_deep_mask"
    if (
        {"muji", "無印良品"} & brand_tokens
        and "精油芬香護手霜" in left_text
        and "精油芬香護手霜" in right_text
        and _has_shared_weight(left, right, 50)
        and bool(left.brand_tokens) != bool(right.brand_tokens)
    ):
        return "muji_aroma_hand_cream_brandless"
    if (
        {"herbacin", "德國小甘菊"} & brand_tokens
        and "小甘菊" in left_text
        and "小甘菊" in right_text
        and "護手霜" in left_text
        and "護手霜" in right_text
        and _has_shared_volume(left, right, 20)
        and bool(left.brand_tokens) != bool(right.brand_tokens)
    ):
        return "herbacin_classic_hand_cream_20ml_brandless"
    if (
        {"sab", "初淨肌"} & (left.brand_tokens & right.brand_tokens)
        and "私密防護舒緩噴霧" in left_text
        and "私密防護舒緩噴霧" in right_text
        and _has_shared_volume(left, right, 30)
    ):
        return "sab_private_spray"
    if (
        "lush" in (left.brand_tokens & right.brand_tokens)
        and "櫻之花身體噴霧" in left_text
        and "櫻之花身體噴霧" in right_text
        and _has_shared_volume(left, right, 200)
    ):
        return "lush_sakura_body_spray"
    if (
        {"kanebo", "佳麗寶"} & (left.brand_tokens & right.brand_tokens)
        and "coffret" in left_text
        and "coffret" in right_text
        and "光透立體眼線筆" in left_text
        and "光透立體眼線筆" in right_text
    ):
        return "kanebo_coffret_eyeliner"
    if (
        "artmis" in brand_tokens
        and "葳兒柔" in left_text
        and "葳兒柔" in right_text
        and "賦活凝膠" in left_text
        and "賦活凝膠" in right_text
        and _has_shared_volume(left, right, 35)
    ):
        return "artmis_virile_gel"
    if (
        "artmis" in brand_tokens
        and "私密清潔慕斯" in left_text
        and "私密清潔慕斯" in right_text
        and "金縷梅" in left_text
        and "金縷梅" in right_text
        and _has_shared_volume(left, right, 250)
    ):
        return "artmis_witch_hazel_private_mousse_250ml"
    if (
        "artmis" in brand_tokens
        and "私密清潔慕斯" in left_text
        and "私密清潔慕斯" in right_text
        and "蔓越莓" in left_text
        and "蔓越莓" in right_text
        and _has_shared_volume(left, right, 250)
    ):
        return "artmis_cranberry_private_mousse_250ml"
    if (
        "powerman" in pair_text
        and "男性私密養護液" in left_text
        and "男性私密養護液" in right_text
        and _has_shared_volume(left, right, 30)
    ):
        return "playjoy_powerman_male_care_30ml"
    if (
        {"physiogel", "潔美淨"} & (left.brand_tokens & right.brand_tokens)
        and "ai冰鎮精華露" in left_text
        and "ai冰鎮精華露" in right_text
        and _has_shared_volume(left, right, 200)
        and _has_exact_count_alignment(left, right)
    ):
        return "physiogel_ai_ice_essence_200ml_2pack"
    if (
        {"ts6", "護一生"} & (left.brand_tokens & right.brand_tokens)
        and "緊彈水嫩凝膠" in left_text
        and "緊彈水嫩凝膠" in right_text
        and _has_shared_weight(left, right, 40)
    ):
        return "ts6_private_elastic_gel_40g"
    if (
        {"ts6", "護一生"} & (left.brand_tokens & right.brand_tokens)
        and "超美" in left_text
        and "超美" in right_text
        and "香氛誘霜" in left_text
        and "香氛誘霜" in right_text
        and (120.0 in set(left.weights_g) or 120.0 in set(left.volumes_ml))
        and (120.0 in set(right.weights_g) or 120.0 in set(right.volumes_ml))
    ):
        return "ts6_private_white_fragrance_cream_120"
    if (
        {"ts6", "護一生"} & (left.brand_tokens & right.brand_tokens)
        and "淨白植感慕斯" in left_text
        and "淨白植感慕斯" in right_text
        and _has_shared_weight(left, right, 180)
        and _has_exact_count_alignment(left, right)
    ):
        return "ts6_white_mousse_180g_3pack"
    if (
        {"ts6", "護一生"} & (left.brand_tokens & right.brand_tokens)
        and "沁涼潔淨慕斯" in left_text
        and "沁涼潔淨慕斯" in right_text
        and _has_shared_weight(left, right, 100)
        and _has_exact_count_alignment(left, right)
    ):
        return "ts6_cooling_clean_mousse_100g"
    if (
        {"ts6", "護一生"} & (left.brand_tokens & right.brand_tokens)
        and "蜜愛潤滑液" in left_text
        and "蜜愛潤滑液" in right_text
        and _has_shared_weight(left, right, 100)
        and _has_exact_count_alignment(left, right)
    ):
        return "ts6_lubricant_100g_3pack"
    if (
        {"ts6", "護一生"} & (left.brand_tokens & right.brand_tokens)
        and "蜜桃煥白凝膠" in left_text
        and "蜜桃煥白凝膠" in right_text
        and _has_shared_weight(left, right, 45)
        and _has_exact_count_alignment(left, right)
    ):
        return "ts6_peach_bright_gel_45g_3pack"
    if (
        {"ts6", "護一生"} & (left.brand_tokens & right.brand_tokens)
        and "極淨白私密潔膚露" in left_text
        and "極淨白私密潔膚露" in right_text
        and "蜜桃煥白凝膠" in left_text
        and "蜜桃煥白凝膠" in right_text
        and _has_shared_weight(left, right, 250)
        and _has_shared_weight(left, right, 45)
    ):
        return "ts6_white_wash_peach_gel_kit"
    if (
        {"beauty", "foot"} <= (left.brand_tokens & right.brand_tokens)
        and "足膜" in left_text
        and "足膜" in right_text
        and any(_has_shared_volume(left, right, volume) for volume in (25, 30))
        and _has_exact_count_alignment(left, right)
        and not _is_multi_variant_catalog_listing(left)
        and not _is_multi_variant_catalog_listing(right)
    ):
        return "beauty_foot_mask_exact_pack"
    if (
        {"kameria", "凱蜜菈"} & (left.brand_tokens & right.brand_tokens)
        and "足足稱奇" in left_text
        and "足足稱奇" in right_text
        and "積雪草" in left_text
        and "積雪草" in right_text
        and "足膜" in left_text
        and "足膜" in right_text
        and _has_shared_volume(left, right, 17)
        and _has_exact_count_alignment(left, right)
        and not _is_multi_variant_catalog_listing(left)
        and not _is_multi_variant_catalog_listing(right)
    ):
        return "kameria_centella_foot_mask_17ml_2pc"
    if (
        {"vaseline", "凡士林"} & (left.brand_tokens & right.brand_tokens)
        and "嬰兒高純修護凝膠" in left_text
        and "嬰兒高純修護凝膠" in right_text
        and _has_shared_weight(left, right, 368)
        and _has_exact_count_alignment(left, right)
    ):
        return "vaseline_baby_jelly_368g_3pack"
    if (
        {"derma", "丹麥德瑪"} & (left.brand_tokens & right.brand_tokens)
        and "寶寶" in left_text
        and "寶寶" in right_text
        and "洗髮沐浴露" in left_text
        and "洗髮沐浴露" in right_text
        and _has_shared_volume(left, right, 150)
    ):
        return "derma_baby_wash_150ml"
    if (
        {"derma", "丹麥德瑪"} & (left.brand_tokens & right.brand_tokens)
        and "寶寶" in left_text
        and "寶寶" in right_text
        and "洗髮沐浴露" in left_text
        and "洗髮沐浴露" in right_text
        and _has_shared_volume(left, right, 500)
    ):
        return "derma_baby_wash_500ml"
    if (
        {"clarins", "克蘭詩"} & (left.brand_tokens & right.brand_tokens)
        and "黃金亮眼萃" in left_text
        and "黃金亮眼萃" in right_text
        and _has_shared_volume(left, right, 20)
    ):
        return "clarins_double_serum_eye_20ml"
    if (
        {"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)
        and "長效潤膚乳" in left_text
        and "長效潤膚乳" in right_text
        and _has_shared_volume(left, right, 237)
    ):
        return "cetaphil_long_lotion_237ml"
    if (
        {"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)
        and "長效潤膚乳" in left_text
        and "長效潤膚乳" in right_text
        and _has_shared_volume(left, right, 473)
    ):
        return "cetaphil_long_lotion_473ml"
    if (
        {"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)
        and "長效潤膚霜" in left_text
        and "長效潤膚霜" in right_text
        and _has_shared_weight(left, right, 250)
    ):
        return "cetaphil_long_moisturizing_cream_250g"
    if (
        {"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)
        and "益膚康修護舒敏乳霜" in left_text
        and "益膚康修護舒敏乳霜" in right_text
        and _has_shared_weight(left, right, 227)
    ):
        return "cetaphil_ad_repair_cream_227g"
    if (
        {"nivea", "妮維雅"} & (left.brand_tokens & right.brand_tokens)
        and "妮維雅霜" in left_text
        and "妮維雅霜" in right_text
        and "隨身版" in left_text
        and "隨身版" in right_text
        and _has_shared_volume(left, right, 100)
    ):
        return "nivea_creme_100ml"
    if (
        "nailmatic" in (left.brand_tokens & right.brand_tokens)
        and "小精靈" in left_text
        and "小精靈" in right_text
        and "指甲油" in left_text
        and "指甲油" in right_text
    ):
        return "nailmatic_casper_polish"
    if (
        "小浪" in (left.brand_tokens & right.brand_tokens)
        and "智能感應自動噴香機" in left_text
        and "智能感應自動噴香機" in right_text
        and "補充液" in left_text
        and "補充液" in right_text
        and _has_shared_count(left, right, 3, "入")
    ):
        return "xiaolang_spray_machine_refill_set"
    if (
        {"yunmi", "j10"} <= (left.brand_tokens & right.brand_tokens)
        and "濕度數顯智能加濕器" in left_text
        and "濕度數顯智能加濕器" in right_text
    ):
        return "yunmi_j10_humidifier"
    if (
        "aquiesse" in (left.brand_tokens & right.brand_tokens)
        and "香氛蠟燭" in left_text
        and "香氛蠟燭" in right_text
        and "5oz" in left_text
        and "5oz" in right_text
        and _is_multi_variant_catalog_listing(left)
        and _is_multi_variant_catalog_listing(right)
    ):
        return "aquiesse_5oz_candle_catalog"
    if (
        {"rejuran", "麗珠蘭"} & (left.brand_tokens & right.brand_tokens)
        and "麗駐蘭修復舒緩面膜" in left_text
        and "麗駐蘭修復舒緩面膜" in right_text
        and "5p" in left_text
        and "5p" in right_text
    ):
        return "rejuran_repair_mask_5p"
    if (
        {"shiseido", "資生堂"} & (left.brand_tokens & right.brand_tokens)
        and "新艷陽" in left_text
        and "新艷陽" in right_text
        and "水離子熱防禦" in left_text
        and "水離子熱防禦" in right_text
        and "隔離露" in left_text
        and "隔離露" in right_text
    ):
        return "shiseido_blue_sunscreen"
    if (
        "dhc" in pair_text
        and "純欖護唇膏" in left_text
        and "純欖護唇膏" in right_text
        and _has_shared_weight(left, right, 1.5)
    ):
        return "dhc_olive_lip_1_5g"
    if (
        "frudia" in pair_text
        and "蜂蜜藍莓" in left_text
        and "蜂蜜藍莓" in right_text
        and "護唇膏" in left_text
        and "護唇膏" in right_text
        and _has_shared_weight(left, right, 10)
    ):
        return "frudia_honey_blueberry_lip_10g"
    if (
        {"sebamed", "施巴"} & brand_tokens
        and "嬰兒護唇膏" in left_text
        and "嬰兒護唇膏" in right_text
        and _has_shared_weight(left, right, 4.8)
        and _has_exact_count_alignment(left, right)
    ):
        return "sebamed_baby_lip_4_8g_2pack"
    if (
        "理膚寶水" in pair_text
        and "滋養修護潤唇膏" in left_text
        and "滋養修護潤唇膏" in right_text
        and _has_shared_volume(left, right, 4.7)
    ):
        return "laroche_posay_lip_balm_4_7ml"
    if (
        {"baan", "貝恩"} & (left.brand_tokens & right.brand_tokens)
        and "嬰兒修護唇膏" in left_text
        and "嬰兒修護唇膏" in right_text
        and left.product_type == right.product_type == "護唇膏"
        and "原味" in left_text
        and "原味" in right_text
        and "草莓" in left_text
        and "草莓" in right_text
    ):
        return "baan_baby_lip_original_strawberry_catalog"
    if (
        {"baan", "貝恩"} & (left.brand_tokens & right.brand_tokens)
        and "嬰兒修護唇膏" in left_text
        and "嬰兒修護唇膏" in right_text
        and left.product_type == right.product_type == "護唇膏"
    ):
        return "baan_baby_lip_base_catalog"
    if (
        {"shu uemura", "植村秀"} & (left.brand_tokens & right.brand_tokens)
        and "3d極細防水眼線膠筆" in left_text
        and "3d極細防水眼線膠筆" in right_text
    ):
        return "shu_3d_eyeliner"
    if (
        {"ysl", "聖羅蘭"} & (left.brand_tokens & right.brand_tokens)
        and "恆久完美透膚煙染腮紅" in left_text
        and "恆久完美透膚煙染腮紅" in right_text
    ):
        return "ysl_blush_catalog"
    if (
        {"hh", "草本新淨界"} & (left.brand_tokens & right.brand_tokens)
        and "私密植萃美白緊緻凝露" in left_text
        and "私密植萃美白緊緻凝露" in right_text
        and _has_shared_volume(left, right, 30)
    ):
        return "hh_private_gel"
    if (
        {"lab52", "齒妍堂"} & (left.brand_tokens & right.brand_tokens)
        and "學習刷牙漱口水" in left_text
        and "學習刷牙漱口水" in right_text
        and _has_overlapping_base_spec(left, right)
    ):
        return "lab52_mouthwash"
    if (
        {"lab52", "齒妍堂"} & (left.brand_tokens | right.brand_tokens)
        and "牙刷" in left_text
        and "牙刷" in right_text
        and any(term in left_text for term in ("嬰幼兒", "幼兒", "汪汪隊"))
        and any(term in right_text for term in ("嬰幼兒", "幼兒", "汪汪隊"))
        and _has_shared_count(left, right, 2, "入")
    ):
        return "lab52_paw_patrol_baby_toothbrush_2pack"
    if (
        "benefit" in (left.brand_tokens & right.brand_tokens)
        and "染唇液" in left_text
        and "染唇液" in right_text
        and "唇頰兩用" in pair_text
    ):
        return "benefit_lip_tint"
    if (
        {"schick", "舒適牌"} & (left.brand_tokens & right.brand_tokens)
        and "舒綺" in left_text
        and "舒綺" in right_text
        and "除毛刀片" in left_text
        and "除毛刀片" in right_text
        and "敏感肌" in left_text
        and "敏感肌" in right_text
        and _has_shared_count(left, right, 3, "入")
    ):
        return "schick_womens_sensitive_blade_3pack"
    if (
        {"herb24", "草本"} & (left.brand_tokens & right.brand_tokens)
        and "晨霧純精油擴香儀ii" in left_text
        and "晨霧純精油擴香儀ii" in right_text
        and (("霧黑" in left_text and "黑色" in right_text) or ("霧黑" in right_text and "黑色" in left_text))
    ):
        return "herb24_mist_diffuser_black"
    if _has_pavaruni_40_scent_oil_alignment(left, right):
        return "pavaruni_40_scent_oil"
    if _has_pavaruni_20_scent_candle_alignment(left, right):
        return "pavaruni_20_scent_candle"
    if _has_laundrin_tokyo_car_freshener_alignment(left, right):
        return "laundrin_tokyo_car_freshener"
    if (
        "好物良品" in (left.brand_tokens & right.brand_tokens)
        and "北歐簡樸融蠟燈桌面氣氛夜燈" in left_text
        and "北歐簡樸融蠟燈桌面氣氛夜燈" in right_text
    ):
        return "goodgoods_nordic_wax_lamp"
    if (
        {"derma", "丹麥德瑪"} & (left.brand_tokens & right.brand_tokens)
        and "有機植萃" in left_text
        and "有機植萃" in right_text
        and "護膚油" in left_text
        and "護膚油" in right_text
        and _has_shared_volume(left, right, 150)
    ):
        return "derma_eco_skin_oil"
    if (
        {"derma", "丹麥德瑪"} & (left.brand_tokens & right.brand_tokens)
        and "大地" in left_text
        and "大地" in right_text
        and "植萃" in left_text
        and "植萃" in right_text
        and "護膚油" in left_text
        and "護膚油" in right_text
        and _has_exact_count_alignment(left, right)
    ):
        return "derma_eco_skin_oil_2pack_review"
    if (
        {"修護保養"} & (left.brand_tokens & right.brand_tokens)
        and "蝸牛特潤修護面膜" in left_text
        and "蝸牛特潤修護面膜" in right_text
        and _has_shared_count(left, right, 6, "片")
    ):
        return "w_repair_snail_mask_6pcs_review"
    if (
        {"yuskin", "悠斯晶"} & (left.brand_tokens & right.brand_tokens)
        and "乳霜" in left_text
        and "乳霜" in right_text
        and _has_shared_weight(left, right, 30)
        and _has_exact_count_alignment(left, right)
    ):
        return "yuskin_classic_cream_30g_6pack"
    if (
        {"johnson", "johnsons", "嬌生"} & (left.brand_tokens & right.brand_tokens)
        and "嬰兒" in left_text
        and "嬰兒" in right_text
        and "潤膚乳" in left_text
        and "潤膚乳" in right_text
        and _has_shared_volume(left, right, 500)
        and (not left.counts or not right.counts or _has_exact_count_alignment(left, right))
        and any(option in left_text and option in right_text for option in ("牛奶", "純淨", "甜夢", "溫和", "棉柔"))
    ):
        return "johnsons_baby_lotion_variant_catalog"
    if (
        {"im meme", "meme"} & (left.brand_tokens & right.brand_tokens)
        and "我愛超磁妝定妝噴霧" in left_text
        and "我愛超磁妝定妝噴霧" in right_text
        and "涼感" in left_text
        and "涼感" in right_text
    ):
        return "im_meme_fixx_cool_setting_spray"
    if (
        {"so", "natural", "fixx"} <= (left.brand_tokens & right.brand_tokens)
        and "全天候超完美定妝噴霧" in left_text
        and "全天候超完美定妝噴霧" in right_text
        and _has_shared_volume(left, right, 120)
        and not any(term in pair_text for term in ("經典款", "光澤款", "霧面款", "夏日款", "涼感", "一般"))
    ):
        return "so_natural_fixx_setting_spray_120ml_plain"
    if (
        {"so", "natural", "fixx"} <= (left.brand_tokens & right.brand_tokens)
        and "全天候超完美定妝噴霧" in left_text
        and "全天候超完美定妝噴霧" in right_text
    ):
        return "so_natural_fixx_setting_spray_catalog"
    if (
        {"kate", "凱婷"} & (left.brand_tokens & right.brand_tokens)
        and "粉餅盒" in left_text
        and "粉餅盒" in right_text
    ):
        return "kate_powder_case_catalog"
    if (
        {"kate", "凱婷"} & (left.brand_tokens & right.brand_tokens)
        and "怪獸級持色唇膏" in left_text
        and "怪獸級持色唇膏" in right_text
    ):
        return "kate_monster_lipstick_catalog"
    if (
        "opi" in (left.brand_tokens & right.brand_tokens)
        and _shared_model_tokens(left, right)
        and "類光繚" in left_text
        and "類光繚" in right_text
        and ("指甲油" in left_text or "指彩" in left_text)
        and ("指甲油" in right_text or "指彩" in right_text)
        and ("如膠似漆" in left_text or "如膠似漆" in right_text)
    ):
        return "opi_gel_polish_exact_model"
    if (
        "opi" in (left.brand_tokens & right.brand_tokens)
        and "類光繚指甲油" in left_text
        and "類光繚指甲油" in right_text
        and any(series in left_text and series in right_text for series in ("白日夢遊", "驕傲果凍"))
    ):
        return "opi_gel_polish_series_catalog"
    if (
        ("rom" in (left.brand_tokens & right.brand_tokens) or "romand" in (left.brand_tokens & right.brand_tokens))
        and "果汁唇釉" in left_text
        and "果汁唇釉" in right_text
        and "2.0" in left_text
        and "2.0" in right_text
    ):
        return "romand_juicy_lip_tint_2_catalog"
    if (
        "solone" in (left.brand_tokens & right.brand_tokens)
        and "持久眼線筆" in left_text
        and "持久眼線筆" in right_text
    ):
        return "solone_longlasting_eyeliner"
    if (
        {"shu uemura", "植村秀"} & (left.brand_tokens & right.brand_tokens)
        and "自動武士刀眉筆" in left_text
        and "自動武士刀眉筆" in right_text
        and "筆蕊" in left_text
        and "筆蕊" in right_text
    ):
        return "shu_auto_hard_formula_refill_catalog"
    if (
        {"summer", "eve", "舒摩兒"} & (left.brand_tokens & right.brand_tokens)
        and "浴潔露" in left_text
        and "浴潔露" in right_text
        and "全肌防護" in left_raw
        and "全肌防護" in right_raw
        and _has_exact_count_alignment(left, right)
    ):
        return "summer_eve_full_skin_wash_2pack"
    if (
        "焦糖楓葉香氛擴香花禮盒" in left_text
        and "焦糖楓葉香氛擴香花禮盒" in right_text
        and _has_shared_volume(left, right, 30)
        and bool(left.brand_tokens) != bool(right.brand_tokens)
    ):
        return "the_forest_maple_diffuser_flower_brandless"
    if (
        "gatsby" in (left.brand_tokens & right.brand_tokens)
        and "爆水擦澡濕巾" in left_text
        and "爆水擦澡濕巾" in right_text
        and "24張入" in left_text
        and "24張入" in right_text
    ):
        return "gatsby_body_wipes_24"
    if (
        {"3w", "clinic"} <= (left.brand_tokens & right.brand_tokens)
        and "膠原蛋白粉底液" in left_text
        and "膠原蛋白粉底液" in right_text
        and _has_shared_volume(left, right, 50)
        and _has_exact_count_alignment(left, right)
    ):
        return "3w_clinic_collagen_foundation_50ml_2pack"
    if (
        "花美水" in (left.brand_tokens & right.brand_tokens)
        and "moisture" in (left.brand_tokens & right.brand_tokens)
        and "保濕修護" in left_text
        and "保濕修護" in right_text
        and "精華凝膠" in left_text
        and "精華凝膠" in right_text
        and ("原黃金" in left_text and "原黃金" in right_text)
        and _has_shared_weight(left, right, 1.7)
        and _has_exact_count_alignment(left, right)
    ):
        return "hanamisui_moisture_original_gel_1_7g_3pack"
    if (
        "花美水" in (left.brand_tokens & right.brand_tokens)
        and "inclear" in (left.brand_tokens & right.brand_tokens)
        and ("櫻克麗兒" in left_text and "櫻克麗兒" in right_text)
        and ("私密淨化凝膠" in left_text and "私密淨化凝膠" in right_text)
        and _has_shared_weight(left, right, 1.7)
        and _has_exact_count_alignment(left, right)
    ):
        return "hanamisui_inclear_private_gel_1_7g_3pack"
    if (
        "花美水" in (left.brand_tokens & right.brand_tokens)
        and "relax" in left_raw
        and "relax" in right_raw
        and "薰衣草" in left_text
        and "薰衣草" in right_text
        and "潤滑凝膠" in left_text
        and "潤滑凝膠" in right_text
        and _has_shared_weight(left, right, 1.7)
        and _has_exact_count_alignment(left, right)
    ):
        return "hanamisui_relax_lavender_gel_1_7g_3pack"
    if (
        ("聖克萊爾" in left_text and "聖克萊爾" in right_text)
        and "私密呼呼溫和潔淨慕斯" in left_text
        and "私密呼呼溫和潔淨慕斯" in right_text
        and _has_shared_volume(left, right, 150)
        and _has_exact_count_alignment(left, right)
    ):
        return "st_clare_private_mousse_150ml_2pack"
    if (
        ("聖克萊爾" in left_text and "聖克萊爾" in right_text)
        and "私密呼呼溫和潔淨慕斯" in left_text
        and "私密呼呼溫和潔淨慕斯" in right_text
        and "私密呼呼舒緩護理噴霧" in left_text
        and "私密呼呼舒緩護理噴霧" in right_text
        and _has_shared_volume(left, right, 150)
        and _has_shared_volume(left, right, 50)
    ):
        return "st_clare_private_mousse_spray_set"
    if (
        ("biopeutic" in (left.brand_tokens & right.brand_tokens) or ("葆療美" in left_text and "葆療美" in right_text))
        and "果酸煥膚水凝乳" in left_text
        and "果酸煥膚水凝乳" in right_text
        and "20%" in left_raw
        and "20%" in right_raw
        and _has_shared_volume(left, right, 150)
    ):
        return "biopeutic_plus_aha_lotion_20_150ml"
    if (
        "台塑生醫" in left_text
        and "台塑生醫" in right_text
        and "嬰兒沐浴洗髮" in left_text
        and "嬰兒沐浴洗髮" in right_text
        and "3件組" in left_text
        and "3件組" in right_text
        and "嬰兒沐浴精" in left_text
        and "嬰兒沐浴精" in right_text
        and "嬰幼童洗髮精" in left_text
        and "嬰幼童洗髮精" in right_text
    ):
        return "taisu_baby_bath_shampoo_3pc"
    if (
        "雅頓" in left_text
        and "雅頓" in right_text
        and "八小時潤澤護唇膏" in left_text
        and "八小時潤澤護唇膏" in right_text
        and "spf15" in left_raw
        and "spf15" in right_raw
        and _has_shared_weight(left, right, 3.7)
        and _has_exact_count_alignment(left, right)
    ):
        return "arden_eight_hour_lip_spf15_3_7g_3pack"
    if (
        "理膚寶水" in left_text
        and "理膚寶水" in right_text
        and "全面修復潤唇膏" in left_text
        and "全面修復潤唇膏" in right_text
        and _has_shared_volume(left, right, 7.5)
    ):
        return "laroche_posay_repair_lip_balm_7_5ml"
    if (
        {"flortte", "花洛莉亞"} & (left.brand_tokens & right.brand_tokens)
        and "水果沙拉系列彩色防水眼線液筆" in left_text
        and "水果沙拉系列彩色防水眼線液筆" in right_text
        and "色號" in left_text
        and "色號" in right_text
        and "任選" in left_raw
        and "任選" in right_raw
        and _has_shared_volume(left, right, 0.5)
    ):
        return "flortte_fruit_salad_eyeliner_0_5ml_catalog"
    if (
        {"neutrogena", "露得清"} & (left.brand_tokens & right.brand_tokens)
        and "護手霜" in left_text
        and "護手霜" in right_text
        and "無香" in left_text
        and "無香" in right_text
        and "有香" in left_text
        and "有香" in right_text
        and _has_shared_weight(left, right, 56)
    ):
        return "neutrogena_hand_cream_56g_scent_catalog"
    if (
        {"kanebo", "佳麗寶"} & (left.brand_tokens & right.brand_tokens)
        and "allie" in left_raw
        and "allie" in right_raw
        and "持采亮化uv防曬水凝乳" in left_text
        and "持采亮化uv防曬水凝乳" in right_text
        and "任選" in left_raw
        and "任選" in right_raw
        and _has_shared_weight(left, right, 60)
    ):
        return "kanebo_allie_bright_uv_milk_60g_catalog"
    if (
        "ordinary" in (left.brand_tokens & right.brand_tokens)
        and "咖啡因" in left_text
        and "咖啡因" in right_text
        and "egcg" in left_raw
        and "egcg" in right_raw
        and "兒茶眼部配方" in left_text
        and "兒茶眼部配方" in right_text
        and (
            _has_shared_volume(left, right, 30)
            or (30.0 in left.volumes_ml and not right.volumes_ml)
            or (30.0 in right.volumes_ml and not left.volumes_ml)
        )
    ):
        return "the_ordinary_caffeine_egcg_30ml"
    if (
        {"sk-ii", "skii", "sk2"} & (left.brand_tokens & right.brand_tokens)
        and "青春露" in left_text
        and "青春露" in right_text
        and _has_shared_volume(left, right, 330)
        and _has_shared_count(left, right, 2, "入")
    ):
        return "sk_ii_essence_330ml_2pack"
    if (
        {"amiino", "安美諾"} & (left.brand_tokens | right.brand_tokens)
        and "美白修護霜" in left_text
        and "美白修護霜" in right_text
        and _has_shared_volume(left, right, 30)
    ):
        return "amiino_whitening_repair_cream_30ml"
    if (
        {"natures", "care"} <= (left.brand_tokens & right.brand_tokens)
        and "綿羊油" in left_text
        and "綿羊油" in right_text
        and _has_exact_count_alignment(left, right)
        and (
            _has_shared_volume(left, right, 125)
            or (125.0 in left.volumes_ml and not right.volumes_ml and "125m" in right_text)
            or (125.0 in right.volumes_ml and not left.volumes_ml and "125m" in left_text)
        )
    ):
        return "natures_care_sheep_oil_exact_pack"
    if (
        "tomoon" in (left.brand_tokens & right.brand_tokens)
        and "德國奔月" in left_text
        and "德國奔月" in right_text
        and "豪華套裝組" in left_text
        and "豪華套裝組" in right_text
        and ("指甲剪" in left_text or "指甲刀" in left_text)
        and ("指甲剪" in right_text or "指甲刀" in right_text)
        and any(size in left_text and size in right_text for size in ("l號", "s號"))
    ):
        return "tomoon_nail_clipper_luxury_size"
    if (
        {"hh", "草本新淨界"} & (left.brand_tokens & right.brand_tokens)
        and "私密植萃抗菌潔淨露" in left_text
        and "私密植萃抗菌潔淨露" in right_text
        and "私密衣物抗菌手洗精" in left_text
        and "私密衣物抗菌手洗精" in right_text
        and _has_shared_volume(left, right, 200)
    ):
        return "hh_private_cleanser_laundry_wash_set"
    if (
        {"sebamed", "施巴"} & (left.brand_tokens & right.brand_tokens)
        and "護潔露" in left_text
        and "護潔露" in right_text
        and _has_shared_volume(left, right, 200)
        and _has_exact_count_alignment(left, right)
    ):
        return "sebamed_ph38_private_wash_200ml_2pack"
    if (
        {"yes", "德悅氏"} & (left.brand_tokens & right.brand_tokens)
        and "鋒利窄弧型剪刀" in left_text
        and "鋒利窄弧型剪刀" in right_text
        and "9cm" in left_text
        and "9cm" in right_text
    ):
        return "yes_curved_scissors_9cm"
    if (
        {"yes", "德悅氏"} & (left.brand_tokens & right.brand_tokens)
        and "腳指甲剪刀" in left_text
        and "腳指甲剪刀" in right_text
        and "10.5cm" in left_text
        and "10.5cm" in right_text
    ):
        return "yes_foot_nail_scissors_10_5cm"
    if (
        {"yes", "德悅氏"} & (left.brand_tokens & right.brand_tokens)
        and "極細指甲緣硬皮剪刀" in left_text
        and "極細指甲緣硬皮剪刀" in right_text
        and "9cm" in left_text
        and "9cm" in right_text
    ):
        return "yes_cuticle_scissors_9cm"
    if _has_yes_nail_tool_exact_alignment(left, right):
        return "yes_nail_tool_exact_model_size"
    if (
        {"kussen", "葵森"} & (left.brand_tokens & right.brand_tokens)
        and "寶寶益菌屁屁膏" in left_text
        and "寶寶益菌屁屁膏" in right_text
        and _has_shared_volume(left, right, 50)
        and _has_exact_count_alignment(left, right)
    ):
        return "kussen_baby_butt_cream_50ml_3pack"
    if (
        "bone" in (left.brand_tokens & right.brand_tokens)
        and "擴香禮盒三入組" in left_text
        and "擴香禮盒三入組" in right_text
        and all(component in left_text and component in right_text for component in ("原木麋鹿", "搖搖貓頭鷹", "薰衣草精油"))
        and _has_exact_count_alignment(left, right)
    ):
        return "bone_diffuser_gift_3pack"
    if (
        {"1990", "選物"} <= (left.brand_tokens & right.brand_tokens)
        and "現代簡約半圓罩融燭燈" in left_text
        and "現代簡約半圓罩融燭燈" in right_text
        and "白色款" in left_text
        and "白色款" in right_text
    ):
        return "selection1990_half_dome_wax_lamp_white"
    if (
        {"1990", "選物"} <= (left.brand_tokens & right.brand_tokens)
        and "歐式可彎融燭燈" in left_text
        and "歐式可彎融燭燈" in right_text
        and "白色款" in left_text
        and "白色款" in right_text
    ):
        return "selection1990_bendable_wax_lamp_white"
    if (
        "canmake" in (left.brand_tokens & right.brand_tokens)
        and "淚袋專用盤" in left_text
        and "淚袋專用盤" in right_text
        and "淚袋眼影盤" in left_text
        and "淚袋眼影盤" in right_text
    ):
        return "canmake_tear_bag_palette"
    if (
        {"recipe", "box"} <= brand_tokens
        and "可撕式水性兒童指甲油" in left_text
        and "可撕式水性兒童指甲油" in right_text
    ):
        return "recipe_box_peelable_child_polish_catalog"
    if (
        "gdesign" in (left.brand_tokens & right.brand_tokens)
        and "aroma" in left_text
        and "aroma" in right_text
        and "lava" in left_text
        and "lava" in right_text
        and "解憂放鬆緩緩燈2.0" in left_text
        and "解憂放鬆緩緩燈2.0" in right_text
        and "熔岩燈" in left_text
        and "熔岩燈" in right_text
        and "精油擴香" in left_text
        and "精油擴香" in right_text
    ):
        return "gdesign_aroma_lava_lamp_2"
    if (
        "hooome" in (left.brand_tokens & right.brand_tokens)
        and "白色" in left_text
        and "白色" in right_text
        and "香氛蠟燭暖燈" in left_text
        and "香氛蠟燭暖燈" in right_text
        and "兩顆燈泡" in left_text
        and "兩顆燈泡" in right_text
        and "禮盒" in left_text
        and "禮盒" in right_text
    ):
        return "hooome_classic_white_wax_lamp_bulbs_giftbox"
    return ""


def _is_relove_private_cleanser_line(left: ProductIdentity, right: ProductIdentity) -> bool:
    return (
        "relove" in (left.brand_tokens | right.brand_tokens)
        and "私密" in left.searchable_name
        and "私密" in right.searchable_name
        and "凝露" in left.searchable_name
        and "凝露" in right.searchable_name
    )


def _is_relove_cleanser_gel_like(left: ProductIdentity, right: ProductIdentity) -> bool:
    if "relove" not in (left.brand_tokens | right.brand_tokens):
        return False
    cleanser_terms = ("私密", "潔淨", "清潔")
    return (
        "凝露" in left.searchable_name
        and "凝露" in right.searchable_name
        and any(term in left.searchable_name for term in cleanser_terms)
        and any(term in right.searchable_name for term in cleanser_terms)
    )


def _is_multi_variant_catalog_listing(identity: ProductIdentity) -> bool:
    text = identity.normalized_name
    return any(phrase in text for phrase in MULTI_VARIANT_LISTING_PHRASES)


def _normalize_variant_option(value: str) -> set[str]:
    compact = re.sub(r"[^a-z0-9]", "", (value or "").lower())
    if not compact:
        return set()
    return {compact}


def _variant_option_compare_key(option: str) -> str:
    if option.isdigit():
        return option.lstrip("0") or "0"
    return option


def _variant_options_overlap(left_options: set[str], right_options: set[str]) -> bool:
    if left_options & right_options:
        return True
    left_keys = {_variant_option_compare_key(option) for option in left_options}
    right_keys = {_variant_option_compare_key(option) for option in right_options}
    return bool(left_keys & right_keys)


def _is_catalog_or_delimited_variant_listing(identity: ProductIdentity) -> bool:
    if _is_multi_variant_catalog_listing(identity):
        return True
    text = identity.searchable_name
    if re.search(r"(?<![a-z0-9])([a-z]?\d{1,3}[a-z]?)\s*(?:~|～|至|-)\s*([a-z]?\d{1,3}[a-z]?)(?![a-z0-9])", text, re.I):
        return True
    options = _explicit_variant_option_tokens(identity)
    if len(options) < 2:
        return bool(
            re.search(r"[/／、,，.．&＆]", text)
            and any(term in text for term in ("粉餅盒", "眼線膠筆", "眉筆", "唇膏", "唇釉", "遮瑕蜜", "車用擴香", "車用香氛"))
        )
    return bool(re.search(r"[/／、,，.．&＆]", text))


def _has_catalog_variant_listing_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
    if not (_is_multi_variant_catalog_listing(left) and _is_multi_variant_catalog_listing(right)):
        return False
    if left.product_type != right.product_type or left.product_type not in {"精油", "護唇膏"}:
        return False
    if not _has_overlapping_base_spec(left, right):
        return False
    shared_core = left.core_tokens & right.core_tokens
    if shared_core:
        return True
    left_text = left.searchable_name
    right_text = right.searchable_name
    catalog_terms = ("香氛擴香罐", "香氛蠟燭", "蠟燭", "擴香罐", "修護唇膏")
    return any(term in left_text and term in right_text for term in catalog_terms)


def _is_variant_sensitive_identity(
    left: ProductIdentity,
    right: ProductIdentity,
    shared_anchor: str,
) -> bool:
    corpus = (
        shared_anchor,
        left.product_type or "",
        right.product_type or "",
        left.searchable_name,
        right.searchable_name,
    )
    return any(keyword in text for text in corpus for keyword in VARIANT_SENSITIVE_KEYWORDS if text)


def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdentity, shared_anchor: str) -> bool:
    if _has_serum_formulation_conflict(left, right, shared_anchor):
        return True
    if _has_taicend_baby_spray_equivalence(left, right):
        return False
    if _has_brush_baby_wildones_toothbrush_alignment(left, right):
        return False
    if _has_baan_baby_lip_catalog_alignment(left, right):
        return False
    if _has_recipe_box_child_sunscreen_cushion_alignment(left, right):
        return False
    if _has_pavaruni_40_scent_oil_alignment(left, right):
        return False
    if _has_pavaruni_20_scent_candle_alignment(left, right):
        return False
    if _has_laundrin_tokyo_car_freshener_alignment(left, right):
        return False
    if _is_relove_private_cleanser_line(left, right):
        return False
    if (
        shared_anchor
        and shared_anchor not in SEARCH_BROAD_ANCHORS
        and not _is_variant_sensitive_identity(left, right, shared_anchor)
    ):
        return False
    if _shared_model_tokens(left, right):
        return False
    left_descriptors = _variant_descriptors(left)
    right_descriptors = _variant_descriptors(right)
    if not left_descriptors or not right_descriptors:
        return False
    if left_descriptors & right_descriptors:
        return False
    for left_descriptor in left_descriptors:
        for right_descriptor in right_descriptors:
            if left_descriptor in right_descriptor or right_descriptor in left_descriptor:
                return False
    return True


def _explicit_variant_option_tokens(identity: ProductIdentity) -> set[str]:
    text = identity.searchable_name
    options: set[str] = set()
    for match in re.finditer(r"(?<![a-z0-9])([a-z]?\d{1,3}[a-z]?)\s*(?:~|～|至|-)\s*([a-z]?\d{1,3}[a-z]?)(?![a-z0-9])", text, re.I):
        for group in (match.group(1), match.group(2)):
            options.update(_normalize_variant_option(group))
    for match in re.finditer(r"(?:#|no\.?|色號|號色)\s*([a-z]?\d{1,3}[a-z]?)(?![a-z0-9])", text, re.I):
        options.update(_normalize_variant_option(match.group(1)))
    for match in re.finditer(r"(?<![a-z0-9])((?:0?\d){1,2})(?=[\u4e00-\u9fff])", text, re.I):
        if text[match.end(1):match.end(1) + 4] in {"號護唇膏", "號護脣膏"}:
            continue
        options.update(_normalize_variant_option(match.group(1)))
    for color_word in VARIANT_OPTION_COLOR_WORDS:
        if color_word in text:
            options.add(color_word)
    return options


def _has_variant_option_selection_gap(identity: ProductIdentity, options: set[str]) -> bool:
    named_options = {option for option in options if not option.isdigit()}
    if len(named_options) < 2:
        return False
    text = identity.searchable_name
    return _is_multi_variant_catalog_listing(identity) or bool(re.search(r"[/／、,，]", text))


def _has_catalog_options_against_generic_count_alignment(
    left: ProductIdentity,
    right: ProductIdentity,
    left_options: set[str],
    right_options: set[str],
) -> bool:
    if not _has_overlapping_base_spec(left, right):
        return False
    if left.product_type and right.product_type and left.product_type != right.product_type:
        return False
    for catalog_identity, generic_identity, catalog_options, generic_options in (
        (left, right, left_options, right_options),
        (right, left, right_options, left_options),
    ):
        named_catalog_options = {option for option in catalog_options if not option.isdigit()}
        named_generic_options = {option for option in generic_options if not option.isdigit()}
        shared_count_options = {
            option for option in catalog_options & generic_options
            if option.isdigit()
        }
        if (
            len(named_catalog_options) >= 2
            and not named_generic_options
            and shared_count_options
            and _is_multi_variant_catalog_listing(catalog_identity)
            and "組" in generic_identity.searchable_name
        ):
            return True
    return False


def _has_explicit_variant_option_conflict(
    left: ProductIdentity,
    right: ProductIdentity,
    shared_anchor: str,
) -> bool:
    if not _is_variant_sensitive_identity(left, right, shared_anchor):
        return False
    left_options = _explicit_variant_option_tokens(left)
    right_options = _explicit_variant_option_tokens(right)
    if not left_options or not right_options:
        return False
    if left_options == right_options:
        return False
    if _variant_options_overlap(left_options, right_options):
        if _has_catalog_options_against_generic_count_alignment(left, right, left_options, right_options):
            return False
        pair_text = f"{left.searchable_name} {right.searchable_name}"
        if any(term in pair_text for term in ("眉筆", "眼線膠筆", "唇膏", "唇釉", "粉餅盒", "遮瑕蜜")) and (
            (
                len(left_options) > len(right_options)
                and _is_catalog_or_delimited_variant_listing(left)
            )
            or (
                len(right_options) > len(left_options)
                and _is_catalog_or_delimited_variant_listing(right)
            )
        ):
            return False
        if (
            len(left_options) > len(right_options)
            and _has_variant_option_selection_gap(left, left_options)
        ) or (
            len(right_options) > len(left_options)
            and _has_variant_option_selection_gap(right, right_options)
        ):
            return True
        return False
    for left_option in left_options:
        for right_option in right_options:
            if left_option in right_option or right_option in left_option:
                return False
    return True


def _has_named_variant_selection_review(
    left: ProductIdentity,
    right: ProductIdentity,
    shared_anchor: str,
) -> bool:
    if (
        _has_makeup_shade_selection_gap(left, right)
        or _has_makeup_spray_variant_selection_gap(left, right)
        or _has_catalog_specific_variant_selection_gap(left, right)
    ):
        return True
    left_options = _explicit_variant_option_tokens(left)
    right_options = _explicit_variant_option_tokens(right)
    if left_options and right_options:
        for catalog_identity, catalog_options, specific_options in (
            (left, left_options, right_options),
            (right, right_options, left_options),
        ):
            if (
                _is_catalog_or_delimited_variant_listing(catalog_identity)
                and len(catalog_options) > len(specific_options)
                and _variant_options_overlap(catalog_options, specific_options)
                and _is_variant_sensitive_identity(left, right, shared_anchor)
            ):
                return True
    if bool(left_options) != bool(right_options):
        option_identity = left if left_options else right
        catalog_identity = right if left_options else left
        if (
            _is_variant_sensitive_identity(left, right, shared_anchor)
            and _is_catalog_or_delimited_variant_listing(catalog_identity)
            and _explicit_variant_option_tokens(option_identity)
        ):
            return True
        if (
            _is_variant_sensitive_identity(left, right, shared_anchor)
            and _has_overlapping_base_spec(left, right)
            and _explicit_variant_option_tokens(option_identity)
            and any(
                term in f"{left.searchable_name} {right.searchable_name}"
                for term in ("粉餅盒", "護手霜", "護唇膏", "護唇棒", "滋養霜", "眼線膠筆", "遮瑕蜜")
            )
        ):
            return True
    if bool(left_options) == bool(right_options):
        return False

    option_identity = left if left_options else right
    named_options = {option for option in (left_options or right_options) if not option.isdigit()}
    if len(named_options) < 2:
        return False
    text = option_identity.searchable_name
    return _is_multi_variant_catalog_listing(option_identity) or bool(re.search(r"[/／、,&＆]", text))


def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]:
    cleaned = _clean_search_phrase(token)
    if not cleaned:
        return (-999, 0, cleaned)
    compact = cleaned.replace(" ", "")
    if compact in SEARCH_NOISE_TOKENS or compact in GENERIC_TOKENS:
        return (-900, 0, cleaned)
    if re.fullmatch(r"\d+(?:\.\d+)?(?:ml|g|mg|kg|l)x\d+", compact, re.I):
        return (-900, 0, cleaned)

    score = 0
    if re.search(r"[a-z][a-z0-9-]{2,}", cleaned):
        score += 30
    if re.search(r"\d", cleaned):
        score += 12

    anchors = _extract_anchor_phrases(cleaned)
    if anchors:
        score += 90
        score += min(24, len(anchors[0]) * 3)
        if anchors[0] == compact:
            score += 8
        if compact in SEARCH_BROAD_ANCHORS:
            score -= 28
    else:
        score += max(0, 24 - len(compact))

    if len(compact) <= 8:
        score += 14
    elif len(compact) >= 12:
        score -= 12

    has_better_anchor = any(
        other != token and _extract_anchor_phrases(other)
        for other in all_tokens
    )
    if has_better_anchor and any(term in compact for term in SEARCH_AMBIGUOUS_PRODUCT_TERMS):
        score -= 80
    if any(noise in compact for noise in SEARCH_NOISE_TOKENS):
        score -= 18

    return (score, -len(compact), cleaned)


def _ranked_search_core_phrases(identity: ProductIdentity, limit: int = 4) -> list[str]:
    tokens = {token for token in identity.core_tokens if token not in GENERIC_TOKENS}
    ranked_tokens = sorted(
        tokens,
        key=lambda token: _search_core_score(token, tokens),
        reverse=True,
    )

    phrases: list[str] = []
    for token in ranked_tokens:
        if _search_core_score(token, tokens)[0] < -100:
            continue
        candidates = _extract_anchor_phrases(token) or [_clean_search_phrase(token)]
        for phrase in candidates:
            compact = phrase.replace(" ", "")
            if len(compact) < 2 or compact in SEARCH_NOISE_TOKENS:
                continue
            if any(term in compact for term in SEARCH_AMBIGUOUS_PRODUCT_TERMS) and len(phrases) > 0:
                continue
            if phrase not in phrases:
                phrases.append(phrase)
            if len(phrases) >= limit:
                return phrases
    return phrases


def _variant_primary_phrase(identity: ProductIdentity) -> str:
    text = identity.searchable_name
    for anchor in ("時尚潮流美甲片", "頂級璀燦美甲片", "薄型經典美甲片", "足部時尚潮流美甲片"):
        pattern = rf"{re.escape(anchor)}[-_ ]*([\u4e00-\u9fff]{{2,8}})"
        match = re.search(pattern, text)
        if not match:
            continue
        phrase = _clean_search_phrase(match.group(1))
        compact = phrase.replace(" ", "")
        if compact and compact not in SEARCH_NOISE_TOKENS:
            return phrase
    variant_descriptors = sorted(_variant_descriptors(identity), key=lambda token: (len(token), token))
    return variant_descriptors[0] if variant_descriptors else ""


def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
    identity = parse_product_identity(name)
    terms: list[str] = []
    is_dashing_diva_nail_line = {"dashing", "diva"} <= identity.brand_tokens and "美甲片" in identity.searchable_name

    def primary_brand_phrase() -> str:
        if {"dashing", "diva"} <= identity.brand_tokens:
            return "dashing diva"
        if {"rom", "nd"} <= identity.brand_tokens:
            return "romand"
        if {"im", "meme"} <= identity.brand_tokens:
            return "im meme"
        if {"recipe", "box"} <= identity.brand_tokens:
            return "recipe box"
        chinese = sorted(
            (token for token in identity.brand_tokens if re.search(r"[\u4e00-\u9fff]", token)),
            key=lambda token: (-len(token), token),
        )
        if chinese:
            return chinese[0]
        latin = sorted(
            (
                token for token in identity.brand_tokens
                if re.search(r"[a-z]", token) and len(token) >= 3 and token not in GENERIC_TOKENS
            ),
            key=lambda token: (" " not in token and "-" not in token, -len(token), token),
        )
        if latin:
            return latin[0]
        short_latin = sorted(
            (
                token for token in identity.brand_tokens
                if re.search(r"[a-z]", token) and len(token) >= 2 and token not in GENERIC_TOKENS
            ),
            key=lambda token: (" " not in token and "-" not in token, -len(token), token),
        )
        return short_latin[0] if short_latin else ""

    brand_part = primary_brand_phrase()
    spec_terms = _search_spec_terms(identity)
    spec_part = " ".join(spec_terms)
    core_phrases = _ranked_search_core_phrases(identity, limit=4)
    full_name_anchor_phrases = _extract_anchor_phrases(name)
    if full_name_anchor_phrases:
        core_phrases = list(dict.fromkeys(full_name_anchor_phrases + core_phrases))
    core_short = " ".join(core_phrases[:2])
    core_primary = core_phrases[0] if core_phrases else ""
    product_type_aliases = set(PRODUCT_TYPES.get(identity.product_type or "", ()))
    chinese_detail_phrases = [
        phrase
        for phrase in core_phrases[1:]
        if re.search(r"[\u4e00-\u9fff]", phrase)
        and phrase != core_primary
        and phrase != (identity.product_type or "")
        and phrase not in SEARCH_BROAD_ANCHORS
        and not any(phrase == alias or phrase in alias or alias in phrase for alias in product_type_aliases)
    ]
    modifier_with_primary = " ".join(
        part for part in (chinese_detail_phrases[0] if chinese_detail_phrases else "", core_primary) if part
    )
    variant_primary = _variant_primary_phrase(identity)
    variant_options = sorted(
        (token for token in _explicit_variant_option_tokens(identity) if token != "0"),
        key=lambda token: (len(token), token),
    )
    variant_option_part = " ".join(variant_options[:2])
    model_phrases = [
        phrase
        for phrase in core_phrases[1:]
        if re.fullmatch(r"[a-z]*\d+[a-z0-9-]*", phrase)
        or re.fullmatch(r"[a-z][a-z0-9-]{2,}", phrase)
    ]
    if "護甲油" in identity.searchable_name:
        model_phrases = [
            phrase for phrase in model_phrases
            if phrase.lower() not in {"top", "coat"} and not re.fullmatch(r"ist\d+", phrase, re.I)
        ]
    primary_with_model = " ".join(
        part for part in (core_primary, model_phrases[0] if model_phrases else "") if part
    )
    variant_sensitive = any(keyword in identity.searchable_name for keyword in VARIANT_SENSITIVE_KEYWORDS)
    model_like_spec = any(
        re.search(r"[a-z]", term)
        and re.search(r"\d", term)
        and not re.fullmatch(r"\d+(?:\.\d+)?(?:ml|g|mg|kg|l)", term, re.I)
        for term in spec_terms
    )
    prefer_variant_search = (
        variant_sensitive
        and bool(variant_primary)
        and not model_phrases
        and not model_like_spec
        and not variant_options
        and "護甲油" not in identity.searchable_name
        and any(
            term in identity.searchable_name
            for term in ("護手霜", "芬香", "香氛", "香味", "擴香", "精油", "指甲油", "指彩")
        )
    )
    for value in (
        " ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part)
        if is_dashing_diva_nail_line and variant_sensitive and variant_primary
        else "",
        " ".join(part for part in (brand_part, core_primary, variant_option_part, spec_part) if part)
        if variant_sensitive and variant_option_part and not model_phrases and not model_like_spec
        else "",
        " ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part)
        if prefer_variant_search
        else "",
        " ".join(part for part in (brand_part, primary_with_model, spec_part) if part)
        if primary_with_model and model_phrases
        else "",
        " ".join(part for part in (brand_part, modifier_with_primary, spec_part) if part)
        if modifier_with_primary and identity.product_type and identity.product_type in core_primary
        else "",
        " ".join(part for part in (brand_part, core_primary, spec_part) if part)
        if variant_sensitive and core_primary and not variant_options
        else "",
        " ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part)
        if variant_sensitive and variant_primary and variant_options and not model_phrases and not model_like_spec
        else "",
        " ".join(part for part in (brand_part, primary_with_model, spec_part) if part),
        " ".join(part for part in (brand_part, core_short, spec_part) if part),
        " ".join(part for part in (brand_part, core_short) if part),
        " ".join(part for part in (core_primary, spec_part) if part),
        identity.searchable_name,
    ):
        cleaned = _clean_search_phrase(value)
        if cleaned and cleaned not in terms:
            terms.append(cleaned[:42])
        if len(terms) >= max_terms:
            break

    return terms