修正 PChome 近門檻商品比對規則
This commit is contained in:
@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.388"
|
||||
SYSTEM_VERSION = "V10.390"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -313,6 +313,16 @@ SEARCH_NOISE_TOKENS = {
|
||||
}
|
||||
|
||||
SEARCH_IDENTITY_ANCHORS = (
|
||||
"控油清爽防曬棒",
|
||||
"蔓越莓私密清潔慕斯",
|
||||
"金縷梅私密清潔慕斯",
|
||||
"光采奪目眼部飾底乳",
|
||||
"男性私密沐浴露",
|
||||
"私密沐浴露",
|
||||
"hydsto 車載香薰",
|
||||
"電動刮鬍刀 s101",
|
||||
"磁吸控油定妝噴霧",
|
||||
"修容打亮棒",
|
||||
"私密潔淨凝露",
|
||||
"柔霧裸唇膏",
|
||||
"潤浸保濕清爽身體乳液",
|
||||
@@ -436,6 +446,15 @@ SEARCH_BROAD_ANCHORS = {
|
||||
}
|
||||
|
||||
VARIANT_SENSITIVE_KEYWORDS = {
|
||||
"香氛蠟燭",
|
||||
"芬香蠟燭",
|
||||
"蠟燭",
|
||||
"擴香",
|
||||
"融蠟燈",
|
||||
"車用香氛",
|
||||
"香味",
|
||||
"私密清潔慕斯",
|
||||
"定妝噴霧",
|
||||
"妝前防護乳",
|
||||
"妝前乳",
|
||||
"素顏霜",
|
||||
@@ -453,9 +472,26 @@ VARIANT_SENSITIVE_KEYWORDS = {
|
||||
"蜜粉餅",
|
||||
"粉底棒",
|
||||
"遮瑕棒",
|
||||
"修容打亮棒",
|
||||
}
|
||||
|
||||
VARIANT_OPTION_COLOR_WORDS = {
|
||||
"茉莉花",
|
||||
"梔子花",
|
||||
"白麝香",
|
||||
"黑麝香",
|
||||
"青檸羅勒",
|
||||
"炭木香",
|
||||
"無花果",
|
||||
"清甜柚香",
|
||||
"杏仁牛奶",
|
||||
"完熟白桃",
|
||||
"原味",
|
||||
"草莓",
|
||||
"蔓越莓",
|
||||
"金縷梅",
|
||||
"柔焦霧面",
|
||||
"水光亮面",
|
||||
"黑色",
|
||||
"棕色",
|
||||
"咖啡色",
|
||||
@@ -544,7 +580,13 @@ PRODUCT_TYPES = {
|
||||
"私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"),
|
||||
"私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"),
|
||||
"護甲油": ("護甲油", "亮油", "top coat"),
|
||||
"定妝噴霧": ("定妝噴霧", "setting spray"),
|
||||
"修容打亮棒": ("修容打亮棒", "修容棒", "打亮棒"),
|
||||
"刮鬍刀": ("刮鬍刀", "刮胡刀", "shaver", "razor"),
|
||||
"體香膏": ("體香膏", "體香劑", "deodorant"),
|
||||
"電動牙刷": ("電動牙刷", "聲波電動牙刷", "electric toothbrush"),
|
||||
"洗手慕斯": ("洗手慕斯", "洗手泡泡", "hand wash foam"),
|
||||
"私密慕斯": ("私密清潔慕斯", "私密防護慕絲", "私密慕斯"),
|
||||
"足膜": ("足膜", "足部膜", "足部去角質"),
|
||||
"妝前乳": ("妝前乳", "妝前防護乳", "妝前隔離", "primer"),
|
||||
"素顏霜": ("素顏霜", "tone up cream"),
|
||||
@@ -557,7 +599,7 @@ PRODUCT_TYPES = {
|
||||
"粉底棒": ("粉底棒", "foundation stick"),
|
||||
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),
|
||||
"化妝水": ("化妝水", "機能水", "toner", "lotion"),
|
||||
"乳液": ("乳液", "emulsion", "milk"),
|
||||
"乳液": ("乳液", "按摩乳", "emulsion", "milk"),
|
||||
"面霜": ("面霜", "乳霜", "霜", "cream"),
|
||||
"防曬": ("防曬", "spf", "uv", "sunscreen"),
|
||||
"洗面乳": ("洗面乳", "洗顏", "潔面", "cleanser", "foam"),
|
||||
@@ -728,6 +770,7 @@ def normalize_product_text(value: str) -> str:
|
||||
)
|
||||
text = text.replace("×", "x").replace("*", "x").replace("*", "x")
|
||||
text = text.replace("/", "/").replace("&", "&")
|
||||
text = text.replace("粧", "妝")
|
||||
text = re.sub(r"[\u3000\r\n\t]+", " ", text)
|
||||
text = text.lower()
|
||||
text = re.sub(r"[??]+", " ", text)
|
||||
@@ -1850,6 +1893,34 @@ def score_marketplace_match(
|
||||
):
|
||||
score += 0.10
|
||||
reasons.append("shared_identity_anchor_lactacyd_wash")
|
||||
if (
|
||||
"私密潔膚露" in shared_anchor
|
||||
and {"femfresh", "芳芯"} & (left.brand_tokens | right.brand_tokens)
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.95
|
||||
and spec_score >= 0.85
|
||||
and token_score >= 0.30
|
||||
and sequence_score >= 0.45
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.06
|
||||
reasons.append("shared_identity_anchor_femfresh_wash")
|
||||
if (
|
||||
"私密沐浴露" in shared_anchor
|
||||
and {"vigill", "婦潔"} & (left.brand_tokens | right.brand_tokens)
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.95
|
||||
and spec_score >= 0.70
|
||||
and token_score >= 0.45
|
||||
and sequence_score >= 0.55
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.06
|
||||
reasons.append("shared_identity_anchor_vigill_private_wash")
|
||||
if (
|
||||
"私密潔淨凝露" in shared_anchor
|
||||
and {"relove"} <= (left.brand_tokens | right.brand_tokens)
|
||||
@@ -1892,6 +1963,52 @@ def score_marketplace_match(
|
||||
):
|
||||
score += 0.12
|
||||
reasons.append("shared_identity_anchor_karadium_eye_stick")
|
||||
if (
|
||||
_has_seche_vite_top_coat_alignment(left, right)
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.95
|
||||
and token_score >= 0.70
|
||||
and sequence_score >= 0.70
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.04
|
||||
reasons.append("shared_identity_anchor_seche_vite_top_coat")
|
||||
if (
|
||||
_has_xiaomi_s101_shaver_alignment(left, right)
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and token_score >= 0.60
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.04
|
||||
reasons.append("shared_model_token_xiaomi_s101_shaver")
|
||||
if (
|
||||
_has_hinoki_roller_oil_alignment(left, right)
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.95
|
||||
and spec_score >= 0.85
|
||||
and sequence_score >= 0.50
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.04
|
||||
reasons.append("shared_identity_anchor_hinoki_roller_oil")
|
||||
if (
|
||||
_has_brush_baby_wildones_toothbrush_alignment(left, right)
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.95
|
||||
and token_score >= 0.78
|
||||
and sequence_score >= 0.90
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.04
|
||||
reasons.append("shared_model_token_brush_baby_wildones")
|
||||
if (
|
||||
shared_anchor
|
||||
and len(shared_anchor.replace(" ", "")) >= 5
|
||||
@@ -2108,14 +2225,15 @@ def _extract_anchor_phrases(token: str) -> list[str]:
|
||||
if "瞬效" in cleaned and "b5" in cleaned and "玻尿酸" in cleaned and "精華" in cleaned:
|
||||
phrases.append("瞬效b5玻尿酸精華")
|
||||
for anchor in SEARCH_IDENTITY_ANCHORS:
|
||||
if anchor not in cleaned:
|
||||
anchor_phrase = _clean_search_phrase(anchor)
|
||||
if not anchor_phrase or anchor_phrase not in cleaned:
|
||||
continue
|
||||
if re.search(r"[\u4e00-\u9fff]", anchor):
|
||||
prefix_width = 0 if len(anchor) >= 5 else (4 if len(anchor) >= 3 else 6)
|
||||
match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor)})", cleaned)
|
||||
phrase = match.group(1) if match else anchor
|
||||
if re.search(r"[\u4e00-\u9fff]", anchor_phrase):
|
||||
prefix_width = 0 if len(anchor_phrase) >= 5 else (4 if len(anchor_phrase) >= 3 else 6)
|
||||
match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor_phrase)})", cleaned)
|
||||
phrase = match.group(1) if match else anchor_phrase
|
||||
else:
|
||||
phrase = anchor
|
||||
phrase = anchor_phrase
|
||||
phrase = _clean_search_phrase(phrase)
|
||||
if phrase.startswith("款") and len(phrase) > 2:
|
||||
phrase = phrase[1:]
|
||||
@@ -2214,6 +2332,64 @@ def _has_serum_formulation_conflict(left: ProductIdentity, right: ProductIdentit
|
||||
return bool(left_hit and right_hit and left_hit != right_hit)
|
||||
|
||||
|
||||
def _has_taicend_baby_spray_equivalence(left: ProductIdentity, right: ProductIdentity) -> bool:
|
||||
brand_tokens = {"taicend", "泰陞"}
|
||||
return (
|
||||
bool(left.brand_tokens & brand_tokens)
|
||||
and bool(right.brand_tokens & brand_tokens)
|
||||
and "屁屁噴" in left.searchable_name
|
||||
and "屁屁噴" in right.searchable_name
|
||||
and _has_overlapping_base_spec(left, right)
|
||||
)
|
||||
|
||||
|
||||
def _has_seche_vite_top_coat_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
|
||||
brand_tokens = {"seche", "vite"}
|
||||
return (
|
||||
brand_tokens <= (left.brand_tokens | right.brand_tokens)
|
||||
and bool(left.brand_tokens & brand_tokens)
|
||||
and bool(right.brand_tokens & brand_tokens)
|
||||
and "快乾亮油" in left.searchable_name
|
||||
and "快乾亮油" in right.searchable_name
|
||||
)
|
||||
|
||||
|
||||
def _has_xiaomi_s101_shaver_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
|
||||
brand_tokens = {"xiaomi", "小米", "小米有品"}
|
||||
return (
|
||||
bool(left.brand_tokens & brand_tokens)
|
||||
and bool(right.brand_tokens & brand_tokens)
|
||||
and "s101" in left.searchable_name
|
||||
and "s101" in right.searchable_name
|
||||
and "電動刮鬍刀" in left.searchable_name
|
||||
and "電動刮鬍刀" in right.searchable_name
|
||||
)
|
||||
|
||||
|
||||
def _has_hinoki_roller_oil_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
|
||||
return (
|
||||
"檜山坊" in left.brand_tokens
|
||||
and "檜山坊" in right.brand_tokens
|
||||
and "檜木精油" in left.searchable_name
|
||||
and "檜木精油" in right.searchable_name
|
||||
and "滾珠瓶" in left.searchable_name
|
||||
and "滾珠瓶" in right.searchable_name
|
||||
and _has_overlapping_base_spec(left, right)
|
||||
)
|
||||
|
||||
|
||||
def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
|
||||
brand_tokens = {"brush", "baby", "wildones"}
|
||||
return (
|
||||
brand_tokens <= left.brand_tokens
|
||||
and brand_tokens <= right.brand_tokens
|
||||
and "電動牙刷" in left.searchable_name
|
||||
and "電動牙刷" in right.searchable_name
|
||||
and "0-10y" in left.searchable_name
|
||||
and "0-10y" in right.searchable_name
|
||||
)
|
||||
|
||||
|
||||
def _is_variant_sensitive_identity(
|
||||
left: ProductIdentity,
|
||||
right: ProductIdentity,
|
||||
@@ -2232,6 +2408,10 @@ def _is_variant_sensitive_identity(
|
||||
def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdentity, shared_anchor: str) -> bool:
|
||||
if _has_serum_formulation_conflict(left, right, shared_anchor):
|
||||
return True
|
||||
if _has_taicend_baby_spray_equivalence(left, right):
|
||||
return False
|
||||
if _has_brush_baby_wildones_toothbrush_alignment(left, right):
|
||||
return False
|
||||
if (
|
||||
shared_anchor
|
||||
and shared_anchor not in SEARCH_BROAD_ANCHORS
|
||||
|
||||
@@ -657,6 +657,133 @@ def test_marketplace_matcher_promotes_kate_bare_lip_line_with_series_copy():
|
||||
assert "shared_identity_anchor_kate_bare_lip" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_next_recoverable_exact_identity_cohorts():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
cases = [
|
||||
(
|
||||
"【OBgE】【官方公司貨】控油清爽防曬棒 18g (清爽 輕薄 控油 防曬棒 補擦不卡粉 戶外通勤 油肌友善 李多慧)",
|
||||
"OBgE/控油清爽防曬棒18g",
|
||||
"shared_identity_anchor",
|
||||
),
|
||||
(
|
||||
"【Play&Joy 官方直營】ARTMIS 蔓越莓私密清潔慕斯 250ml(私密清潔 胺基酸配方 綿密泡沫 粉嫩如春 私密保養)",
|
||||
"ARTMIS 蔓越莓私密清潔慕斯 250ml",
|
||||
"shared_identity_anchor",
|
||||
),
|
||||
(
|
||||
"【Play&Joy 官方直營】ARTMIS 金縷梅私密清潔慕斯 250ml(私密清潔 胺基酸配方 綿密泡沫 粉嫩如春 私密保養)",
|
||||
"ARTMIS 金縷梅私密清潔慕斯 250ml",
|
||||
"shared_identity_anchor",
|
||||
),
|
||||
(
|
||||
"美國 Seche Vite 快乾亮油",
|
||||
"美國Seche Vite指甲快乾亮油14ml",
|
||||
"shared_identity_anchor_seche_vite_top_coat",
|
||||
),
|
||||
(
|
||||
"【TAICEND 泰陞】寶貝液體保護膜 屁屁噴(100ml/1入組)",
|
||||
"TAICEND泰陞 寶貝液體護膜100ml 屁屁噴 屁屁膏",
|
||||
"strong_exact_spec_match",
|
||||
),
|
||||
(
|
||||
"【femfresh 芳芯】私密潔膚露 250ml 長效清新 純淨植萃 舒緩敏感肌膚 私密處清潔 私密處護理|繁華中西藥局|",
|
||||
"【femfresh芳芯 官方直營】私密潔膚露250ml (任選)",
|
||||
"shared_identity_anchor_femfresh_wash",
|
||||
),
|
||||
(
|
||||
"【VIGILL 婦潔】男性私密沐浴露220ml(男性私密清潔 一瓶洗全身)",
|
||||
"【VIGILL 婦潔】日常潔淨 私密沐浴露220ml",
|
||||
"shared_identity_anchor_vigill_private_wash",
|
||||
),
|
||||
(
|
||||
"【Solone】光采奪目眼部飾底乳(眼部打底 眼影打底)",
|
||||
"Solone 光采奪目眼部飾底乳 2.8g",
|
||||
"shared_identity_anchor_packaging_variant",
|
||||
),
|
||||
(
|
||||
"【小米有品】HYDSTO 車載香薰(全車淨化/持久清香/車用香水/香薰)",
|
||||
"小米有品 HYDSTO 車載香薰",
|
||||
"shared_identity_anchor_packaging_variant",
|
||||
),
|
||||
(
|
||||
"【小米】電動刮鬍刀 S101(米家電動刮鬍刀 小米刮鬍刀 電動刮鬍刀 米家刮鬍刀)",
|
||||
"【小米 Xiaomi】 小米電動刮鬍刀 S101",
|
||||
"shared_model_token_xiaomi_s101_shaver",
|
||||
),
|
||||
(
|
||||
"【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)",
|
||||
"【柏瑞美PRAMY】 磁吸控油定粧噴霧 柔焦霧面",
|
||||
"shared_identity_anchor",
|
||||
),
|
||||
(
|
||||
"【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(水光亮面)",
|
||||
"【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面",
|
||||
"shared_identity_anchor",
|
||||
),
|
||||
(
|
||||
"【i’m meme】韓國 Multi Stick Dual 雙頭修容打亮棒 3.3g (修容 打亮 修容棒 打亮筆 修容筆)",
|
||||
"【I’M MEME】我愛小臉修容打亮棒 3.3g",
|
||||
"shared_identity_anchor",
|
||||
),
|
||||
(
|
||||
"【檜山坊】檜木精油滾珠瓶5ml 兩入(療癒 放鬆 穩定心神)",
|
||||
"【檜山坊】台灣原生檜木精油5ml滾珠瓶兩入組",
|
||||
"shared_identity_anchor_hinoki_roller_oil",
|
||||
),
|
||||
(
|
||||
"【ARM&HAMMER 鐵鎚】小蘇打配方體香膏(71g)",
|
||||
"Arm & Hammer 小蘇打體香膏 2.5oz /71g 長效防護 植物萃取 溫合無鋁",
|
||||
"strong_exact_spec_match",
|
||||
),
|
||||
(
|
||||
"【Brush Baby】WildOnes 充電式兒童聲波電動牙刷 0-10Y(多款可選)",
|
||||
"Brush Baby WildOnes 充電式兒童電動牙刷(0-10Y)-多款可選",
|
||||
"shared_model_token_brush_baby_wildones",
|
||||
),
|
||||
(
|
||||
"【PALMER’S 帕瑪氏】新撫紋按摩乳250ml(新配方效果全新升級)",
|
||||
"Palmer s 可可脂撫紋按摩乳液 250ml",
|
||||
"strong_exact_spec_match",
|
||||
),
|
||||
]
|
||||
|
||||
for momo_name, competitor_name, expected_reason in cases:
|
||||
diagnostics = score_marketplace_match(momo_name, competitor_name)
|
||||
assert diagnostics.score >= 0.76, (momo_name, diagnostics)
|
||||
assert diagnostics.hard_veto is False
|
||||
if expected_reason == "shared_identity_anchor":
|
||||
assert any(reason.startswith("shared_identity_anchor") for reason in diagnostics.reasons)
|
||||
else:
|
||||
assert expected_reason in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismatch():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
cases = [
|
||||
(
|
||||
"【MUJI 無印良品】芬香蠟燭.茉莉花香味/85g",
|
||||
"芬香蠟燭.梔子花香味/85g【MUJI 無印良品】",
|
||||
),
|
||||
(
|
||||
"【Play&Joy 官方直營】ARTMIS 蔓越莓私密清潔慕斯 250ml",
|
||||
"ARTMIS 金縷梅私密清潔慕斯 250ml",
|
||||
),
|
||||
(
|
||||
"【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)",
|
||||
"【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面",
|
||||
),
|
||||
]
|
||||
|
||||
for momo_name, competitor_name in cases:
|
||||
diagnostics = score_marketplace_match(momo_name, competitor_name)
|
||||
assert diagnostics.hard_veto is True
|
||||
assert diagnostics.comparison_mode == "not_comparable"
|
||||
assert diagnostics.score < 0.76
|
||||
assert "variant_option_conflict" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
|
||||
Reference in New Issue
Block a user