修正 PChome 近門檻商品比對規則

This commit is contained in:
OoO
2026-05-24 10:22:59 +08:00
committed by AiderHeal Bot
parent 4ae3a39970
commit d2dea022ea
3 changed files with 315 additions and 8 deletions

View File

@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.388"
SYSTEM_VERSION = "V10.390"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -313,6 +313,16 @@ SEARCH_NOISE_TOKENS = {
}
SEARCH_IDENTITY_ANCHORS = (
"控油清爽防曬棒",
"蔓越莓私密清潔慕斯",
"金縷梅私密清潔慕斯",
"光采奪目眼部飾底乳",
"男性私密沐浴露",
"私密沐浴露",
"hydsto 車載香薰",
"電動刮鬍刀 s101",
"磁吸控油定妝噴霧",
"修容打亮棒",
"私密潔淨凝露",
"柔霧裸唇膏",
"潤浸保濕清爽身體乳液",
@@ -436,6 +446,15 @@ SEARCH_BROAD_ANCHORS = {
}
VARIANT_SENSITIVE_KEYWORDS = {
"香氛蠟燭",
"芬香蠟燭",
"蠟燭",
"擴香",
"融蠟燈",
"車用香氛",
"香味",
"私密清潔慕斯",
"定妝噴霧",
"妝前防護乳",
"妝前乳",
"素顏霜",
@@ -453,9 +472,26 @@ VARIANT_SENSITIVE_KEYWORDS = {
"蜜粉餅",
"粉底棒",
"遮瑕棒",
"修容打亮棒",
}
VARIANT_OPTION_COLOR_WORDS = {
"茉莉花",
"梔子花",
"白麝香",
"黑麝香",
"青檸羅勒",
"炭木香",
"無花果",
"清甜柚香",
"杏仁牛奶",
"完熟白桃",
"原味",
"草莓",
"蔓越莓",
"金縷梅",
"柔焦霧面",
"水光亮面",
"黑色",
"棕色",
"咖啡色",
@@ -544,7 +580,13 @@ PRODUCT_TYPES = {
"私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"),
"私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"),
"護甲油": ("護甲油", "亮油", "top coat"),
"定妝噴霧": ("定妝噴霧", "setting spray"),
"修容打亮棒": ("修容打亮棒", "修容棒", "打亮棒"),
"刮鬍刀": ("刮鬍刀", "刮胡刀", "shaver", "razor"),
"體香膏": ("體香膏", "體香劑", "deodorant"),
"電動牙刷": ("電動牙刷", "聲波電動牙刷", "electric toothbrush"),
"洗手慕斯": ("洗手慕斯", "洗手泡泡", "hand wash foam"),
"私密慕斯": ("私密清潔慕斯", "私密防護慕絲", "私密慕斯"),
"足膜": ("足膜", "足部膜", "足部去角質"),
"妝前乳": ("妝前乳", "妝前防護乳", "妝前隔離", "primer"),
"素顏霜": ("素顏霜", "tone up cream"),
@@ -557,7 +599,7 @@ PRODUCT_TYPES = {
"粉底棒": ("粉底棒", "foundation stick"),
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),
"化妝水": ("化妝水", "機能水", "toner", "lotion"),
"乳液": ("乳液", "emulsion", "milk"),
"乳液": ("乳液", "按摩乳", "emulsion", "milk"),
"面霜": ("面霜", "乳霜", "", "cream"),
"防曬": ("防曬", "spf", "uv", "sunscreen"),
"洗面乳": ("洗面乳", "洗顏", "潔面", "cleanser", "foam"),
@@ -728,6 +770,7 @@ def normalize_product_text(value: str) -> str:
)
text = text.replace("×", "x").replace("", "x").replace("*", "x")
text = text.replace("", "/").replace("", "&")
text = text.replace("", "")
text = re.sub(r"[\u3000\r\n\t]+", " ", text)
text = text.lower()
text = re.sub(r"[?]+", " ", text)
@@ -1850,6 +1893,34 @@ def score_marketplace_match(
):
score += 0.10
reasons.append("shared_identity_anchor_lactacyd_wash")
if (
"私密潔膚露" in shared_anchor
and {"femfresh", "芳芯"} & (left.brand_tokens | right.brand_tokens)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.95
and spec_score >= 0.85
and token_score >= 0.30
and sequence_score >= 0.45
and not variant_descriptor_conflict
):
score += 0.06
reasons.append("shared_identity_anchor_femfresh_wash")
if (
"私密沐浴露" in shared_anchor
and {"vigill", "婦潔"} & (left.brand_tokens | right.brand_tokens)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.95
and spec_score >= 0.70
and token_score >= 0.45
and sequence_score >= 0.55
and not variant_descriptor_conflict
):
score += 0.06
reasons.append("shared_identity_anchor_vigill_private_wash")
if (
"私密潔淨凝露" in shared_anchor
and {"relove"} <= (left.brand_tokens | right.brand_tokens)
@@ -1892,6 +1963,52 @@ def score_marketplace_match(
):
score += 0.12
reasons.append("shared_identity_anchor_karadium_eye_stick")
if (
_has_seche_vite_top_coat_alignment(left, right)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.95
and token_score >= 0.70
and sequence_score >= 0.70
and not variant_descriptor_conflict
):
score += 0.04
reasons.append("shared_identity_anchor_seche_vite_top_coat")
if (
_has_xiaomi_s101_shaver_alignment(left, right)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and token_score >= 0.60
and not variant_descriptor_conflict
):
score += 0.04
reasons.append("shared_model_token_xiaomi_s101_shaver")
if (
_has_hinoki_roller_oil_alignment(left, right)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.95
and spec_score >= 0.85
and sequence_score >= 0.50
and not variant_descriptor_conflict
):
score += 0.04
reasons.append("shared_identity_anchor_hinoki_roller_oil")
if (
_has_brush_baby_wildones_toothbrush_alignment(left, right)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.95
and token_score >= 0.78
and sequence_score >= 0.90
and not variant_descriptor_conflict
):
score += 0.04
reasons.append("shared_model_token_brush_baby_wildones")
if (
shared_anchor
and len(shared_anchor.replace(" ", "")) >= 5
@@ -2108,14 +2225,15 @@ def _extract_anchor_phrases(token: str) -> list[str]:
if "瞬效" in cleaned and "b5" in cleaned and "玻尿酸" in cleaned and "精華" in cleaned:
phrases.append("瞬效b5玻尿酸精華")
for anchor in SEARCH_IDENTITY_ANCHORS:
if anchor not in cleaned:
anchor_phrase = _clean_search_phrase(anchor)
if not anchor_phrase or anchor_phrase not in cleaned:
continue
if re.search(r"[\u4e00-\u9fff]", anchor):
prefix_width = 0 if len(anchor) >= 5 else (4 if len(anchor) >= 3 else 6)
match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor)})", cleaned)
phrase = match.group(1) if match else anchor
if re.search(r"[\u4e00-\u9fff]", anchor_phrase):
prefix_width = 0 if len(anchor_phrase) >= 5 else (4 if len(anchor_phrase) >= 3 else 6)
match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor_phrase)})", cleaned)
phrase = match.group(1) if match else anchor_phrase
else:
phrase = anchor
phrase = anchor_phrase
phrase = _clean_search_phrase(phrase)
if phrase.startswith("") and len(phrase) > 2:
phrase = phrase[1:]
@@ -2214,6 +2332,64 @@ def _has_serum_formulation_conflict(left: ProductIdentity, right: ProductIdentit
return bool(left_hit and right_hit and left_hit != right_hit)
def _has_taicend_baby_spray_equivalence(left: ProductIdentity, right: ProductIdentity) -> bool:
brand_tokens = {"taicend", "泰陞"}
return (
bool(left.brand_tokens & brand_tokens)
and bool(right.brand_tokens & brand_tokens)
and "屁屁噴" in left.searchable_name
and "屁屁噴" in right.searchable_name
and _has_overlapping_base_spec(left, right)
)
def _has_seche_vite_top_coat_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
brand_tokens = {"seche", "vite"}
return (
brand_tokens <= (left.brand_tokens | right.brand_tokens)
and bool(left.brand_tokens & brand_tokens)
and bool(right.brand_tokens & brand_tokens)
and "快乾亮油" in left.searchable_name
and "快乾亮油" in right.searchable_name
)
def _has_xiaomi_s101_shaver_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
brand_tokens = {"xiaomi", "小米", "小米有品"}
return (
bool(left.brand_tokens & brand_tokens)
and bool(right.brand_tokens & brand_tokens)
and "s101" in left.searchable_name
and "s101" in right.searchable_name
and "電動刮鬍刀" in left.searchable_name
and "電動刮鬍刀" in right.searchable_name
)
def _has_hinoki_roller_oil_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
return (
"檜山坊" in left.brand_tokens
and "檜山坊" in right.brand_tokens
and "檜木精油" in left.searchable_name
and "檜木精油" in right.searchable_name
and "滾珠瓶" in left.searchable_name
and "滾珠瓶" in right.searchable_name
and _has_overlapping_base_spec(left, right)
)
def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
brand_tokens = {"brush", "baby", "wildones"}
return (
brand_tokens <= left.brand_tokens
and brand_tokens <= right.brand_tokens
and "電動牙刷" in left.searchable_name
and "電動牙刷" in right.searchable_name
and "0-10y" in left.searchable_name
and "0-10y" in right.searchable_name
)
def _is_variant_sensitive_identity(
left: ProductIdentity,
right: ProductIdentity,
@@ -2232,6 +2408,10 @@ def _is_variant_sensitive_identity(
def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdentity, shared_anchor: str) -> bool:
if _has_serum_formulation_conflict(left, right, shared_anchor):
return True
if _has_taicend_baby_spray_equivalence(left, right):
return False
if _has_brush_baby_wildones_toothbrush_alignment(left, right):
return False
if (
shared_anchor
and shared_anchor not in SEARCH_BROAD_ANCHORS

View File

@@ -657,6 +657,133 @@ def test_marketplace_matcher_promotes_kate_bare_lip_line_with_series_copy():
assert "shared_identity_anchor_kate_bare_lip" in diagnostics.reasons
def test_marketplace_matcher_promotes_next_recoverable_exact_identity_cohorts():
from services.marketplace_product_matcher import score_marketplace_match
cases = [
(
"【OBgE】【官方公司貨】控油清爽防曬棒 18g (清爽 輕薄 控油 防曬棒 補擦不卡粉 戶外通勤 油肌友善 李多慧)",
"OBgE/控油清爽防曬棒18g",
"shared_identity_anchor",
),
(
"【Play&Joy 官方直營】ARTMIS 蔓越莓私密清潔慕斯 250ml(私密清潔 胺基酸配方 綿密泡沫 粉嫩如春 私密保養)",
"ARTMIS 蔓越莓私密清潔慕斯 250ml",
"shared_identity_anchor",
),
(
"【Play&Joy 官方直營】ARTMIS 金縷梅私密清潔慕斯 250ml(私密清潔 胺基酸配方 綿密泡沫 粉嫩如春 私密保養)",
"ARTMIS 金縷梅私密清潔慕斯 250ml",
"shared_identity_anchor",
),
(
"美國 Seche Vite 快乾亮油",
"美國Seche Vite指甲快乾亮油14ml",
"shared_identity_anchor_seche_vite_top_coat",
),
(
"【TAICEND 泰陞】寶貝液體保護膜 屁屁噴(100ml/1入組)",
"TAICEND泰陞 寶貝液體護膜100ml 屁屁噴 屁屁膏",
"strong_exact_spec_match",
),
(
"【femfresh 芳芯】私密潔膚露 250ml 長效清新 純淨植萃 舒緩敏感肌膚 私密處清潔 私密處護理|繁華中西藥局|",
"【femfresh芳芯 官方直營】私密潔膚露250ml (任選)",
"shared_identity_anchor_femfresh_wash",
),
(
"【VIGILL 婦潔】男性私密沐浴露220ml(男性私密清潔 一瓶洗全身)",
"【VIGILL 婦潔】日常潔淨 私密沐浴露220ml",
"shared_identity_anchor_vigill_private_wash",
),
(
"【Solone】光采奪目眼部飾底乳(眼部打底 眼影打底)",
"Solone 光采奪目眼部飾底乳 2.8g",
"shared_identity_anchor_packaging_variant",
),
(
"【小米有品】HYDSTO 車載香薰(全車淨化/持久清香/車用香水/香薰)",
"小米有品 HYDSTO 車載香薰",
"shared_identity_anchor_packaging_variant",
),
(
"【小米】電動刮鬍刀 S101(米家電動刮鬍刀 小米刮鬍刀 電動刮鬍刀 米家刮鬍刀)",
"【小米 Xiaomi】 小米電動刮鬍刀 S101",
"shared_model_token_xiaomi_s101_shaver",
),
(
"【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)",
"【柏瑞美PRAMY】 磁吸控油定粧噴霧 柔焦霧面",
"shared_identity_anchor",
),
(
"【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(水光亮面)",
"【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面",
"shared_identity_anchor",
),
(
"【im meme】韓國 Multi Stick Dual 雙頭修容打亮棒 3.3g (修容 打亮 修容棒 打亮筆 修容筆)",
"【IM MEME】我愛小臉修容打亮棒 3.3g",
"shared_identity_anchor",
),
(
"【檜山坊】檜木精油滾珠瓶5ml 兩入(療癒 放鬆 穩定心神)",
"【檜山坊】台灣原生檜木精油5ml滾珠瓶兩入組",
"shared_identity_anchor_hinoki_roller_oil",
),
(
"【ARM&HAMMER 鐵鎚】小蘇打配方體香膏(71g)",
"Arm & Hammer 小蘇打體香膏 2.5oz /71g 長效防護 植物萃取 溫合無鋁",
"strong_exact_spec_match",
),
(
"【Brush Baby】WildOnes 充電式兒童聲波電動牙刷 0-10Y(多款可選)",
"Brush Baby WildOnes 充電式兒童電動牙刷(0-10Y)-多款可選",
"shared_model_token_brush_baby_wildones",
),
(
"【PALMERS 帕瑪氏】新撫紋按摩乳250ml(新配方效果全新升級)",
"Palmer s 可可脂撫紋按摩乳液 250ml",
"strong_exact_spec_match",
),
]
for momo_name, competitor_name, expected_reason in cases:
diagnostics = score_marketplace_match(momo_name, competitor_name)
assert diagnostics.score >= 0.76, (momo_name, diagnostics)
assert diagnostics.hard_veto is False
if expected_reason == "shared_identity_anchor":
assert any(reason.startswith("shared_identity_anchor") for reason in diagnostics.reasons)
else:
assert expected_reason in diagnostics.reasons
def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismatch():
from services.marketplace_product_matcher import score_marketplace_match
cases = [
(
"【MUJI 無印良品】芬香蠟燭.茉莉花香味/85g",
"芬香蠟燭.梔子花香味/85g【MUJI 無印良品】",
),
(
"【Play&Joy 官方直營】ARTMIS 蔓越莓私密清潔慕斯 250ml",
"ARTMIS 金縷梅私密清潔慕斯 250ml",
),
(
"【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)",
"【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面",
),
]
for momo_name, competitor_name in cases:
diagnostics = score_marketplace_match(momo_name, competitor_name)
assert diagnostics.hard_veto is True
assert diagnostics.comparison_mode == "not_comparable"
assert diagnostics.score < 0.76
assert "variant_option_conflict" in diagnostics.reasons
def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack():
from services.marketplace_product_matcher import score_marketplace_match