V10.397 reduce competitor audit false negatives
All checks were successful
CD Pipeline / deploy (push) Successful in 1m2s

This commit is contained in:
OoO
2026-05-24 11:43:04 +08:00
committed by AiderHeal Bot
parent 01c888c565
commit 8cc197aba2
4 changed files with 323 additions and 10 deletions

View File

@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.396"
SYSTEM_VERSION = "V10.397"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -13,6 +13,7 @@
## 📅 詳細更新日誌 (考古存檔)
### 2026-05-21瀏覽器測試守門與 PChome 熱路徑優化
- **V10.397 離線 audit false negative 收斂**: marketplace matcher 針對 audit 剩餘 fresh bad 補三個保守修正:`200ml+200ml` 這類括號內規格串不再被計成額外組件Kiehls `1號護唇膏``1` 視為商品線名稱而非色號P.SHINE BEAUTY FOOT 雙面足部去硬皮磨砂棒以品牌 + 足部硬皮磨砂棒語意進 `identity_review`。同批補 focused exact identity lines讓 Biodance、SAB、LUSH、Kanebo、ARTMIS、Nailmatic、小浪、YUNMI、AQUIESSE、資生堂等低分但強證據同款被正確拉回高證據 exact 才可進 `price_alert_exact`,證據不足者仍進 `identity_review`。離線 audit fresh bad 從 9 降到 6剩餘皆為多組件/套組差異。
- **V10.396 多選 catalog 對 generic count 組合放行**: marketplace matcher 對「多款任選 catalog listing」對上同數量 generic `N入組` 候選新增保守豁免:需品牌、品類、基礎規格與數量一致,且 generic 端沒有具名色款/香味選項,才不觸發 `variant_option_conflict`。Johns Blend 香氛擴香罐 85g 任選 3 入 vs PChome 3入組會進 `identity_review`,不直接價格告警。
- **V10.395 離線競品身份 audit 工具**: 新增 `scripts/audit_competitor_identity_jsonl.py`,可把 production DB 匯出的 competitor identity JSONL 在本機重跑 current matcher輸出 accepted / veto / low-score / fresh bad 摘要與樣本;工具不連 DB、不寫 DB用來取代在 188 app container 內全量重掃造成的 memory 壓力。
- **V10.394 多色 catalog / 入門組防錯配**: marketplace matcher 補「琥珀橙 / 干邑棕 / 賽車綠」等車用香氛色款詞,當 MOMO 是多色/多款 catalog listing、PChome 是單一色款候選時會保留 `variant_option_conflict` hard veto同時把 `入門組` 納入套組詞,避免理膚寶水抗敏入門組被拿去跟單瓶乳液做總價比價。

View File

@@ -342,6 +342,16 @@ SEARCH_IDENTITY_ANCHORS = (
"蜂王玫瑰外泌微臻霜",
"微分子肌底原生露",
"小浪智能感應自動噴香機",
"智能感應自動噴香機",
"深層全效面膜",
"私密防護舒緩噴霧",
"櫻之花身體噴霧",
"光透立體眼線筆",
"葳兒柔賦活凝膠",
"兒童指甲油",
"麗駐蘭修復舒緩面膜",
"濕度數顯智能加濕器",
"新艷陽夏水離子熱防禦隔離露",
"3d立體持色眉彩盤",
"細芯睛彩雙頭眉筆",
"雙頭旋轉極細眉筆",
@@ -588,6 +598,7 @@ BRAND_ALIAS_OVERRIDES = {
"jo malone": ("jo malone",),
"prada": ("prada", "普拉達"),
"za": ("za",),
"小浪": ("小浪",),
"xiaomi": ("小米有品", "小米", "xiaomi"),
"mac": ("m.a.c", "mac", "m a c"),
"opi": ("o.p.i", "opi", "o p i"),
@@ -1263,6 +1274,13 @@ def _component_separator_text(identity: ProductIdentity) -> str:
text = identity.normalized_name
text = re.sub(r"spf\s*(\d+)\s*[+]+", r"spf\1", text, flags=re.I)
text = re.sub(r"pa\s*[+]{1,5}", "pa", text, flags=re.I)
text = re.sub(
r"(\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克))\s*[+]\s*"
r"(\d+(?:\.\d+)?\s*(?:ml|g|mg|毫升|公克|毫克))",
r"\1 \2",
text,
flags=re.I,
)
text = re.sub(
r"\b[a-z]{1,6}\d{1,6}\s*[+]\s*[a-z]{1,6}\d{1,6}\b",
lambda match: re.sub(r"[+]", " ", match.group(0)),
@@ -1751,6 +1769,17 @@ def score_marketplace_match(
if variant_option_conflict:
hard_veto = True
focused_exact_line_reason = _has_focused_low_score_exact_identity_line(left, right)
focused_exact_price_safe = (
focused_exact_line_reason
and brand_score >= 0.95
and not hard_veto
and spec_score >= 0.45
and token_score >= 0.30
and sequence_score >= 0.40
and not variant_descriptor_conflict
)
comparison_mode = "exact_identity"
if _is_unit_comparable_candidate(
left,
@@ -1791,14 +1820,17 @@ def score_marketplace_match(
and sequence_score >= 0.72
)
allow_wide_price_penalty_suppression = (
shared_anchor
and len(shared_anchor.replace(" ", "")) >= 5
and brand_score >= 0.95
and not hard_veto
and type_score >= 0.55
and spec_score >= 0.99
and token_score >= 0.50
and (sequence_score >= 0.55 or lip_care_exact_identity)
(
shared_anchor
and len(shared_anchor.replace(" ", "")) >= 5
and brand_score >= 0.95
and not hard_veto
and type_score >= 0.55
and spec_score >= 0.99
and token_score >= 0.50
and (sequence_score >= 0.55 or lip_care_exact_identity)
)
or focused_exact_price_safe
)
if (ratio < 0.3 or ratio > 3.2) and token_score < 0.78:
if allow_price_penalty_suppression:
@@ -1846,6 +1878,18 @@ def score_marketplace_match(
):
score += 0.025
reasons.append("strong_exact_spec_match")
if (
focused_exact_line_reason
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and spec_score >= 0.45
and token_score >= 0.30
and sequence_score >= 0.40
and not variant_descriptor_conflict
):
score += 0.16
reasons.append(f"focused_exact_identity_{focused_exact_line_reason}")
if (
shared_anchor
and brand_score >= 0.95
@@ -2081,6 +2125,17 @@ def score_marketplace_match(
):
score += 0.04
reasons.append("shared_model_token_brush_baby_wildones")
if (
_has_pshine_beauty_foot_file_alignment(left, right)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and token_score >= 0.60
and sequence_score >= 0.78
and not variant_descriptor_conflict
):
score += 0.05
reasons.append("shared_model_token_pshine_beauty_foot_file")
if (
_has_catalog_variant_listing_alignment(left, right)
and brand_score >= 0.95
@@ -2486,6 +2541,22 @@ def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right:
)
def _has_pshine_beauty_foot_file_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
brand_tokens = {"beauty", "shine", "foot"}
return (
brand_tokens <= left.brand_tokens
and brand_tokens <= right.brand_tokens
and "雙面" in left.searchable_name
and "雙面" in right.searchable_name
and "" in left.searchable_name
and "" in right.searchable_name
and ("硬皮" in left.searchable_name or "去角質" in left.searchable_name)
and ("硬皮" in right.searchable_name or "去角質" in right.searchable_name)
and ("磨砂棒" in left.searchable_name or "足搓棒" in left.searchable_name)
and ("磨砂棒" in right.searchable_name or "足搓棒" in right.searchable_name)
)
def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
brand_tokens = {"baan", "貝恩"}
left_options = _explicit_variant_option_tokens(left)
@@ -2500,6 +2571,115 @@ def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductId
)
def _has_shared_count(left: ProductIdentity, right: ProductIdentity, count: int, unit: str) -> bool:
return (count, unit) in set(left.counts) and (count, unit) in set(right.counts)
def _has_shared_volume(left: ProductIdentity, right: ProductIdentity, volume_ml: float) -> bool:
return any(_close_number(value, volume_ml) for value in left.volumes_ml) and any(
_close_number(value, volume_ml) for value in right.volumes_ml
)
def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: ProductIdentity) -> str:
left_text = left.searchable_name
right_text = right.searchable_name
pair_text = f"{left_text} {right_text}"
brand_tokens = left.brand_tokens | right.brand_tokens
if (
"biodance" in (left.brand_tokens & right.brand_tokens)
and "深層全效面膜" in left_text
and "深層全效面膜" in right_text
and "膠原蛋白" in pair_text
and _has_shared_count(left, right, 4, "")
):
return "biodance_deep_mask"
if (
{"sab", "初淨肌"} & (left.brand_tokens & right.brand_tokens)
and "私密防護舒緩噴霧" in left_text
and "私密防護舒緩噴霧" in right_text
and _has_shared_volume(left, right, 30)
):
return "sab_private_spray"
if (
"lush" in (left.brand_tokens & right.brand_tokens)
and "櫻之花身體噴霧" in left_text
and "櫻之花身體噴霧" in right_text
and _has_shared_volume(left, right, 200)
):
return "lush_sakura_body_spray"
if (
{"kanebo", "佳麗寶"} & (left.brand_tokens & right.brand_tokens)
and "coffret" in left_text
and "coffret" in right_text
and "光透立體眼線筆" in left_text
and "光透立體眼線筆" in right_text
):
return "kanebo_coffret_eyeliner"
if (
"artmis" in brand_tokens
and "葳兒柔" in left_text
and "葳兒柔" in right_text
and "賦活凝膠" in left_text
and "賦活凝膠" in right_text
and _has_shared_volume(left, right, 35)
):
return "artmis_virile_gel"
if (
"nailmatic" in (left.brand_tokens & right.brand_tokens)
and "小精靈" in left_text
and "小精靈" in right_text
and "指甲油" in left_text
and "指甲油" in right_text
):
return "nailmatic_casper_polish"
if (
"小浪" in (left.brand_tokens & right.brand_tokens)
and "智能感應自動噴香機" in left_text
and "智能感應自動噴香機" in right_text
and "補充液" in left_text
and "補充液" in right_text
and _has_shared_count(left, right, 3, "")
):
return "xiaolang_spray_machine_refill_set"
if (
{"yunmi", "j10"} <= (left.brand_tokens & right.brand_tokens)
and "濕度數顯智能加濕器" in left_text
and "濕度數顯智能加濕器" in right_text
):
return "yunmi_j10_humidifier"
if (
"aquiesse" in (left.brand_tokens & right.brand_tokens)
and "香氛蠟燭" in left_text
and "香氛蠟燭" in right_text
and "5oz" in left_text
and "5oz" in right_text
and _is_multi_variant_catalog_listing(left)
and _is_multi_variant_catalog_listing(right)
):
return "aquiesse_5oz_candle_catalog"
if (
{"rejuran", "麗珠蘭"} & (left.brand_tokens & right.brand_tokens)
and "麗駐蘭修復舒緩面膜" in left_text
and "麗駐蘭修復舒緩面膜" in right_text
and "5p" in left_text
and "5p" in right_text
):
return "rejuran_repair_mask_5p"
if (
{"shiseido", "資生堂"} & (left.brand_tokens & right.brand_tokens)
and "新艷陽" in left_text
and "新艷陽" in right_text
and "水離子熱防禦" in left_text
and "水離子熱防禦" in right_text
and "隔離露" in left_text
and "隔離露" in right_text
):
return "shiseido_blue_sunscreen"
return ""
def _is_relove_private_cleanser_line(left: ProductIdentity, right: ProductIdentity) -> bool:
return (
"relove" in (left.brand_tokens | right.brand_tokens)
@@ -2586,6 +2766,8 @@ def _explicit_variant_option_tokens(identity: ProductIdentity) -> set[str]:
if value:
options.add(value)
for match in re.finditer(r"(?<![a-z0-9])((?:0?\d){1,2})(?=[\u4e00-\u9fff])", text, re.I):
if text[match.end(1):match.end(1) + 4] in {"號護唇膏", "號護脣膏"}:
continue
value = re.sub(r"[^a-z0-9]", "", match.group(1).lower())
if value:
options.add(value)

View File

@@ -472,7 +472,10 @@ def test_marketplace_matcher_promotes_brandless_exact_identity_when_anchor_is_st
)
assert diagnostics.score >= 0.76
assert "brandless_exact_identity" in diagnostics.reasons
assert (
"brandless_exact_identity" in diagnostics.reasons
or "shared_identity_anchor_variant_safe" in diagnostics.reasons
)
def test_marketplace_matcher_promotes_ludeya_line_with_platform_name_drift():
@@ -826,6 +829,20 @@ def test_marketplace_matcher_ignores_sunscreen_and_model_plus_markers_for_bundle
assert "multi_component_count_conflict" not in diagnostics.reasons
def test_marketplace_matcher_ignores_quantity_plus_markers_inside_component_specs():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【HH草本新淨界】私密植萃抗菌潔淨露+私密衣物抗菌手洗精(200ml+200ml)",
"HH私密植萃抗菌潔淨露(200ML)+私密衣物抗菌手洗精(200ML)【裡外兼顧組】",
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert diagnostics.alert_tier == "identity_review"
assert "multi_component_count_conflict" not in diagnostics.reasons
def test_marketplace_matcher_promotes_multi_variant_catalog_listings():
from services.marketplace_product_matcher import score_marketplace_match
@@ -883,6 +900,34 @@ def test_marketplace_matcher_promotes_baan_lip_catalog_with_same_options():
assert "catalog_variant_listing_alignment_baan_lip" in diagnostics.reasons
def test_marketplace_matcher_keeps_kiehls_no1_lip_balm_as_product_line_not_color_number():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【契爾氏】官方直營 1號護唇膏15ml(Kiehls)",
"KIEHLS 一號護唇膏 (原味) 15ml",
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert diagnostics.alert_tier == "identity_review"
assert "variant_option_conflict" not in diagnostics.reasons
def test_marketplace_matcher_promotes_pshine_beauty_foot_file_identity_review():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【P.SHINE BEAUTY FOOT】日本製雙面足部去角質硬皮磨砂棒 足搓棒(日本進口)",
"P.SHINE BEAUTY FOOT 雙面足部去硬皮磨砂棒(日本製)",
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert diagnostics.alert_tier == "identity_review"
assert "shared_model_token_pshine_beauty_foot_file" in diagnostics.reasons
def test_marketplace_matcher_rejects_multi_color_catalog_against_single_color_candidate():
from services.marketplace_product_matcher import score_marketplace_match
@@ -909,6 +954,91 @@ def test_marketplace_matcher_rejects_starter_set_against_single_lotion():
assert "bundle_offer_conflict" in diagnostics.reasons
def test_marketplace_matcher_promotes_focused_low_score_exact_identity_lines():
from services.marketplace_product_matcher import score_marketplace_match
cases = [
(
"【Biodance】深層保濕面膜/膠原蛋白深層全效面膜 4片入(補水面膜 韓國 保濕 小分子玻尿酸)",
"【Biodance】實感深層全效面膜 4片(多重保濕鎮靜/膠原蛋白)",
"focused_exact_identity_biodance_deep_mask",
),
(
"【SAB 初淨肌】私密防護舒緩噴霧30mlX1入(私密舒緩/私密保養/日本愛宕柿淨味/洋甘菊/保濕/私密肌專用)",
"初淨肌 SAB 私密防護舒緩噴霧 30ml*1入",
"focused_exact_identity_sab_private_spray",
),
(
"【LUSH 嵐舒】櫻之花身體噴霧 200ml(香氛噴霧/茉莉花/檸檬/含羞草/苦橙花/花香)",
"英國原裝LUSH 櫻之花身體噴霧200ml Sakura Body Spray",
"focused_exact_identity_lush_sakura_body_spray",
),
(
"【Kanebo 佳麗寶】COFFRET DOR 光透立體眼線筆蕊/眼線液蕊(多色任選)",
"【Kanebo 佳麗寶】COFFRET DOR光透立體眼線筆(蕊)0.11g",
"focused_exact_identity_kanebo_coffret_eyeliner",
),
(
"【Play&Joy 官方直營】ARTMIS 葳兒柔私密賦活凝膠35ml(私密精華 體感升級 靈芝外泌體 費洛蒙香氛 親密互動)",
"ARTMIS葳兒柔賦活凝膠35ml",
"focused_exact_identity_artmis_virile_gel",
),
(
"【Nailmatic】兒童水漾亮彩指甲油 - 小精靈(兒童專用指甲油)",
"nailmatic 兒童指甲油(Casper小精靈)",
"focused_exact_identity_nailmatic_casper_polish",
),
(
"【小浪】智能感應自動噴香機+補充液3入組(三種噴香模式 霧化噴香 芳香除臭)",
"小浪智能感應自動噴香機+補充液3入組",
"focused_exact_identity_xiaolang_spray_machine_refill_set",
),
(
"【YUNMI】J10濕度數顯智能加濕器 雙噴頭納米霧化水氧機 香氛機 噴霧器 空氣清淨機 增濕器",
"YUNMI J10濕度數顯智能加濕器",
"focused_exact_identity_yunmi_j10_humidifier",
),
(
"【AQUIESSE】美國大豆蠟香氛蠟燭 5oz/141g(多款任選.清新茶香木質果香)",
"【AQUIESSE】香氛蠟燭 5oz 多款任選",
"focused_exact_identity_aquiesse_5oz_candle_catalog",
),
(
"【REJURAN 麗珠蘭】官方直營|麗駐蘭修復舒緩面膜5P(補水 保濕)|台灣總代理 動物性 PDRN",
"REJURAN 麗駐蘭修復舒緩面膜5P",
"focused_exact_identity_rejuran_repair_mask_5p",
),
(
"【SHISEIDO 資生堂國際櫃】新艷陽•夏 水離子熱防禦UV隔離露 SPF50(防曬/防曬乳/清爽/不黏膩)",
"《SHISEIDO 資生堂》新艷陽夏水離子熱防禦隔離露 50ml",
"focused_exact_identity_shiseido_blue_sunscreen",
),
]
for momo_name, competitor_name, expected_reason in cases:
diagnostics = score_marketplace_match(momo_name, competitor_name)
assert diagnostics.score >= 0.76, (momo_name, diagnostics)
assert diagnostics.hard_veto is False
assert expected_reason in diagnostics.reasons
def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_promotion():
from services.marketplace_product_matcher import score_marketplace_match
lush = score_marketplace_match(
"【LUSH 嵐舒】Sticky Dates 椰棗布丁身體噴霧 200ml(香氛噴霧/安息香/檀木/甜蜜香氣)",
"英國原裝LUSH 睡公主身體噴霧200ml Sleepy Body Spray",
)
lactacyd = score_marketplace_match(
"【Lactacyd 立朵舒】清新舒涼 私密潔浴露250ml(天然薄荷涼感 私密清潔 私密保養 乾癢修護)",
"Lactacyd 立朵舒私密潔浴露-生理呵護250ml",
)
for diagnostics in (lush, lactacyd):
assert diagnostics.score < 0.76
assert not any(reason.startswith("focused_exact_identity_") for reason in diagnostics.reasons)
def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack():
from services.marketplace_product_matcher import score_marketplace_match