V10.398 recover true low confidence matches
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s

This commit is contained in:
OoO
2026-05-24 12:05:30 +08:00
committed by AiderHeal Bot
parent 8cc197aba2
commit 4622be3441
4 changed files with 146 additions and 2 deletions

View File

@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.397"
SYSTEM_VERSION = "V10.398"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -12,6 +12,9 @@
## 📅 詳細更新日誌 (考古存檔)
### 2026-05-24PChome 近門檻身份回收第二輪
- **V10.398 true low confidence 保守回收**: marketplace matcher 針對正式前段 `true_low_confidence` 補一輪 focused exact identity lines讓 Baan 嬰兒修護唇膏、植村秀 3D 極細防水眼線膠筆、YSL 恆久完美透膚煙染腮紅、HH 私密植萃美白緊緻凝露、Lab52 學習刷牙漱口水、Benefit 經典菲菲染唇液、Herb24 晨霧純精油擴香儀、Pavaruni 40 香味 10ml 精油與 GATSBY 爆水擦澡濕巾等近門檻真同款可被回收;未放寬 `MIN_MATCH_SCORE`。同版保留 peripera 多色任選對單一色號、LUNASOL 頰彩對眼彩組、MUJI 細軸棉棒對黑色棉棒的低信心保護,並讓多組件套組即使達強身份證據也停在 `identity_review`,避免總價被誤當精準價格告警。
### 2026-05-21瀏覽器測試守門與 PChome 熱路徑優化
- **V10.397 離線 audit false negative 收斂**: marketplace matcher 針對 audit 剩餘 fresh bad 補三個保守修正:`200ml+200ml` 這類括號內規格串不再被計成額外組件Kiehls `1號護唇膏``1` 視為商品線名稱而非色號P.SHINE BEAUTY FOOT 雙面足部去硬皮磨砂棒以品牌 + 足部硬皮磨砂棒語意進 `identity_review`。同批補 focused exact identity lines讓 Biodance、SAB、LUSH、Kanebo、ARTMIS、Nailmatic、小浪、YUNMI、AQUIESSE、資生堂等低分但強證據同款被正確拉回高證據 exact 才可進 `price_alert_exact`,證據不足者仍進 `identity_review`。離線 audit fresh bad 從 9 降到 6剩餘皆為多組件/套組差異。
- **V10.396 多選 catalog 對 generic count 組合放行**: marketplace matcher 對「多款任選 catalog listing」對上同數量 generic `N入組` 候選新增保守豁免:需品牌、品類、基礎規格與數量一致,且 generic 端沒有具名色款/香味選項,才不觸發 `variant_option_conflict`。Johns Blend 香氛擴香罐 85g 任選 3 入 vs PChome 3入組會進 `identity_review`,不直接價格告警。

View File

@@ -352,6 +352,16 @@ SEARCH_IDENTITY_ANCHORS = (
"麗駐蘭修復舒緩面膜",
"濕度數顯智能加濕器",
"新艷陽夏水離子熱防禦隔離露",
"3d極細防水眼線膠筆",
"恆久完美透膚煙染腮紅",
"私密植萃美白緊緻凝露",
"學習刷牙漱口水",
"羅馬柱智慧居家車用香氛機",
"經典菲菲染唇液",
"染唇液",
"晨霧純精油擴香儀",
"天然植物香氛精油",
"爆水擦澡濕巾",
"3d立體持色眉彩盤",
"細芯睛彩雙頭眉筆",
"雙頭旋轉極細眉筆",
@@ -598,6 +608,7 @@ BRAND_ALIAS_OVERRIDES = {
"jo malone": ("jo malone",),
"prada": ("prada", "普拉達"),
"za": ("za",),
"hh": ("hh", "草本新淨界"),
"小浪": ("小浪",),
"xiaomi": ("小米有品", "小米", "xiaomi"),
"mac": ("m.a.c", "mac", "m a c"),
@@ -1649,6 +1660,7 @@ def _classify_match_quality(
shared_anchor: str,
shared_models: set[str],
catalog_count_omission: bool,
multi_component_pair: bool,
) -> tuple[str, str, str]:
"""Map raw matcher scores into operator-facing price comparison lanes."""
reason_set = set(reasons)
@@ -1670,6 +1682,8 @@ def _classify_match_quality(
and (direct_spec_evidence or (shared_anchor and token_score >= 0.62 and sequence_score >= 0.58))
)
if strong_identity_evidence and not catalog_count_omission:
if multi_component_pair:
return "exact", "manual_review", "identity_review"
return "exact", "total_price", "price_alert_exact"
if score >= 0.76:
@@ -2288,6 +2302,7 @@ def score_marketplace_match(
shared_anchor=shared_anchor,
shared_models=shared_models,
catalog_count_omission=catalog_count_omission,
multi_component_pair=_has_multi_component(left) and _has_multi_component(right),
)
evidence_flags = _build_evidence_flags(
brand_score=brand_score,
@@ -2571,6 +2586,20 @@ def _has_baan_baby_lip_catalog_alignment(left: ProductIdentity, right: ProductId
)
def _has_pavaruni_40_scent_oil_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
left_text = left.searchable_name
right_text = right.searchable_name
return (
"pavaruni" in (left.brand_tokens & right.brand_tokens)
and "天然植物" in f"{left_text} {right_text}"
and "精油" in left_text
and "精油" in right_text
and _has_shared_volume(left, right, 10)
and ("40香味" in left_text or "40種香味" in left_text)
and ("40香味" in right_text or "40種香味" in right_text)
)
def _has_shared_count(left: ProductIdentity, right: ProductIdentity, count: int, unit: str) -> bool:
return (count, unit) in set(left.counts) and (count, unit) in set(right.counts)
@@ -2677,6 +2706,63 @@ def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: Pro
and "隔離露" in right_text
):
return "shiseido_blue_sunscreen"
if (
{"baan", "貝恩"} & (left.brand_tokens & right.brand_tokens)
and "嬰兒修護唇膏" in left_text
and "嬰兒修護唇膏" in right_text
and left.product_type == right.product_type == "護唇膏"
):
return "baan_baby_lip_base_catalog"
if (
{"shu uemura", "植村秀"} & (left.brand_tokens & right.brand_tokens)
and "3d極細防水眼線膠筆" in left_text
and "3d極細防水眼線膠筆" in right_text
):
return "shu_3d_eyeliner"
if (
{"ysl", "聖羅蘭"} & (left.brand_tokens & right.brand_tokens)
and "恆久完美透膚煙染腮紅" in left_text
and "恆久完美透膚煙染腮紅" in right_text
):
return "ysl_blush_catalog"
if (
{"hh", "草本新淨界"} & (left.brand_tokens & right.brand_tokens)
and "私密植萃美白緊緻凝露" in left_text
and "私密植萃美白緊緻凝露" in right_text
and _has_shared_volume(left, right, 30)
):
return "hh_private_gel"
if (
{"lab52", "齒妍堂"} & (left.brand_tokens & right.brand_tokens)
and "學習刷牙漱口水" in left_text
and "學習刷牙漱口水" in right_text
and _has_overlapping_base_spec(left, right)
):
return "lab52_mouthwash"
if (
"benefit" in (left.brand_tokens & right.brand_tokens)
and "染唇液" in left_text
and "染唇液" in right_text
and "唇頰兩用" in pair_text
):
return "benefit_lip_tint"
if (
{"herb24", "草本"} & (left.brand_tokens & right.brand_tokens)
and "晨霧純精油擴香儀ii" in left_text
and "晨霧純精油擴香儀ii" in right_text
and (("霧黑" in left_text and "黑色" in right_text) or ("霧黑" in right_text and "黑色" in left_text))
):
return "herb24_mist_diffuser_black"
if _has_pavaruni_40_scent_oil_alignment(left, right):
return "pavaruni_40_scent_oil"
if (
"gatsby" in (left.brand_tokens & right.brand_tokens)
and "爆水擦澡濕巾" in left_text
and "爆水擦澡濕巾" in right_text
and "24張入" in left_text
and "24張入" in right_text
):
return "gatsby_body_wipes_24"
return ""
@@ -2735,6 +2821,8 @@ def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdenti
return False
if _has_baan_baby_lip_catalog_alignment(left, right):
return False
if _has_pavaruni_40_scent_oil_alignment(left, right):
return False
if _is_relove_private_cleanser_line(left, right):
return False
if (

View File

@@ -1013,6 +1013,51 @@ def test_marketplace_matcher_promotes_focused_low_score_exact_identity_lines():
"《SHISEIDO 資生堂》新艷陽夏水離子熱防禦隔離露 50ml",
"focused_exact_identity_shiseido_blue_sunscreen",
),
(
"【Baan 貝恩】嬰兒修護唇膏4.5g",
"【貝恩】嬰兒修護唇膏(原味/草莓)",
"focused_exact_identity_baan_baby_lip_base_catalog",
),
(
"【植村秀】官方直營 3D極細防水眼線膠筆(Shu uemura)",
"《Shu Uemura 植村秀》3D極細防水眼線膠筆 0.08g-#深棕",
"focused_exact_identity_shu_3d_eyeliner",
),
(
"【YSL】官方直營 恆久完美透膚煙染腮紅(任選1款/新品上市)",
"【YSL聖羅蘭】恆久完美透膚煙染腮紅 6g ( #12/ #57/ #93)",
"focused_exact_identity_ysl_blush_catalog",
),
(
"【HH草本新淨界】私密植萃美白緊緻凝露30ml(私密美白 保濕緊緻 私密緊實)",
"HH 私密植萃美白緊緻凝露(30ml)",
"focused_exact_identity_hh_private_gel",
),
(
"【Lab52 齒妍堂】學習刷牙漱口水170g/瓶(食品級配方/牙菌斑顯示劑/口腔清潔居家檢測)",
"Lab52齒妍堂 汪汪隊立大功學習刷牙漱口水170g",
"focused_exact_identity_lab52_mouthwash",
),
(
"【benefit 貝玲妃】經典染唇液(唇頰兩用/持久不脫妝)",
"【Benefit】經典菲菲染唇液6ml(唇頰兩用/持久不脫妝)多色",
"focused_exact_identity_benefit_lip_tint",
),
(
"【草本24】Herb24 晨霧純精油擴香儀II_霧黑無須用水紅桔 純質精油)(歐盟EMC證書 主機享半年保固)",
"【草本24。Herb24】晨霧純精油擴香儀II_黑色任選價值780元純質精油10ml",
"focused_exact_identity_herb24_mist_diffuser_black",
),
(
"【Pavaruni】美國天然植物精油40香味10ml(香薰 擴香 萃取 薰香機 水氧機 薰衣草 檀香 玫瑰 雪松 白茶 茉莉)",
"【美國Pavaruni】天然植物香氛精油40種香味10ml 多款任選",
"focused_exact_identity_pavaruni_40_scent_oil",
),
(
"【GATSBY】爆水擦澡濕巾24張入(涼感乾洗澡)",
"GATSBY 爆水擦澡濕巾24張入(240g)",
"focused_exact_identity_gatsby_body_wipes_24",
),
]
for momo_name, competitor_name, expected_reason in cases:
@@ -1033,8 +1078,16 @@ def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_
"【Lactacyd 立朵舒】清新舒涼 私密潔浴露250ml(天然薄荷涼感 私密清潔 私密保養 乾癢修護)",
"Lactacyd 立朵舒私密潔浴露-生理呵護250ml",
)
lunasol = score_marketplace_match(
"即期品【Kanebo 佳麗寶】LUNASOL 晶巧霓光/明艷頰彩(多色任選)",
"【Kanebo 佳麗寶】LUNASOL 晶巧霓光眼彩寵愛組",
)
muji_swab = score_marketplace_match(
"【MUJI 無印良品】細軸棉棒/200支",
"【MUJI 無印良品】棉棒(黑色)/200支",
)
for diagnostics in (lush, lactacyd):
for diagnostics in (lush, lactacyd, lunasol, muji_swab):
assert diagnostics.score < 0.76
assert not any(reason.startswith("focused_exact_identity_") for reason in diagnostics.reasons)