From 205b9ea3f606328967e57026bb250259912e4ed6 Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 19 May 2026 23:13:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BC=B7=E5=8C=96=E5=95=86=E5=93=81=E7=B7=9A?= =?UTF-8?q?=E9=85=8D=E5=B0=8D=E8=88=87=E7=B5=84=E5=90=88=20veto?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/AI_INTELLIGENCE_MODULE_SOT.md | 1 + services/marketplace_product_matcher.py | 45 ++++++++++++++++++++++ tests/test_marketplace_product_matcher.py | 46 +++++++++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index dbcee29..fcd4767 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -342,6 +342,7 @@ LEFT JOIN competitor_prices cp - competitor PPT 不可只輸出 matched rows 造成覆蓋率假象;`fetch_competitor_comparison_results()` 必須用 `LEFT JOIN valid_competitor` 保留高營收/高價但尚未有效配對的 MOMO 商品,並帶出 `match_status`、`candidate_count`、`best_match_score` 與 `match_diagnostic`,讓簡報與 AI 文案明確區分「高信心比對」與「待補身份/價格」。 - `services/competitor_identity_revalidator.py` 可對既有 `competitor_prices` legacy row 離線重跑 `identity_v2`:只有新版 matcher 分數 `>= 0.76` 且無 hard veto 才補 `identity_v2` / `legacy_revalidated` tags;預設不刷新 `expires_at`,避免過期價格進入決策。 - `CompetitorPriceFeeder.run_expired_identity_refresh()` 會優先刷新已通過 `identity_v2` 但 TTL 過期的 PChome row:直接用既有 `competitor_product_id` 批次呼叫 PChome 商品 API,再用新版 matcher 重新驗證名稱/規格/價格 sanity,通過後寫回 `competitor_prices` 與 `competitor_price_history`。這條路徑提升新鮮價格覆蓋率,但不降低 match threshold,也不讓過期價格直接進入決策。 +- `marketplace_product_matcher.py` 的擴充只能走「正向證據 + 反向 veto」:品牌一致、商品線/型號訊號強、價格合理且無 hard veto 時才允許 `strong_product_line_match` 加分;補充瓶/補充包/refill 與一般正裝不互相配對,分享組/加量組/明星組等組合包不得誤配單品。 - Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待審`、`身份否決`、`找不到同款`、`抓取異常`、`尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。 ### 執行方式 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index cd789d4..4366570 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -448,6 +448,9 @@ def _has_bundle_offer(identity: ProductIdentity) -> bool: or "優惠套組" in text or "禮盒組" in text or "加大組" in text + or "加量組" in text + or "分享組" in text + or "明星組" in text or "套組" in text ) @@ -461,6 +464,17 @@ def _has_multi_component(identity: ProductIdentity) -> bool: ) +def _has_refill_pack(identity: ProductIdentity) -> bool: + text = identity.normalized_name + return bool( + "補充瓶" in text + or "補充包" in text + or "替換蕊" in text + or "替換芯" in text + or "refill" in text + ) + + def _spec_mention_count(identity: ProductIdentity) -> int: return len(re.findall(r"\d+(?:\.\d+)?\s*(?:ml|毫升|l|g|公克|kg)", identity.normalized_name, re.I)) @@ -488,6 +502,23 @@ def _chinese_bigram_score(left: ProductIdentity, right: ProductIdentity) -> floa return 2 * len(left_signature & right_signature) / (len(left_signature) + len(right_signature)) +def _has_strong_product_line_signal( + left: ProductIdentity, + right: ProductIdentity, + token_score: float, + chinese_name_score: float, +) -> bool: + shared_core = left.core_tokens & right.core_tokens + shared_latin_or_model = { + token for token in shared_core + if re.fullmatch(r"[a-z][a-z0-9-]{3,}", token) + or re.fullmatch(r"[a-z]{2,}-?\d+[a-z0-9-]*", token) + } + if shared_latin_or_model and token_score >= 0.50: + return True + return token_score >= 0.56 and chinese_name_score >= 0.45 + + def score_marketplace_match( momo_name: str, competitor_name: str, @@ -525,6 +556,8 @@ def score_marketplace_match( reasons.append("bundle_offer_conflict") if _has_multi_component(left) != _has_multi_component(right): reasons.append("multi_component_conflict") + if _has_refill_pack(left) != _has_refill_pack(right): + reasons.append("refill_pack_conflict") left_spec_mentions = _spec_mention_count(left) right_spec_mentions = _spec_mention_count(right) if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions: @@ -537,6 +570,8 @@ def score_marketplace_match( hard_veto = True if _has_multi_component(left) != _has_multi_component(right): hard_veto = True + if _has_refill_pack(left) != _has_refill_pack(right): + hard_veto = True if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions: hard_veto = True if chinese_name_score < 0.16 and token_score < 0.72: @@ -568,6 +603,16 @@ def score_marketplace_match( if token_score >= 0.72 and spec_score >= 0.82 and not brand_conflict: score += 0.08 + if ( + brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.55 + and spec_score >= 0.55 + and _has_strong_product_line_signal(left, right, token_score, chinese_name_score) + ): + score += 0.07 + reasons.append("strong_product_line_match") if hard_veto: score = min(score, 0.32) score = max(0.0, min(1.0, score)) diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 616805f..43cbe74 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -61,6 +61,52 @@ def test_marketplace_matcher_handles_bundle_piece_count(): assert diagnostics.hard_veto is False +def test_marketplace_matcher_accepts_strong_model_line_without_specs(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【Stadler Form】Sophie 無線香氛水氧機 水氧機 香氛機", + "【瑞士Stadler Form】無線香氛水氧機 露營燈造型 Sophie", + momo_price=3780, + competitor_price=3980, + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert "strong_product_line_match" in diagnostics.reasons + + +def test_marketplace_matcher_rejects_bundle_to_single_even_when_brand_matches(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【NARS】小白餅閨蜜分享組(裸光蜜粉餅/定妝蜜粉)", + "【NARS】裸光蜜粉餅(小白餅) 10g", + momo_price=3300, + competitor_price=1099, + ) + + assert diagnostics.score < 0.76 + assert diagnostics.hard_veto is True + assert "bundle_offer_conflict" in diagnostics.reasons + + +def test_marketplace_matcher_does_not_promote_wide_price_refill_candidate(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【蘭蔻】官方直營 絕對完美永生玫瑰逆齡乳霜60ml補充瓶", + "LANCOME蘭蔻 絕對完美永生玫瑰逆齡乳霜 60ml", + momo_price=11205, + competitor_price=5349, + ) + + assert diagnostics.score < 0.76 + assert diagnostics.hard_veto is True + assert "refill_pack_conflict" in diagnostics.reasons + assert "strong_product_line_match" not in diagnostics.reasons + + def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch): from services import pchome_crawler