強化商品線配對與組合 veto
All checks were successful
CD Pipeline / deploy (push) Successful in 1m11s

This commit is contained in:
OoO
2026-05-19 23:13:54 +08:00
parent 6fc064cd9a
commit 205b9ea3f6
3 changed files with 92 additions and 0 deletions

View File

@@ -342,6 +342,7 @@ LEFT JOIN competitor_prices cp
- competitor PPT 不可只輸出 matched rows 造成覆蓋率假象;`fetch_competitor_comparison_results()` 必須用 `LEFT JOIN valid_competitor` 保留高營收/高價但尚未有效配對的 MOMO 商品,並帶出 `match_status``candidate_count``best_match_score``match_diagnostic`,讓簡報與 AI 文案明確區分「高信心比對」與「待補身份/價格」。
- `services/competitor_identity_revalidator.py` 可對既有 `competitor_prices` legacy row 離線重跑 `identity_v2`:只有新版 matcher 分數 `>= 0.76` 且無 hard veto 才補 `identity_v2` / `legacy_revalidated` tags預設不刷新 `expires_at`,避免過期價格進入決策。
- `CompetitorPriceFeeder.run_expired_identity_refresh()` 會優先刷新已通過 `identity_v2` 但 TTL 過期的 PChome row直接用既有 `competitor_product_id` 批次呼叫 PChome 商品 API再用新版 matcher 重新驗證名稱/規格/價格 sanity通過後寫回 `competitor_prices``competitor_price_history`。這條路徑提升新鮮價格覆蓋率,但不降低 match threshold也不讓過期價格直接進入決策。
- `marketplace_product_matcher.py` 的擴充只能走「正向證據 + 反向 veto」品牌一致、商品線/型號訊號強、價格合理且無 hard veto 時才允許 `strong_product_line_match` 加分;補充瓶/補充包/refill 與一般正裝不互相配對,分享組/加量組/明星組等組合包不得誤配單品。
- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新``舊版配對待重驗``低分配對待審``身份否決``找不到同款``抓取異常``尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。
### 執行方式

View File

@@ -448,6 +448,9 @@ def _has_bundle_offer(identity: ProductIdentity) -> bool:
or "優惠套組" in text
or "禮盒組" in text
or "加大組" in text
or "加量組" in text
or "分享組" in text
or "明星組" in text
or "套組" in text
)
@@ -461,6 +464,17 @@ def _has_multi_component(identity: ProductIdentity) -> bool:
)
def _has_refill_pack(identity: ProductIdentity) -> bool:
text = identity.normalized_name
return bool(
"補充瓶" in text
or "補充包" in text
or "替換蕊" in text
or "替換芯" in text
or "refill" in text
)
def _spec_mention_count(identity: ProductIdentity) -> int:
return len(re.findall(r"\d+(?:\.\d+)?\s*(?:ml|毫升|l|g|公克|kg)", identity.normalized_name, re.I))
@@ -488,6 +502,23 @@ def _chinese_bigram_score(left: ProductIdentity, right: ProductIdentity) -> floa
return 2 * len(left_signature & right_signature) / (len(left_signature) + len(right_signature))
def _has_strong_product_line_signal(
left: ProductIdentity,
right: ProductIdentity,
token_score: float,
chinese_name_score: float,
) -> bool:
shared_core = left.core_tokens & right.core_tokens
shared_latin_or_model = {
token for token in shared_core
if re.fullmatch(r"[a-z][a-z0-9-]{3,}", token)
or re.fullmatch(r"[a-z]{2,}-?\d+[a-z0-9-]*", token)
}
if shared_latin_or_model and token_score >= 0.50:
return True
return token_score >= 0.56 and chinese_name_score >= 0.45
def score_marketplace_match(
momo_name: str,
competitor_name: str,
@@ -525,6 +556,8 @@ def score_marketplace_match(
reasons.append("bundle_offer_conflict")
if _has_multi_component(left) != _has_multi_component(right):
reasons.append("multi_component_conflict")
if _has_refill_pack(left) != _has_refill_pack(right):
reasons.append("refill_pack_conflict")
left_spec_mentions = _spec_mention_count(left)
right_spec_mentions = _spec_mention_count(right)
if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
@@ -537,6 +570,8 @@ def score_marketplace_match(
hard_veto = True
if _has_multi_component(left) != _has_multi_component(right):
hard_veto = True
if _has_refill_pack(left) != _has_refill_pack(right):
hard_veto = True
if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
hard_veto = True
if chinese_name_score < 0.16 and token_score < 0.72:
@@ -568,6 +603,16 @@ def score_marketplace_match(
if token_score >= 0.72 and spec_score >= 0.82 and not brand_conflict:
score += 0.08
if (
brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.55
and spec_score >= 0.55
and _has_strong_product_line_signal(left, right, token_score, chinese_name_score)
):
score += 0.07
reasons.append("strong_product_line_match")
if hard_veto:
score = min(score, 0.32)
score = max(0.0, min(1.0, score))

View File

@@ -61,6 +61,52 @@ def test_marketplace_matcher_handles_bundle_piece_count():
assert diagnostics.hard_veto is False
def test_marketplace_matcher_accepts_strong_model_line_without_specs():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【Stadler Form】Sophie 無線香氛水氧機 水氧機 香氛機",
"【瑞士Stadler Form】無線香氛水氧機 露營燈造型 Sophie",
momo_price=3780,
competitor_price=3980,
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert "strong_product_line_match" in diagnostics.reasons
def test_marketplace_matcher_rejects_bundle_to_single_even_when_brand_matches():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【NARS】小白餅閨蜜分享組(裸光蜜粉餅/定妝蜜粉)",
"【NARS】裸光蜜粉餅(小白餅) 10g",
momo_price=3300,
competitor_price=1099,
)
assert diagnostics.score < 0.76
assert diagnostics.hard_veto is True
assert "bundle_offer_conflict" in diagnostics.reasons
def test_marketplace_matcher_does_not_promote_wide_price_refill_candidate():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【蘭蔻】官方直營 絕對完美永生玫瑰逆齡乳霜60ml補充瓶",
"LANCOME蘭蔻 絕對完美永生玫瑰逆齡乳霜 60ml",
momo_price=11205,
competitor_price=5349,
)
assert diagnostics.score < 0.76
assert diagnostics.hard_veto is True
assert "refill_pack_conflict" in diagnostics.reasons
assert "strong_product_line_match" not in diagnostics.reasons
def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch):
from services import pchome_crawler