[V10.351] recover private care identity candidates
This commit is contained in:
@@ -1389,13 +1389,103 @@ class CompetitorPriceFeeder:
|
||||
try:
|
||||
product = product_map.get(_product_id_key(competitor_product_id))
|
||||
if not product:
|
||||
recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search(
|
||||
crawler,
|
||||
momo_name,
|
||||
momo_price=momo_price,
|
||||
existing_product_id=competitor_product_id,
|
||||
)
|
||||
if recovered:
|
||||
best_product, score, diagnostics = recovered
|
||||
if getattr(diagnostics, "comparison_mode", "") == "unit_comparable":
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
|
||||
candidate_count=max(1, recovery_candidate_count),
|
||||
attempt_status="refresh_unit_comparable",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
error_message=_format_match_diagnostics(diagnostics),
|
||||
source=source,
|
||||
)
|
||||
skipped_low += 1
|
||||
attempts_written += 1
|
||||
continue
|
||||
|
||||
if score >= MIN_MATCH_SCORE and not getattr(diagnostics, "hard_veto", False):
|
||||
tags = _extract_tags(best_product)
|
||||
tags.extend(getattr(diagnostics, "tags", []))
|
||||
for reason in getattr(diagnostics, "reasons", ()) or ():
|
||||
tags.append(f"match_{reason}")
|
||||
tags.extend(["refresh_known_identity", "fresh_search_recovery", "missing_known_product_id"])
|
||||
tags = list(dict.fromkeys(tags))
|
||||
|
||||
should_write, write_reason = self._should_upsert_competitor_price(
|
||||
sku,
|
||||
best_product,
|
||||
score,
|
||||
source=source,
|
||||
)
|
||||
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
|
||||
if not should_write:
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=attempt_terms,
|
||||
candidate_count=max(1, recovery_candidate_count),
|
||||
attempt_status="refresh_needs_review",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
|
||||
source=source,
|
||||
)
|
||||
skipped_low += 1
|
||||
attempts_written += 1
|
||||
continue
|
||||
|
||||
tags.append(write_reason)
|
||||
self._upsert_competitor_price(
|
||||
sku,
|
||||
best_product,
|
||||
score,
|
||||
tags,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
diagnostics=diagnostics,
|
||||
source=source,
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=attempt_terms,
|
||||
candidate_count=max(1, recovery_candidate_count),
|
||||
attempt_status="matched",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
source=source,
|
||||
)
|
||||
matched += 1
|
||||
history_written += 1
|
||||
attempts_written += 1
|
||||
continue
|
||||
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=0,
|
||||
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
|
||||
candidate_count=max(0, recovery_candidate_count),
|
||||
attempt_status="refresh_no_result",
|
||||
error_message=f"PChome product_id not returned: {competitor_product_id}",
|
||||
source=source,
|
||||
|
||||
@@ -37,6 +37,17 @@ NOISE_PHRASES = (
|
||||
"買1送1",
|
||||
"限定版",
|
||||
"璀璨奢金限定版",
|
||||
"單入任選",
|
||||
"單入",
|
||||
"全肌防護",
|
||||
"經典防護王",
|
||||
"賦活美學",
|
||||
"弱酸性",
|
||||
"植萃複方",
|
||||
"溫和潤澤護理",
|
||||
"ph值平衡",
|
||||
"淨味沐浴乳",
|
||||
"香氛凝膠",
|
||||
"任選",
|
||||
"即期品",
|
||||
"福利品",
|
||||
@@ -140,6 +151,16 @@ SEARCH_NOISE_PHRASES = (
|
||||
"聖誕禮物",
|
||||
"限定版",
|
||||
"璀璨奢金限定版",
|
||||
"單入任選",
|
||||
"全肌防護",
|
||||
"經典防護王",
|
||||
"賦活美學",
|
||||
"弱酸性",
|
||||
"植萃複方",
|
||||
"溫和潤澤護理",
|
||||
"ph值平衡",
|
||||
"淨味沐浴乳",
|
||||
"香氛凝膠",
|
||||
"母親節",
|
||||
"父親節",
|
||||
"情人節",
|
||||
@@ -200,6 +221,11 @@ SEARCH_IDENTITY_ANCHORS = (
|
||||
"零粉感超持久粉底棒",
|
||||
"超持久水光鎖吻唇釉",
|
||||
"裸光蜜粉餅",
|
||||
"私密潔膚露",
|
||||
"私密肌潔膚露",
|
||||
"男性私密醒肌抑菌噴霧",
|
||||
"男性私密激淨凝露",
|
||||
"私密抑菌噴霧",
|
||||
"絕對完美永生玫瑰逆齡乳霜",
|
||||
"永生玫瑰逆齡乳霜",
|
||||
"永生玫瑰霜",
|
||||
@@ -299,6 +325,8 @@ BRAND_ALIAS_OVERRIDES = {
|
||||
PRODUCT_TYPES = {
|
||||
"止汗噴霧": ("止汗爽身噴霧", "爽身噴霧", "止汗噴霧"),
|
||||
"潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"),
|
||||
"私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"),
|
||||
"私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"),
|
||||
"唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"),
|
||||
"粉底棒": ("粉底棒", "foundation stick"),
|
||||
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),
|
||||
|
||||
@@ -736,6 +736,94 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l
|
||||
assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"])
|
||||
|
||||
|
||||
def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
recovered = PChomeProduct(
|
||||
product_id="DDAB01-RECOVERED",
|
||||
name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml",
|
||||
price=441,
|
||||
original_price=499,
|
||||
discount=11,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.8,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def fetch_product_details(self, product_ids, batch_size=20):
|
||||
assert product_ids == ["DDAB01-MISSING"]
|
||||
return True, "ok", []
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", [recovered]
|
||||
|
||||
def fake_score(_momo_name, competitor_name, **_kwargs):
|
||||
return SimpleNamespace(
|
||||
score=0.885,
|
||||
brand_score=1.0,
|
||||
token_score=0.7,
|
||||
spec_score=1.0,
|
||||
sequence_score=0.62,
|
||||
type_score=1.0,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=("spec_name_alignment",),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
writes = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_should_upsert_competitor_price",
|
||||
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_upsert_competitor_price",
|
||||
lambda sku, product, score, tags, **kwargs: writes.append({
|
||||
"sku": sku,
|
||||
"product_id": product.product_id,
|
||||
"score": score,
|
||||
"tags": tags,
|
||||
**kwargs,
|
||||
}),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_known_identity_refresh_items([{
|
||||
"sku": "9823407",
|
||||
"name": "【Summer’s Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)",
|
||||
"product_id": 4864,
|
||||
"momo_price": 441,
|
||||
"competitor_product_id": "DDAB01-MISSING",
|
||||
}])
|
||||
|
||||
assert result.matched == 1
|
||||
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
|
||||
assert "missing_known_product_id" in writes[0]["tags"]
|
||||
assert "fresh_search_recovery" in writes[0]["tags"]
|
||||
assert attempts[0]["attempt_status"] == "matched"
|
||||
|
||||
|
||||
def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
@@ -258,6 +258,42 @@ def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder():
|
||||
assert "shared_identity_anchor_packaging_variant" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_private_wash_same_identity():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
summer = score_marketplace_match(
|
||||
"【Summer’s Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)",
|
||||
"eve舒摩兒 賦活美學浴潔露-全肌防護 237ml",
|
||||
momo_price=441,
|
||||
competitor_price=441,
|
||||
)
|
||||
femfresh = score_marketplace_match(
|
||||
"【femfresh 芳芯】弱酸性植萃複方溫和潤澤護理私密肌潔膚露250ml/瓶(pH值平衡護潔露淨味沐浴乳香氛凝膠)",
|
||||
"【femfresh芳芯 官方直營】私密潔膚露250ml (任選)",
|
||||
momo_price=399,
|
||||
competitor_price=399,
|
||||
)
|
||||
|
||||
for diagnostics in (summer, femfresh):
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_private_spray_vs_private_gel():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【isLeaf】韓國isLeaf男性私密醒肌抑菌噴霧60ml-夏夜微醺(SGS 24小時抑菌)",
|
||||
"韓國 isLeaf 男性私密激淨凝露 湛藍海洋 60ml",
|
||||
momo_price=299,
|
||||
competitor_price=299,
|
||||
)
|
||||
|
||||
assert diagnostics.score < 0.76
|
||||
assert diagnostics.hard_veto is True
|
||||
assert "type_conflict" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_same_count_different_unit_family():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
|
||||
Reference in New Issue
Block a user