[V10.351] recover private care identity candidates

This commit is contained in:
OoO
2026-05-20 20:34:02 +08:00
parent d030e4cf22
commit 5d38735548
4 changed files with 244 additions and 2 deletions

View File

@@ -1389,13 +1389,103 @@ class CompetitorPriceFeeder:
try:
product = product_map.get(_product_id_key(competitor_product_id))
if not product:
recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search(
crawler,
momo_name,
momo_price=momo_price,
existing_product_id=competitor_product_id,
)
if recovered:
best_product, score, diagnostics = recovered
if getattr(diagnostics, "comparison_mode", "") == "unit_comparable":
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
candidate_count=max(1, recovery_candidate_count),
attempt_status="refresh_unit_comparable",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
error_message=_format_match_diagnostics(diagnostics),
source=source,
)
skipped_low += 1
attempts_written += 1
continue
if score >= MIN_MATCH_SCORE and not getattr(diagnostics, "hard_veto", False):
tags = _extract_tags(best_product)
tags.extend(getattr(diagnostics, "tags", []))
for reason in getattr(diagnostics, "reasons", ()) or ():
tags.append(f"match_{reason}")
tags.extend(["refresh_known_identity", "fresh_search_recovery", "missing_known_product_id"])
tags = list(dict.fromkeys(tags))
should_write, write_reason = self._should_upsert_competitor_price(
sku,
best_product,
score,
source=source,
)
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
if not should_write:
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=attempt_terms,
candidate_count=max(1, recovery_candidate_count),
attempt_status="refresh_needs_review",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
source=source,
)
skipped_low += 1
attempts_written += 1
continue
tags.append(write_reason)
self._upsert_competitor_price(
sku,
best_product,
score,
tags,
momo_product_id=momo_product_id,
momo_price=momo_price,
diagnostics=diagnostics,
source=source,
)
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=attempt_terms,
candidate_count=max(1, recovery_candidate_count),
attempt_status="matched",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
source=source,
)
matched += 1
history_written += 1
attempts_written += 1
continue
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms,
candidate_count=0,
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
candidate_count=max(0, recovery_candidate_count),
attempt_status="refresh_no_result",
error_message=f"PChome product_id not returned: {competitor_product_id}",
source=source,

View File

@@ -37,6 +37,17 @@ NOISE_PHRASES = (
"買1送1",
"限定版",
"璀璨奢金限定版",
"單入任選",
"單入",
"全肌防護",
"經典防護王",
"賦活美學",
"弱酸性",
"植萃複方",
"溫和潤澤護理",
"ph值平衡",
"淨味沐浴乳",
"香氛凝膠",
"任選",
"即期品",
"福利品",
@@ -140,6 +151,16 @@ SEARCH_NOISE_PHRASES = (
"聖誕禮物",
"限定版",
"璀璨奢金限定版",
"單入任選",
"全肌防護",
"經典防護王",
"賦活美學",
"弱酸性",
"植萃複方",
"溫和潤澤護理",
"ph值平衡",
"淨味沐浴乳",
"香氛凝膠",
"母親節",
"父親節",
"情人節",
@@ -200,6 +221,11 @@ SEARCH_IDENTITY_ANCHORS = (
"零粉感超持久粉底棒",
"超持久水光鎖吻唇釉",
"裸光蜜粉餅",
"私密潔膚露",
"私密肌潔膚露",
"男性私密醒肌抑菌噴霧",
"男性私密激淨凝露",
"私密抑菌噴霧",
"絕對完美永生玫瑰逆齡乳霜",
"永生玫瑰逆齡乳霜",
"永生玫瑰霜",
@@ -299,6 +325,8 @@ BRAND_ALIAS_OVERRIDES = {
PRODUCT_TYPES = {
"止汗噴霧": ("止汗爽身噴霧", "爽身噴霧", "止汗噴霧"),
"潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"),
"私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"),
"私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"),
"唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"),
"粉底棒": ("粉底棒", "foundation stick"),
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),

View File

@@ -736,6 +736,94 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l
assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"])
def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
recovered = PChomeProduct(
product_id="DDAB01-RECOVERED",
name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml",
price=441,
original_price=499,
discount=11,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED",
stock=20,
store="24h",
rating=4.8,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def fetch_product_details(self, product_ids, batch_size=20):
assert product_ids == ["DDAB01-MISSING"]
return True, "ok", []
def search_products(self, *_args, **_kwargs):
return True, "ok", [recovered]
def fake_score(_momo_name, competitor_name, **_kwargs):
return SimpleNamespace(
score=0.885,
brand_score=1.0,
token_score=0.7,
spec_score=1.0,
sequence_score=0.62,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=("spec_name_alignment",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda sku, product, score, tags, **kwargs: writes.append({
"sku": sku,
"product_id": product.product_id,
"score": score,
"tags": tags,
**kwargs,
}),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_known_identity_refresh_items([{
"sku": "9823407",
"name": "【Summers Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)",
"product_id": 4864,
"momo_price": 441,
"competitor_product_id": "DDAB01-MISSING",
}])
assert result.matched == 1
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
assert "missing_known_product_id" in writes[0]["tags"]
assert "fresh_search_recovery" in writes[0]["tags"]
assert attempts[0]["attempt_status"] == "matched"
def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct

View File

@@ -258,6 +258,42 @@ def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder():
assert "shared_identity_anchor_packaging_variant" in diagnostics.reasons
def test_marketplace_matcher_promotes_private_wash_same_identity():
from services.marketplace_product_matcher import score_marketplace_match
summer = score_marketplace_match(
"【Summers Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)",
"eve舒摩兒 賦活美學浴潔露-全肌防護 237ml",
momo_price=441,
competitor_price=441,
)
femfresh = score_marketplace_match(
"【femfresh 芳芯】弱酸性植萃複方溫和潤澤護理私密肌潔膚露250ml/瓶(pH值平衡護潔露淨味沐浴乳香氛凝膠)",
"【femfresh芳芯 官方直營】私密潔膚露250ml (任選)",
momo_price=399,
competitor_price=399,
)
for diagnostics in (summer, femfresh):
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
def test_marketplace_matcher_rejects_private_spray_vs_private_gel():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【isLeaf】韓國isLeaf男性私密醒肌抑菌噴霧60ml-夏夜微醺(SGS 24小時抑菌)",
"韓國 isLeaf 男性私密激淨凝露 湛藍海洋 60ml",
momo_price=299,
competitor_price=299,
)
assert diagnostics.score < 0.76
assert diagnostics.hard_veto is True
assert "type_conflict" in diagnostics.reasons
def test_marketplace_matcher_rejects_same_count_different_unit_family():
from services.marketplace_product_matcher import score_marketplace_match