From 5d38735548e29d22c0f7d0b8745254fa8b50294f Mon Sep 17 00:00:00 2001 From: OoO Date: Wed, 20 May 2026 20:34:02 +0800 Subject: [PATCH] [V10.351] recover private care identity candidates --- services/competitor_price_feeder.py | 94 ++++++++++++++++++- services/marketplace_product_matcher.py | 28 ++++++ ...t_competitor_match_attempts_persistence.py | 88 +++++++++++++++++ tests/test_marketplace_product_matcher.py | 36 +++++++ 4 files changed, 244 insertions(+), 2 deletions(-) diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 012ba81..fff8307 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -1389,13 +1389,103 @@ class CompetitorPriceFeeder: try: product = product_map.get(_product_id_key(competitor_product_id)) if not product: + recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search( + crawler, + momo_name, + momo_price=momo_price, + existing_product_id=competitor_product_id, + ) + if recovered: + best_product, score, diagnostics = recovered + if getattr(diagnostics, "comparison_mode", "") == "unit_comparable": + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms + [term for term in recovery_terms if term not in search_terms], + candidate_count=max(1, recovery_candidate_count), + attempt_status="refresh_unit_comparable", + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + error_message=_format_match_diagnostics(diagnostics), + source=source, + ) + skipped_low += 1 + attempts_written += 1 + continue + + if score >= MIN_MATCH_SCORE and not getattr(diagnostics, "hard_veto", False): + tags = _extract_tags(best_product) + tags.extend(getattr(diagnostics, "tags", [])) + for reason in getattr(diagnostics, "reasons", ()) or (): + tags.append(f"match_{reason}") + tags.extend(["refresh_known_identity", "fresh_search_recovery", "missing_known_product_id"]) + tags = list(dict.fromkeys(tags)) + + should_write, write_reason = self._should_upsert_competitor_price( + sku, + best_product, + score, + source=source, + ) + attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms] + if not should_write: + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=attempt_terms, + candidate_count=max(1, recovery_candidate_count), + attempt_status="refresh_needs_review", + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}", + source=source, + ) + skipped_low += 1 + attempts_written += 1 + continue + + tags.append(write_reason) + self._upsert_competitor_price( + sku, + best_product, + score, + tags, + momo_product_id=momo_product_id, + momo_price=momo_price, + diagnostics=diagnostics, + source=source, + ) + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=attempt_terms, + candidate_count=max(1, recovery_candidate_count), + attempt_status="matched", + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + source=source, + ) + matched += 1 + history_written += 1 + attempts_written += 1 + continue + self._record_match_attempt( sku, momo_name, momo_product_id=momo_product_id, momo_price=momo_price, - search_terms=search_terms, - candidate_count=0, + search_terms=search_terms + [term for term in recovery_terms if term not in search_terms], + candidate_count=max(0, recovery_candidate_count), attempt_status="refresh_no_result", error_message=f"PChome product_id not returned: {competitor_product_id}", source=source, diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 792b321..ae397f5 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -37,6 +37,17 @@ NOISE_PHRASES = ( "買1送1", "限定版", "璀璨奢金限定版", + "單入任選", + "單入", + "全肌防護", + "經典防護王", + "賦活美學", + "弱酸性", + "植萃複方", + "溫和潤澤護理", + "ph值平衡", + "淨味沐浴乳", + "香氛凝膠", "任選", "即期品", "福利品", @@ -140,6 +151,16 @@ SEARCH_NOISE_PHRASES = ( "聖誕禮物", "限定版", "璀璨奢金限定版", + "單入任選", + "全肌防護", + "經典防護王", + "賦活美學", + "弱酸性", + "植萃複方", + "溫和潤澤護理", + "ph值平衡", + "淨味沐浴乳", + "香氛凝膠", "母親節", "父親節", "情人節", @@ -200,6 +221,11 @@ SEARCH_IDENTITY_ANCHORS = ( "零粉感超持久粉底棒", "超持久水光鎖吻唇釉", "裸光蜜粉餅", + "私密潔膚露", + "私密肌潔膚露", + "男性私密醒肌抑菌噴霧", + "男性私密激淨凝露", + "私密抑菌噴霧", "絕對完美永生玫瑰逆齡乳霜", "永生玫瑰逆齡乳霜", "永生玫瑰霜", @@ -299,6 +325,8 @@ BRAND_ALIAS_OVERRIDES = { PRODUCT_TYPES = { "止汗噴霧": ("止汗爽身噴霧", "爽身噴霧", "止汗噴霧"), "潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"), + "私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"), + "私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"), "唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"), "粉底棒": ("粉底棒", "foundation stick"), "精華": ("精華", "精華液", "essence", "serum", "安瓶"), diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index ddef5ed..03c6639 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -736,6 +736,94 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"]) +def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + from services.pchome_crawler import PChomeProduct + + recovered = PChomeProduct( + product_id="DDAB01-RECOVERED", + name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml", + price=441, + original_price=499, + discount=11, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED", + stock=20, + store="24h", + rating=4.8, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def fetch_product_details(self, product_ids, batch_size=20): + assert product_ids == ["DDAB01-MISSING"] + return True, "ok", [] + + def search_products(self, *_args, **_kwargs): + return True, "ok", [recovered] + + def fake_score(_momo_name, competitor_name, **_kwargs): + return SimpleNamespace( + score=0.885, + brand_score=1.0, + token_score=0.7, + spec_score=1.0, + sequence_score=0.62, + type_score=1.0, + price_penalty=0.0, + hard_veto=False, + reasons=("spec_name_alignment",), + comparison_mode="exact_identity", + tags=["identity_v2", "comparison_exact_identity", "brand_match"], + ) + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + writes = [] + monkeypatch.setattr( + feeder, + "_should_upsert_competitor_price", + lambda *_args, **_kwargs: (True, "same_or_empty_existing"), + ) + monkeypatch.setattr( + feeder, + "_upsert_competitor_price", + lambda sku, product, score, tags, **kwargs: writes.append({ + "sku": sku, + "product_id": product.product_id, + "score": score, + "tags": tags, + **kwargs, + }), + ) + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + + result = feeder._run_known_identity_refresh_items([{ + "sku": "9823407", + "name": "【Summer’s Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)", + "product_id": 4864, + "momo_price": 441, + "competitor_product_id": "DDAB01-MISSING", + }]) + + assert result.matched == 1 + assert writes[0]["product_id"] == "DDAB01-RECOVERED" + assert "missing_known_product_id" in writes[0]["tags"] + assert "fresh_search_recovery" in writes[0]["tags"] + assert attempts[0]["attempt_status"] == "matched" + + def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 629697c..fa46205 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -258,6 +258,42 @@ def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder(): assert "shared_identity_anchor_packaging_variant" in diagnostics.reasons +def test_marketplace_matcher_promotes_private_wash_same_identity(): + from services.marketplace_product_matcher import score_marketplace_match + + summer = score_marketplace_match( + "【Summer’s Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)", + "eve舒摩兒 賦活美學浴潔露-全肌防護 237ml", + momo_price=441, + competitor_price=441, + ) + femfresh = score_marketplace_match( + "【femfresh 芳芯】弱酸性植萃複方溫和潤澤護理私密肌潔膚露250ml/瓶(pH值平衡護潔露淨味沐浴乳香氛凝膠)", + "【femfresh芳芯 官方直營】私密潔膚露250ml (任選)", + momo_price=399, + competitor_price=399, + ) + + for diagnostics in (summer, femfresh): + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + + +def test_marketplace_matcher_rejects_private_spray_vs_private_gel(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【isLeaf】韓國isLeaf男性私密醒肌抑菌噴霧60ml-夏夜微醺(SGS 24小時抑菌)", + "韓國 isLeaf 男性私密激淨凝露 湛藍海洋 60ml", + momo_price=299, + competitor_price=299, + ) + + assert diagnostics.score < 0.76 + assert diagnostics.hard_veto is True + assert "type_conflict" in diagnostics.reasons + + def test_marketplace_matcher_rejects_same_count_different_unit_family(): from services.marketplace_product_matcher import score_marketplace_match