diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index 1e4811f..2a35d94 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -29,6 +29,9 @@ - 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。 - 2026-05-20 追記:同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 - 2026-05-20 追記:同步 PChome 共享 identity anchor scorer 與市場情報 review report route 進入大檔門檻後的行數;此處只更新 inventory,不變更功能。 +- 2026-05-20 追記:同步 PChome contained identity anchor scorer 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 +- 2026-05-20 追記:同步 PChome spec/name alignment near-threshold scorer 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 +- 2026-05-20 追記:同步市場情報 review report route 與 review receipt 巨檔現況,並校正 PChome fresh-search recovery 更新後的 `services/competitor_price_feeder.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 ## 達到或超過 800 行檔案清單 @@ -55,18 +58,19 @@ | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders | | 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers | | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting | -| 1447 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | +| 1559 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service | | 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing | | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy | | 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter | | 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration | | 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 | -| 1602 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy | +| 1733 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy | | 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers | | 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service | -| 804 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration | +| 990 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration | | 811 | `services/market_intel/deployment_readiness.py` | P2 market intel deployment readiness | preflight gates / readiness payload / route contract helpers | +| 846 | `services/market_intel/candidate_queue_review_ai_summary_persistence_telegram_dispatch_report_catalog_record_run_receipt.py` | P2 market intel review receipt pipeline | AI summary / persistence / Telegram dispatch / report catalog run receipt orchestration | ## 市場情報開發前置禁區 diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 165aad3..012ba81 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -274,6 +274,36 @@ def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None, mo return candidates +def _recover_low_score_with_fresh_search( + crawler, + momo_name: str, + momo_price: float = None, + existing_product_id: str = "", +) -> tuple[Optional[tuple], list[str], int]: + """ + 當 legacy / known-id 候選重評仍低分時,再跑一次 fresh keyword search, + 嘗試把舊錯配洗成新的真同款。 + """ + keywords = _build_search_keywords(momo_name) + candidates = _search_pchome_candidates( + crawler, + momo_name, + keywords=keywords, + momo_price=momo_price, + ) + if existing_product_id: + existing_key = _product_id_key(existing_product_id) + fresh_candidates = [ + candidate + for candidate in candidates + if _product_id_key(getattr(candidate, "product_id", "")) != existing_key + ] + if fresh_candidates: + candidates = fresh_candidates + best = _find_best_match_detail(momo_name, candidates, momo_price=momo_price) + return best, keywords, len(candidates) + + def _structural_similarity(momo_p, pchome_p) -> float: """ 結構化相似度計算(品牌 + 規格 + 關鍵字) @@ -1411,14 +1441,94 @@ class CompetitorPriceFeeder: continue if score < MIN_MATCH_SCORE: + recovery_terms: list[str] = [] + recovery_candidate_count = 0 + if not getattr(diagnostics, "hard_veto", False): + recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search( + crawler, + momo_name, + momo_price=momo_price, + existing_product_id=competitor_product_id, + ) + if recovered: + recovered_product, recovered_score, recovered_diagnostics = recovered + if recovered_score > score: + best_product, score, diagnostics = recovered_product, recovered_score, recovered_diagnostics + + if score >= MIN_MATCH_SCORE: + tags = _extract_tags(best_product) + tags.extend(getattr(diagnostics, "tags", [])) + for reason in getattr(diagnostics, "reasons", ()) or (): + tags.append(f"match_{reason}") + tags.append("refresh_known_identity") + if recovery_terms: + tags.append("fresh_search_recovery") + tags = list(dict.fromkeys(tags)) + + should_write, write_reason = self._should_upsert_competitor_price( + sku, + best_product, + score, + source=source, + ) + candidate_count = max(1, recovery_candidate_count or 1) + attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms] + if not should_write: + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=attempt_terms, + candidate_count=candidate_count, + attempt_status="refresh_needs_review", + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}", + source=source, + ) + skipped_low += 1 + attempts_written += 1 + continue + + tags.append(write_reason) + self._upsert_competitor_price( + sku, + best_product, + score, + tags, + momo_product_id=momo_product_id, + momo_price=momo_price, + diagnostics=diagnostics, + source=source, + ) + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=attempt_terms, + candidate_count=candidate_count, + attempt_status="matched", + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + source=source, + ) + matched += 1 + history_written += 1 + attempts_written += 1 + continue + attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "refresh_low_score" self._record_match_attempt( sku, momo_name, momo_product_id=momo_product_id, momo_price=momo_price, - search_terms=search_terms, - candidate_count=1, + search_terms=search_terms + [term for term in recovery_terms if term not in search_terms], + candidate_count=max(1, recovery_candidate_count or 1), attempt_status=attempt_status, best_product=best_product, best_score=score, diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index cd7ffc8..792b321 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -35,6 +35,8 @@ NOISE_PHRASES = ( "送", "買一送一", "買1送1", + "限定版", + "璀璨奢金限定版", "任選", "即期品", "福利品", @@ -136,6 +138,8 @@ SEARCH_NOISE_PHRASES = ( "精美紙袋", "交換禮物", "聖誕禮物", + "限定版", + "璀璨奢金限定版", "母親節", "父親節", "情人節", @@ -191,7 +195,11 @@ SEARCH_NOISE_TOKENS = { } SEARCH_IDENTITY_ANCHORS = ( + "止汗爽身噴霧", "止汗爽身乳膏pro", + "零粉感超持久粉底棒", + "超持久水光鎖吻唇釉", + "裸光蜜粉餅", "絕對完美永生玫瑰逆齡乳霜", "永生玫瑰逆齡乳霜", "永生玫瑰霜", @@ -223,6 +231,7 @@ SEARCH_IDENTITY_ANCHORS = ( "身體乳", "緊實乳", "潔膚露", + "浴潔露", "潔淨液", "護甲油", "指甲油", @@ -288,6 +297,10 @@ BRAND_ALIAS_OVERRIDES = { } PRODUCT_TYPES = { + "止汗噴霧": ("止汗爽身噴霧", "爽身噴霧", "止汗噴霧"), + "潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"), + "唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"), + "粉底棒": ("粉底棒", "foundation stick"), "精華": ("精華", "精華液", "essence", "serum", "安瓶"), "化妝水": ("化妝水", "機能水", "toner", "lotion"), "乳液": ("乳液", "emulsion", "milk"), @@ -1259,6 +1272,19 @@ def score_marketplace_match( ): score += 0.03 reasons.append("shared_identity_anchor_no_spec") + if ( + shared_anchor + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and spec_score >= 0.45 + and token_score >= 0.56 + and sequence_score >= 0.62 + and not variant_descriptor_conflict + ): + score += 0.02 + reasons.append("shared_identity_anchor_packaging_variant") if ( brand_score >= 0.95 and not hard_veto diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index d69bef0..ddef5ed 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -618,6 +618,124 @@ def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkey assert attempts[0]["search_terms"] == ["known_product_id:DDAB01-1900ABCD"] +def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_low_score(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + from services.pchome_crawler import PChomeProduct + + stale = PChomeProduct( + product_id="DDAB01-STALE", + name="Panasonic 國際牌 男仕防水美體除毛器 國際版 (ER-GK83)", + price=2290, + original_price=2490, + discount=8, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-STALE", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + recovered = PChomeProduct( + product_id="DDAB01-RECOVERED", + name="Panasonic 國際牌 男士身體除毛器 ER-GK83", + price=2390, + original_price=2490, + discount=4, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED", + stock=20, + store="24h", + rating=4.8, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def fetch_product_details(self, product_ids, batch_size=20): + assert product_ids == ["DDAB01-STALE"] + return True, "ok", [stale] + + def search_products(self, *_args, **_kwargs): + return True, "ok", [stale, recovered] + + def fake_score(_momo_name, competitor_name, **_kwargs): + if "RECOVERED" in competitor_name or "男士身體除毛器" in competitor_name: + return SimpleNamespace( + score=0.81, + brand_score=1.0, + token_score=0.8, + spec_score=0.8, + sequence_score=0.72, + type_score=0.55, + price_penalty=0.0, + hard_veto=False, + reasons=("shared_model_token",), + comparison_mode="exact_identity", + tags=["identity_v2", "comparison_exact_identity", "brand_match"], + ) + return SimpleNamespace( + score=0.68, + brand_score=1.0, + token_score=0.55, + spec_score=0.55, + sequence_score=0.6, + type_score=0.55, + price_penalty=0.0, + hard_veto=False, + reasons=(), + comparison_mode="exact_identity", + tags=["identity_v2", "comparison_exact_identity", "brand_match"], + ) + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + writes = [] + monkeypatch.setattr( + feeder, + "_should_upsert_competitor_price", + lambda *_args, **_kwargs: (True, "same_or_empty_existing"), + ) + monkeypatch.setattr( + feeder, + "_upsert_competitor_price", + lambda sku, product, score, tags, **kwargs: writes.append({ + "sku": sku, + "product_id": product.product_id, + "score": score, + "tags": tags, + **kwargs, + }), + ) + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + + result = feeder._run_known_identity_refresh_items([{ + "sku": "TP00090100000153", + "name": "【Panasonic 國際牌】男士身體除毛器 2025新款 ER-GK83 日版 日本直送", + "product_id": 1, + "momo_price": 2490, + "competitor_product_id": "DDAB01-STALE", + }]) + + assert result.matched == 1 + assert writes[0]["product_id"] == "DDAB01-RECOVERED" + assert "fresh_search_recovery" in writes[0]["tags"] + assert attempts[0]["attempt_status"] == "matched" + assert "known_product_id:DDAB01-STALE" in attempts[0]["search_terms"] + assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"]) + + def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index d072064..629697c 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -214,6 +214,50 @@ def test_marketplace_matcher_rejects_product_type_conflict_even_when_line_matche assert diagnostics.comparison_mode == "not_comparable" +def test_marketplace_matcher_rejects_foundation_stick_vs_foundation_liquid(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【蘭蔻】零粉感超持久粉底棒9.5g", + "【LANCOME 蘭蔻】零粉感超持久粉底 30ml", + momo_price=1620, + competitor_price=1580, + ) + + assert diagnostics.score < 0.76 + assert diagnostics.hard_veto is True + assert "type_conflict" in diagnostics.reasons + + +def test_marketplace_matcher_promotes_nivea_deodorant_spray_identity(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【NIVEA 妮維雅】男士 止汗爽身噴霧 無印乾爽-清新海洋", + "NIVEA 妮維雅 男士止汗爽身噴霧 無印乾爽-清新海洋150ml", + momo_price=159, + competitor_price=169, + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + + +def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【NARS】官方直營 裸光蜜粉餅(璀璨奢金限定版/星沙金小白餅)", + "【NARS】裸光蜜粉餅(小白餅) 10g", + momo_price=1050, + competitor_price=1050, + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert "shared_identity_anchor_packaging_variant" in diagnostics.reasons + + def test_marketplace_matcher_rejects_same_count_different_unit_family(): from services.marketplace_product_matcher import score_marketplace_match