diff --git a/config.py b/config.py index df6e953..51cd96c 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.344" +SYSTEM_VERSION = "V10.345" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index b4d18fa..73b2df3 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -27,6 +27,7 @@ - 2026-05-20 追記:同步背景 PChome crawler 搜尋韌性擴充後的 `services/pchome_crawler.py` 行數;此處只更新 inventory,不變更 PChome crawler 行為。 - 2026-05-20 追記:同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更比價行為。 - 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。 +- 2026-05-20 追記:同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 ## 達到或超過 800 行檔案清單 @@ -53,7 +54,7 @@ | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders | | 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers | | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting | -| 1356 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | +| 1387 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service | | 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing | | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy | diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 7915db2..930c32c 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -138,6 +138,7 @@ SEARCH_NOISE_PHRASES = ( "水光感", "官方直營", "官方", + "經典款", ) SEARCH_NOISE_TOKENS = { @@ -160,9 +161,23 @@ SEARCH_NOISE_TOKENS = { "防曬", "彩妝", "水光感", + "保濕", + "抗老", + "超品日", + "經典款", + "款", + "pdrn", } SEARCH_IDENTITY_ANCHORS = ( + "絕對完美永生玫瑰逆齡乳霜", + "永生玫瑰逆齡乳霜", + "永生玫瑰霜", + "玫瑰精露", + "玫瑰霜", + "青春敷面膜", + "長效潤膚霜", + "小黑瓶", "免用水潔淨液", "身體按摩精油", "按摩精油", @@ -196,6 +211,15 @@ SEARCH_IDENTITY_ANCHORS = ( "香氛機", ) +SEARCH_BROAD_ANCHORS = { + "乳霜", + "面霜", + "面膜", + "精華", + "乳液", + "精油", +} + SEARCH_AMBIGUOUS_PRODUCT_TERMS = { "保護膜", "保護貼", @@ -1234,6 +1258,8 @@ def _extract_anchor_phrases(token: str) -> list[str]: else: phrase = anchor phrase = _clean_search_phrase(phrase) + if phrase.startswith("款") and len(phrase) > 2: + phrase = phrase[1:] if any(existing in phrase and existing != phrase for existing in phrases): continue if len(phrase) >= 2 and phrase not in phrases: @@ -1248,6 +1274,8 @@ def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str] compact = cleaned.replace(" ", "") if compact in SEARCH_NOISE_TOKENS or compact in GENERIC_TOKENS: return (-900, 0, cleaned) + if re.fullmatch(r"\d+(?:\.\d+)?(?:ml|g|mg|kg|l)x\d+", compact, re.I): + return (-900, 0, cleaned) score = 0 if re.search(r"[a-z][a-z0-9-]{2,}", cleaned): @@ -1258,8 +1286,11 @@ def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str] anchors = _extract_anchor_phrases(cleaned) if anchors: score += 90 + score += min(24, len(anchors[0]) * 3) if anchors[0] == compact: score += 8 + if compact in SEARCH_BROAD_ANCHORS: + score -= 28 else: score += max(0, 24 - len(compact)) diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 3138a94..db66b63 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -468,6 +468,26 @@ def test_marketplace_search_terms_keep_professional_product_phrase(): assert not any("卸除防曬" in term or "外出清潔" in term for term in mustela_terms) +def test_marketplace_search_terms_prefer_specific_line_over_generic_usage_words(): + from services.marketplace_product_matcher import build_search_terms + + mask_terms = build_search_terms("【SK-II】官方直營 青春敷面膜10+6+6送12(保濕/面膜/超品日)", max_terms=5) + bottle_terms = build_search_terms( + "【LANCOME 蘭蔻】官方直營 經典款小黑瓶100ml雙入組(LANCOME/經典款/抗老)", + max_terms=5, + ) + rose_terms = build_search_terms( + "【蘭蔻】官方直營 玫瑰霜60ml+玫瑰精露150ml(LANCOME/永生玫瑰霜/抗老/PDRN)", + max_terms=5, + ) + + assert mask_terms[0] == "sk ii 青春敷面膜" + assert bottle_terms[0] == "蘭蔻 小黑瓶 100ml" + assert rose_terms[0] == "蘭蔻 永生玫瑰霜 60ml" + assert "抗老" not in " ".join(bottle_terms[:3] + rose_terms[:3]) + assert "pdrn" not in " ".join(term.lower() for term in rose_terms[:3]) + + def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch): from services import pchome_crawler