diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index a08beb5..43ec96e 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.550 補安全搜尋召回詞:`_build_variant_recall_search_plan()` 對低風險穩定品類新增 `品牌 + 品類` 的補搜尋詞,讓 `no_result / refresh_no_result` 更有機會找到 PChome 候選後再交給 matcher 安全判斷;美甲片、指甲油、唇彩、香氛/精油、粉底、防曬、任選/色號/款式等高 variant 風險商品不走通用召回,DASHING DIVA 仍只走既有 line-specific recall + sort fallback。此變更不改 `MIN_MATCH_SCORE`、hard veto、fresh-search write safety 或 stronger existing match 覆寫保護。 - V10.549 收斂比價新鮮度 KPI 口徑:coverage cache 升到 v10,`expires_at IS NULL` 不再算進「可用比價 / decision ready」,改拆成 `unknown_freshness_matches` / `unknown_freshness_count`,避免沒有到期時間的舊資料被當成可直接決策的新鮮價格。Dashboard / daily / growth 同步顯示未知新鮮度與「未形成有效身份配對」,並把 PChome/MOMO 價格方向文案改成 `PChome 價格壓力` / `MOMO 價格優勢`,降低誤讀。 - V10.548 接線更多 focused exact 舊候選回刷:把 matcher 已驗證可安全走 total-price 的 3W CLINIC 膠原蛋白粉底液 50ml x2、花美水 Moisture/Inclear 1.7g x3、KUSSEN 寶寶益菌屁屁膏 50ml 3 入、Lab52 齒妍堂嬰幼兒/汪汪隊牙刷 2 入接進 `_fetch_retryable_candidate_skus()` focused true-low / rescore 窄門。這只擴大「舊候選可被新版 matcher 重評」的入口,不改 `MIN_MATCH_SCORE`、hard veto、auto price write safety 或既有覆寫保護。 - V10.547 強化單位價覆核洞察:`manual_unit_price_required` 不再只是人工狀態,覆核隊列與商品看板會重新帶出單位價換算、MOMO/PChome 單位價方向、差距百分比與處理建議;決策信封 / OpenClaw / PPT 摘要可讀到 `unit_price_insight`。人工覆核寫回也會保留原始 `match_diagnostic_json` / comparison mode / diagnostic codes,避免後續簡報、審計或 AI 策略只剩人工文案而失去 matcher 證據鏈。 diff --git a/config.py b/config.py index f3c2083..8e8e445 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.549" +SYSTEM_VERSION = "V10.550" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 2fbb022..e632253 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-06-01:PChome 比價新鮮度操作閉環 +- **V10.550 安全搜尋召回詞補強**: `competitor_price_feeder` 在既有精準搜尋詞之外,對低風險穩定品類補上一組 `品牌 + 品類` recall keyword,提升 `no_result / refresh_no_result` 找到候選的機率;高 variant 風險商品如美甲片、指甲油、唇彩、香氛/精油、粉底、防曬與含任選/色號/款式/香味的商品不走通用召回。DASHING DIVA 仍保留既有 line-specific recall 與 PChome sort fallback;本次不更動 `MIN_MATCH_SCORE`、hard veto、auto write safety 或 stronger existing match 保護。 - **V10.549 比價新鮮度 KPI 口徑收斂**: `fetch_competitor_coverage()` cache 升到 v10,`expires_at IS NULL` 不再混入 `fresh_matches` / `decision_ready_rate`,改拆成 `unknown_freshness_matches` 與 `unknown_freshness_rate`,讓「可用比價覆蓋率」只代表有明確未過期時間的 identity 價格。Dashboard、daily、growth 同步顯示未知新鮮度與「未形成有效身份配對」,第一屏資料時間改看最新有效 PChome 價格抓取,並把價格方向文案改為 `PChome 價格壓力` / `MOMO 價格優勢`。 - **V10.548 focused exact 舊候選回刷接線**: `_fetch_retryable_candidate_skus()` 的 focused true-low / rescore 窄門新增 3W CLINIC 膠原蛋白粉底液 50ml x2、花美水 Moisture/Inclear 1.7g x3、KUSSEN 寶寶益菌屁屁膏 50ml 3 入、Lab52 齒妍堂嬰幼兒 / 汪汪隊牙刷 2 入。這些品線在 matcher 測試中已是 `exact / total_price / price_alert_exact`,本次只讓舊 `true_low_confidence` / `rescore_accepted_current` 候選能被新版 matcher 重新判斷;仍不放寬 `MIN_MATCH_SCORE`、hard veto、auto write safety 與 stronger existing match 保護。 - **V10.547 單位價覆核洞察與證據鏈保留**: `manual_unit_price_required` 現在會和 `unit_comparable` 一樣重新產生單位價比較,並轉成 `unit_price_insight`,明確標示 PChome 或 MOMO 哪邊單位價較低、差距百分比、嚴重度與操作建議;Dashboard 覆核卡、商品列、決策信封與 OpenClaw/PPT 摘要都可讀到這個訊號。人工覆核寫回 `competitor_match_attempts` 時也會在欄位存在時保留原始 `match_diagnostic_json`、`comparison_mode`、`hard_veto`、`diagnostic_codes`,`competitor_match_reviews.candidate_diagnostic` 同步附帶 JSON 證據,避免人工閉環後只剩狀態文字。 diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 61868dd..6ae7b00 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -48,6 +48,52 @@ TTL_HOURS = int(os.getenv("PCHOME_FEEDER_TTL_HOURS", "48")) # competitor REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程 VARIANT_RECALL_SORTS = ("sale/dc", "new/dc") RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72) +GENERIC_RECALL_SAFE_PRODUCT_TYPES = { + "止汗噴霧", + "潔膚露", + "刮鬍刀", + "電動牙刷", + "洗手慕斯", + "足膜", + "化妝水", + "乳液", + "面霜", + "洗面乳", + "面膜", + "眼霜", + "卸妝", +} +GENERIC_RECALL_BLOCK_TERMS = ( + "任選", + "多款", + "多色", + "色號", + "顏色", + "款式", + "香味", + "香調", + "香氛", + "精油", + "擴香", + "蠟燭", + "融蠟", + "融燭", + "美甲片", + "指甲油", + "指彩", + "唇釉", + "唇彩", + "唇膏", + "潤唇", + "眼影", + "腮紅", + "粉底", + "遮瑕", + "定妝", + "妝前", + "防曬", + "護手霜", +) BROWSE_SH_DIAGNOSTIC_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED", "true").lower() in {"1", "true", "yes", "on"} BROWSE_SH_EXECUTE_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED", "false").lower() in {"1", "true", "yes", "on"} BROWSE_SH_TIMEOUT_SECONDS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_TIMEOUT", "20")) @@ -377,6 +423,67 @@ def _build_search_keywords(momo_name: str) -> list: return _dedupe_terms(primary_terms) +def _primary_brand_phrase(identity) -> str: + brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())} + if {"3w", "clinic"} <= brand_tokens: + return "3w clinic" + if {"dashing", "diva"} <= brand_tokens: + return "dashing diva" + if {"rom", "nd"} <= brand_tokens: + return "romand" + if {"im", "meme"} <= brand_tokens: + return "im meme" + if {"recipe", "box"} <= brand_tokens: + return "recipe box" + + chinese = sorted( + ( + token for token in getattr(identity, "brand_tokens", set()) + if re.search(r"[\u4e00-\u9fff]", token) + ), + key=lambda token: (-len(token), token), + ) + if chinese: + return chinese[0] + + latin = sorted( + ( + token for token in getattr(identity, "brand_tokens", set()) + if re.search(r"[a-z]", token, re.I) and len(token) >= 3 + ), + key=lambda token: (" " not in token and "-" not in token, -len(token), token), + ) + if latin: + return latin[0].lower() + + short_latin = sorted( + ( + token for token in getattr(identity, "brand_tokens", set()) + if re.search(r"[a-z]", token, re.I) and len(token) >= 2 + ), + key=lambda token: (" " not in token and "-" not in token, -len(token), token), + ) + return short_latin[0].lower() if short_latin else "" + + +def _is_generic_recall_safe(identity) -> bool: + product_type = getattr(identity, "product_type", None) + if product_type not in GENERIC_RECALL_SAFE_PRODUCT_TYPES: + return False + searchable_name = getattr(identity, "searchable_name", "") or "" + return not any(term in searchable_name for term in GENERIC_RECALL_BLOCK_TERMS) + + +def _build_generic_recall_keywords(identity) -> list[str]: + if not _is_generic_recall_safe(identity): + return [] + brand = _primary_brand_phrase(identity) + product_type = getattr(identity, "product_type", "") or "" + if not brand or not product_type: + return [] + return _dedupe_terms([f"{brand} {product_type}"]) + + def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> list[tuple[str, str | None]]: plan = [(keyword, None) for keyword in (keywords or [])] try: @@ -388,6 +495,13 @@ def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> li brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())} if not ({"dashing", "diva"} <= brand_tokens and "美甲片" in getattr(identity, "searchable_name", "")): + seen = {(keyword.lower(), sort) for keyword, sort in plan} + for keyword in _build_generic_recall_keywords(identity): + key = (keyword.lower(), None) + if key in seen: + continue + seen.add(key) + plan.append((keyword, None)) return plan searchable_name = getattr(identity, "searchable_name", "") diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index b5b55bd..83de1fd 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -1656,6 +1656,97 @@ def test_search_candidates_uses_line_specific_recall_for_dashing_diva_p_line(mon ] +def test_search_candidates_adds_safe_generic_brand_type_recall(monkeypatch): + from services.competitor_price_feeder import _search_pchome_candidates + from services.pchome_crawler import PChomeProduct + + candidate = PChomeProduct( + product_id="DDAB01-B5CREAM", + name="理膚寶水 B5 全面修復霜 40ml", + price=699, + original_price=799, + discount=13, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-B5CREAM", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self): + self.calls = [] + + def search_products(self, keyword, **kwargs): + self.calls.append((keyword, kwargs.get("sort"))) + return True, "ok", [candidate] + + monkeypatch.setattr( + "services.marketplace_product_matcher.score_marketplace_match", + lambda *_args, **_kwargs: SimpleNamespace(score=0.72), + ) + + crawler = FakeCrawler() + _search_pchome_candidates( + crawler, + "【理膚寶水】B5 全面修復霜 40ml", + keywords=["理膚寶水 全面修復霜 b5 40ml"], + momo_price=699, + ) + + assert crawler.calls == [ + ("理膚寶水 全面修復霜 b5 40ml", None), + ("理膚寶水 面霜", None), + ] + + +def test_search_candidates_does_not_add_generic_recall_for_variant_sensitive_lip(monkeypatch): + from services.competitor_price_feeder import _search_pchome_candidates + from services.pchome_crawler import PChomeProduct + + candidate = PChomeProduct( + product_id="DDBH8E-ROMAND12", + name="rom&nd 果汁唇釉 2.0 #12", + price=299, + original_price=399, + discount=25, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDBH8E-ROMAND12", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self): + self.calls = [] + + def search_products(self, keyword, **kwargs): + self.calls.append((keyword, kwargs.get("sort"))) + return True, "ok", [candidate] + + monkeypatch.setattr( + "services.marketplace_product_matcher.score_marketplace_match", + lambda *_args, **_kwargs: SimpleNamespace(score=0.72), + ) + + crawler = FakeCrawler() + _search_pchome_candidates( + crawler, + "【rom&nd】果汁唇釉 2.0 #12", + keywords=["romand 果汁唇釉 12"], + momo_price=299, + ) + + assert crawler.calls == [("romand 果汁唇釉 12", None)] + + def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog): from services import competitor_price_feeder from services import marketplace_product_matcher