diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index de498e0..21731e7 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.520 拆開過期價格刷新與搜尋救援:`run_expired_identity_refresh()` 只刷新既有 `identity_v2` PChome product_id,不再因少數 product_id 查不到或低分而同步進入慢速 `fresh_search_recovery`;缺失 / 低分候選交給 `run_retryable_candidate_revalidation()` 處理,避免正式刷新 500+ 筆時被外部搜尋拖死,讓價格新鮮度可以穩定批次回升。 - V10.519 對齊 Webcrumbs host data metadata 與新版比價覆蓋口徑:`services/webcrumbs_host_data_service.py` 會同時輸出身份覆蓋、價格新鮮、過期配對與待補抓數,讓 shared-ui plugin / 其他專案 proxy 不會把 `coverage_rate` 誤讀成價格可用率。 - V10.518 修正 PChome 比價覆蓋率口徑與新鮮度產線:`fetch_competitor_coverage()` 改拆「身份覆蓋」與「價格新鮮」,覆蓋率不再因 `expires_at` 過期被歸零;首頁 / 業績 / 成長頁同步顯示身份覆蓋、價格新鮮數與新鮮率。PChome 快取 TTL 預設由 6h 改 48h,並把每日 expired identity refresh / retryable / unmatched limits 改為環境變數,預設提高到 1200 / 240 / 240,避免 1800+ 已配對 identity 因刷新量不足長期失效。 - V10.517 補 PChome 近門檻比對安全 exact 與香氛 variant 防線:Lab52 齒妍堂汪汪隊嬰幼兒牙刷 2 入組可由低分區提升為 `exact / total_price / price_alert_exact`;Les nez 香氛融蠟燈不同款式、Time Leisure 香薰蠟燭單側香味款式會被留在覆核 / veto,不再進 recoverable 自動回刷,避免為了壓低 low_score 而錯配款式。 diff --git a/config.py b/config.py index 87093ef..0656780 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.519" +SYSTEM_VERSION = "V10.520" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index b026c13..82882c1 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -389,7 +389,7 @@ LEFT JOIN competitor_prices cp - `services/competitor_intel_repository.py` 是下游頁面、圖表、簡報的共用查詢出口;新增消費端不得各自硬寫不同 match threshold。所有競品報表的價差方向統一為 `MOMO - PChome`:正值代表 MOMO 較貴 / PChome 低價壓力,負值代表 MOMO 價格優勢;daily、growth、OpenClaw、PPT 不得使用反向定義。 - competitor PPT 不可只輸出 matched rows 造成覆蓋率假象;`fetch_competitor_comparison_results()` 必須用 `LEFT JOIN valid_competitor` 保留高營收/高價但尚未有效配對的 MOMO 商品,並帶出 `match_status`、`candidate_count`、`best_match_score` 與 `match_diagnostic`,讓簡報與 AI 文案明確區分「高信心比對」與「待補身份/價格」。 - `services/competitor_identity_revalidator.py` 可對既有 `competitor_prices` legacy row 離線重跑 `identity_v2`:只有新版 matcher 分數 `>= 0.76` 且無 hard veto 才補 `identity_v2` / `legacy_revalidated` tags;預設不刷新 `expires_at`,避免過期價格進入決策。 -- `CompetitorPriceFeeder.run_expired_identity_refresh()` 會優先刷新已通過 `identity_v2` 但 TTL 過期的 PChome row:直接用既有 `competitor_product_id` 批次呼叫 PChome 商品 API,再用新版 matcher 重新驗證名稱/規格/價格 sanity,通過後寫回 `competitor_prices` 與 `competitor_price_history`。這條路徑提升新鮮價格覆蓋率,但不降低 match threshold,也不讓過期價格直接進入決策。 +- `CompetitorPriceFeeder.run_expired_identity_refresh()` 會優先刷新已通過 `identity_v2` 但 TTL 過期的 PChome row:直接用既有 `competitor_product_id` 批次呼叫 PChome 商品 API,再用新版 matcher 重新驗證名稱/規格/價格 sanity,通過後寫回 `competitor_prices` 與 `competitor_price_history`。這條路徑提升新鮮價格覆蓋率,但不降低 match threshold,也不讓過期價格直接進入決策;若既有 `competitor_product_id` 已查不到或回傳候選低於門檻,expired refresh 只寫 `refresh_no_result` / 低信心 attempt 並標記 `fresh_search_recovery_deferred`,不得在同一條價格刷新路徑 fresh search 替換正式 identity。fresh search recovery 只保留給 retryable candidate revalidation / unmatched priority 等補抓路徑。 - `marketplace_product_matcher.py` 的擴充只能走「正向證據 + 反向 veto」:品牌一致、商品線/型號訊號強、價格合理且無 hard veto 時才允許 `strong_product_line_match` 加分;補充瓶/補充包/refill 與一般正裝不互相配對,分享組/加量組/明星組等組合包不得誤配單品。 - 近門檻規則必須成對補「召回 + 防錯配」測試:可召回者需有品牌、商品線、規格或具名 identity anchor,例如 MUJI 精油芬香護手霜、Mustela 慕之幼爽身潤膚乳、Herbacin 小甘菊護手霜;防錯配者需成為 hard veto,例如 M·A·C Macximal 柔霧/緞光唇膏質地、ERBE 指甲清垢棒/指甲緣刨刀功能、Schick 舒芙/舒綺女用除毛刀品線。不得用單一同規格或同品牌放寬全域門檻。 - 套組/買送/件數不同但品牌、核心商品線與單一基礎規格一致時,matcher 必須回傳 `comparison_mode='unit_comparable'` 與 `unit_comparable` reason;Feeder 只能寫入 `competitor_match_attempts.attempt_status='unit_comparable'` 或 `refresh_unit_comparable`,不得寫入 `competitor_prices`。Dashboard 與 `competitor_intel_repository` 必須用 `build_unit_price_comparison()` 產生每 ml / 每 g / 每入單位價證據,讓 PPT / AI 報表可說明「需單位價比較」而不是把總價當同款價差。商品看板在正式配對尚未成立時,仍必須顯示最佳候選 PChome 商品名稱、候選價與「候選價,需單位換算」說明,讓人工覆核可直接看見下一步;daily/growth、PPT 與 OpenClaw 摘要不得自建查詢,需消費 `fetch_competitor_review_queue()` 與 coverage 的 `unit_comparable_count`。若任一側含多個不同容量/重量規格,視為多品項套組,不可進 `unit_comparable`。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 70ca83b..a5cc9ce 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-31:Webcrumbs 共用 UI Runtime 與市場情報 writer approval +- **V10.520 PChome 過期價格刷新快慢路徑拆分**: `run_expired_identity_refresh()` 改為只刷新已確認 `identity_v2` 的既有 PChome product_id;若 product_id 已查不到或回傳後低分,不再同步跑慢速 fresh search recovery,而是記錄 `refresh_no_result` / low-score 並交給 `run_retryable_candidate_revalidation()` 的近門檻救援路徑。這能避免正式回刷 500+ 筆時被少數缺失 ID 拖到長時間卡住,讓價格新鮮度批次回升更可控。 - **V10.519 Webcrumbs host data metadata 對齊新覆蓋率口徑**: Webcrumbs host data metadata 同步輸出 `fresh_match_count`、`fresh_match_rate`、`stale_match_count` 與 `pending_match_count`,讓共用 UI / 其他專案 proxy 能分清身份覆蓋與價格新鮮度,不再只看到舊的 matched_count / coverage_rate。 - **V10.518 PChome 覆蓋率與新鮮度拆分**: 修正比價監控總覽把價格 TTL 過期誤算成「未覆蓋」的產品口徑,`fetch_competitor_coverage()` 現在分開回報 `valid_matches`(identity 覆蓋)、`fresh_matches`(價格新鮮)、`stale_matches` 與 `fresh_match_rate`;首頁、業績與成長頁同步顯示身份覆蓋與價格新鮮。PChome 快取 TTL 預設由 6h 改 48h,並將每日 expired identity refresh / retryable / unmatched limits 改為環境變數,預設提升到 1200 / 240 / 240,避免已建立 identity 的商品因刷新量不足被長期視為無覆蓋。 - **V10.517 PChome near-threshold 比對 hotfix**: 新增 Lab52 齒妍堂汪汪隊嬰幼兒牙刷 2 入組 focused exact identity,讓真同款可進 `exact / total_price / price_alert_exact`;同時補 Les nez 香氛融蠟燈款式選擇 gap 與 Time Leisure 香薰蠟燭香味 gap,將不同款式 / 單側香味候選留在覆核或 veto,不讓它們進 recoverable 自動回刷。測試鎖住 Dashing Diva、Pavaruni、Recipe Box、Lactacyd 與 feeder recoverable 邊界。 diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index ac687fd..a066d84 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -1794,6 +1794,7 @@ class CompetitorPriceFeeder: skus: list, source: str = "pchome", label: str = "已確認身份價格刷新", + allow_missing_recovery: bool = True, ) -> FeederResult: start = time.time() @@ -1837,6 +1838,25 @@ class CompetitorPriceFeeder: try: product = product_map.get(_product_id_key(competitor_product_id)) if not product: + if not allow_missing_recovery: + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=0, + attempt_status="refresh_no_result", + error_message=( + f"PChome product_id not returned: {competitor_product_id}; " + "fresh_search_recovery_deferred" + ), + source=source, + ) + skipped_no += 1 + attempts_written += 1 + continue + recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search( crawler, momo_name, @@ -2029,20 +2049,21 @@ class CompetitorPriceFeeder: if score < MIN_MATCH_SCORE: recovery_terms: list[str] = [] recovery_candidate_count = 0 - recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search( - crawler, - momo_name, - momo_price=momo_price, - existing_product_id=competitor_product_id, - ) - if recovered: - recovered_product, recovered_score, recovered_diagnostics = recovered - if ( - recovered_score > score - or getattr(diagnostics, "hard_veto", False) - and not getattr(recovered_diagnostics, "hard_veto", False) - ): - best_product, score, diagnostics = recovered_product, recovered_score, recovered_diagnostics + if allow_missing_recovery: + recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search( + crawler, + momo_name, + momo_price=momo_price, + existing_product_id=competitor_product_id, + ) + if recovered: + recovered_product, recovered_score, recovered_diagnostics = recovered + if ( + recovered_score > score + or getattr(diagnostics, "hard_veto", False) + and not getattr(recovered_diagnostics, "hard_veto", False) + ): + best_product, score, diagnostics = recovered_product, recovered_score, recovered_diagnostics if score >= MIN_MATCH_SCORE: extras = ["refresh_known_identity"] @@ -2144,7 +2165,11 @@ class CompetitorPriceFeeder: best_product=best_product, best_score=score, diagnostics=diagnostics, - error_message=_format_match_diagnostics(diagnostics), + error_message=( + _format_match_diagnostics(diagnostics) + if allow_missing_recovery + else f"fresh_search_recovery_deferred; {_format_match_diagnostics(diagnostics)}" + ), source=source, ) skipped_low += 1 @@ -2286,7 +2311,12 @@ class CompetitorPriceFeeder: logger.error(f"[Feeder] 讀取過期 identity_v2 商品失敗: {e}") return FeederResult(0, 0, 0, 0, 1, 0.0) - return self._run_known_identity_refresh_items(skus, source=source, label="identity_v2 過期價格刷新") + return self._run_known_identity_refresh_items( + skus, + source=source, + label="identity_v2 過期價格刷新", + allow_missing_recovery=False, + ) def run_retryable_candidate_revalidation( self, diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index 99b5263..5184189 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -102,6 +102,7 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert "identity_v2" in source assert "_fetch_expired_identity_skus" in source assert "run_expired_identity_refresh" in source + assert "allow_missing_recovery=False" in source assert "_fetch_retryable_candidate_skus" in source assert "run_retryable_candidate_revalidation" in source retryable_source = source.split("def _fetch_retryable_candidate_skus", 1)[1].split( @@ -1664,6 +1665,43 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch): assert attempts[0]["attempt_status"] == "matched" +def test_competitor_feeder_expired_refresh_defers_missing_known_id_recovery(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def fetch_product_details(self, product_ids, batch_size=20): + assert product_ids == ["DDAB01-MISSING"] + return True, "ok", [] + + def search_products(self, *_args, **_kwargs): + raise AssertionError("expired identity refresh should not run fresh search recovery") + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + + result = feeder._run_known_identity_refresh_items([{ + "sku": "9823407", + "name": "【Summer’s Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)", + "product_id": 4864, + "momo_price": 441, + "competitor_product_id": "DDAB01-MISSING", + }], allow_missing_recovery=False) + + assert result.matched == 0 + assert result.skipped_no_result == 1 + assert attempts[0]["attempt_status"] == "refresh_no_result" + assert "fresh_search_recovery_deferred" in attempts[0]["error_message"] + + def test_competitor_feeder_records_missing_known_id_low_score_candidate(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct