diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index bed572a..924ec54 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.527 收斂 PChome 過期 identity 搜尋救援隊列:`recover-stale` 不再直接吃全部過期 `identity_v2`,改走 `_fetch_expired_identity_recovery_skus()`,只收既有正式診斷為 `exact_identity / total_price / price_alert_exact` 且無 variant、catalog、commercial condition、count、bundle、unit-price 等阻擋理由的舊配對;名稱含任選、多款、香味、色號、即期、融燭燈、香氛蠟燭等高風險訊號也先排除,避免慢速 fresh search 把人工覆核型 stale pair 全部掃進來。 - V10.526 將 PChome 近門檻重評池與過期 identity 搜尋救援變成可觀測、可操作產線:`preview_retryable_candidate_revalidation()` / `preview_expired_identity_recovery()` 都是 read-only,不啟動 PChome 搜尋、不呼叫 LLM、不寫 DB;`/api/ai/pchome-match/backfill/status` 回傳 `revalidation_preview` / `stale_recovery_preview`,Dashboard 顯示「可重評 / 窄門 / 可救援」數字,並新增「救援過期 40 筆」按鈕呼叫 `/api/ai/pchome-match/recover-stale`,只在舊 PChome ID 缺失或低分時走受控 fresh-search recovery,最後仍經 hard veto、auto price write safety 與 overwrite protection。 - V10.525 補高分 review-gated exact 舊候選重評入口:`run_retryable_candidate_revalidation()` 仍以 `low_score / refresh_low_score / recoverable_low_score` 為主,只額外允許 Beauty Foot / KAMERIA / TS6 / Vaseline 這批已補 focused exact 規則、舊分數 >= 0.95、無商業狀態 / 款式 / 入數 / 組合阻擋理由的 `true_low_confidence` 進窄門重評,讓 V10.523 的安全規則可以實際回收舊資料,不把所有人工審核候選打開。 - V10.524 將「待刷新」變成可操作入口:商品看板 PChome 補抓產線新增「刷新過期 120 筆」按鈕,呼叫 `/api/ai/pchome-match/refresh-stale` 背景執行 `run_expired_identity_refresh()`,只刷新既有 `identity_v2` 的 PChome product_id,不跑 fresh search recovery、不呼叫 LLM,完成後重算 AI 挑品並清除 Dashboard / 競價快取。 diff --git a/config.py b/config.py index 22b2cfe..e325613 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.526" +SYSTEM_VERSION = "V10.527" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index c078f1d..ee94530 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -90,7 +90,7 @@ SQL漏斗(~300筆) - 配對來源仍以 PChome crawler 真實搜尋結果為準;無競品資料時不生成挑品。 - 比對覆蓋率補強入口:`POST /api/ai/pchome-match/backfill`,優先補抓仍無有效 PChome 配對的高價 ACTIVE 商品,完成後自動重算 AI 挑品清單。 - 過期價格刷新入口:`POST /api/ai/pchome-match/refresh-stale`,只針對已建立 `identity_v2` 但 `expires_at` 過期的 PChome product_id 執行 `run_expired_identity_refresh()`;不得跑 fresh search recovery,不得呼叫 LLM,完成後重算 AI 挑品並清除 Dashboard / competitor intel cache。 -- 過期 identity 搜尋救援入口:`POST /api/ai/pchome-match/recover-stale` 僅供操作員手動觸發,對已過期 `identity_v2` 先走既有 PChome product_id refresh;只有舊 ID 查無商品或重評低於門檻時,才允許受控 fresh search recovery。這條路徑可抓 PChome,但不得呼叫 LLM;正式寫入仍必須通過 matcher、hard veto、auto price write safety 與 overwrite protection。 +- 過期 identity 搜尋救援入口:`POST /api/ai/pchome-match/recover-stale` 僅供操作員手動觸發,對已過期 `identity_v2` 先走既有 PChome product_id refresh;只有舊 ID 查無商品或重評低於門檻時,才允許受控 fresh search recovery。救援隊列必須先排除 variant、catalog、commercial condition、count、bundle、unit-price 與任選 / 多款 / 香味 / 色號 / 即期 / 融燭燈 / 香氛蠟燭等高風險名稱訊號。這條路徑可抓 PChome,但不得呼叫 LLM;正式寫入仍必須通過 matcher、hard veto、auto price write safety 與 overwrite protection。 - 補抓狀態入口:`GET /api/ai/pchome-match/backfill/status` 除背景任務狀態外,必須回傳 read-only coverage snapshot:`active_with_price` / `valid_matches` / `match_rate` / `fresh_matches` / `fresh_match_rate` / `stale_matches` / `pending` / `actionable_review_count`,供 Dashboard 顯示目前該刷新過期價格或補抓未搜尋商品;此端點不寫 DB、不呼叫 LLM、不抓外站。 - 排程閉環:`run_pchome_match_backfill_task` 每日 10:30 執行,補抓 PChome 待比對商品、寫入歷史價格,再重算 `strategy='product_pick'` 清單。 - PChome / MOMO 競價摘要出口 `services/competitor_intel_repository.py` 使用 30 分鐘共享快取(`COMPETITOR_INTEL_CACHE_TTL_SECONDS` 可調),避免 `/growth_analysis`、`/daily_sales`、PPT/AI 報表每次請求重跑昂貴覆蓋率與價差趨勢查詢;`run_competitor_price_feeder_task` 與 PChome backfill 完成後會主動清除快取。快取只包摘要輸出,不改 matcher 的高信心門檻與 identity_v2 準確性規則。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index f03b66f..6f90cc4 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-06-01:PChome 比價新鮮度操作閉環 +- **V10.527 PChome 過期 identity 搜尋救援隊列收斂**: V10.526 production smoke 發現直接對全部過期 `identity_v2` 做 rescue 會把香氛 / 色號 / 目錄款 / 商業狀態差異等人工覆核型 stale pair 送進慢速 fresh search,20 筆耗時 361 秒且 0 筆成功。新增 `_fetch_expired_identity_recovery_skus()` 作為救援專用隊列,只收既有正式診斷為 `exact_identity / total_price / price_alert_exact` 且無 variant、catalog、commercial condition、count、bundle、unit-price 等阻擋理由的舊配對;名稱含任選、多款、香味、色號、即期、融燭燈、香氛蠟燭等高風險訊號先排除。 - **V10.526 PChome 重評預覽與過期 identity 搜尋救援**: `/api/ai/pchome-match/backfill/status` 新增 60 秒快取的 `revalidation_preview` 與 `stale_recovery_preview`,Dashboard 補抓產線顯示「可重評 / 窄門 / 可救援」數字;兩個 preview 都只讀 DB,不啟動 PChome 搜尋、不呼叫 LLM、不寫 `competitor_match_attempts` 或正式價格表。另新增 `/api/ai/pchome-match/recover-stale` 與「救援過期 40 筆」按鈕,對過期 `identity_v2` 先查既有 product_id,只有在舊 ID 缺失或低分時才走受控 fresh-search recovery,最後仍經 hard veto、auto price write safety 與 overwrite protection 才能寫入正式比價。 - **V10.525 高分 review-gated exact 舊候選窄門重評**: `run_retryable_candidate_revalidation()` 保持主戰場為 `low_score / refresh_low_score / recoverable_low_score`,只額外收 Beauty Foot、KAMERIA、TS6、Vaseline 這批已由 V10.523 補 focused exact 規則的 `true_low_confidence` 舊候選。入口要求舊分數 >= 0.95、仍為 `exact_identity`、具備 `strong_exact_spec_match`,且不得含 `commercial_condition_gap`、variant、count、bundle、refill 等阻擋理由;讓已驗證真同款可被回刷,不把整個人工審核池自動打開。 - **V10.524 PChome 過期價格刷新手動入口**: 商品看板 PChome 補抓產線新增「刷新過期 120 筆」按鈕與 `/api/ai/pchome-match/refresh-stale`,背景執行既有 `run_expired_identity_refresh()`,只刷新已建立 `identity_v2` 的 PChome product_id,不跑 fresh search recovery、不呼叫 LLM;完成後重算 AI 挑品並清除 Dashboard / competitor intel cache,讓 `stale_matches` 從觀測指標變成可直接操作的任務。 diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 5fc2b35..aac764a 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -96,6 +96,39 @@ REVALIDATABLE_REVIEW_SQL_REASON_LIST = ", ".join( REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST = ", ".join( f"'{reason}'" for reason in sorted(REVALIDATABLE_REVIEW_BLOCK_REASONS) ) +STALE_IDENTITY_RECOVERY_BLOCK_REASONS = { + "accessory_case_conflict", + "aroma_lamp_style_selection_gap", + "aroma_scent_variant_conflict", + "bundle_offer_conflict", + "candle_catalog_selection_gap", + "catalog_count_omission", + "commercial_condition_gap", + "count_conflict", + "makeup_catalog_selection_gap", + "makeup_finish_conflict", + "makeup_usage_conflict", + "multi_component_conflict", + "multi_component_count_conflict", + "named_component_quantity_conflict", + "nail_tool_function_conflict", + "price_ratio_extreme", + "price_ratio_wide", + "product_line_conflict", + "refill_pack_conflict", + "romand_lip_line_conflict", + "unit_comparable", + "variant_descriptor_conflict", + "variant_option_conflict", + "variant_selection_review", +} +STALE_IDENTITY_RECOVERY_BLOCK_SQL_REASON_LIST = ", ".join( + f"'{reason}'" for reason in sorted(STALE_IDENTITY_RECOVERY_BLOCK_REASONS) +) +STALE_IDENTITY_RECOVERY_BLOCK_NAME_PATTERN = ( + r"(任選|多款|色號|顏色|款式|香味|香調|即期|短效|航空版|" + r"融燭燈|融蠟燈|香氛蠟燭|精油蠟燭|蠟燭|限定|組合任選)" +) # ── Feeder 結果 ─────────────────────────────────────── @dataclass @@ -1259,7 +1292,7 @@ class CompetitorPriceFeeder: 不查 PChome、不重新比對、不寫 attempts / prices。 """ preview_limit = max(1, min(int(limit), 120)) - rows = self._fetch_expired_identity_skus(limit=preview_limit) + rows = self._fetch_expired_identity_recovery_skus(limit=preview_limit) examples: list[dict] = [] for row in rows[:5]: examples.append({ @@ -1280,6 +1313,85 @@ class CompetitorPriceFeeder: "boundary": "read_only_no_crawl_no_llm_no_db_write", } + def _fetch_expired_identity_recovery_skus(self, limit: int = 40) -> list: + """ + 取得適合 fresh-search recovery 的過期 identity_v2 商品。 + + 這比一般 expired refresh 更窄:只收過去已是 exact / total_price / + price_alert_exact 的正式配對,且排除款式、香味、型態、入數、商業狀態等 + 高風險診斷或名稱訊號。避免把本來應該人工覆核的 stale pair 送進慢速搜尋。 + """ + if self.engine is None: + raise RuntimeError("需要注入 SQLAlchemy engine") + + from sqlalchemy import text + sql = text(f""" + WITH latest_momo AS ( + SELECT + p.id AS product_id, + p.i_code AS sku, + p.name, + p.category, + pr.price AS momo_price, + ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC) AS rn + FROM products p + JOIN price_records pr ON pr.product_id = p.id + WHERE p.status = 'ACTIVE' + ) + SELECT + lm.product_id, + lm.sku, + lm.name, + lm.category, + lm.momo_price, + cp.competitor_product_id, + cp.competitor_product_name, + cp.match_score, + cp.expires_at + FROM latest_momo lm + JOIN competitor_prices cp + ON cp.sku = lm.sku + AND cp.source = 'pchome' + AND cp.competitor_product_id IS NOT NULL + AND cp.competitor_product_id <> '' + AND cp.expires_at IS NOT NULL + AND cp.expires_at <= CURRENT_TIMESTAMP + AND COALESCE(cp.match_score, 0) >= :match_score_floor + AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2' + WHERE lm.rn = 1 + AND ( + COALESCE(cp.tags, '[]'::jsonb) ? 'price_basis_total_price' + OR cp.match_diagnostic_json->>'price_basis' = 'total_price' + ) + AND ( + COALESCE(cp.tags, '[]'::jsonb) ? 'alert_tier_price_alert_exact' + OR cp.match_diagnostic_json->>'alert_tier' = 'price_alert_exact' + ) + AND COALESCE(cp.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity' + AND COALESCE(cp.hard_veto, false) = false + AND NOT ( + COALESCE(cp.match_diagnostic_json->'reasons', '[]'::jsonb) + ?| array[{STALE_IDENTITY_RECOVERY_BLOCK_SQL_REASON_LIST}] + ) + AND COALESCE(lm.name, '') !~* :blocked_name_pattern + AND COALESCE(cp.competitor_product_name, '') !~* :blocked_name_pattern + ORDER BY + cp.expires_at ASC, + lm.momo_price DESC NULLS LAST, + lm.sku + LIMIT :limit + """) + with self.engine.connect() as conn: + rows = conn.execute( + sql, + { + "limit": max(1, min(int(limit), 120)), + "match_score_floor": MIN_MATCH_SCORE, + "blocked_name_pattern": STALE_IDENTITY_RECOVERY_BLOCK_NAME_PATTERN, + }, + ).fetchall() + return [dict(r._mapping) for r in rows] + def _fetch_expired_identity_skus(self, limit: int = 120) -> list: """ 取得 identity_v2 已確認、但 PChome 價格快取過期的商品。 @@ -2497,7 +2609,7 @@ class CompetitorPriceFeeder: safety、overwrite protection 才能寫入正式 competitor_prices。 """ try: - skus = self._fetch_expired_identity_skus(limit=max(1, min(int(limit), 120))) + skus = self._fetch_expired_identity_recovery_skus(limit=max(1, min(int(limit), 120))) except Exception as e: logger.error(f"[Feeder] 讀取過期 identity_v2 搜尋救援商品失敗: {e}") return FeederResult(0, 0, 0, 0, 1, 0.0) diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index e3f5c26..f84ed0c 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -108,6 +108,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert "_fetch_retryable_candidate_skus" in source assert "preview_retryable_candidate_revalidation" in source assert "preview_expired_identity_recovery" in source + assert "_fetch_expired_identity_recovery_skus" in source + assert "STALE_IDENTITY_RECOVERY_BLOCK_REASONS" in source assert "read_only_no_crawl_no_llm_no_db_write" in source assert "run_retryable_candidate_revalidation" in source assert "run_expired_identity_search_recovery" in source @@ -282,7 +284,7 @@ def test_competitor_feeder_expired_identity_recovery_preview_is_read_only(monkey }, ] - monkeypatch.setattr(feeder, "_fetch_expired_identity_skus", fake_fetch) + monkeypatch.setattr(feeder, "_fetch_expired_identity_recovery_skus", fake_fetch) payload = feeder.preview_expired_identity_recovery(limit=2) @@ -306,7 +308,7 @@ def test_competitor_feeder_expired_search_recovery_allows_fresh_recovery(monkeyp calls.append((items, kwargs)) return FeederResult(1, 1, 0, 0, 0, 0.1) - monkeypatch.setattr(feeder, "_fetch_expired_identity_skus", fake_fetch) + monkeypatch.setattr(feeder, "_fetch_expired_identity_recovery_skus", fake_fetch) monkeypatch.setattr(feeder, "_run_known_identity_refresh_items", fake_run) result = feeder.run_expired_identity_search_recovery(limit=999) @@ -1826,7 +1828,7 @@ def test_competitor_feeder_expired_recovery_allows_fresh_search(monkeypatch): captured = {} monkeypatch.setattr( feeder, - "_fetch_expired_identity_skus", + "_fetch_expired_identity_recovery_skus", lambda limit: [{"sku": "STALE-1", "competitor_product_id": "OLD-PID"}], )