diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 1882bb8..238811b 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.530 輕量化 PChome 狀態 preview 並暫停 `recover-stale` 主操作入口:`_fetch_retryable_candidate_skus()` 先從最新 `competitor_match_attempts` 縮小可重評候選,再用 `JOIN LATERAL` 只取該 SKU 最新 MOMO 價,避免 `/api/ai/pchome-match/backfill/status` 因 `price_records` 全量 window scan 超時;正式 smoke 同時顯示過期 identity fresh-search rescue 5 筆耗時約 109 秒且 0 筆成功,因此 Dashboard 移除「救援過期 40 筆」按鈕,只保留 `stale_recovery_preview` 的只讀「可救援」觀測;後端 `/api/ai/pchome-match/recover-stale` 改由 `PCHOME_STALE_RECOVERY_ENABLED=true` 顯式開關才可執行,避免操作員誤按低成功率慢路徑拖住 worker。 - V10.529 補強 `recover-stale` 名稱風險擋詞:過期 identity 搜尋救援會先排除 `+`、`x2`、`*2` 等組合暗示,以及湛藍、麋香、海洋、玫瑰、薰衣草、生理呵護、日用型、清爽、潤澤等常見變體 / 香味 / 版本詞,避免同品牌同規格但不同香味、不同膚感、不同使用情境的 stale pair 進慢速 fresh search。 - V10.528 將 `recover-stale` 救援 preview 改成輕量雙階段篩選:SQL 從過期 `competitor_prices` 小集合出發,只做 identity_v2、過期、exact/total_price/price_alert_exact 等必要條件並限制候選量,再用 `JOIN LATERAL` 取 ACTIVE 商品最新 MOMO 價;variant / catalog / commercial condition / 高風險名稱訊號改在 Python 對小樣本過濾,避免正式站看板狀態端點因全量 price_records、JSONB + regex 過重查詢拖垮 app worker。 - V10.527 收斂 PChome 過期 identity 搜尋救援隊列:`recover-stale` 不再直接吃全部過期 `identity_v2`,改走 `_fetch_expired_identity_recovery_skus()`,只收既有正式診斷為 `exact_identity / total_price / price_alert_exact` 且無 variant、catalog、commercial condition、count、bundle、unit-price 等阻擋理由的舊配對;名稱含任選、多款、香味、色號、即期、融燭燈、香氛蠟燭等高風險訊號也先排除,避免慢速 fresh search 把人工覆核型 stale pair 全部掃進來。 diff --git a/config.py b/config.py index bcb03c6..fddd095 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.529" +SYSTEM_VERSION = "V10.530" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 5792911..04342fe 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -90,7 +90,7 @@ SQL漏斗(~300筆) - 配對來源仍以 PChome crawler 真實搜尋結果為準;無競品資料時不生成挑品。 - 比對覆蓋率補強入口:`POST /api/ai/pchome-match/backfill`,優先補抓仍無有效 PChome 配對的高價 ACTIVE 商品,完成後自動重算 AI 挑品清單。 - 過期價格刷新入口:`POST /api/ai/pchome-match/refresh-stale`,只針對已建立 `identity_v2` 但 `expires_at` 過期的 PChome product_id 執行 `run_expired_identity_refresh()`;不得跑 fresh search recovery,不得呼叫 LLM,完成後重算 AI 挑品並清除 Dashboard / competitor intel cache。 -- 過期 identity 搜尋救援入口:`POST /api/ai/pchome-match/recover-stale` 僅供操作員手動觸發,對已過期 `identity_v2` 先走既有 PChome product_id refresh;只有舊 ID 查無商品或重評低於門檻時,才允許受控 fresh search recovery。救援隊列必須先排除 variant、catalog、commercial condition、count、bundle、unit-price 與任選 / 多款 / 香味 / 色號 / 即期 / 融燭燈 / 香氛蠟燭 / `+` / `xN` / `*N` / 具名香味或膚感版本等高風險名稱訊號。這條路徑可抓 PChome,但不得呼叫 LLM;正式寫入仍必須通過 matcher、hard veto、auto price write safety 與 overwrite protection。 +- 過期 identity 搜尋救援入口:`POST /api/ai/pchome-match/recover-stale` 預設必須關閉主操作入口,僅保留只讀 preview;正式 smoke 顯示小批次成功率不足且耗時偏高時,不得在 Dashboard 顯示日常操作按鈕。若需操作員手動執行,必須先明確設定 `PCHOME_STALE_RECOVERY_ENABLED=true`,再對已過期 `identity_v2` 先走既有 PChome product_id refresh;只有舊 ID 查無商品或重評低於門檻時,才允許受控 fresh search recovery。救援隊列必須先排除 variant、catalog、commercial condition、count、bundle、unit-price 與任選 / 多款 / 香味 / 色號 / 即期 / 融燭燈 / 香氛蠟燭 / `+` / `xN` / `*N` / 具名香味或膚感版本等高風險名稱訊號。這條路徑可抓 PChome,但不得呼叫 LLM;正式寫入仍必須通過 matcher、hard veto、auto price write safety 與 overwrite protection。 - 補抓狀態入口:`GET /api/ai/pchome-match/backfill/status` 除背景任務狀態外,必須回傳 read-only coverage snapshot:`active_with_price` / `valid_matches` / `match_rate` / `fresh_matches` / `fresh_match_rate` / `stale_matches` / `pending` / `actionable_review_count`,供 Dashboard 顯示目前該刷新過期價格或補抓未搜尋商品;此端點不寫 DB、不呼叫 LLM、不抓外站。 - 排程閉環:`run_pchome_match_backfill_task` 每日 10:30 執行,補抓 PChome 待比對商品、寫入歷史價格,再重算 `strategy='product_pick'` 清單。 - PChome / MOMO 競價摘要出口 `services/competitor_intel_repository.py` 使用 30 分鐘共享快取(`COMPETITOR_INTEL_CACHE_TTL_SECONDS` 可調),避免 `/growth_analysis`、`/daily_sales`、PPT/AI 報表每次請求重跑昂貴覆蓋率與價差趨勢查詢;`run_competitor_price_feeder_task` 與 PChome backfill 完成後會主動清除快取。快取只包摘要輸出,不改 matcher 的高信心門檻與 identity_v2 準確性規則。 @@ -98,7 +98,7 @@ SQL漏斗(~300筆) - PChome re-score 回收線:`rescore_accepted_current` 只能表示最新版 matcher 判定「可人工採用」,不可直接寫入正式 `competitor_prices`;`fetch_competitor_coverage()` 必須輸出 `rescore_accepted_count`,Dashboard、daily/growth 與 OpenClaw 競品摘要都要把「重算可採用待審」獨立呈現,避免和一般低信心/單位價覆核混在一起。 - PChome 低信心操作分流:Dashboard 與 read-only `/api/pchome-review/queue` 必須把近門檻可救、證據不足、低信心舊候選拆成 `recoverable_low_score`、`true_low_confidence`、`legacy_low_score` 三個可篩選桶;廣義 `low_score` 僅作 repository/export 相容查詢,不可在 UI 中冒充單一操作分流。 - `run_retryable_candidate_revalidation()` 的自動回刷主戰場仍限 `low_score` / `refresh_low_score` / `recoverable_low_score`;`true_low_confidence` 只有在已補 focused exact 規則的窄範圍品線、舊分數 >= 0.95、`comparison_mode='exact_identity'`、含 `strong_exact_spec_match` 且不含 commercial / variant / count / bundle / refill 等阻擋理由時,才可進入重評,不得全面打開人工審核池。 -- `/api/ai/pchome-match/backfill/status` 必須把近門檻重評池與過期 identity 救援池以只讀 `revalidation_preview` / `stale_recovery_preview` 曝光給操作員;預覽只復用正式候選 SQL 並受 limit / 60 秒快取限制,不啟動 PChome 搜尋、不呼叫 LLM、不寫 `competitor_match_attempts` / `competitor_prices`。救援 preview 必須從過期 `competitor_prices` 小集合出發並用 `JOIN LATERAL` 取最新 MOMO 價,不得掃全量 `price_records`;其中 `review_gated_count` 僅代表窄門 `true_low_confidence` exact 候選,不得被解讀為全量人工池可自動回刷。 +- `/api/ai/pchome-match/backfill/status` 必須把近門檻重評池與過期 identity 救援池以只讀 `revalidation_preview` / `stale_recovery_preview` 曝光給操作員;預覽只復用正式候選 SQL 並受 limit / 60 秒快取限制,不啟動 PChome 搜尋、不呼叫 LLM、不寫 `competitor_match_attempts` / `competitor_prices`。重評 preview 必須先從最新 `competitor_match_attempts` 縮小候選,再用 `JOIN LATERAL` 取單一最新 MOMO 價;救援 preview 必須從過期 `competitor_prices` 小集合出發並用 `JOIN LATERAL` 取最新 MOMO 價,兩者都不得掃全量 `price_records`;Dashboard 只能顯示「可救援」觀測值,不得在未開啟 `PCHOME_STALE_RECOVERY_ENABLED` 時提供 recover-stale 執行按鈕;其中 `review_gated_count` 僅代表窄門 `true_low_confidence` exact 候選,不得被解讀為全量人工池可自動回刷。 - PChome re-score audit 預設必須先取每個 SKU 的最新 `competitor_match_attempts` 狀態,再套用 status / reason 篩選;舊低信心歷史候選只能透過 `--include-historical-candidates` 明確進入考古掃描,避免已入隊、已否決或已修正 SKU 被舊紀錄重新推回報表。 - production re-score `--apply-accepted` 僅可追加 `rescore_accepted_current` attempt 給人工覆核;執行後需清除 Dashboard / competitor intel cache,且必須抽查 `competitor_prices` / `competitor_price_history` 未新增正式價差。 - production re-score 若曾把 `variant_selection_review` 追加成 `rescore_accepted_current`,必須用 `audit_competitor_match_attempt_rescore.py --retract-variant-accepted` 追加最新 `true_low_confidence` 退回列;此路徑只寫 `competitor_match_attempts`,不得刪歷史紀錄,也不得寫 `competitor_prices` / `competitor_price_history`。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 009dcbb..c4d8029 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-06-01:PChome 比價新鮮度操作閉環 +- **V10.530 retryable preview 輕量化與 recover-stale 安全閘**: 正式站 profiling 證實 `/api/ai/pchome-match/backfill/status` 剩餘瓶頸在 `revalidation_preview`,約 12 秒;`_fetch_retryable_candidate_skus()` 改為先取每個 SKU 最新 attempt 並縮小到可重評候選,再 `JOIN LATERAL` 取單一最新 MOMO 價,不再對全量 `price_records` 做商品 window scan。正式 smoke 也顯示過期 identity fresh-search rescue 小批次 5 筆耗時約 109 秒且 0 筆成功,因此 Dashboard 移除「救援過期 40 筆」按鈕,只保留 `stale_recovery_preview` 只讀觀測;後端 `/api/ai/pchome-match/recover-stale` 保留但需明確設定 `PCHOME_STALE_RECOVERY_ENABLED=true` 才能執行,避免低成功率慢任務拖住正式 worker。 - **V10.529 recover-stale 名稱風險擋詞補強**: 過期 identity 搜尋救援新增 `+`、`x2`、`*2` 等組合暗示,以及湛藍、麋香、海洋、玫瑰、薰衣草、生理呵護、日用型、清爽、潤澤等常見變體 / 香味 / 版本詞;避免同品牌同規格但不同香味、不同膚感或不同使用情境的 stale pair 進慢速 fresh search。 - **V10.528 recover-stale preview 輕量化**: V10.527 的救援隊列在正式站 preview 時曾造成 status API 超時。改為雙階段篩選:SQL 從過期 `competitor_prices` 小集合出發,只做 identity_v2、過期、exact/total_price/price_alert_exact 等必要條件並限制候選量,再用 `JOIN LATERAL` 取 ACTIVE 商品最新 MOMO 價;variant / catalog / commercial condition / 高風險名稱訊號改在 Python 對小樣本過濾,避免 `/api/ai/pchome-match/backfill/status` 因全量 price_records、JSONB + regex preview 查詢拖垮。 - **V10.527 PChome 過期 identity 搜尋救援隊列收斂**: V10.526 production smoke 發現直接對全部過期 `identity_v2` 做 rescue 會把香氛 / 色號 / 目錄款 / 商業狀態差異等人工覆核型 stale pair 送進慢速 fresh search,20 筆耗時 361 秒且 0 筆成功。新增 `_fetch_expired_identity_recovery_skus()` 作為救援專用隊列,只收既有正式診斷為 `exact_identity / total_price / price_alert_exact` 且無 variant、catalog、commercial condition、count、bundle、unit-price 等阻擋理由的舊配對;名稱含任選、多款、香味、色號、即期、融燭燈、香氛蠟燭等高風險訊號先排除。 diff --git a/routes/ai_routes.py b/routes/ai_routes.py index 422911b..f36f451 100644 --- a/routes/ai_routes.py +++ b/routes/ai_routes.py @@ -2008,6 +2008,13 @@ def api_pchome_match_recover_stale(): """背景搜尋救援過期 identity_v2:舊 ID 缺失或低分時才允許 fresh search recovery。""" import threading + if os.getenv("PCHOME_STALE_RECOVERY_ENABLED", "false").lower() not in {"1", "true", "yes", "on"}: + return jsonify({ + 'success': False, + 'message': 'PChome 過期 identity 搜尋救援目前僅開放只讀預覽;正式 smoke 顯示成功率不足,需開啟 PCHOME_STALE_RECOVERY_ENABLED 才能執行', + 'data': _get_pchome_backfill_status_payload(), + }), 409 + payload = request.get_json(silent=True) or {} try: limit = max(5, min(int(payload.get('limit', 40)), 80)) diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 4e7e54a..abbb013 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -1139,19 +1139,7 @@ class CompetitorPriceFeeder: from sqlalchemy import text sql = text(f""" - WITH latest_momo AS ( - SELECT - p.id AS product_id, - p.i_code AS sku, - p.name, - p.category, - pr.price AS momo_price, - ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC) AS rn - FROM products p - JOIN price_records pr ON pr.product_id = p.id - WHERE p.status = 'ACTIVE' - ), - latest_attempt AS ( + WITH latest_attempt AS ( SELECT DISTINCT ON (cma.sku) cma.sku, cma.best_competitor_product_id, @@ -1164,33 +1152,44 @@ class CompetitorPriceFeeder: FROM competitor_match_attempts cma WHERE cma.source = 'pchome' ORDER BY cma.sku, cma.attempted_at DESC, cma.id DESC + ), + candidate_attempt AS ( + SELECT la.* + FROM latest_attempt la + WHERE la.best_competitor_product_id IS NOT NULL + AND la.best_competitor_product_id <> '' + AND COALESCE(la.best_match_score, 0) >= :min_score + AND COALESCE(la.hard_veto, false) = false + AND COALESCE(la.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity' ) SELECT - lm.product_id, - lm.sku, - lm.name, - lm.category, - lm.momo_price, + p.id AS product_id, + p.i_code AS sku, + p.name, + p.category, + latest_price.price AS momo_price, la.best_competitor_product_id AS competitor_product_id, la.best_competitor_product_name AS competitor_product_name, la.best_match_score, la.attempt_status - FROM latest_momo lm - JOIN latest_attempt la - ON la.sku = lm.sku + FROM candidate_attempt la + JOIN products p + ON p.i_code = la.sku + AND p.status = 'ACTIVE' + JOIN LATERAL ( + SELECT pr.price + FROM price_records pr + WHERE pr.product_id = p.id + ORDER BY pr.timestamp DESC, pr.id DESC + LIMIT 1 + ) latest_price ON TRUE LEFT JOIN competitor_prices cp - ON cp.sku = lm.sku + ON cp.sku = p.i_code AND cp.source = 'pchome' AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP) AND COALESCE(cp.match_score, 0) >= :match_score_floor AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2' - WHERE lm.rn = 1 - AND cp.sku IS NULL - AND la.best_competitor_product_id IS NOT NULL - AND la.best_competitor_product_id <> '' - AND COALESCE(la.best_match_score, 0) >= :min_score - AND COALESCE(la.hard_veto, false) = false - AND COALESCE(la.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity' + WHERE cp.sku IS NULL AND ( ( la.attempt_status IN ( @@ -1214,24 +1213,24 @@ class CompetitorPriceFeeder: ) AND ( ( - lower(COALESCE(lm.name, '')) LIKE '%beauty foot%' + lower(COALESCE(p.name, '')) LIKE '%beauty foot%' AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%beauty foot%' - AND COALESCE(lm.name, '') LIKE '%足膜%' + AND COALESCE(p.name, '') LIKE '%足膜%' AND COALESCE(la.best_competitor_product_name, '') LIKE '%足膜%' ) OR ( - COALESCE(lm.name, '') LIKE '%足足稱奇%' + COALESCE(p.name, '') LIKE '%足足稱奇%' AND COALESCE(la.best_competitor_product_name, '') LIKE '%足足稱奇%' - AND COALESCE(lm.name, '') LIKE '%足膜%' + AND COALESCE(p.name, '') LIKE '%足膜%' AND COALESCE(la.best_competitor_product_name, '') LIKE '%足膜%' ) OR ( - lower(COALESCE(lm.name, '')) LIKE '%ts6%' + lower(COALESCE(p.name, '')) LIKE '%ts6%' AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%ts6%' AND ( - COALESCE(lm.name, '') LIKE '%蜜愛潤滑液%' - OR COALESCE(lm.name, '') LIKE '%蜜桃煥白凝膠%' - OR COALESCE(lm.name, '') LIKE '%極淨白私密潔膚露%' + COALESCE(p.name, '') LIKE '%蜜愛潤滑液%' + OR COALESCE(p.name, '') LIKE '%蜜桃煥白凝膠%' + OR COALESCE(p.name, '') LIKE '%極淨白私密潔膚露%' ) AND ( COALESCE(la.best_competitor_product_name, '') LIKE '%蜜愛潤滑液%' @@ -1241,20 +1240,20 @@ class CompetitorPriceFeeder: ) OR ( ( - lower(COALESCE(lm.name, '')) LIKE '%vaseline%' - OR COALESCE(lm.name, '') LIKE '%凡士林%' + lower(COALESCE(p.name, '')) LIKE '%vaseline%' + OR COALESCE(p.name, '') LIKE '%凡士林%' ) AND ( lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%vaseline%' OR COALESCE(la.best_competitor_product_name, '') LIKE '%凡士林%' ) - AND COALESCE(lm.name, '') LIKE '%嬰兒高純修護凝膠%' + AND COALESCE(p.name, '') LIKE '%嬰兒高純修護凝膠%' AND COALESCE(la.best_competitor_product_name, '') LIKE '%嬰兒高純修護凝膠%' ) ) ) ) - ORDER BY la.best_match_score DESC NULLS LAST, lm.momo_price DESC NULLS LAST, lm.sku + ORDER BY la.best_match_score DESC NULLS LAST, latest_price.price DESC NULLS LAST, p.i_code LIMIT :limit """) with self.engine.connect() as conn: diff --git a/templates/dashboard_v2.html b/templates/dashboard_v2.html index f636f23..248b11a 100644 --- a/templates/dashboard_v2.html +++ b/templates/dashboard_v2.html @@ -65,7 +65,6 @@ data-pchome-backfill-card data-backfill-endpoint="/api/ai/pchome-match/backfill" data-refresh-stale-endpoint="/api/ai/pchome-match/refresh-stale" - data-recover-stale-endpoint="/api/ai/pchome-match/recover-stale" data-status-endpoint="/api/ai/pchome-match/backfill/status" data-pchome-backfill-action="backfillPchomeMatches">
@@ -89,12 +88,6 @@ data-limit="120"> 刷新過期 120 筆 -