From 7e29c00eb839ee9b4e3472a16b11f7bd5bd7c80f Mon Sep 17 00:00:00 2001 From: OoO Date: Mon, 25 May 2026 12:00:12 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=20PChome=20rescore=20?= =?UTF-8?q?=E9=87=8D=E8=A4=87=E5=85=A5=E9=9A=8A=E5=88=A4=E6=96=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 4 +- .../current_execution_queue_20260524.md | 1 + docs/memory/history_logs.md | 1 + .../competitor_match_attempt_rescore_audit.py | 14 +++--- ..._competitor_match_attempt_rescore_audit.py | 49 +++++++++++++++++++ 7 files changed, 63 insertions(+), 9 deletions(-) diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 27bf9a2..3bd1e45 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.466 修正 rescore audit duplicate 判斷:只在「最新 attempt 已是同候選 `rescore_accepted_current`」時跳過;若歷史曾 accepted、但後續 crawler 又追加低信心列,允許重新 materialize,避免 Dashboard latest-state 仍停在 `true_low_confidence`。Production pilot 已將 SKU `14756069`、`11159042`、`13842560`、`8394210`、`15192547`、`10509765`、`10603780` 送入人工覆核隊列;只寫 `competitor_match_attempts`,`competitor_prices` / `competitor_price_history` 未變。 - V10.465 修正 embedding fallback-disabled 控制流:`allow_111_fallback=False` 時若 resolver 回 111,不再直接退出或只試單台 GCP-B,會強制改試尚未嘗試的 GCP-A/GCP-B;背景 embedding 仍不落 111。 - V10.464 補 rescore audit 精準 SKU pilot:`audit_competitor_match_attempt_rescore.py --sku` 可只掃指定 SKU,再搭配 `--apply-accepted` 只把通過新版 matcher 的目標 SKU 追加到 `rescore_accepted_current` 人工覆核隊列,不寫正式價格表。 - V10.463 補 DR.WU / 達爾膚品牌 alias:同規格 `DR.WU 達爾膚` 與 `DR.WU` 候選不再被當成 brandless identity review,會以既有 exact_identity / total_price / price_alert_exact 閘門處理;未調整 `MIN_MATCH_SCORE`,保留 variant / hard veto 保護。 diff --git a/config.py b/config.py index 299d911..ba84cbf 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.465" +SYSTEM_VERSION = "V10.466" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index d431e93..f3d9d2c 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -1,8 +1,8 @@ # MOMO PRO — AI 競價情報模組 Single Source of Truth -> **最後更新**: 2026-05-24 (台北時間) +> **最後更新**: 2026-05-25 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 備援預設關閉 -> **適用版本**: V10.465 +> **適用版本**: V10.466 --- diff --git a/docs/memory/current_execution_queue_20260524.md b/docs/memory/current_execution_queue_20260524.md index bbde84d..83d1503 100644 --- a/docs/memory/current_execution_queue_20260524.md +++ b/docs/memory/current_execution_queue_20260524.md @@ -53,6 +53,7 @@ - 2026-05-25 08:30 CST 起,rescore audit 支援 `--sku` repeatable 精準篩選;production pilot 可只指定 3-10 個 SKU 執行 read-only audit 或 `--apply-accepted`,避免寬範圍掃描誤把不同 cohort 混在同一次驗證。 - 2026-05-25 08:25 CST 起,`DR.WU / DR WU / DRWU / 達爾膚` 視為同一品牌 alias;正式樣本中的 DR.WU 玻尿酸保濕精華乳 50ML、2入組與杏仁酸亮白煥膚精華 18% 30ML 2入組,在不調整全域門檻下可由 brandless identity review 回到 exact total-price lane。 - 2026-05-25 08:36 CST production pilot:SKU `10362820`、`10653216`、`10653329` 已從 `true_low_confidence` materialize 為 `rescore_accepted_current`,只進人工覆核隊列,不寫 `competitor_prices`。 +- 2026-05-25 11:55 CST 起,rescore audit duplicate 判斷只看最新 attempt;若歷史已有 accepted 但後續 crawler 又追加低信心列,可重新 materialize 成最新 `rescore_accepted_current`。Production pilot 已將 SKU `14756069`、`11159042`、`13842560`、`8394210`、`15192547`、`10509765`、`10603780` 入人工覆核隊列;正式 `competitor_prices` / `competitor_price_history` 未寫入或改變。 ## 3. 12 Agent 決策信封整合 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index e5f01a3..15152e6 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.466 Rescore latest-state duplicate 修正與 7 SKU pilot**: `materialize_rescore_accept_reviews()` 的 duplicate 判斷改看最新 attempt,而不是歷史任一 accepted;若後續 crawler 又把同 SKU/候選覆蓋成 `true_low_confidence`,可重新追加 `rescore_accepted_current` 讓 Dashboard latest-state 正確進人工覆核。Production pilot 已將 SKU `14756069`、`11159042`、`13842560`、`8394210`、`15192547`、`10509765`、`10603780` materialize 到人工覆核隊列;`competitor_prices` 目標計數維持 7、`competitor_price_history` 目標計數維持 210,未寫正式價差表。 - **V10.465 Embedding GCP fallback 修正**: `OllamaService.generate_embedding(..., allow_111_fallback=False)` 若 resolver 因 unhealthy cache 回 111,會強制改試尚未嘗試的 GCP-A/GCP-B,不再直接 `break` 造成 `tried=[]` 或只試單台 GCP-B;背景 embedding 仍不允許落 111。 - **V10.464 Rescore SKU pilot 篩選**: `audit_competitor_match_attempt_rescore.py` 與 `fetch_match_attempt_rescore_rows()` 增加 `--sku` / `skus` 篩選,可針對 DR.WU 這類明確 cohort 做 3-10 筆精準 materialize,不必為了 pilot 掃整批 `true_low_confidence`。 - **V10.463 DR.WU / 達爾膚品牌 alias**: `marketplace_product_matcher` 補 `DR.WU / DR WU / DRWU / 達爾膚` 正規化,讓正式樣本中同規格玻尿酸保濕精華乳、杏仁酸亮白煥膚精華不再因品牌 token 不同被降成 brandless identity review;測試鎖住 exact / total_price / price_alert_exact。 diff --git a/services/competitor_match_attempt_rescore_audit.py b/services/competitor_match_attempt_rescore_audit.py index 2b28c4c..599af21 100644 --- a/services/competitor_match_attempt_rescore_audit.py +++ b/services/competitor_match_attempt_rescore_audit.py @@ -231,20 +231,22 @@ def _ensure_attempt_table(conn) -> None: def _already_materialized(conn, *, source: str, sku: str, candidate_id: str) -> bool: row = conn.execute(text(""" - SELECT 1 + SELECT attempt_status, COALESCE(best_competitor_product_id, '') AS candidate_id FROM competitor_match_attempts WHERE sku = :sku AND source = :source - AND attempt_status = :attempt_status - AND COALESCE(best_competitor_product_id, '') = :candidate_id + ORDER BY attempted_at DESC, id DESC LIMIT 1 """), { "sku": sku, "source": source, - "attempt_status": RESCORE_ACCEPTED_CURRENT_STATUS, - "candidate_id": candidate_id, }).first() - return row is not None + if row is None: + return False + return ( + row.attempt_status == RESCORE_ACCEPTED_CURRENT_STATUS + and row.candidate_id == candidate_id + ) def materialize_rescore_accept_reviews( diff --git a/tests/test_competitor_match_attempt_rescore_audit.py b/tests/test_competitor_match_attempt_rescore_audit.py index 48ea5fe..6d83c82 100644 --- a/tests/test_competitor_match_attempt_rescore_audit.py +++ b/tests/test_competitor_match_attempt_rescore_audit.py @@ -210,6 +210,55 @@ def test_match_attempt_rescore_materializes_accepted_current_for_manual_review() assert "matcher_rescore=accepted_current" in stored[0]["error_message"] +def test_match_attempt_rescore_materialize_allows_requeue_when_latest_is_low_confidence(): + from services.competitor_match_attempt_rescore_audit import materialize_rescore_accept_reviews + + engine = create_engine("sqlite:///:memory:") + rows = [{ + "sku": "10509765", + "attempt_status": "true_low_confidence", + "momo_product_id": 10509765, + "momo_product_name": "【悠斯晶】經典乳霜120g(2入組)", + "momo_price": 599, + "candidate_count": 1, + "best_competitor_product_id": "YUSKIN-120G-2", + "best_competitor_product_name": "【Yuskin悠斯晶】經典乳霜 2盒組(120g/盒)", + "best_competitor_price": 540, + "best_match_score": 0.779, + }] + + with engine.begin() as conn: + initial_stats = materialize_rescore_accept_reviews(conn, rows) + duplicate_stats = materialize_rescore_accept_reviews(conn, rows) + conn.execute(text(""" + INSERT INTO competitor_match_attempts + (sku, source, attempt_status, momo_product_id, momo_product_name, + momo_price, candidate_count, best_competitor_product_id, + best_competitor_product_name, best_competitor_price, + best_match_score, diagnostic_codes, error_message, attempted_at) + VALUES + ('10509765', 'pchome', 'true_low_confidence', 10509765, + '【悠斯晶】經典乳霜120g(2入組)', 599, 1, 'YUSKIN-120G-2', + '【Yuskin悠斯晶】經典乳霜 2盒組(120g/盒)', 540, 0.779, + '["strong_exact_spec_match"]', 'later crawler low-confidence row', + CURRENT_TIMESTAMP) + """)) + requeue_stats = materialize_rescore_accept_reviews(conn, rows) + latest_status = conn.execute(text(""" + SELECT attempt_status + FROM competitor_match_attempts + WHERE sku = '10509765' + ORDER BY attempted_at DESC, id DESC + LIMIT 1 + """)).scalar_one() + + assert initial_stats["materialized"] == 1 + assert duplicate_stats["skipped_duplicate"] == 1 + assert requeue_stats["materialized"] == 1 + assert requeue_stats["skipped_duplicate"] == 0 + assert latest_status == "rescore_accepted_current" + + def test_match_attempt_rescore_retracts_variant_review_from_accepted_queue(): from services.competitor_match_attempt_rescore_audit import ( fetch_variant_rescore_accept_review_rows,