From 97e33bf1e025d034370fd9ee3b33fca920f6ef33 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 22:30:14 +0800 Subject: [PATCH] Guard PChome feeder auto price writes --- TODO_NEXT_STEPS.txt | 2 + config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 4 +- .../current_execution_queue_20260524.md | 2 + docs/memory/history_logs.md | 2 + services/competitor_price_feeder.py | 112 ++++++++++++++++++ ...t_competitor_match_attempts_persistence.py | 48 ++++++++ tests/test_marketplace_product_matcher.py | 15 +++ 8 files changed, 185 insertions(+), 2 deletions(-) diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index c533f3a..a63aff6 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,8 @@ ================================================================================ 【已完成】 + - V10.454 production rescore 回收執行:以 latest-sku-only 口徑重算 `true_low_confidence` 745 筆,只將 2 筆 `accepted_current` 追加成 `rescore_accepted_current` 人工覆核 attempt(KATE 怪獸級持色唇膏、Herbacin 小甘菊護手霜 20ml);未寫 `competitor_prices` / `competitor_price_history`,並已清除 Dashboard / competitor intel cache。 + - V10.454 補 feeder 正式寫入安全閘門:matcher 若只到 `manual_review` / `identity_review` / `variant_selection_review`,例如 MOMO 多款任選唇膏對 PChome 單一款式,只能進 `true_low_confidence` 覆核,不得由 retryable replay 或 known identity refresh 自動寫入 `competitor_prices` 正式價差。 - V10.453 補 PChome matcher 安全回收規則:新增 Herbacin 小甘菊護手霜 20ml brandless 同款 anchor;修正 `EX8` 型號不可被誤解析成 `x8` 入數;新增 GONESH / 香氛固體凝膠的一側泛稱、一側明確香味或 No. 款式 veto,避免近門檻 replay 把不同香味、不同入數商品錯寫成正式價差。 - V10.452 修正 PChome rescore audit 掃描口徑:`audit_competitor_match_attempt_rescore.py` 預設先取每個 SKU 最新 attempt,再套用 status / reason 篩選,和 Dashboard review queue 的最新狀態一致;舊 SKU/候選考古掃描需明確加 `--include-historical-candidates`,避免已修正或已入隊商品被舊低信心紀錄重複推回報表。 - V10.451 拆分 PChome `low_score` 操作分流並補 read-only queue API:比價覆核頁把近門檻可救、證據不足、低信心舊候選拆成獨立篩選;repository 同步提供 `recoverable_low_score`、`true_low_confidence`、`legacy_low_score` 三個 status filter,`/api/pchome-review/queue` 可直接用同一套 review_status 做 smoke / operator tools 查詢,讓回刷、人工覆核與報表不再把所有低信心候選混在一起。 diff --git a/config.py b/config.py index fc0c1c9..f867467 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.453" +SYSTEM_VERSION = "V10.454" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index a238199..5456ba2 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-05-24 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 備援預設關閉 -> **適用版本**: V10.453 +> **適用版本**: V10.454 --- @@ -84,7 +84,9 @@ SQL漏斗(~300筆) - PChome re-score 回收線:`rescore_accepted_current` 只能表示最新版 matcher 判定「可人工採用」,不可直接寫入正式 `competitor_prices`;`fetch_competitor_coverage()` 必須輸出 `rescore_accepted_count`,Dashboard、daily/growth 與 OpenClaw 競品摘要都要把「重算可採用待審」獨立呈現,避免和一般低信心/單位價覆核混在一起。 - PChome 低信心操作分流:Dashboard 與 read-only `/api/pchome-review/queue` 必須把近門檻可救、證據不足、低信心舊候選拆成 `recoverable_low_score`、`true_low_confidence`、`legacy_low_score` 三個可篩選桶;廣義 `low_score` 僅作 repository/export 相容查詢,不可在 UI 中冒充單一操作分流。 - PChome re-score audit 預設必須先取每個 SKU 的最新 `competitor_match_attempts` 狀態,再套用 status / reason 篩選;舊低信心歷史候選只能透過 `--include-historical-candidates` 明確進入考古掃描,避免已入隊、已否決或已修正 SKU 被舊紀錄重新推回報表。 +- production re-score `--apply-accepted` 僅可追加 `rescore_accepted_current` attempt 給人工覆核;執行後需清除 Dashboard / competitor intel cache,且必須抽查 `competitor_prices` / `competitor_price_history` 未新增正式價差。 - PChome matcher replay 必須先守住假陽性:`EX8` 等型號不可被誤解析成 `x8` 入數;香氛固體凝膠 / 空氣芳香劑若一側為泛稱、一側含明確香味或 No. 款式,必須走 `aroma_scent_variant_conflict` veto,不得因同品牌同重量直接寫正式價差。 +- PChome feeder 正式寫入必須再套一層價格資料閘門:只有 `match_type='exact'`、`price_basis='total_price'`、`alert_tier='price_alert_exact'` 且無 `variant_selection_review` 的結果可以自動寫入 `competitor_prices`;`manual_review` / `identity_review` 只能留在覆核隊列或人工採用流程,不得由 retryable replay 或 known identity refresh 自動升成正式價差。 | 角色 | 模型 | 主機 | 成本 | 每日限額 | |------|------|------|------|---------| diff --git a/docs/memory/current_execution_queue_20260524.md b/docs/memory/current_execution_queue_20260524.md index 6a72e44..ff2d4b6 100644 --- a/docs/memory/current_execution_queue_20260524.md +++ b/docs/memory/current_execution_queue_20260524.md @@ -21,6 +21,8 @@ - private-care / body-care - 2026-05-24 22:10 CST 起,PChome rescore audit 預設對齊 review queue 最新狀態:先取每個 SKU 最新 attempt,再套用 status / reason 篩選;歷史候選回看需明確使用 `--include-historical-candidates`。 - 2026-05-24 22:20 CST 起,matcher replay 先套用 V10.453 安全修正:`EX8` 型號不視為 `x8` 入數,香氛固體凝膠一側泛稱、一側具體香味/No. 款式走 veto;Herbacin 小甘菊護手霜 20ml brandless 可作窄範圍安全回收。 +- 2026-05-24 22:42 CST 起,feeder 正式寫入套用 V10.454 安全閘門:`identity_review` / `manual_review` / `variant_selection_review` 的近門檻候選只能留在覆核,不能由 replay 或 refresh 自動寫正式 PChome 價差。 +- 2026-05-24 22:48 CST 已執行 production rescore 入隊:745 筆 `true_low_confidence` 中只有 2 筆通過 gate,已追加 `rescore_accepted_current` 人工覆核 attempt;正式價格表未寫入,Dashboard / competitor intel cache 已清除。 - 只新增窄範圍、可解釋 matcher 規則。 - 保留 `MIN_MATCH_SCORE`、`identity_veto`、既有正式候選覆寫保護。 - 驗收:`matched` 有增加、目標 `low_score` 下降、`needs_review` 不異常上升、無明顯跨色號/跨款式/跨劑型錯配。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 95052e9..70f865a 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,8 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.454 production rescore 入人工覆核隊列**: 以 latest-sku-only 口徑重算 745 筆 `true_low_confidence`,只有 2 筆通過現行 matcher gate,已追加成 `rescore_accepted_current`:SKU `8884618` KATE 怪獸級持色唇膏、SKU `10922465` Herbacin 小甘菊護手霜 20ml。這次只寫 `competitor_match_attempts` 人工覆核列,未寫 `competitor_prices` / `competitor_price_history`,並已清除 Dashboard 與 competitor intel cache。 +- **V10.454 feeder 正式寫入閘門**: `CompetitorPriceFeeder` 現在只允許 `exact + total_price + price_alert_exact` 的 matcher 結果自動寫入 `competitor_prices`;`manual_review`、`identity_review`、`variant_selection_review`(例如 MOMO 多款任選唇膏對 PChome 單一水光款)會保留在 `true_low_confidence` 覆核,不得因分數剛過門檻而污染正式比價資料。 - **V10.453 matcher 安全回收規則**: 新增 Herbacin 小甘菊護手霜 20ml brandless 同款 anchor;修正 `EX8` 型號不再被誤解析為 `x8` 入數;新增香氛固體凝膠 / 空氣芳香劑一側泛稱、一側明確香味或 No. 款式的 `aroma_scent_variant_conflict` veto。這輪目標是讓 retryable replay 可救回真同款,同時先封住 MIRAE 入數與 GONESH 香味款式的假陽性。 - **V10.452 PChome rescore audit 最新狀態口徑**: `scripts/audit_competitor_match_attempt_rescore.py` 與 `fetch_match_attempt_rescore_rows()` 預設改成先取每個 SKU 最新 attempt,再套用 status / reason 篩選,與 Dashboard review queue 一致;需要回看歷史候選時才使用 `--include-historical-candidates`,避免舊低信心紀錄讓已修正、已否決或已入隊 SKU 重複回到操作報表。 - **V10.451 low_score 操作分流拆分與 queue API**: Dashboard 比價覆核頁不再只給一個籠統低信心分頁;新增「近門檻可救」「證據不足」「低信心舊候選」三個篩選,`competitor_intel_repository.REVIEW_STATUS_FILTER_GROUPS` 同步提供對應分流,`/api/pchome-review/queue` 也能用同一套 `review_status` 做 read-only smoke / operator tools 查詢,讓 matcher 回刷、人工覆核、OpenClaw 報表能分清楚可自動回收、應保守等待、與需補搜尋的候選。 diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index c7bc547..16a7dd6 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -111,6 +111,31 @@ def _classify_low_score_attempt(score: float, diagnostics) -> str: return "true_low_confidence" +def _is_auto_price_write_safe(diagnostics) -> bool: + """Only exact, total-price identities may update the formal comparison cache.""" + if not diagnostics or getattr(diagnostics, "hard_veto", False): + return False + if getattr(diagnostics, "comparison_mode", "") != "exact_identity": + return False + if getattr(diagnostics, "match_type", "") != "exact": + return False + if getattr(diagnostics, "price_basis", "") != "total_price": + return False + if getattr(diagnostics, "alert_tier", "") != "price_alert_exact": + return False + if "variant_selection_review" in set(getattr(diagnostics, "reasons", ()) or ()): + return False + return True + + +def _classify_auto_write_block_attempt(score: float, diagnostics) -> str: + if getattr(diagnostics, "hard_veto", False): + return "identity_veto" + if score >= MIN_MATCH_SCORE: + return "true_low_confidence" + return _classify_low_score_attempt(score, diagnostics) + + def _has_variant_selection_gap( momo_name: str, ranked_matches: list[tuple], @@ -1605,6 +1630,35 @@ class CompetitorPriceFeeder: if manual_accept_override: score = max(score, MIN_MATCH_SCORE) + if not manual_accept_override and not _is_auto_price_write_safe(diagnostics): + attempt_status = _classify_auto_write_block_attempt(score, diagnostics) + browse_diagnostic = self._prepare_browse_diagnostic( + momo_name, + search_terms=search_terms, + reason=attempt_status, + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + candidate_count=len(products), + ) + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=len(products), + attempt_status=attempt_status, + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + browse_diagnostic=browse_diagnostic, + error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}", + source=source, + ) + attempts_written += 1 + skipped_low += 1 + continue tags = _extend_match_tags(_extract_tags(best_product), diagnostics) if manual_accept_override: tags.extend(["manual_review", "manual_accept"]) @@ -1800,6 +1854,25 @@ class CompetitorPriceFeeder: continue if score >= MIN_MATCH_SCORE and not getattr(diagnostics, "hard_veto", False): + if not _is_auto_price_write_safe(diagnostics): + attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms] + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=attempt_terms, + candidate_count=max(1, recovery_candidate_count), + attempt_status=_classify_auto_write_block_attempt(score, diagnostics), + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}", + source=source, + ) + skipped_low += 1 + attempts_written += 1 + continue tags = _extend_match_tags( _extract_tags(best_product), diagnostics, @@ -1932,6 +2005,26 @@ class CompetitorPriceFeeder: extras = ["refresh_known_identity"] if recovery_terms: extras.append("fresh_search_recovery") + if not _is_auto_price_write_safe(diagnostics): + candidate_count = max(1, recovery_candidate_count or 1) + attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms] + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=attempt_terms, + candidate_count=candidate_count, + attempt_status=_classify_auto_write_block_attempt(score, diagnostics), + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}", + source=source, + ) + skipped_low += 1 + attempts_written += 1 + continue tags = _extend_match_tags(_extract_tags(best_product), diagnostics, extras) should_write, write_reason = self._should_upsert_competitor_price( @@ -2015,6 +2108,25 @@ class CompetitorPriceFeeder: attempts_written += 1 continue + if not _is_auto_price_write_safe(diagnostics): + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=1, + attempt_status=_classify_auto_write_block_attempt(score, diagnostics), + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}", + source=source, + ) + skipped_low += 1 + attempts_written += 1 + continue + tags = _extend_match_tags(_extract_tags(best_product), diagnostics, ["refresh_known_identity"]) should_write, write_reason = self._should_upsert_competitor_price( diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index 00d0698..5c5afb0 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -146,6 +146,36 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert "idx_comp_match_attempts_sku_source_time" in migration +def test_competitor_feeder_blocks_identity_review_from_auto_price_write(): + from types import SimpleNamespace + + from services.competitor_price_feeder import ( + _classify_auto_write_block_attempt, + _is_auto_price_write_safe, + ) + + identity_review = SimpleNamespace( + hard_veto=False, + comparison_mode="exact_identity", + match_type="comparable", + price_basis="manual_review", + alert_tier="identity_review", + reasons=("variant_selection_review",), + ) + exact_price = SimpleNamespace( + hard_veto=False, + comparison_mode="exact_identity", + match_type="exact", + price_basis="total_price", + alert_tier="price_alert_exact", + reasons=(), + ) + + assert _is_auto_price_write_safe(identity_review) is False + assert _classify_auto_write_block_attempt(0.783, identity_review) == "true_low_confidence" + assert _is_auto_price_write_safe(exact_price) is True + + def test_competitor_feeder_keeps_variant_selection_review_out_of_recoverable(): from services.competitor_price_feeder import _classify_low_score_attempt @@ -489,6 +519,9 @@ def test_competitor_feeder_skips_rejected_candidate_and_uses_next_best(monkeypat hard_veto=False, reasons=(), comparison_mode="exact_identity", + match_type="exact", + price_basis="total_price", + alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity"], ) @@ -878,6 +911,9 @@ def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(mon hard_veto=False, reasons=("shared_identity_anchor_packaging_variant",), comparison_mode="exact_identity", + match_type="exact", + price_basis="total_price", + alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) @@ -1101,6 +1137,9 @@ def test_competitor_feeder_marks_existing_stronger_match_as_protected(monkeypatc hard_veto=False, reasons=("shared_identity_anchor_packaging_variant",), comparison_mode="exact_identity", + match_type="exact", + price_basis="total_price", + alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) @@ -1463,6 +1502,9 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l hard_veto=False, reasons=("shared_model_token",), comparison_mode="exact_identity", + match_type="exact", + price_basis="total_price", + alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) return SimpleNamespace( @@ -1476,6 +1518,9 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l hard_veto=False, reasons=(), comparison_mode="exact_identity", + match_type="exact", + price_basis="total_price", + alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) @@ -1565,6 +1610,9 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch): hard_veto=False, reasons=("spec_name_alignment",), comparison_mode="exact_identity", + match_type="exact", + price_basis="total_price", + alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index a9ded67..426f507 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -1842,6 +1842,21 @@ def test_marketplace_matcher_keeps_named_option_vs_catalog_in_review(): assert "variant_selection_review" in diagnostics.reasons +def test_marketplace_matcher_keeps_kate_catalog_vs_single_variant_in_review(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【KATE 凱婷】怪獸級持色唇膏 水光款/經典款/微發色款(獨家技術持久不沾 高保濕)", + "【KATE 凱婷】怪獸級持色唇膏(水光) 1.6g", + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert diagnostics.price_basis == "manual_review" + assert diagnostics.alert_tier == "identity_review" + assert "variant_selection_review" in diagnostics.reasons + + def test_marketplace_matcher_promotes_variant_safe_exact_option(): from services.marketplace_product_matcher import score_marketplace_match