diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 238811b..23f3798 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.531 補 PChome matcher 過度保守的安全 exact 線:同品線、同規格、同數量的多件組若沒有 variant / count / bundle / commercial / unit-price 等阻擋理由,且商品型別完全對齊,允許進 `exact / total_price / price_alert_exact`;新增 DHC 純欖護唇膏 1.5g、FRUDIA 蜂蜜藍莓護唇膏 10g、SEBAMED 嬰兒護唇膏 4.8g x2、理膚寶水滋養修護潤唇膏 4.7ml 的 focused total-price 規則。負例仍鎖住混合組、香味款、粉底色號與蠟燭 catalog,不放寬全域門檻。 - V10.530 輕量化 PChome 狀態 preview 並暫停 `recover-stale` 主操作入口:`_fetch_retryable_candidate_skus()` 先從最新 `competitor_match_attempts` 縮小可重評候選,再用 `JOIN LATERAL` 只取該 SKU 最新 MOMO 價,避免 `/api/ai/pchome-match/backfill/status` 因 `price_records` 全量 window scan 超時;正式 smoke 同時顯示過期 identity fresh-search rescue 5 筆耗時約 109 秒且 0 筆成功,因此 Dashboard 移除「救援過期 40 筆」按鈕,只保留 `stale_recovery_preview` 的只讀「可救援」觀測;後端 `/api/ai/pchome-match/recover-stale` 改由 `PCHOME_STALE_RECOVERY_ENABLED=true` 顯式開關才可執行,避免操作員誤按低成功率慢路徑拖住 worker。 - V10.529 補強 `recover-stale` 名稱風險擋詞:過期 identity 搜尋救援會先排除 `+`、`x2`、`*2` 等組合暗示,以及湛藍、麋香、海洋、玫瑰、薰衣草、生理呵護、日用型、清爽、潤澤等常見變體 / 香味 / 版本詞,避免同品牌同規格但不同香味、不同膚感、不同使用情境的 stale pair 進慢速 fresh search。 - V10.528 將 `recover-stale` 救援 preview 改成輕量雙階段篩選:SQL 從過期 `competitor_prices` 小集合出發,只做 identity_v2、過期、exact/total_price/price_alert_exact 等必要條件並限制候選量,再用 `JOIN LATERAL` 取 ACTIVE 商品最新 MOMO 價;variant / catalog / commercial condition / 高風險名稱訊號改在 Python 對小樣本過濾,避免正式站看板狀態端點因全量 price_records、JSONB + regex 過重查詢拖垮 app worker。 diff --git a/config.py b/config.py index fddd095..d10bc95 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.530" +SYSTEM_VERSION = "V10.531" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 04342fe..e0c8d11 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -103,6 +103,7 @@ SQL漏斗(~300筆) - production re-score `--apply-accepted` 僅可追加 `rescore_accepted_current` attempt 給人工覆核;執行後需清除 Dashboard / competitor intel cache,且必須抽查 `competitor_prices` / `competitor_price_history` 未新增正式價差。 - production re-score 若曾把 `variant_selection_review` 追加成 `rescore_accepted_current`,必須用 `audit_competitor_match_attempt_rescore.py --retract-variant-accepted` 追加最新 `true_low_confidence` 退回列;此路徑只寫 `competitor_match_attempts`,不得刪歷史紀錄,也不得寫 `competitor_prices` / `competitor_price_history`。 - PChome matcher replay 必須先守住假陽性:`EX8` 等型號不可被誤解析成 `x8` 入數;香氛固體凝膠 / 空氣芳香劑若一側為泛稱、一側含明確香味或 No. 款式,必須走 `aroma_scent_variant_conflict` veto,不得因同品牌同重量直接寫正式價差。 +- PChome matcher 對「同規格同數量」的多件組可以安全回收,但必須同時滿足:商品型別完全對齊、品牌同線、規格與數量對齊、沒有 variant / count / bundle / commercial / unit-price / price-ratio 阻擋理由,才可打 `safe_multi_component_exact_total_price` 並進 `exact / total_price / price_alert_exact`;混合組、香味款、色號款、catalog 任選仍需留在 `identity_review` 或 veto。護唇品 focused total-price 僅允許已明確建規則的 DHC 純欖 1.5g、FRUDIA 蜂蜜藍莓 10g、SEBAMED 嬰兒護唇膏 4.8g x2、理膚寶水滋養修護潤唇膏 4.7ml,不得把所有 lip/cosmetic catalog 一次放行。 - PChome feeder 正式寫入必須再套一層價格資料閘門:只有 `match_type='exact'`、`price_basis='total_price'`、`alert_tier='price_alert_exact'` 且無 `variant_selection_review` 的結果可以自動寫入 `competitor_prices`;`manual_review` / `identity_review` 只能留在覆核隊列或人工採用流程,不得由 retryable replay 或 known identity refresh 自動升成正式價差。Rescore audit 若遇到 `variant_selection_review`,也不得產生 `accepted_current`。 | 角色 | 模型 | 主機 | 成本 | 每日限額 | diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index c4d8029..be88f54 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-06-01:PChome 比價新鮮度操作閉環 +- **V10.531 PChome 安全 exact 規則補強**: production refresh 顯示大量舊 identity 不是分數不足,而是被多件組 / 護唇品 variant 防線過度保守地擋在 `identity_review`。新增 `safe_multi_component_exact_total_price`:同品線、同規格、同數量且商品型別完全對齊、無 variant / count / bundle / commercial / unit-price 阻擋時,才可進 `exact / total_price / price_alert_exact`;另補 DHC 純欖護唇膏 1.5g、FRUDIA 蜂蜜藍莓護唇膏 10g、SEBAMED 嬰兒護唇膏 4.8g x2、理膚寶水滋養修護潤唇膏 4.7ml focused total-price。回歸測試保留 HH 混合組、TS6 香味衣物手洗精、粉底色號與蠟燭 catalog 不自動放行。 - **V10.530 retryable preview 輕量化與 recover-stale 安全閘**: 正式站 profiling 證實 `/api/ai/pchome-match/backfill/status` 剩餘瓶頸在 `revalidation_preview`,約 12 秒;`_fetch_retryable_candidate_skus()` 改為先取每個 SKU 最新 attempt 並縮小到可重評候選,再 `JOIN LATERAL` 取單一最新 MOMO 價,不再對全量 `price_records` 做商品 window scan。正式 smoke 也顯示過期 identity fresh-search rescue 小批次 5 筆耗時約 109 秒且 0 筆成功,因此 Dashboard 移除「救援過期 40 筆」按鈕,只保留 `stale_recovery_preview` 只讀觀測;後端 `/api/ai/pchome-match/recover-stale` 保留但需明確設定 `PCHOME_STALE_RECOVERY_ENABLED=true` 才能執行,避免低成功率慢任務拖住正式 worker。 - **V10.529 recover-stale 名稱風險擋詞補強**: 過期 identity 搜尋救援新增 `+`、`x2`、`*2` 等組合暗示,以及湛藍、麋香、海洋、玫瑰、薰衣草、生理呵護、日用型、清爽、潤澤等常見變體 / 香味 / 版本詞;避免同品牌同規格但不同香味、不同膚感或不同使用情境的 stale pair 進慢速 fresh search。 - **V10.528 recover-stale preview 輕量化**: V10.527 的救援隊列在正式站 preview 時曾造成 status API 超時。改為雙階段篩選:SQL 從過期 `competitor_prices` 小集合出發,只做 identity_v2、過期、exact/total_price/price_alert_exact 等必要條件並限制候選量,再用 `JOIN LATERAL` 取 ACTIVE 商品最新 MOMO 價;variant / catalog / commercial condition / 高風險名稱訊號改在 Python 對小樣本過濾,避免 `/api/ai/pchome-match/backfill/status` 因全量 price_records、JSONB + regex preview 查詢拖垮。 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index e9f3d1f..8b66844 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -490,7 +490,11 @@ FOCUSED_IDENTITY_VARIANT_REVIEW_BYPASS_REASONS = { "artmis_cranberry_private_mousse_250ml", "artmis_witch_hazel_private_mousse_250ml", "baan_baby_lip_original_strawberry_catalog", + "dhc_olive_lip_1_5g", + "frudia_honey_blueberry_lip_10g", + "laroche_posay_lip_balm_4_7ml", "lush_sakura_body_spray", + "sebamed_baby_lip_4_8g_2pack", "so_natural_fixx_setting_spray_120ml_plain", "cetaphil_long_lotion_237ml", "cetaphil_long_lotion_473ml", @@ -530,7 +534,11 @@ FOCUSED_IDENTITY_TOTAL_PRICE_REASONS = { "artmis_cranberry_private_mousse_250ml", "artmis_witch_hazel_private_mousse_250ml", "baan_baby_lip_original_strawberry_catalog", + "dhc_olive_lip_1_5g", + "frudia_honey_blueberry_lip_10g", + "laroche_posay_lip_balm_4_7ml", "lush_sakura_body_spray", + "sebamed_baby_lip_4_8g_2pack", "so_natural_fixx_setting_spray_120ml_plain", "cetaphil_long_lotion_237ml", "cetaphil_long_lotion_473ml", @@ -1920,6 +1928,53 @@ def _build_evidence_flags( return _dedupe_tuple(flags) +def _has_safe_multi_component_exact_total_price( + left: ProductIdentity, + right: ProductIdentity, + *, + brand_score: float, + token_score: float, + spec_score: float, + sequence_score: float, + type_score: float, + hard_veto: bool, + variant_descriptor_conflict: bool, + reasons: Iterable[str], +) -> bool: + """Allow exact total-price writes for same-component sets, not mixed bundles.""" + if hard_veto or variant_descriptor_conflict: + return False + if not (_has_multi_component(left) and _has_multi_component(right)): + return False + reason_set = set(reasons) + blocked = { + "variant_selection_review", + "variant_option_conflict", + "variant_descriptor_conflict", + "pack_quantity_difference", + "count_conflict", + "bundle_offer_conflict", + "multi_component_conflict", + "multi_component_count_conflict", + "commercial_condition_gap", + "refill_pack_conflict", + "unit_comparable", + "price_ratio_extreme", + "price_ratio_wide", + } + if reason_set & blocked: + return False + if brand_score < 0.95 or type_score < 0.95 or spec_score < 0.85: + return False + if _has_exact_count_alignment(left, right): + return token_score >= 0.50 and sequence_score >= 0.50 + return ( + token_score >= 0.62 + and sequence_score >= 0.62 + and bool(reason_set & {"strong_exact_spec_match", "shared_model_token", "spec_name_alignment"}) + ) + + def _classify_match_quality( *, score: float, @@ -1967,7 +2022,10 @@ def _classify_match_quality( if strong_identity_evidence and not catalog_count_omission: if focused_total_price_safe and "variant_selection_review" not in reason_set: return "exact", "total_price", "price_alert_exact" - if multi_component_pair or "variant_selection_review" in reason_set: + safe_multi_component_total_price = "safe_multi_component_exact_total_price" in reason_set + if "variant_selection_review" in reason_set: + return "exact", "manual_review", "identity_review" + if multi_component_pair and not safe_multi_component_total_price: return "exact", "manual_review", "identity_review" return "exact", "total_price", "price_alert_exact" @@ -2798,6 +2856,19 @@ def score_marketplace_match( if hard_veto: score = min(score, 0.74 if comparison_mode == "unit_comparable" else 0.32) score = max(0.0, min(1.0, score)) + if _has_safe_multi_component_exact_total_price( + left, + right, + brand_score=brand_score, + token_score=token_score, + spec_score=spec_score, + sequence_score=sequence_score, + type_score=type_score, + hard_veto=hard_veto, + variant_descriptor_conflict=variant_descriptor_conflict, + reasons=reasons, + ): + reasons.append("safe_multi_component_exact_total_price") reason_tuple = _dedupe_tuple(reasons) match_type, price_basis, alert_tier = _classify_match_quality( score=score, @@ -4191,6 +4262,37 @@ def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: Pro and "隔離露" in right_text ): return "shiseido_blue_sunscreen" + if ( + "dhc" in pair_text + and "純欖護唇膏" in left_text + and "純欖護唇膏" in right_text + and _has_shared_weight(left, right, 1.5) + ): + return "dhc_olive_lip_1_5g" + if ( + "frudia" in pair_text + and "蜂蜜藍莓" in left_text + and "蜂蜜藍莓" in right_text + and "護唇膏" in left_text + and "護唇膏" in right_text + and _has_shared_weight(left, right, 10) + ): + return "frudia_honey_blueberry_lip_10g" + if ( + {"sebamed", "施巴"} & brand_tokens + and "嬰兒護唇膏" in left_text + and "嬰兒護唇膏" in right_text + and _has_shared_weight(left, right, 4.8) + and _has_exact_count_alignment(left, right) + ): + return "sebamed_baby_lip_4_8g_2pack" + if ( + "理膚寶水" in pair_text + and "滋養修護潤唇膏" in left_text + and "滋養修護潤唇膏" in right_text + and _has_shared_volume(left, right, 4.7) + ): + return "laroche_posay_lip_balm_4_7ml" if ( {"baan", "貝恩"} & (left.brand_tokens & right.brand_tokens) and "嬰兒修護唇膏" in left_text diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 62c7a12..c810564 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -1149,6 +1149,71 @@ def test_marketplace_matcher_promotes_exact_variant_safe_review_bypass_lines(): assert expected_reason in diagnostics.reasons +def test_marketplace_matcher_promotes_safe_multi_component_exact_sets_to_total_price(): + from services.marketplace_product_matcher import score_marketplace_match + + cases = [ + ( + "【Aveeno 艾惟諾】燕麥高效舒緩護手霜100gx3", + "艾惟諾 燕麥高效舒緩護手霜100gx3", + ), + ( + "【花美水】Aging逆齡緊緻精華凝膠-原白金(1.7g x 3支/盒)", + "【花美水】Aging 逆齡緊緻 精華凝膠(原白金)(1.7g*3支入)/盒", + ), + ( + "【O.P.I】受損型硬甲油記得卸組-亮麗增強基礎護甲油15mL+專業去光水110mL-NTT80+AL414(禮物/官方直營)", + "OPI 護甲油記得卸組(亮麗增強基礎護甲油15mL+去光水110mL)", + ), + ] + + for momo_name, competitor_name in cases: + diagnostics = score_marketplace_match(momo_name, competitor_name) + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert diagnostics.match_type == "exact" + assert diagnostics.price_basis == "total_price" + assert diagnostics.alert_tier == "price_alert_exact" + assert "safe_multi_component_exact_total_price" in diagnostics.reasons + + +def test_marketplace_matcher_promotes_focused_lip_care_exact_lines_to_total_price(): + from services.marketplace_product_matcher import score_marketplace_match + + cases = [ + ( + "【DHC】純欖護唇膏1.5g", + "【DHC】純欖護唇膏 1.5g", + "focused_exact_identity_dhc_olive_lip_1_5g", + ), + ( + "【FRUDIA】確認過嘴唇 蜂蜜藍莓保濕潤澤護唇膏10g(冠軍護唇膏/還可當晚安唇膜)", + "FRUDIA確認過嘴唇∼蜂蜜藍莓保濕潤澤護唇膏10g", + "focused_exact_identity_frudia_honey_blueberry_lip_10g", + ), + ( + "【SEBAMED 施巴】嬰兒護唇膏4.8gx2(總代理)", + "施巴5.5 sebamed 嬰兒護唇膏 4.8g*2入/袋", + "focused_exact_identity_sebamed_baby_lip_4_8g_2pack", + ), + ( + "【理膚寶水】滋養修護潤唇膏 4.7ml(滋養軟化唇部)", + "理膚寶水 滋養修護潤唇膏 4.7ml", + "focused_exact_identity_laroche_posay_lip_balm_4_7ml", + ), + ] + + for momo_name, competitor_name, expected_reason in cases: + diagnostics = score_marketplace_match(momo_name, competitor_name) + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert diagnostics.match_type == "exact" + assert diagnostics.price_basis == "total_price" + assert diagnostics.alert_tier == "price_alert_exact" + assert "variant_selection_review" not in diagnostics.reasons + assert expected_reason in diagnostics.reasons + + def test_marketplace_matcher_promotes_focused_manual_gate_exact_lines_to_total_price(): from services.marketplace_product_matcher import score_marketplace_match