From bd33f7ff60c388c51cc486aa6f93ec7f6f1ff5e3 Mon Sep 17 00:00:00 2001 From: OoO Date: Mon, 25 May 2026 16:05:08 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BC=B7=E5=8C=96=20PChome=20rescore=20?= =?UTF-8?q?=E6=BC=8F=E6=8E=83=E8=88=87=E9=8C=AF=E9=85=8D=E9=98=B2=E7=B7=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- .../current_execution_queue_20260524.md | 2 + .../audit_competitor_match_attempt_rescore.py | 9 ++- services/marketplace_product_matcher.py | 65 +++++++++++++++++++ ..._competitor_match_attempt_rescore_audit.py | 28 ++++++++ tests/test_marketplace_product_matcher.py | 35 ++++++++++ 7 files changed, 140 insertions(+), 2 deletions(-) diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 9b6aa4e..f906d56 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.475 補 PChome rescore 操作與高分錯配防線:`scripts/audit_competitor_match_attempt_rescore.py` 預設不再只掃 `strong_exact_spec_match`,避免漏掉 `focused_exact_*` 等新版 matcher 理由;matcher 新增暖燈 S/M/L 尺寸差、NITORI 香氛噴霧器型號差的 hard veto,並把彩妝色號單邊出現的高分候選送進 `variant_selection_review`,避免 LA MER 氣墊等色號型商品被誤入 accepted queue。測試:`tests/test_marketplace_product_matcher.py`、`tests/test_competitor_match_attempts_persistence.py`、`tests/test_competitor_match_attempt_rescore_audit.py` 通過。 - V10.474 補 PChome near-threshold matcher / feeder 下一階段:新增 HOOOME 白色經典香氛暖燈與 Gdesign Aroma Lava 2.0 的窄範圍 total-price exact 回收;Recipe Box 可撕式水性兒童指甲油只進 identity_review,不自動寫正式價差;Pavaruni 蠟燭 vs 精油、DASHING DIVA 不同款式仍維持 veto/低信心。known-id refresh 現在會對 hard-veto 舊候選執行 fresh search recovery,missing known-id 若 fresh search 只找到低分候選也會保留 best candidate + diagnostics,而非落成 `refresh_no_result`;正式覆寫保護新增 stronger existing guard,避免較弱新候選以高分覆蓋既有強正式配對。測試:`tests/test_marketplace_product_matcher.py`、`tests/test_competitor_match_attempts_persistence.py`、`tests/test_competitor_match_attempt_rescore_audit.py` 通過。 - V10.473 補背景 embedding host_health skip:`allow_111_fallback=false` 會讀最近 `host_health_probes`,跳過 runtime unhealthy 的 GCP 節點(預設 20 分鐘,DB fail-open),避免每筆任務都等待已知壞節點 timeout;路由安全不變,不把背景 embedding 落 111。 - V10.472 補 GCP Ollama failover rootless 診斷:新增 `scripts/ops/diagnose_ollama_gcp_failover.sh`,可一鍵檢查 GCP-A direct、GCP-B direct、111、110:11435、110:11436 與 GCP-B `bge-m3` runtime;目前輸出確認 GCP-A `11434` refused、GCP-B direct/embed OK、110:11435 502、110:11436 OK。110 無免密 sudo、GCP-A 22 refused、GCP-B SSH key denied,因此 primary 修復需 GCP/SSH 或 110 root 權限;應用層維持 GCP-A → GCP-B → 111,不把背景 embedding 落 111。 diff --git a/config.py b/config.py index 466392c..05729a2 100644 --- a/config.py +++ b/config.py @@ -350,7 +350,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.474" +SYSTEM_VERSION = "V10.475" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/current_execution_queue_20260524.md b/docs/memory/current_execution_queue_20260524.md index d646a9c..bf8f720 100644 --- a/docs/memory/current_execution_queue_20260524.md +++ b/docs/memory/current_execution_queue_20260524.md @@ -28,6 +28,7 @@ - 2026-05-25 14:10 CST 起,`V10.472` 補 rootless GCP Ollama failover 診斷腳本與 DevOps SOP:`scripts/ops/diagnose_ollama_gcp_failover.sh` 會檢查 direct GCP-A/GCP-B/111、110 proxy `11435/11436` 與 GCP-B `bge-m3` runtime。現況輸出:GCP-A direct `/api/version` failed/refused、GCP-B direct OK、111 OK、110:11435 502、110:11436 OK、GCP-B embed OK;110 無免密 sudo,`ssh gcp-a` 22 refused、`ssh gcp-b` publickey denied,因此 primary 修復需 GCP/SSH 或 110 root 權限。 - 2026-05-25 14:12 CST 起,`V10.473` 進行背景 embedding host_health skip:`allow_111_fallback=false` 的背景 embedding 會讀最近 `host_health_probes`,若 GCP-A/GCP-B runtime 已被標 unhealthy,直接跳過該節點並開 GCP circuit,不等待 30 秒 timeout、不落 111;DB 讀取失敗 fail-open。 - 2026-05-25 14:45 CST 起,`V10.474` 補 PChome near-threshold matcher / feeder 下一階段:HOOOME 白色經典香氛暖燈、Gdesign Aroma Lava 2.0 進 total-price exact;Recipe Box 可撕式水性兒童指甲油保留 identity_review,不自動寫正式價差;Pavaruni 蠟燭 vs 精油與 DASHING DIVA 不同款式仍不放行。known-id refresh 會對 hard-veto 舊候選跑 fresh search recovery;missing known-id 若 fresh search 只有低分候選,也保留 best candidate + diagnostics,不再只記 `refresh_no_result`;正式覆寫保護新增 stronger existing guard。 +- 2026-05-25 15:20 CST 起,`V10.475` 補 rescore CLI 與高分錯配防線:audit CLI 預設不再只掃 `strong_exact_spec_match`,避免新版 `focused_exact_*` 理由漏掃;matcher 對香氛暖燈 S/M/L 尺寸差、NITORI 香氛噴霧器型號差直接 hard veto,彩妝色號單邊出現時送 `variant_selection_review`,避免高分但不同 variant 的候選被誤推入 accepted queue。 - 2026-05-25 12:05 CST 狀態:`main` 已部署到 188,正式 `/health` 為 `V10.467`,待推 Gitea。兩段變更已合併驗證:V10.466 rescore duplicate 改看 latest-state,7 筆 SKU 最新 attempt 全為 `rescore_accepted_current`,`competitor_prices` / `competitor_price_history` 目標計數未變;V10.467 focused exact matcher 在容器內回 `exact / total_price / price_alert_exact`。本輪 recreate `momo-app`、`scheduler`、`telegram-bot`;未使用 `--remove-orphans`,未碰 `momo-db`。Smoke 通過:三容器 healthy、PChome rescore queue API HTTP 200、Gemini 24 小時無 provider 紀錄、Ollama env 順序維持 GCP-A → GCP-B → 111、3 分鐘三容器 log 未見 Traceback / ERROR / CRITICAL / IntegrityError。 ## 1. MOMO / PChome 核心比價準確率 @@ -66,6 +67,7 @@ - 2026-05-25 12:20 CST 起,matcher 新增 `focused_exact_total_price_safe` 窄範圍通道;目前只覆蓋 3W CLINIC 粉底液 2入、花美水凝膠 3支、The Ordinary 咖啡因 EGCG 30ml、KUSSEN 屁屁膏 3入、Bone 擴香禮盒、1990 融燭燈白色款與 CANMAKE 淚袋盤等已確認同款樣本。這讓高信心 `exact/manual_review` 能轉為 `exact/total_price` 供 rescore pilot 入人工覆核;DASHING DIVA、唇彩、香味、色號/款式敏感商品仍不放行。 - 2026-05-25 12:25 CST production pilot:SKU `6101639`、`10074951`、`7760902`、`TP00074980000005`、`14774766`、`10142589`、`10262470`、`10262471`、`11308520` 已從 `true_low_confidence` materialize 為 `rescore_accepted_current`,全數 `exact/total_price/price_alert_exact` 且理由含 `focused_exact_total_price_safe`。SKU `6101784` 因「即期品」商業條件不同,刻意保留在 `true_low_confidence`,不納入本輪自動入隊。 - 2026-05-25 14:45 CST 起,matcher 擴充至香氛/精油近門檻安全 cohort:HOOOME 白色經典香氛暖燈與 Gdesign Aroma Lava 2.0 可進 `exact/total_price/price_alert_exact`;Recipe Box 可撕式水性兒童指甲油只進 `identity_review`,因兒童指甲油仍可能藏色款/款式。DASHING DIVA 與 Pavaruni cross-type 負例已補測試,避免跨款式、跨劑型誤配。 +- 2026-05-25 15:20 CST 起,新增三個正式觀察到的高分負例防線:PRAY 守夜人暖燈 L vs S、NITORI 香氛噴霧器 5510 vs YX168、LA MER 氣墊粉霜通用 listing vs `11 Rosy Ivory` 色號。前兩者 hard veto,後者保留高分但不進 accepted queue。 ## 3. 12 Agent 決策信封整合 diff --git a/scripts/audit_competitor_match_attempt_rescore.py b/scripts/audit_competitor_match_attempt_rescore.py index e5d2529..029f18e 100755 --- a/scripts/audit_competitor_match_attempt_rescore.py +++ b/scripts/audit_competitor_match_attempt_rescore.py @@ -59,7 +59,14 @@ def main(argv: list[str] | None = None) -> int: parser.add_argument("--input", help="JSONL file path, or '-' for stdin. If omitted, query DATABASE_PATH.") parser.add_argument("--source", default="pchome") parser.add_argument("--status", action="append", dest="statuses", help="Attempt status to include; repeatable.") - parser.add_argument("--reason-filter", default="strong_exact_spec_match") + parser.add_argument( + "--reason-filter", + default=None, + help=( + "Only include rows whose diagnostic_codes contain this text. " + "Default is unfiltered so newer focused matcher reasons are not missed." + ), + ) parser.add_argument("--limit", type=int, default=100) parser.add_argument("--sample-limit", type=int, default=20) parser.add_argument("--min-score", type=float, default=MIN_MATCH_SCORE) diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 9ef1247..1f6cda8 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -1970,6 +1970,12 @@ def score_marketplace_match( aroma_scent_variant_conflict = _has_aroma_scent_variant_conflict(left, right) if aroma_scent_variant_conflict: reasons.append("aroma_scent_variant_conflict") + wax_lamp_size_letter_conflict = _has_wax_lamp_size_letter_conflict(left, right) + if wax_lamp_size_letter_conflict: + reasons.append("size_letter_variant_conflict") + nitori_diffuser_model_conflict = _has_nitori_diffuser_model_conflict(left, right) + if nitori_diffuser_model_conflict: + reasons.append("nitori_diffuser_model_conflict") variant_selection_review = _has_named_variant_selection_review(left, right, shared_anchor) if variant_selection_review: reasons.append("variant_selection_review") @@ -2021,6 +2027,10 @@ def score_marketplace_match( hard_veto = True if aroma_scent_variant_conflict: hard_veto = True + if wax_lamp_size_letter_conflict: + hard_veto = True + if nitori_diffuser_model_conflict: + hard_veto = True focused_exact_line_reason = _has_focused_low_score_exact_identity_line(left, right) if focused_exact_line_reason in FOCUSED_IDENTITY_REVIEW_ONLY_REASONS: @@ -3007,6 +3017,59 @@ def _has_aroma_scent_variant_conflict(left: ProductIdentity, right: ProductIdent return False +def _standalone_size_letter_tokens(identity: ProductIdentity) -> set[str]: + text = identity.searchable_name + return { + match.group(1).lower() + for match in re.finditer(r"(? bool: + pair_text = f"{left.searchable_name} {right.searchable_name}" + if not any(term in pair_text for term in ("香氛蠟燭暖燈", "蠟燭暖燈", "融蠟燈")): + return False + left_sizes = _standalone_size_letter_tokens(left) + right_sizes = _standalone_size_letter_tokens(right) + return bool(left_sizes and right_sizes and not (left_sizes & right_sizes)) + + +def _has_nitori_diffuser_model_conflict(left: ProductIdentity, right: ProductIdentity) -> bool: + if not ({"nitori", "宜得利家居"} & (left.brand_tokens & right.brand_tokens)): + return False + if "香氛噴霧器" not in left.searchable_name or "香氛噴霧器" not in right.searchable_name: + return False + left_models = _extract_model_tokens(left.searchable_name) | set( + re.findall(r"(? set[str]: + text = identity.searchable_name + tokens = set(_explicit_variant_option_tokens(identity)) + shade_pattern = ( + r"(? bool: + pair_text = f"{left.searchable_name} {right.searchable_name}" + if not any(term in pair_text for term in ("氣墊粉霜", "粉底", "粉霜", "蜜粉", "唇釉", "唇膏")): + return False + left_shades = _makeup_shade_tokens(left) + right_shades = _makeup_shade_tokens(right) + return bool(left_shades) != bool(right_shades) + + def _has_taicend_baby_spray_equivalence(left: ProductIdentity, right: ProductIdentity) -> bool: brand_tokens = {"taicend", "泰陞"} return ( @@ -3734,6 +3797,8 @@ def _has_named_variant_selection_review( right: ProductIdentity, shared_anchor: str, ) -> bool: + if _has_makeup_shade_selection_gap(left, right): + return True left_options = _explicit_variant_option_tokens(left) right_options = _explicit_variant_option_tokens(right) if bool(left_options) != bool(right_options): diff --git a/tests/test_competitor_match_attempt_rescore_audit.py b/tests/test_competitor_match_attempt_rescore_audit.py index 6d83c82..c0b09f2 100644 --- a/tests/test_competitor_match_attempt_rescore_audit.py +++ b/tests/test_competitor_match_attempt_rescore_audit.py @@ -1,4 +1,6 @@ import json +import importlib.util +from pathlib import Path from sqlalchemy import create_engine, text @@ -28,6 +30,32 @@ def _create_match_attempts_table(conn): """)) +def _load_rescore_cli_module(): + module_path = Path(__file__).resolve().parents[1] / "scripts" / "audit_competitor_match_attempt_rescore.py" + spec = importlib.util.spec_from_file_location("audit_competitor_match_attempt_rescore_cli", module_path) + module = importlib.util.module_from_spec(spec) + assert spec and spec.loader + spec.loader.exec_module(module) + return module + + +def test_match_attempt_rescore_cli_defaults_to_unfiltered_reasons(monkeypatch, capsys): + cli = _load_rescore_cli_module() + captured = {} + + def fake_build_match_attempt_rescore_audit(_engine, **kwargs): + captured.update(kwargs) + return {"scanned": 0} + + monkeypatch.setattr(cli, "create_engine", lambda _database_path: object()) + monkeypatch.setattr(cli, "build_match_attempt_rescore_audit", fake_build_match_attempt_rescore_audit) + + assert cli.main(["--limit", "1"]) == 0 + + capsys.readouterr() + assert captured["reason_filter"] is None + + def test_match_attempt_rescore_audit_classifies_current_gate_pass_and_veto(): from services.competitor_match_attempt_rescore_audit import summarize_match_attempt_rescore diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index ba3448f..849ba92 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -2187,6 +2187,41 @@ def test_marketplace_matcher_keeps_aroma_and_nail_variant_gaps_blocked(): assert "variant_descriptor_conflict" in dashing_cross_style.reasons +def test_marketplace_matcher_blocks_high_score_wax_lamp_and_device_variant_gaps(): + from services.marketplace_product_matcher import score_marketplace_match + + pray_size_gap = score_marketplace_match( + "【韓國PRAY】守夜人金屬香氛蠟燭暖燈-復古金(L/專櫃公司貨)", + "【韓國EPOCHSIA x Pray】守夜人金屬香氛蠟燭暖燈(S)-復古金", + ) + nitori_model_gap = score_marketplace_match( + "【NITORI 宜得利家居】香氛噴霧器 5510(香氛)", + "【NITORI 宜得利家居】香氛噴霧器 YX168 WH", + ) + + assert pray_size_gap.hard_veto is True + assert pray_size_gap.comparison_mode == "not_comparable" + assert "size_letter_variant_conflict" in pray_size_gap.reasons + assert nitori_model_gap.hard_veto is True + assert nitori_model_gap.comparison_mode == "not_comparable" + assert "nitori_diffuser_model_conflict" in nitori_model_gap.reasons + + +def test_marketplace_matcher_sends_single_sided_makeup_shade_to_review(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【LA MER 海洋拉娜】奇蹟煥采氣墊粉霜 24g(12g x 2)專櫃公司貨", + "《LA MER 海洋拉娜》奇蹟煥采氣墊粉霜 24g(12g x 2)-11 Rosy Ivory", + ) + + assert diagnostics.hard_veto is False + assert diagnostics.score >= 0.76 + assert diagnostics.price_basis == "manual_review" + assert diagnostics.alert_tier == "identity_review" + assert "variant_selection_review" in diagnostics.reasons + + def test_marketplace_matcher_promotes_eaoron_classic_tone_up_cream_exact_line(): from services.marketplace_product_matcher import score_marketplace_match