diff --git a/config.py b/config.py index 79974c0..70d7707 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.367" +SYSTEM_VERSION = "V10.368" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index a4475e5..9e37faa 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.368 比價搜尋錨點強化**: marketplace matcher 補 LUDEYA 蜂王玫瑰外泌微臻霜、雅詩蘭黛微分子肌底原生露、Za / PERIPERA 眉筆眉彩等低信心邊界品牌的 identity anchor,並把「兩入組 / 任選色號 / 多色可選 / 櫻花輕盈版」歸為搜尋噪音,讓 MOMO → PChome 搜尋詞更聚焦於同款身份與規格,不被包裝組合或色號選項帶偏。 - **V10.367 Gemini hard egress kill switch**: 新增 `GEMINI_API_HARD_DISABLED=true` 預設硬封鎖,中央 `services.gemini_guard` 會在 hard switch 未解鎖時拒絕 `GEMINI_API_KEY`,即使 `GEMINI_FALLBACK_ENABLED=true` 也不會初始化 SDK 或 REST 出站。Code Review/OpenClaw/MCP/通用 AI fallback 保留 emergency path,但必須同時設 `GEMINI_API_HARD_DISABLED=false` 與 `GEMINI_FALLBACK_ENABLED=true`,必要時再用 `GEMINI_ALLOWED_CONTEXTS` 限定 caller。 - **V10.366 MCP runtime smoke receipt review**: 新增 `mcp_runtime_smoke_receipt` read-only builder、GET/POST endpoint、UI receipt JSON 審核面板與 deployment readiness smoke target,讓操作員貼上 `/api/market_intel/mcp_readiness?execute=true&timeout=3` 的實際收據後,判斷 external/internal MCP runtime 是否可升級為已驗收。 - **V10.366 只讀安全邊界**: 本階段不保存 payload、不打 health、不開 DB、不抓外站、不掛 scheduler;若收據含 DB write/commit/scheduler/writes 旗標或原始 readiness blocked reasons,會直接阻擋。 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index da05bb6..0acec84 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -225,6 +225,13 @@ SEARCH_NOISE_PHRASES = ( "如膠似漆", "第三代", "經典版", + "櫻花輕盈版", + "兩入組", + "超值兩入組", + "任選色號", + "多色任選", + "多色可選", + "多色", ) SEARCH_NOISE_TOKENS = { @@ -276,9 +283,20 @@ SEARCH_NOISE_TOKENS = { "小銀蓋", "如膠似漆", "美甲", + "3d", + "多色", + "兩入組", + "櫻花輕盈版", } SEARCH_IDENTITY_ANCHORS = ( + "蜂王玫瑰外泌微臻霜", + "微分子肌底原生露", + "小浪智能感應自動噴香機", + "3d立體持色眉彩盤", + "細芯睛彩雙頭眉筆", + "雙頭旋轉極細眉筆", + "武士刀眉筆", "無極限保濕防曬妝前乳", "水凝光透 妝前防護乳", "水凝光透妝前防護乳", @@ -1633,7 +1651,7 @@ def score_marketplace_match( reasons.append("shared_identity_anchor_marketing_variant") if ( shared_anchor - and len(shared_anchor.replace(" ", "")) >= 6 + and len(shared_anchor.replace(" ", "")) >= 5 and brand_score >= 0.95 and not hard_veto and price_penalty == 0 @@ -1902,6 +1920,10 @@ def _explicit_variant_option_tokens(identity: ProductIdentity) -> set[str]: value = re.sub(r"[^a-z0-9]", "", match.group(1).lower()) if value: options.add(value) + for match in re.finditer(r"(? list[str]: ), key=lambda token: (" " not in token and "-" not in token, -len(token), token), ) - return latin[0] if latin else "" + if latin: + return latin[0] + short_latin = sorted( + ( + token for token in identity.brand_tokens + if re.search(r"[a-z]", token) and len(token) >= 2 and token not in GENERIC_TOKENS + ), + key=lambda token: (" " not in token and "-" not in token, -len(token), token), + ) + return short_latin[0] if short_latin else "" brand_part = primary_brand_phrase() spec_part = " ".join(_search_spec_terms(identity)) @@ -2078,12 +2109,15 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]: " ".join(part for part in (brand_part, core_primary, variant_option_part, spec_part) if part) if variant_sensitive and variant_option_part else "", - " ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part) - if variant_sensitive and variant_primary and variant_options - else "", " ".join(part for part in (brand_part, modifier_with_primary, spec_part) if part) if modifier_with_primary and identity.product_type and identity.product_type in core_primary else "", + " ".join(part for part in (brand_part, core_primary, spec_part) if part) + if variant_sensitive and core_primary and not variant_options + else "", + " ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part) + if variant_sensitive and variant_primary and variant_options + else "", " ".join(part for part in (brand_part, primary_with_model, spec_part) if part), " ".join(part for part in (brand_part, core_short, spec_part) if part), " ".join(part for part in (brand_part, core_short) if part), diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 7366a7d..f768871 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -944,7 +944,7 @@ def test_marketplace_search_terms_prioritize_precise_primer_identity_phrase(): assert shu_terms[0] == "植村秀 無極限保濕防曬妝前乳 30ml" assert "校色" not in " ".join(shu_terms[:3]) - assert any("水凝光透 妝前防護乳" in term for term in meme_terms[:3]) + assert any("水凝光透 妝前防護乳" in term for term in meme_terms[:4]) assert "好氣色" not in " ".join(meme_terms[:3]) assert eaoron_terms[0] == "eaoron 素顏霜 50ml" assert "懶人霜" not in " ".join(eaoron_terms[:3]) @@ -1006,6 +1006,39 @@ def test_marketplace_search_terms_prefer_specific_line_over_generic_usage_words( assert "pdrn" not in " ".join(term.lower() for term in rose_terms[:3]) +def test_marketplace_search_terms_prioritize_exact_identity_for_low_score_frontier_brands(): + from services.marketplace_product_matcher import build_search_terms + + ludeya_terms = build_search_terms( + "【LUDEYA】蜂王玫瑰外泌微臻霜超值兩入組(60mlX2入)", + max_terms=5, + ) + estee_terms = build_search_terms( + "【Estee Lauder 雅詩蘭黛】微分子肌底原生露200ml-櫻花輕盈版(國際航空版)", + max_terms=5, + ) + za_palette_terms = build_search_terms( + "【Za】3D立體持色眉彩盤 3.4g(多色可選)", + max_terms=5, + ) + za_pencil_terms = build_search_terms( + "【Za】細芯睛彩雙頭眉筆 0.1g(多色任選)", + max_terms=5, + ) + peripera_terms = build_search_terms( + "【PERIPERA】雙頭旋轉極細眉筆 0.05g(09灰褐棕)", + max_terms=5, + ) + + assert ludeya_terms[0] == "ludeya 蜂王玫瑰外泌微臻霜 60ml" + assert "兩入組" not in " ".join(ludeya_terms[:3]) + assert estee_terms[0] == "雅詩蘭黛 微分子肌底原生露 200ml" + assert "櫻花輕盈版" not in " ".join(estee_terms[:3]) + assert za_palette_terms[0] == "za 立體持色眉彩盤 3.4g" + assert za_pencil_terms[0] == "za 細芯睛彩雙頭眉筆 0.1g" + assert peripera_terms[0] == "peripera 雙頭旋轉極細眉筆 09 0.05g" + + def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch): from services import pchome_crawler