強化比價搜尋同款錨點
All checks were successful
CD Pipeline / deploy (push) Successful in 1m3s

This commit is contained in:
OoO
2026-05-21 14:43:46 +08:00
committed by AiderHeal Bot
parent c016200bf4
commit 1bdb0f2bd8
4 changed files with 75 additions and 7 deletions

View File

@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.367"
SYSTEM_VERSION = "V10.368"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -13,6 +13,7 @@
## 📅 詳細更新日誌 (考古存檔)
### 2026-05-21瀏覽器測試守門與 PChome 熱路徑優化
- **V10.368 比價搜尋錨點強化**: marketplace matcher 補 LUDEYA 蜂王玫瑰外泌微臻霜、雅詩蘭黛微分子肌底原生露、Za / PERIPERA 眉筆眉彩等低信心邊界品牌的 identity anchor並把「兩入組 / 任選色號 / 多色可選 / 櫻花輕盈版」歸為搜尋噪音,讓 MOMO → PChome 搜尋詞更聚焦於同款身份與規格,不被包裝組合或色號選項帶偏。
- **V10.367 Gemini hard egress kill switch**: 新增 `GEMINI_API_HARD_DISABLED=true` 預設硬封鎖,中央 `services.gemini_guard` 會在 hard switch 未解鎖時拒絕 `GEMINI_API_KEY`,即使 `GEMINI_FALLBACK_ENABLED=true` 也不會初始化 SDK 或 REST 出站。Code Review/OpenClaw/MCP/通用 AI fallback 保留 emergency path但必須同時設 `GEMINI_API_HARD_DISABLED=false``GEMINI_FALLBACK_ENABLED=true`,必要時再用 `GEMINI_ALLOWED_CONTEXTS` 限定 caller。
- **V10.366 MCP runtime smoke receipt review**: 新增 `mcp_runtime_smoke_receipt` read-only builder、GET/POST endpoint、UI receipt JSON 審核面板與 deployment readiness smoke target讓操作員貼上 `/api/market_intel/mcp_readiness?execute=true&timeout=3` 的實際收據後,判斷 external/internal MCP runtime 是否可升級為已驗收。
- **V10.366 只讀安全邊界**: 本階段不保存 payload、不打 health、不開 DB、不抓外站、不掛 scheduler若收據含 DB write/commit/scheduler/writes 旗標或原始 readiness blocked reasons會直接阻擋。

View File

@@ -225,6 +225,13 @@ SEARCH_NOISE_PHRASES = (
"如膠似漆",
"第三代",
"經典版",
"櫻花輕盈版",
"兩入組",
"超值兩入組",
"任選色號",
"多色任選",
"多色可選",
"多色",
)
SEARCH_NOISE_TOKENS = {
@@ -276,9 +283,20 @@ SEARCH_NOISE_TOKENS = {
"小銀蓋",
"如膠似漆",
"美甲",
"3d",
"多色",
"兩入組",
"櫻花輕盈版",
}
SEARCH_IDENTITY_ANCHORS = (
"蜂王玫瑰外泌微臻霜",
"微分子肌底原生露",
"小浪智能感應自動噴香機",
"3d立體持色眉彩盤",
"細芯睛彩雙頭眉筆",
"雙頭旋轉極細眉筆",
"武士刀眉筆",
"無極限保濕防曬妝前乳",
"水凝光透 妝前防護乳",
"水凝光透妝前防護乳",
@@ -1633,7 +1651,7 @@ def score_marketplace_match(
reasons.append("shared_identity_anchor_marketing_variant")
if (
shared_anchor
and len(shared_anchor.replace(" ", "")) >= 6
and len(shared_anchor.replace(" ", "")) >= 5
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
@@ -1902,6 +1920,10 @@ def _explicit_variant_option_tokens(identity: ProductIdentity) -> set[str]:
value = re.sub(r"[^a-z0-9]", "", match.group(1).lower())
if value:
options.add(value)
for match in re.finditer(r"(?<![a-z0-9])((?:0?\d){1,2})(?=[\u4e00-\u9fff])", text, re.I):
value = re.sub(r"[^a-z0-9]", "", match.group(1).lower())
if value:
options.add(value)
for color_word in VARIANT_OPTION_COLOR_WORDS:
if color_word in text:
options.add(color_word)
@@ -2038,7 +2060,16 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
),
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
)
return latin[0] if latin else ""
if latin:
return latin[0]
short_latin = sorted(
(
token for token in identity.brand_tokens
if re.search(r"[a-z]", token) and len(token) >= 2 and token not in GENERIC_TOKENS
),
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
)
return short_latin[0] if short_latin else ""
brand_part = primary_brand_phrase()
spec_part = " ".join(_search_spec_terms(identity))
@@ -2078,12 +2109,15 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
" ".join(part for part in (brand_part, core_primary, variant_option_part, spec_part) if part)
if variant_sensitive and variant_option_part
else "",
" ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part)
if variant_sensitive and variant_primary and variant_options
else "",
" ".join(part for part in (brand_part, modifier_with_primary, spec_part) if part)
if modifier_with_primary and identity.product_type and identity.product_type in core_primary
else "",
" ".join(part for part in (brand_part, core_primary, spec_part) if part)
if variant_sensitive and core_primary and not variant_options
else "",
" ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part)
if variant_sensitive and variant_primary and variant_options
else "",
" ".join(part for part in (brand_part, primary_with_model, spec_part) if part),
" ".join(part for part in (brand_part, core_short, spec_part) if part),
" ".join(part for part in (brand_part, core_short) if part),

View File

@@ -944,7 +944,7 @@ def test_marketplace_search_terms_prioritize_precise_primer_identity_phrase():
assert shu_terms[0] == "植村秀 無極限保濕防曬妝前乳 30ml"
assert "校色" not in " ".join(shu_terms[:3])
assert any("水凝光透 妝前防護乳" in term for term in meme_terms[:3])
assert any("水凝光透 妝前防護乳" in term for term in meme_terms[:4])
assert "好氣色" not in " ".join(meme_terms[:3])
assert eaoron_terms[0] == "eaoron 素顏霜 50ml"
assert "懶人霜" not in " ".join(eaoron_terms[:3])
@@ -1006,6 +1006,39 @@ def test_marketplace_search_terms_prefer_specific_line_over_generic_usage_words(
assert "pdrn" not in " ".join(term.lower() for term in rose_terms[:3])
def test_marketplace_search_terms_prioritize_exact_identity_for_low_score_frontier_brands():
from services.marketplace_product_matcher import build_search_terms
ludeya_terms = build_search_terms(
"【LUDEYA】蜂王玫瑰外泌微臻霜超值兩入組(60mlX2入)",
max_terms=5,
)
estee_terms = build_search_terms(
"【Estee Lauder 雅詩蘭黛】微分子肌底原生露200ml-櫻花輕盈版(國際航空版)",
max_terms=5,
)
za_palette_terms = build_search_terms(
"【Za】3D立體持色眉彩盤 3.4g(多色可選)",
max_terms=5,
)
za_pencil_terms = build_search_terms(
"【Za】細芯睛彩雙頭眉筆 0.1g(多色任選)",
max_terms=5,
)
peripera_terms = build_search_terms(
"【PERIPERA】雙頭旋轉極細眉筆 0.05g(09灰褐棕)",
max_terms=5,
)
assert ludeya_terms[0] == "ludeya 蜂王玫瑰外泌微臻霜 60ml"
assert "兩入組" not in " ".join(ludeya_terms[:3])
assert estee_terms[0] == "雅詩蘭黛 微分子肌底原生露 200ml"
assert "櫻花輕盈版" not in " ".join(estee_terms[:3])
assert za_palette_terms[0] == "za 立體持色眉彩盤 3.4g"
assert za_pencil_terms[0] == "za 細芯睛彩雙頭眉筆 0.1g"
assert peripera_terms[0] == "peripera 雙頭旋轉極細眉筆 09 0.05g"
def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch):
from services import pchome_crawler