V10.391 handle catalog variant listings
All checks were successful
CD Pipeline / deploy (push) Successful in 1m7s

This commit is contained in:
OoO
2026-05-24 10:32:42 +08:00
committed by AiderHeal Bot
parent 0538c8d5f6
commit 6d4b188787
6 changed files with 174 additions and 4 deletions

View File

@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.390"
SYSTEM_VERSION = "V10.391"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -13,6 +13,7 @@
## 📅 詳細更新日誌 (考古存檔)
### 2026-05-21瀏覽器測試守門與 PChome 熱路徑優化
- **V10.391 多款任選 catalog listing 防錯配**: marketplace matcher 新增 `catalog_variant_listing_alignment`,當 MOMO/PChome 雙方都是多款/多色/多香味任選 listing且商品線、規格與類型一致時可放行香氛擴香罐、香氛蠟燭等 catalog 型同款;同時把 Relove 菸鹼醯胺 vs 胺基酸私密清潔凝露列為變體衝突,並讓 competitor feeder 不再只因 `strong_exact_spec_match` 就把低分候選視為 recoverable避免只同規格但品線不同的商品回寫正式比價。
- **V10.390 PChome 近門檻商品比對規則**: marketplace matcher 補 17 組近門檻真同款召回與錯配防線,包含 OBgE 防曬棒、ARTMIS 私密清潔慕斯、Seche Vite 快乾亮油、TAICEND 屁屁噴、femfresh / VIGILL 私密清潔、Solone 眼部飾底乳、HYDSTO 車載香薰、小米 S101 刮鬍刀、PRAMY 定妝噴霧、I'M MEME 修容打亮棒、檜山坊滾珠精油、ARM&HAMMER 體香膏、Brush Baby WildOnes 電動牙刷與 Palmer's 按摩乳;同時把香氛/私密慕斯/定妝噴霧 finish 差異列為 variant-sensitive避免不同香味、蔓越莓 vs 金縷梅、柔焦霧面 vs 水光亮面被誤推成直接價格告警。
- **V10.388 精華乳 / 精華霜變體防錯配**: marketplace matcher 新增精華類 formulation conflict guard當共享 identity anchor 只到「精華」但一側是「精華乳」、另一側是「精華霜 / 精華液」時會標記 `variant_descriptor_conflict` 並壓低同款分數,避免自白肌等同品牌相近品線被錯推成 PChome/MOMO 可直接價格告警。Competitor feeder 同步會用最新 matcher 重新驗舊配對;若舊 `identity_v2` 已被現行 matcher 判成低分或 veto允許新的高信心候選替換避免歷史錯配卡住正式 `competitor_prices`
- **V10.387 EA 比價 HITL 告警證據排版**: Elephant Alpha 的 DB evidence 與 Hermes pre-fetch action 現在會把 PChome/MOMO 同款證據帶進 Telegram`match_type``price_basis``alert_tier``match_score` 會獨立成「證據」行,讓人工審核能分辨高信心同款、總價可比、單位價覆核與身份覆核,不再只看到乾巴巴的 `MOMO vs PChome` 長句。同版 marketplace matcher 補 Relove「私密潔淨凝露」identity anchor 與聯名款搜尋噪音,避免 PLAY BOY / 小虎等活動詞壓過真同款名稱。

View File

@@ -55,7 +55,6 @@ BROWSE_SH_MAX_EXECUTIONS_PER_RUN = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_MAX_PE
BROWSE_SH_OUTPUT_PREVIEW_CHARS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS", "1200"))
RECOVERABLE_DIAGNOSTIC_REASONS = {
"strong_product_line_match",
"strong_exact_spec_match",
"shared_identity_anchor",
"shared_identity_anchor_no_spec",
"shared_identity_anchor_packaging_variant",
@@ -93,6 +92,15 @@ def _has_recoverable_identity_signal(diagnostics) -> bool:
)
def _is_multi_variant_listing_name(name: str) -> bool:
return bool(
re.search(
r"(多款任選|多款可選|多色任選|多色可選|多種香味|多種香氣|香味任選|香味可選|味道可選)",
name or "",
)
)
def _classify_low_score_attempt(score: float, diagnostics) -> str:
if getattr(diagnostics, "hard_veto", False):
return "identity_veto"
@@ -117,10 +125,11 @@ def _has_variant_selection_gap(
source_identity = parse_product_identity(momo_name)
source_options = set(_explicit_variant_option_tokens(source_identity))
source_multi_listing = _is_multi_variant_listing_name(momo_name)
if re.search(r"任選\s*[一二兩三四五六七八九十0-9]+\s*款", momo_name):
source_options -= {str(value) for value in range(1, 11)}
source_options -= {f"{value:02d}" for value in range(1, 11)}
if source_options:
if source_options and not source_multi_listing:
return False
threshold = max(best_score - 0.02, RECOVERABLE_LOW_SCORE_FLOOR)
@@ -128,7 +137,10 @@ def _has_variant_selection_gap(
for product, score, diagnostics in ranked_matches[:5]:
if getattr(diagnostics, "hard_veto", False) or score < threshold:
continue
candidate_identity = parse_product_identity(getattr(product, "name", "") or "")
candidate_name = getattr(product, "name", "") or ""
if source_multi_listing != _is_multi_variant_listing_name(candidate_name):
return True
candidate_identity = parse_product_identity(candidate_name)
options = _explicit_variant_option_tokens(candidate_identity)
if len(options) >= 2:
return True

View File

@@ -454,6 +454,9 @@ VARIANT_SENSITIVE_KEYWORDS = {
"車用香氛",
"香味",
"私密清潔慕斯",
"私密清潔凝露",
"私密潔淨凝露",
"私密淨白清潔凝露",
"定妝噴霧",
"妝前防護乳",
"妝前乳",
@@ -492,6 +495,8 @@ VARIANT_OPTION_COLOR_WORDS = {
"金縷梅",
"柔焦霧面",
"水光亮面",
"菸鹼醯胺",
"胺基酸",
"黑色",
"棕色",
"咖啡色",
@@ -534,6 +539,20 @@ VARIANT_DESCRIPTOR_NOISE_KEYWORDS = {
"盒組",
}
MULTI_VARIANT_LISTING_PHRASES = (
"多款任選",
"多款可選",
"多色任選",
"多色可選",
"多種香味",
"多種香氣",
"香味任選",
"香味可選",
"味道可選",
"任選",
"可選",
)
SEARCH_AMBIGUOUS_PRODUCT_TERMS = {
"保護膜",
"保護貼",
@@ -2009,6 +2028,18 @@ def score_marketplace_match(
):
score += 0.04
reasons.append("shared_model_token_brush_baby_wildones")
if (
_has_catalog_variant_listing_alignment(left, right)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and spec_score >= 0.85
and type_score >= 0.95
and sequence_score >= 0.50
and not variant_descriptor_conflict
):
score += 0.06
reasons.append("catalog_variant_listing_alignment")
if (
shared_anchor
and len(shared_anchor.replace(" ", "")) >= 5
@@ -2390,6 +2421,37 @@ def _has_brush_baby_wildones_toothbrush_alignment(left: ProductIdentity, right:
)
def _is_relove_private_cleanser_line(left: ProductIdentity, right: ProductIdentity) -> bool:
return (
"relove" in (left.brand_tokens | right.brand_tokens)
and "私密" in left.searchable_name
and "私密" in right.searchable_name
and "凝露" in left.searchable_name
and "凝露" in right.searchable_name
)
def _is_multi_variant_catalog_listing(identity: ProductIdentity) -> bool:
text = identity.normalized_name
return any(phrase in text for phrase in MULTI_VARIANT_LISTING_PHRASES)
def _has_catalog_variant_listing_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
if not (_is_multi_variant_catalog_listing(left) and _is_multi_variant_catalog_listing(right)):
return False
if left.product_type != right.product_type or left.product_type not in {"精油", "護唇膏"}:
return False
if not _has_overlapping_base_spec(left, right):
return False
shared_core = left.core_tokens & right.core_tokens
if shared_core:
return True
left_text = left.searchable_name
right_text = right.searchable_name
catalog_terms = ("香氛擴香罐", "香氛蠟燭", "蠟燭", "擴香罐", "修護唇膏")
return any(term in left_text and term in right_text for term in catalog_terms)
def _is_variant_sensitive_identity(
left: ProductIdentity,
right: ProductIdentity,
@@ -2412,6 +2474,8 @@ def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdenti
return False
if _has_brush_baby_wildones_toothbrush_alignment(left, right):
return False
if _is_relove_private_cleanser_line(left, right):
return False
if (
shared_anchor
and shared_anchor not in SEARCH_BROAD_ANCHORS

View File

@@ -666,6 +666,70 @@ def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatc
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_competitor_feeder_does_not_treat_spec_only_match_as_recoverable(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-SPEC",
name="LANCOME 蘭蔻 超極限肌因精華露150ml 專櫃公司貨",
price=3200,
original_price=3600,
discount=11,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-SPEC",
stock=20,
store="24h",
rating=4.6,
review_count=12,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.748,
brand_score=1.0,
token_score=0.42,
spec_score=1.0,
sequence_score=0.49,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=("strong_exact_spec_match",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "LAN001",
"name": "【LANCOME 蘭蔻】官方直營 超極光活粹晶露150ml",
"product_id": 10,
"momo_price": 3200,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct

View File

@@ -774,6 +774,10 @@ def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismat
"【PRAMY 柏瑞美】磁吸控油定妝噴霧 100ML(柔焦霧面)",
"【柏瑞美PRAMY】 磁吸控油定粧噴霧 水光亮面",
),
(
"【Relove】8%菸鹼醯胺私密淨白清潔凝露120ml(私密清潔 私密美白 涼感潔淨 PH3.8弱酸呵護)",
"RELOVE胺基酸私密清潔凝露120ml",
),
]
for momo_name, competitor_name in cases:
@@ -784,6 +788,31 @@ def test_marketplace_matcher_rejects_fragrance_formula_and_finish_variant_mismat
assert "variant_option_conflict" in diagnostics.reasons
def test_marketplace_matcher_promotes_multi_variant_catalog_listings():
from services.marketplace_product_matcher import score_marketplace_match
cases = [
(
"【日本Johns Blend】香氛擴香罐85g(車用/任選/白麝香/黑麝香/茉莉/櫻花/繡球花/魔髮奇緣/青檸羅勒)",
"日本Johns Blend 車用香氛擴香罐85g(多款可選)",
),
(
"【COCODOR】香氛蠟燭170g(多款任選/官方直營)",
"COCODOR Premium Jar Candle 香氛精油蠟燭170g(多種香味任選)",
),
(
"【COCODOR】香氛蠟燭95g(多款任選/官方直營)",
"COCODOR Premium Jar Candle 香氛精油蠟燭95g(多種香味任選)",
),
]
for momo_name, competitor_name in cases:
diagnostics = score_marketplace_match(momo_name, competitor_name)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert "catalog_variant_listing_alignment" in diagnostics.reasons
def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack():
from services.marketplace_product_matcher import score_marketplace_match