[V10.355] rescue near-threshold identity cohorts
This commit is contained in:
@@ -51,6 +51,10 @@ REVIEW_STATUS_OPTIONS = [
|
||||
'refresh_unit_comparable',
|
||||
'identity_veto',
|
||||
'low_score',
|
||||
'refresh_low_score',
|
||||
'recoverable_low_score',
|
||||
'true_low_confidence',
|
||||
'protected_existing_match',
|
||||
'expired_match',
|
||||
'refresh_no_result',
|
||||
'no_result',
|
||||
@@ -62,7 +66,8 @@ REVIEW_STATUS_OPTIONS = [
|
||||
'statuses': ('unit_comparable', 'refresh_unit_comparable'),
|
||||
},
|
||||
{'key': 'identity_veto', 'label': '已排除', 'statuses': ('identity_veto',)},
|
||||
{'key': 'low_score', 'label': '低信心', 'statuses': ('low_score',)},
|
||||
{'key': 'low_score', 'label': '低信心', 'statuses': ('low_score', 'refresh_low_score', 'recoverable_low_score', 'true_low_confidence')},
|
||||
{'key': 'protected_existing_match', 'label': '既有保護', 'statuses': ('protected_existing_match',)},
|
||||
{'key': 'expired_match', 'label': '價格過期', 'statuses': ('expired_match',)},
|
||||
{'key': 'no_result', 'label': '找不到同款', 'statuses': ('no_result', 'refresh_no_result')},
|
||||
]
|
||||
@@ -249,24 +254,30 @@ def _build_pchome_match_status(attempt=None, ineligible=None):
|
||||
candidate_count = int(attempt.get('candidate_count') or 0)
|
||||
score_text = f"最佳候選 {round(score * 100)}%" if score is not None else "尚無候選分數"
|
||||
|
||||
if status in {'low_score', 'refresh_low_score'}:
|
||||
if status in {'low_score', 'refresh_low_score', 'recoverable_low_score', 'true_low_confidence'}:
|
||||
diagnostic_text = attempt.get('error_message') or ''
|
||||
label, summary = _diagnostic_match_rejection_label(
|
||||
diagnostic_text,
|
||||
score_text,
|
||||
blocked=False,
|
||||
)
|
||||
if status == 'recoverable_low_score':
|
||||
label = '近門檻可回收'
|
||||
summary = '同品線證據已足夠,但分數仍略低於正式採用門檻'
|
||||
elif status == 'true_low_confidence':
|
||||
label = '證據不足'
|
||||
summary = '目前候選仍缺乏足夠身份證據,先保守不採用'
|
||||
return {
|
||||
'label': label,
|
||||
'tone': 'neutral',
|
||||
'summary': summary,
|
||||
'detail': f'{candidate_count} 筆候選',
|
||||
}
|
||||
if status in {'needs_review', 'refresh_needs_review'}:
|
||||
if status in {'needs_review', 'refresh_needs_review', 'protected_existing_match'}:
|
||||
return {
|
||||
'label': '配對衝突待審',
|
||||
'label': '既有配對保護',
|
||||
'tone': 'neutral',
|
||||
'summary': '新候選與既有配對不同,需人工確認後再覆蓋',
|
||||
'summary': '新候選合理,但正式環境已存在更強既有配對,需人工確認後才覆蓋',
|
||||
'detail': f'{score_text} / {candidate_count} 筆候選',
|
||||
}
|
||||
if status in {'no_result', 'no_match', 'refresh_no_match'}:
|
||||
|
||||
@@ -28,6 +28,10 @@ ACTIONABLE_ATTEMPT_STATUSES = {
|
||||
"refresh_unit_comparable",
|
||||
"identity_veto",
|
||||
"low_score",
|
||||
"refresh_low_score",
|
||||
"recoverable_low_score",
|
||||
"true_low_confidence",
|
||||
"protected_existing_match",
|
||||
"expired_match",
|
||||
"refresh_no_result",
|
||||
"no_result",
|
||||
@@ -35,7 +39,8 @@ ACTIONABLE_ATTEMPT_STATUSES = {
|
||||
REVIEW_STATUS_FILTER_GROUPS = {
|
||||
"unit_comparable": ("unit_comparable", "refresh_unit_comparable"),
|
||||
"identity_veto": ("identity_veto",),
|
||||
"low_score": ("low_score",),
|
||||
"low_score": ("low_score", "refresh_low_score", "recoverable_low_score", "true_low_confidence"),
|
||||
"protected_existing_match": ("protected_existing_match",),
|
||||
"expired_match": ("expired_match",),
|
||||
"no_result": ("no_result", "refresh_no_result"),
|
||||
}
|
||||
@@ -44,6 +49,10 @@ ATTEMPT_STATUS_LABELS = {
|
||||
"refresh_unit_comparable": "需單位價比較",
|
||||
"identity_veto": "身份否決",
|
||||
"low_score": "低信心待審",
|
||||
"refresh_low_score": "刷新後仍低信心",
|
||||
"recoverable_low_score": "近門檻可救回",
|
||||
"true_low_confidence": "證據不足待觀察",
|
||||
"protected_existing_match": "既有較強配對保護中",
|
||||
"expired_match": "價格過期待刷新",
|
||||
"refresh_no_result": "刷新找不到商品",
|
||||
"no_result": "找不到同款",
|
||||
@@ -58,6 +67,10 @@ ATTEMPT_ACTION_LABELS = {
|
||||
"refresh_unit_comparable": "人工確認檔期、贈品與單位價",
|
||||
"identity_veto": "確認是否為不同商品線或規格",
|
||||
"low_score": "人工審核候選商品身份",
|
||||
"refresh_low_score": "檢查 refresh 後是否還有更好的同款候選",
|
||||
"recoverable_low_score": "優先回放這批近門檻同品線候選",
|
||||
"true_low_confidence": "保守保留,等待更明確的身份證據",
|
||||
"protected_existing_match": "比較新舊候選證據,避免覆蓋較強正式配對",
|
||||
"expired_match": "重新刷新 PChome 價格",
|
||||
"refresh_no_result": "調整搜尋詞後重抓",
|
||||
"no_result": "補充搜尋詞或品牌關鍵字",
|
||||
@@ -755,9 +768,11 @@ def _review_queue_cte_and_filter(
|
||||
CASE
|
||||
WHEN la.attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 0
|
||||
WHEN la.attempt_status = 'identity_veto' THEN 1
|
||||
WHEN la.attempt_status = 'low_score' THEN 2
|
||||
WHEN la.attempt_status = 'expired_match' THEN 3
|
||||
ELSE 4
|
||||
WHEN la.attempt_status IN ('recoverable_low_score', 'low_score', 'refresh_low_score') THEN 2
|
||||
WHEN la.attempt_status = 'protected_existing_match' THEN 3
|
||||
WHEN la.attempt_status = 'true_low_confidence' THEN 4
|
||||
WHEN la.attempt_status = 'expired_match' THEN 5
|
||||
ELSE 6
|
||||
END AS priority_rank
|
||||
FROM latest_momo lm
|
||||
JOIN latest_attempt la ON la.sku = lm.sku
|
||||
@@ -902,6 +917,10 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic
|
||||
'refresh_unit_comparable',
|
||||
'identity_veto',
|
||||
'low_score',
|
||||
'refresh_low_score',
|
||||
'recoverable_low_score',
|
||||
'true_low_confidence',
|
||||
'protected_existing_match',
|
||||
'expired_match',
|
||||
'refresh_no_result',
|
||||
'no_result'
|
||||
@@ -910,9 +929,11 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic
|
||||
CASE
|
||||
WHEN la.attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 0
|
||||
WHEN la.attempt_status = 'identity_veto' THEN 1
|
||||
WHEN la.attempt_status = 'low_score' THEN 2
|
||||
WHEN la.attempt_status = 'expired_match' THEN 3
|
||||
ELSE 4
|
||||
WHEN la.attempt_status IN ('recoverable_low_score', 'low_score', 'refresh_low_score') THEN 2
|
||||
WHEN la.attempt_status = 'protected_existing_match' THEN 3
|
||||
WHEN la.attempt_status = 'true_low_confidence' THEN 4
|
||||
WHEN la.attempt_status = 'expired_match' THEN 5
|
||||
ELSE 6
|
||||
END,
|
||||
lm.momo_price DESC NULLS LAST,
|
||||
la.best_match_score DESC NULLS LAST,
|
||||
|
||||
@@ -45,6 +45,19 @@ BATCH_SIZE = 30 # 每批 DB 寫入筆數
|
||||
RATE_DELAY = float(os.getenv("PCHOME_FEEDER_RATE_DELAY", "1.0")) # 每次 PChome 請求間隔(秒)
|
||||
TTL_HOURS = 6 # competitor_prices 快取有效期
|
||||
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
|
||||
RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72)
|
||||
RECOVERABLE_DIAGNOSTIC_REASONS = {
|
||||
"strong_product_line_match",
|
||||
"strong_exact_spec_match",
|
||||
"shared_identity_anchor",
|
||||
"shared_identity_anchor_no_spec",
|
||||
"shared_identity_anchor_packaging_variant",
|
||||
"shared_identity_anchor_marketing_variant",
|
||||
"shared_identity_anchor_core_line",
|
||||
"shared_identity_anchor_variant_safe",
|
||||
"shared_model_token",
|
||||
"spec_name_alignment",
|
||||
}
|
||||
|
||||
# ── Feeder 結果 ───────────────────────────────────────
|
||||
@dataclass
|
||||
@@ -59,6 +72,28 @@ class FeederResult:
|
||||
attempts_written: int = 0
|
||||
|
||||
|
||||
def _has_recoverable_identity_signal(diagnostics) -> bool:
|
||||
if not diagnostics:
|
||||
return False
|
||||
reasons = set(getattr(diagnostics, "reasons", ()) or ())
|
||||
if reasons & RECOVERABLE_DIAGNOSTIC_REASONS:
|
||||
return True
|
||||
return (
|
||||
getattr(diagnostics, "brand_score", 0) >= 0.95
|
||||
and getattr(diagnostics, "token_score", 0) >= 0.56
|
||||
and getattr(diagnostics, "sequence_score", 0) >= 0.50
|
||||
and getattr(diagnostics, "comparison_mode", "exact_identity") == "exact_identity"
|
||||
)
|
||||
|
||||
|
||||
def _classify_low_score_attempt(score: float, diagnostics) -> str:
|
||||
if getattr(diagnostics, "hard_veto", False):
|
||||
return "identity_veto"
|
||||
if score >= RECOVERABLE_LOW_SCORE_FLOOR and _has_recoverable_identity_signal(diagnostics):
|
||||
return "recoverable_low_score"
|
||||
return "true_low_confidence"
|
||||
|
||||
|
||||
def _extract_tags(pchome_product) -> list:
|
||||
"""
|
||||
從 PChomeProduct 物件提取語意標籤
|
||||
@@ -716,7 +751,8 @@ class CompetitorPriceFeeder:
|
||||
|
||||
這條路徑不重新搜尋,只用前次留下的 PChome product_id 批次查詢最新商品資料,
|
||||
適合把舊 scorer 卡在 0.70~0.759 的真同款重新推進正式比價。
|
||||
已重評後仍不足門檻的 refresh_low_score 不再重複進隊列,避免排程空轉。
|
||||
僅重跑明顯仍在 exact identity 軌道內、具回收價值的候選;
|
||||
真正低信心與 hard veto 不再反覆空轉。
|
||||
"""
|
||||
if self.engine is None:
|
||||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||||
@@ -769,7 +805,7 @@ class CompetitorPriceFeeder:
|
||||
AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
|
||||
WHERE lm.rn = 1
|
||||
AND cp.sku IS NULL
|
||||
AND la.attempt_status = 'low_score'
|
||||
AND la.attempt_status IN ('low_score', 'refresh_low_score', 'recoverable_low_score')
|
||||
AND la.best_competitor_product_id IS NOT NULL
|
||||
AND la.best_competitor_product_id <> ''
|
||||
AND COALESCE(la.best_match_score, 0) >= :min_score
|
||||
@@ -1212,7 +1248,7 @@ class CompetitorPriceFeeder:
|
||||
continue
|
||||
|
||||
if score < MIN_MATCH_SCORE and not manual_accept_override:
|
||||
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "low_score"
|
||||
attempt_status = _classify_low_score_attempt(score, diagnostics)
|
||||
logger.debug(
|
||||
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE}),"
|
||||
f"{_format_match_diagnostics(diagnostics)}"
|
||||
@@ -1263,7 +1299,7 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=len(products),
|
||||
attempt_status="needs_review",
|
||||
attempt_status="protected_existing_match",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
@@ -1439,7 +1475,7 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
search_terms=attempt_terms,
|
||||
candidate_count=max(1, recovery_candidate_count),
|
||||
attempt_status="refresh_needs_review",
|
||||
attempt_status="protected_existing_match",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
@@ -1571,7 +1607,7 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
search_terms=attempt_terms,
|
||||
candidate_count=candidate_count,
|
||||
attempt_status="refresh_needs_review",
|
||||
attempt_status="protected_existing_match",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
@@ -1611,7 +1647,7 @@ class CompetitorPriceFeeder:
|
||||
attempts_written += 1
|
||||
continue
|
||||
|
||||
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "refresh_low_score"
|
||||
attempt_status = _classify_low_score_attempt(score, diagnostics)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1651,7 +1687,7 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=1,
|
||||
attempt_status="refresh_needs_review",
|
||||
attempt_status="protected_existing_match",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
|
||||
@@ -244,6 +244,7 @@ SEARCH_NOISE_TOKENS = {
|
||||
}
|
||||
|
||||
SEARCH_IDENTITY_ANCHORS = (
|
||||
"時尚潮流美甲片",
|
||||
"止汗爽身噴霧",
|
||||
"止汗爽身乳膏pro",
|
||||
"零粉感超持久粉底棒",
|
||||
@@ -258,6 +259,7 @@ SEARCH_IDENTITY_ANCHORS = (
|
||||
"裸光幻閃亮采餅",
|
||||
"絕對持久定妝噴霧",
|
||||
"兒童防曬氣墊粉餅",
|
||||
"勝過眼皮十色眼影盤",
|
||||
"提提亮膚打亮液",
|
||||
"甜甜嫩頰腮紅液",
|
||||
"自動武士刀眉筆",
|
||||
@@ -330,6 +332,22 @@ SEARCH_BROAD_ANCHORS = {
|
||||
"香氛融蠟燈",
|
||||
}
|
||||
|
||||
VARIANT_SENSITIVE_KEYWORDS = {
|
||||
"美甲片",
|
||||
"眼影盤",
|
||||
"唇釉",
|
||||
"唇膏",
|
||||
"唇凍",
|
||||
"潤唇膏",
|
||||
"眉筆",
|
||||
"眼線筆",
|
||||
"腮紅液",
|
||||
"打亮液",
|
||||
"蜜粉餅",
|
||||
"粉底棒",
|
||||
"遮瑕棒",
|
||||
}
|
||||
|
||||
SEARCH_AMBIGUOUS_PRODUCT_TERMS = {
|
||||
"保護膜",
|
||||
"保護貼",
|
||||
@@ -368,6 +386,11 @@ PRODUCT_TYPES = {
|
||||
"潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"),
|
||||
"私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"),
|
||||
"私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"),
|
||||
"氣墊粉餅": ("氣墊粉餅", "cushion"),
|
||||
"眼影盤": ("眼影盤",),
|
||||
"打亮液": ("打亮液",),
|
||||
"腮紅液": ("腮紅液",),
|
||||
"護唇膏": ("護唇膏", "潤唇膏"),
|
||||
"唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"),
|
||||
"粉底棒": ("粉底棒", "foundation stick"),
|
||||
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),
|
||||
@@ -1381,6 +1404,20 @@ def score_marketplace_match(
|
||||
):
|
||||
score += 0.02
|
||||
reasons.append("shared_identity_anchor_core_line")
|
||||
if (
|
||||
shared_anchor
|
||||
and len(shared_anchor.replace(" ", "")) >= 6
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.55
|
||||
and spec_score >= 0.45
|
||||
and token_score >= 0.58
|
||||
and sequence_score >= 0.50
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.025
|
||||
reasons.append("shared_identity_anchor_variant_safe")
|
||||
if (
|
||||
brand_score >= 0.95
|
||||
and not hard_veto
|
||||
@@ -1403,7 +1440,7 @@ def score_marketplace_match(
|
||||
):
|
||||
score += 0.04
|
||||
reasons.append("shared_model_token")
|
||||
if variant_descriptor_conflict and spec_score < 0.85 and not shared_anchor and not shared_models:
|
||||
if variant_descriptor_conflict and spec_score < 0.85:
|
||||
score -= 0.05
|
||||
reasons.append("variant_descriptor_conflict")
|
||||
if (
|
||||
@@ -1549,8 +1586,27 @@ def _variant_descriptors(identity: ProductIdentity) -> set[str]:
|
||||
return {token for token in descriptors if token}
|
||||
|
||||
|
||||
def _is_variant_sensitive_identity(
|
||||
left: ProductIdentity,
|
||||
right: ProductIdentity,
|
||||
shared_anchor: str,
|
||||
) -> bool:
|
||||
corpus = (
|
||||
shared_anchor,
|
||||
left.product_type or "",
|
||||
right.product_type or "",
|
||||
left.searchable_name,
|
||||
right.searchable_name,
|
||||
)
|
||||
return any(keyword in text for text in corpus for keyword in VARIANT_SENSITIVE_KEYWORDS if text)
|
||||
|
||||
|
||||
def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdentity, shared_anchor: str) -> bool:
|
||||
if shared_anchor and shared_anchor not in SEARCH_BROAD_ANCHORS:
|
||||
if (
|
||||
shared_anchor
|
||||
and shared_anchor not in SEARCH_BROAD_ANCHORS
|
||||
and not _is_variant_sensitive_identity(left, right, shared_anchor)
|
||||
):
|
||||
return False
|
||||
if _shared_model_tokens(left, right):
|
||||
return False
|
||||
@@ -1558,7 +1614,13 @@ def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdenti
|
||||
right_descriptors = _variant_descriptors(right)
|
||||
if not left_descriptors or not right_descriptors:
|
||||
return False
|
||||
return not bool(left_descriptors & right_descriptors)
|
||||
if left_descriptors & right_descriptors:
|
||||
return False
|
||||
for left_descriptor in left_descriptors:
|
||||
for right_descriptor in right_descriptors:
|
||||
if left_descriptor in right_descriptor or right_descriptor in left_descriptor:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]:
|
||||
@@ -1636,6 +1698,12 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
|
||||
terms: list[str] = []
|
||||
|
||||
def primary_brand_phrase() -> str:
|
||||
if {"dashing", "diva"} <= identity.brand_tokens:
|
||||
return "dashing diva"
|
||||
if {"rom", "nd"} <= identity.brand_tokens:
|
||||
return "romand"
|
||||
if {"im", "meme"} <= identity.brand_tokens:
|
||||
return "im meme"
|
||||
chinese = sorted(
|
||||
(token for token in identity.brand_tokens if re.search(r"[\u4e00-\u9fff]", token)),
|
||||
key=lambda token: (-len(token), token),
|
||||
@@ -1656,6 +1724,8 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
|
||||
core_phrases = _ranked_search_core_phrases(identity, limit=4)
|
||||
core_short = " ".join(core_phrases[:2])
|
||||
core_primary = core_phrases[0] if core_phrases else ""
|
||||
variant_descriptors = sorted(_variant_descriptors(identity), key=lambda token: (len(token), token))
|
||||
variant_primary = variant_descriptors[0] if variant_descriptors else ""
|
||||
model_phrases = [
|
||||
phrase
|
||||
for phrase in core_phrases[1:]
|
||||
@@ -1665,7 +1735,11 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
|
||||
primary_with_model = " ".join(
|
||||
part for part in (core_primary, model_phrases[0] if model_phrases else "") if part
|
||||
)
|
||||
variant_sensitive = any(keyword in identity.searchable_name for keyword in VARIANT_SENSITIVE_KEYWORDS)
|
||||
for value in (
|
||||
" ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part)
|
||||
if variant_sensitive and variant_primary
|
||||
else "",
|
||||
" ".join(part for part in (brand_part, primary_with_model, spec_part) if part),
|
||||
" ".join(part for part in (brand_part, core_short, spec_part) if part),
|
||||
" ".join(part for part in (brand_part, core_short) if part),
|
||||
|
||||
@@ -589,7 +589,7 @@ def _fetch_competitor_summary() -> Dict[str, Any]:
|
||||
)
|
||||
SELECT
|
||||
SUM(CASE WHEN attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 1 ELSE 0 END) AS unit_comparable_count,
|
||||
SUM(CASE WHEN attempt_status IN ('unit_comparable', 'refresh_unit_comparable', 'identity_veto', 'low_score', 'expired_match', 'no_result', 'refresh_no_result') THEN 1 ELSE 0 END) AS review_queue_count
|
||||
SUM(CASE WHEN attempt_status IN ('unit_comparable', 'refresh_unit_comparable', 'identity_veto', 'low_score', 'refresh_low_score', 'recoverable_low_score', 'true_low_confidence', 'protected_existing_match', 'expired_match', 'no_result', 'refresh_no_result') THEN 1 ELSE 0 END) AS review_queue_count
|
||||
FROM latest_attempt
|
||||
""")).fetchone()
|
||||
return {
|
||||
|
||||
@@ -18,14 +18,16 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert "INSERT INTO competitor_match_attempts" in source
|
||||
assert "CAST(:search_terms AS jsonb)" in source
|
||||
assert 'attempt_status="matched"' in source
|
||||
assert '"low_score"' in source
|
||||
assert '"recoverable_low_score"' in source
|
||||
assert '"true_low_confidence"' in source
|
||||
assert '"identity_veto"' in source
|
||||
assert 'attempt_status="no_result"' in source
|
||||
assert 'attempt_status="no_match"' in source
|
||||
assert 'attempt_status="error"' in source
|
||||
assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source
|
||||
assert 'attempt_status="needs_review"' in source
|
||||
assert 'attempt_status="protected_existing_match"' in source
|
||||
assert "_should_upsert_competitor_price" in source
|
||||
assert "_classify_low_score_attempt" in source
|
||||
assert "replace_legacy_unverified" in source
|
||||
assert "identity_v2" in source
|
||||
assert "_fetch_expired_identity_skus" in source
|
||||
@@ -35,8 +37,7 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
retryable_source = source.split("def _fetch_retryable_candidate_skus", 1)[1].split(
|
||||
"def _fetch_expired_identity_skus", 1
|
||||
)[0]
|
||||
assert "la.attempt_status = 'low_score'" in retryable_source
|
||||
assert "refresh_low_score')" not in retryable_source
|
||||
assert "la.attempt_status IN ('low_score', 'refresh_low_score', 'recoverable_low_score')" in retryable_source
|
||||
latest_attempt_source = retryable_source.split("latest_attempt AS", 1)[1].split(
|
||||
"SELECT\n lm.product_id", 1
|
||||
)[0]
|
||||
@@ -144,7 +145,7 @@ def test_reject_review_expires_current_formal_price():
|
||||
best_competitor_price, best_match_score, error_message, attempted_at)
|
||||
VALUES
|
||||
('A005', 'pchome', 1, '舒特膚 AD 乳液 200ml', 980,
|
||||
'[]', 1, 'needs_review',
|
||||
'[]', 1, 'protected_existing_match',
|
||||
'DDAB01-REJECT', '舒特膚 AD 乳液 200ml', 899, 0.84,
|
||||
'score=0.84', '2026-05-20 09:10:00')
|
||||
"""))
|
||||
@@ -408,6 +409,134 @@ def test_competitor_feeder_splits_hard_veto_from_low_score(monkeypatch):
|
||||
assert attempts[0]["diagnostics"].hard_veto is True
|
||||
|
||||
|
||||
def test_competitor_feeder_marks_near_threshold_same_line_as_recoverable(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
product = PChomeProduct(
|
||||
product_id="DDAB01-RECOVERABLE",
|
||||
name="Recipe Box 韓兔 兒童防曬氣墊粉餅",
|
||||
price=699,
|
||||
original_price=799,
|
||||
discount=12,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERABLE",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", [product]
|
||||
|
||||
def fake_score(*_args, **_kwargs):
|
||||
return SimpleNamespace(
|
||||
score=0.754,
|
||||
brand_score=1.0,
|
||||
token_score=0.59,
|
||||
spec_score=0.55,
|
||||
sequence_score=0.53,
|
||||
type_score=1.0,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=("strong_product_line_match",),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_sku_items([{
|
||||
"sku": "RB001",
|
||||
"name": "【Recipebox】Recipe Box兒童防曬氣墊粉餅(兒童化妝品/無毒防曬粉餅/天然彩妝)",
|
||||
"product_id": 8,
|
||||
"momo_price": 699,
|
||||
}])
|
||||
|
||||
assert result.matched == 0
|
||||
assert result.skipped_low_score == 1
|
||||
assert attempts[0]["attempt_status"] == "recoverable_low_score"
|
||||
|
||||
|
||||
def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
product = PChomeProduct(
|
||||
product_id="DDAB01-WEAK",
|
||||
name="韓系彩妝 十色眼影盤",
|
||||
price=499,
|
||||
original_price=699,
|
||||
discount=28,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-WEAK",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.2,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", [product]
|
||||
|
||||
def fake_score(*_args, **_kwargs):
|
||||
return SimpleNamespace(
|
||||
score=0.733,
|
||||
brand_score=0.95,
|
||||
token_score=0.51,
|
||||
spec_score=0.45,
|
||||
sequence_score=0.44,
|
||||
type_score=0.55,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=(),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_sku_items([{
|
||||
"sku": "RM001",
|
||||
"name": "【rom&nd】勝過眼皮十色眼影盤",
|
||||
"product_id": 9,
|
||||
"momo_price": 499,
|
||||
}])
|
||||
|
||||
assert result.matched == 0
|
||||
assert result.skipped_low_score == 1
|
||||
assert attempts[0]["attempt_status"] == "true_low_confidence"
|
||||
|
||||
|
||||
def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score():
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
@@ -455,6 +584,83 @@ def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score():
|
||||
assert reason.startswith("replace_same_identity_better_score=0.788->0.811")
|
||||
|
||||
|
||||
def test_competitor_feeder_marks_existing_stronger_match_as_protected(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
product = PChomeProduct(
|
||||
product_id="DDAB01-NEW",
|
||||
name="PONY EFFECT 絕對持久定妝噴霧",
|
||||
price=599,
|
||||
original_price=699,
|
||||
discount=14,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-NEW",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", [product]
|
||||
|
||||
def fake_score(*_args, **_kwargs):
|
||||
return SimpleNamespace(
|
||||
score=0.781,
|
||||
brand_score=1.0,
|
||||
token_score=0.79,
|
||||
spec_score=0.55,
|
||||
sequence_score=0.68,
|
||||
type_score=0.55,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=("shared_identity_anchor_packaging_variant",),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
writes = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_should_upsert_competitor_price",
|
||||
lambda *_args, **_kwargs: (False, "existing_match_conflict;existing_score=0.948;incoming_score=0.781"),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_upsert_competitor_price",
|
||||
lambda *args, **kwargs: writes.append((args, kwargs)),
|
||||
)
|
||||
|
||||
result = feeder._run_sku_items([{
|
||||
"sku": "14133077",
|
||||
"name": "【PONY EFFECT】絕對持久定妝噴霧",
|
||||
"product_id": 10,
|
||||
"momo_price": 599,
|
||||
}])
|
||||
|
||||
assert result.matched == 0
|
||||
assert result.skipped_low_score == 1
|
||||
assert writes == []
|
||||
assert attempts[0]["attempt_status"] == "protected_existing_match"
|
||||
assert "existing_match_conflict" in attempts[0]["error_message"]
|
||||
|
||||
|
||||
def test_search_candidates_does_not_stop_on_merely_acceptable_match(monkeypatch):
|
||||
from services.competitor_price_feeder import _search_pchome_candidates
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
@@ -573,6 +573,22 @@ def test_marketplace_matcher_does_not_promote_different_option_without_spec():
|
||||
|
||||
assert diagnostics.score < 0.76
|
||||
assert "strong_exact_spec_match" not in diagnostics.reasons
|
||||
assert "variant_descriptor_conflict" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_variant_safe_exact_option():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【DASHING DIVA】MAGICPRESS時尚潮流美甲片_極光之藍",
|
||||
"Dashing Diva/F 時尚潮流美甲片-極光之藍 MDF5F001AG",
|
||||
momo_price=331,
|
||||
competitor_price=420,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert "shared_identity_anchor_variant_safe" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_shared_identity_anchor_near_threshold():
|
||||
@@ -649,6 +665,36 @@ def test_marketplace_matcher_promotes_shared_anchor_without_spec_conflict():
|
||||
assert "shared_identity_anchor_no_spec" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_recipe_box_near_threshold_with_variant_safe_anchor():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【Recipebox】Recipe Box兒童防曬氣墊粉餅(兒童化妝品/無毒防曬粉餅/天然彩妝)",
|
||||
"Recipe Box 韓兔 兒童防曬氣墊粉餅",
|
||||
momo_price=699,
|
||||
competitor_price=699,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert "shared_identity_anchor_variant_safe" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_romand_palette_exact_line():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【rom&nd】勝過眼皮十色眼影盤",
|
||||
"rom&nd X ZO&FRIENDS 勝過眼皮十色眼影盤 8g/7g",
|
||||
momo_price=499,
|
||||
competitor_price=499,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert "shared_identity_anchor_variant_safe" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_shared_model_token_for_exact_model():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
@@ -735,6 +781,16 @@ def test_marketplace_search_terms_keep_professional_product_phrase():
|
||||
assert not any("卸除防曬" in term or "外出清潔" in term for term in mustela_terms)
|
||||
|
||||
|
||||
def test_marketplace_search_terms_keep_variant_descriptor_for_sensitive_lines():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
dashing_terms = build_search_terms("【DASHING DIVA】MAGICPRESS時尚潮流美甲片_極光之藍", max_terms=5)
|
||||
romand_terms = build_search_terms("【rom&nd】勝過眼皮十色眼影盤", max_terms=5)
|
||||
|
||||
assert dashing_terms[0] == "dashing diva 時尚潮流美甲片 極光之藍"
|
||||
assert romand_terms[0] == "romand 勝過眼皮十色眼影盤"
|
||||
|
||||
|
||||
def test_marketplace_search_terms_prefer_specific_line_over_generic_usage_words():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
|
||||
Reference in New Issue
Block a user