[V10.355] rescue near-threshold identity cohorts

This commit is contained in:
OoO
2026-05-21 09:21:46 +08:00
parent b272abccca
commit 88101cf5d0
7 changed files with 433 additions and 29 deletions

View File

@@ -51,6 +51,10 @@ REVIEW_STATUS_OPTIONS = [
'refresh_unit_comparable',
'identity_veto',
'low_score',
'refresh_low_score',
'recoverable_low_score',
'true_low_confidence',
'protected_existing_match',
'expired_match',
'refresh_no_result',
'no_result',
@@ -62,7 +66,8 @@ REVIEW_STATUS_OPTIONS = [
'statuses': ('unit_comparable', 'refresh_unit_comparable'),
},
{'key': 'identity_veto', 'label': '已排除', 'statuses': ('identity_veto',)},
{'key': 'low_score', 'label': '低信心', 'statuses': ('low_score',)},
{'key': 'low_score', 'label': '低信心', 'statuses': ('low_score', 'refresh_low_score', 'recoverable_low_score', 'true_low_confidence')},
{'key': 'protected_existing_match', 'label': '既有保護', 'statuses': ('protected_existing_match',)},
{'key': 'expired_match', 'label': '價格過期', 'statuses': ('expired_match',)},
{'key': 'no_result', 'label': '找不到同款', 'statuses': ('no_result', 'refresh_no_result')},
]
@@ -249,24 +254,30 @@ def _build_pchome_match_status(attempt=None, ineligible=None):
candidate_count = int(attempt.get('candidate_count') or 0)
score_text = f"最佳候選 {round(score * 100)}%" if score is not None else "尚無候選分數"
if status in {'low_score', 'refresh_low_score'}:
if status in {'low_score', 'refresh_low_score', 'recoverable_low_score', 'true_low_confidence'}:
diagnostic_text = attempt.get('error_message') or ''
label, summary = _diagnostic_match_rejection_label(
diagnostic_text,
score_text,
blocked=False,
)
if status == 'recoverable_low_score':
label = '近門檻可回收'
summary = '同品線證據已足夠,但分數仍略低於正式採用門檻'
elif status == 'true_low_confidence':
label = '證據不足'
summary = '目前候選仍缺乏足夠身份證據,先保守不採用'
return {
'label': label,
'tone': 'neutral',
'summary': summary,
'detail': f'{candidate_count} 筆候選',
}
if status in {'needs_review', 'refresh_needs_review'}:
if status in {'needs_review', 'refresh_needs_review', 'protected_existing_match'}:
return {
'label': '配對衝突待審',
'label': '既有配對保護',
'tone': 'neutral',
'summary': '新候選既有配對不同,需人工確認後覆蓋',
'summary': '新候選合理,但正式環境已存在更強既有配對,需人工確認後覆蓋',
'detail': f'{score_text} / {candidate_count} 筆候選',
}
if status in {'no_result', 'no_match', 'refresh_no_match'}:

View File

@@ -28,6 +28,10 @@ ACTIONABLE_ATTEMPT_STATUSES = {
"refresh_unit_comparable",
"identity_veto",
"low_score",
"refresh_low_score",
"recoverable_low_score",
"true_low_confidence",
"protected_existing_match",
"expired_match",
"refresh_no_result",
"no_result",
@@ -35,7 +39,8 @@ ACTIONABLE_ATTEMPT_STATUSES = {
REVIEW_STATUS_FILTER_GROUPS = {
"unit_comparable": ("unit_comparable", "refresh_unit_comparable"),
"identity_veto": ("identity_veto",),
"low_score": ("low_score",),
"low_score": ("low_score", "refresh_low_score", "recoverable_low_score", "true_low_confidence"),
"protected_existing_match": ("protected_existing_match",),
"expired_match": ("expired_match",),
"no_result": ("no_result", "refresh_no_result"),
}
@@ -44,6 +49,10 @@ ATTEMPT_STATUS_LABELS = {
"refresh_unit_comparable": "需單位價比較",
"identity_veto": "身份否決",
"low_score": "低信心待審",
"refresh_low_score": "刷新後仍低信心",
"recoverable_low_score": "近門檻可救回",
"true_low_confidence": "證據不足待觀察",
"protected_existing_match": "既有較強配對保護中",
"expired_match": "價格過期待刷新",
"refresh_no_result": "刷新找不到商品",
"no_result": "找不到同款",
@@ -58,6 +67,10 @@ ATTEMPT_ACTION_LABELS = {
"refresh_unit_comparable": "人工確認檔期、贈品與單位價",
"identity_veto": "確認是否為不同商品線或規格",
"low_score": "人工審核候選商品身份",
"refresh_low_score": "檢查 refresh 後是否還有更好的同款候選",
"recoverable_low_score": "優先回放這批近門檻同品線候選",
"true_low_confidence": "保守保留,等待更明確的身份證據",
"protected_existing_match": "比較新舊候選證據,避免覆蓋較強正式配對",
"expired_match": "重新刷新 PChome 價格",
"refresh_no_result": "調整搜尋詞後重抓",
"no_result": "補充搜尋詞或品牌關鍵字",
@@ -755,9 +768,11 @@ def _review_queue_cte_and_filter(
CASE
WHEN la.attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 0
WHEN la.attempt_status = 'identity_veto' THEN 1
WHEN la.attempt_status = 'low_score' THEN 2
WHEN la.attempt_status = 'expired_match' THEN 3
ELSE 4
WHEN la.attempt_status IN ('recoverable_low_score', 'low_score', 'refresh_low_score') THEN 2
WHEN la.attempt_status = 'protected_existing_match' THEN 3
WHEN la.attempt_status = 'true_low_confidence' THEN 4
WHEN la.attempt_status = 'expired_match' THEN 5
ELSE 6
END AS priority_rank
FROM latest_momo lm
JOIN latest_attempt la ON la.sku = lm.sku
@@ -902,6 +917,10 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic
'refresh_unit_comparable',
'identity_veto',
'low_score',
'refresh_low_score',
'recoverable_low_score',
'true_low_confidence',
'protected_existing_match',
'expired_match',
'refresh_no_result',
'no_result'
@@ -910,9 +929,11 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic
CASE
WHEN la.attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 0
WHEN la.attempt_status = 'identity_veto' THEN 1
WHEN la.attempt_status = 'low_score' THEN 2
WHEN la.attempt_status = 'expired_match' THEN 3
ELSE 4
WHEN la.attempt_status IN ('recoverable_low_score', 'low_score', 'refresh_low_score') THEN 2
WHEN la.attempt_status = 'protected_existing_match' THEN 3
WHEN la.attempt_status = 'true_low_confidence' THEN 4
WHEN la.attempt_status = 'expired_match' THEN 5
ELSE 6
END,
lm.momo_price DESC NULLS LAST,
la.best_match_score DESC NULLS LAST,

View File

@@ -45,6 +45,19 @@ BATCH_SIZE = 30 # 每批 DB 寫入筆數
RATE_DELAY = float(os.getenv("PCHOME_FEEDER_RATE_DELAY", "1.0")) # 每次 PChome 請求間隔(秒)
TTL_HOURS = 6 # competitor_prices 快取有效期
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72)
RECOVERABLE_DIAGNOSTIC_REASONS = {
"strong_product_line_match",
"strong_exact_spec_match",
"shared_identity_anchor",
"shared_identity_anchor_no_spec",
"shared_identity_anchor_packaging_variant",
"shared_identity_anchor_marketing_variant",
"shared_identity_anchor_core_line",
"shared_identity_anchor_variant_safe",
"shared_model_token",
"spec_name_alignment",
}
# ── Feeder 結果 ───────────────────────────────────────
@dataclass
@@ -59,6 +72,28 @@ class FeederResult:
attempts_written: int = 0
def _has_recoverable_identity_signal(diagnostics) -> bool:
if not diagnostics:
return False
reasons = set(getattr(diagnostics, "reasons", ()) or ())
if reasons & RECOVERABLE_DIAGNOSTIC_REASONS:
return True
return (
getattr(diagnostics, "brand_score", 0) >= 0.95
and getattr(diagnostics, "token_score", 0) >= 0.56
and getattr(diagnostics, "sequence_score", 0) >= 0.50
and getattr(diagnostics, "comparison_mode", "exact_identity") == "exact_identity"
)
def _classify_low_score_attempt(score: float, diagnostics) -> str:
if getattr(diagnostics, "hard_veto", False):
return "identity_veto"
if score >= RECOVERABLE_LOW_SCORE_FLOOR and _has_recoverable_identity_signal(diagnostics):
return "recoverable_low_score"
return "true_low_confidence"
def _extract_tags(pchome_product) -> list:
"""
從 PChomeProduct 物件提取語意標籤
@@ -716,7 +751,8 @@ class CompetitorPriceFeeder:
這條路徑不重新搜尋,只用前次留下的 PChome product_id 批次查詢最新商品資料,
適合把舊 scorer 卡在 0.70~0.759 的真同款重新推進正式比價。
已重評後仍不足門檻的 refresh_low_score 不再重複進隊列,避免排程空轉。
僅重跑明顯仍在 exact identity 軌道內、具回收價值的候選;
真正低信心與 hard veto 不再反覆空轉。
"""
if self.engine is None:
raise RuntimeError("需要注入 SQLAlchemy engine")
@@ -769,7 +805,7 @@ class CompetitorPriceFeeder:
AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
WHERE lm.rn = 1
AND cp.sku IS NULL
AND la.attempt_status = 'low_score'
AND la.attempt_status IN ('low_score', 'refresh_low_score', 'recoverable_low_score')
AND la.best_competitor_product_id IS NOT NULL
AND la.best_competitor_product_id <> ''
AND COALESCE(la.best_match_score, 0) >= :min_score
@@ -1212,7 +1248,7 @@ class CompetitorPriceFeeder:
continue
if score < MIN_MATCH_SCORE and not manual_accept_override:
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "low_score"
attempt_status = _classify_low_score_attempt(score, diagnostics)
logger.debug(
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE})"
f"{_format_match_diagnostics(diagnostics)}"
@@ -1263,7 +1299,7 @@ class CompetitorPriceFeeder:
momo_price=momo_price,
search_terms=search_terms,
candidate_count=len(products),
attempt_status="needs_review",
attempt_status="protected_existing_match",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
@@ -1439,7 +1475,7 @@ class CompetitorPriceFeeder:
momo_price=momo_price,
search_terms=attempt_terms,
candidate_count=max(1, recovery_candidate_count),
attempt_status="refresh_needs_review",
attempt_status="protected_existing_match",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
@@ -1571,7 +1607,7 @@ class CompetitorPriceFeeder:
momo_price=momo_price,
search_terms=attempt_terms,
candidate_count=candidate_count,
attempt_status="refresh_needs_review",
attempt_status="protected_existing_match",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
@@ -1611,7 +1647,7 @@ class CompetitorPriceFeeder:
attempts_written += 1
continue
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "refresh_low_score"
attempt_status = _classify_low_score_attempt(score, diagnostics)
self._record_match_attempt(
sku,
momo_name,
@@ -1651,7 +1687,7 @@ class CompetitorPriceFeeder:
momo_price=momo_price,
search_terms=search_terms,
candidate_count=1,
attempt_status="refresh_needs_review",
attempt_status="protected_existing_match",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,

View File

@@ -244,6 +244,7 @@ SEARCH_NOISE_TOKENS = {
}
SEARCH_IDENTITY_ANCHORS = (
"時尚潮流美甲片",
"止汗爽身噴霧",
"止汗爽身乳膏pro",
"零粉感超持久粉底棒",
@@ -258,6 +259,7 @@ SEARCH_IDENTITY_ANCHORS = (
"裸光幻閃亮采餅",
"絕對持久定妝噴霧",
"兒童防曬氣墊粉餅",
"勝過眼皮十色眼影盤",
"提提亮膚打亮液",
"甜甜嫩頰腮紅液",
"自動武士刀眉筆",
@@ -330,6 +332,22 @@ SEARCH_BROAD_ANCHORS = {
"香氛融蠟燈",
}
VARIANT_SENSITIVE_KEYWORDS = {
"美甲片",
"眼影盤",
"唇釉",
"唇膏",
"唇凍",
"潤唇膏",
"眉筆",
"眼線筆",
"腮紅液",
"打亮液",
"蜜粉餅",
"粉底棒",
"遮瑕棒",
}
SEARCH_AMBIGUOUS_PRODUCT_TERMS = {
"保護膜",
"保護貼",
@@ -368,6 +386,11 @@ PRODUCT_TYPES = {
"潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"),
"私密噴霧": ("私密噴霧", "抑菌噴霧", "醒肌抑菌噴霧"),
"私密凝露": ("凝露", "激淨凝露", "緊實凝露", "亮白凝露"),
"氣墊粉餅": ("氣墊粉餅", "cushion"),
"眼影盤": ("眼影盤",),
"打亮液": ("打亮液",),
"腮紅液": ("腮紅液",),
"護唇膏": ("護唇膏", "潤唇膏"),
"唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"),
"粉底棒": ("粉底棒", "foundation stick"),
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),
@@ -1381,6 +1404,20 @@ def score_marketplace_match(
):
score += 0.02
reasons.append("shared_identity_anchor_core_line")
if (
shared_anchor
and len(shared_anchor.replace(" ", "")) >= 6
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.55
and spec_score >= 0.45
and token_score >= 0.58
and sequence_score >= 0.50
and not variant_descriptor_conflict
):
score += 0.025
reasons.append("shared_identity_anchor_variant_safe")
if (
brand_score >= 0.95
and not hard_veto
@@ -1403,7 +1440,7 @@ def score_marketplace_match(
):
score += 0.04
reasons.append("shared_model_token")
if variant_descriptor_conflict and spec_score < 0.85 and not shared_anchor and not shared_models:
if variant_descriptor_conflict and spec_score < 0.85:
score -= 0.05
reasons.append("variant_descriptor_conflict")
if (
@@ -1549,8 +1586,27 @@ def _variant_descriptors(identity: ProductIdentity) -> set[str]:
return {token for token in descriptors if token}
def _is_variant_sensitive_identity(
left: ProductIdentity,
right: ProductIdentity,
shared_anchor: str,
) -> bool:
corpus = (
shared_anchor,
left.product_type or "",
right.product_type or "",
left.searchable_name,
right.searchable_name,
)
return any(keyword in text for text in corpus for keyword in VARIANT_SENSITIVE_KEYWORDS if text)
def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdentity, shared_anchor: str) -> bool:
if shared_anchor and shared_anchor not in SEARCH_BROAD_ANCHORS:
if (
shared_anchor
and shared_anchor not in SEARCH_BROAD_ANCHORS
and not _is_variant_sensitive_identity(left, right, shared_anchor)
):
return False
if _shared_model_tokens(left, right):
return False
@@ -1558,7 +1614,13 @@ def _has_variant_descriptor_conflict(left: ProductIdentity, right: ProductIdenti
right_descriptors = _variant_descriptors(right)
if not left_descriptors or not right_descriptors:
return False
return not bool(left_descriptors & right_descriptors)
if left_descriptors & right_descriptors:
return False
for left_descriptor in left_descriptors:
for right_descriptor in right_descriptors:
if left_descriptor in right_descriptor or right_descriptor in left_descriptor:
return False
return True
def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]:
@@ -1636,6 +1698,12 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
terms: list[str] = []
def primary_brand_phrase() -> str:
if {"dashing", "diva"} <= identity.brand_tokens:
return "dashing diva"
if {"rom", "nd"} <= identity.brand_tokens:
return "romand"
if {"im", "meme"} <= identity.brand_tokens:
return "im meme"
chinese = sorted(
(token for token in identity.brand_tokens if re.search(r"[\u4e00-\u9fff]", token)),
key=lambda token: (-len(token), token),
@@ -1656,6 +1724,8 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
core_phrases = _ranked_search_core_phrases(identity, limit=4)
core_short = " ".join(core_phrases[:2])
core_primary = core_phrases[0] if core_phrases else ""
variant_descriptors = sorted(_variant_descriptors(identity), key=lambda token: (len(token), token))
variant_primary = variant_descriptors[0] if variant_descriptors else ""
model_phrases = [
phrase
for phrase in core_phrases[1:]
@@ -1665,7 +1735,11 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
primary_with_model = " ".join(
part for part in (core_primary, model_phrases[0] if model_phrases else "") if part
)
variant_sensitive = any(keyword in identity.searchable_name for keyword in VARIANT_SENSITIVE_KEYWORDS)
for value in (
" ".join(part for part in (brand_part, core_primary, variant_primary, spec_part) if part)
if variant_sensitive and variant_primary
else "",
" ".join(part for part in (brand_part, primary_with_model, spec_part) if part),
" ".join(part for part in (brand_part, core_short, spec_part) if part),
" ".join(part for part in (brand_part, core_short) if part),

View File

@@ -589,7 +589,7 @@ def _fetch_competitor_summary() -> Dict[str, Any]:
)
SELECT
SUM(CASE WHEN attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 1 ELSE 0 END) AS unit_comparable_count,
SUM(CASE WHEN attempt_status IN ('unit_comparable', 'refresh_unit_comparable', 'identity_veto', 'low_score', 'expired_match', 'no_result', 'refresh_no_result') THEN 1 ELSE 0 END) AS review_queue_count
SUM(CASE WHEN attempt_status IN ('unit_comparable', 'refresh_unit_comparable', 'identity_veto', 'low_score', 'refresh_low_score', 'recoverable_low_score', 'true_low_confidence', 'protected_existing_match', 'expired_match', 'no_result', 'refresh_no_result') THEN 1 ELSE 0 END) AS review_queue_count
FROM latest_attempt
""")).fetchone()
return {

View File

@@ -18,14 +18,16 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
assert "INSERT INTO competitor_match_attempts" in source
assert "CAST(:search_terms AS jsonb)" in source
assert 'attempt_status="matched"' in source
assert '"low_score"' in source
assert '"recoverable_low_score"' in source
assert '"true_low_confidence"' in source
assert '"identity_veto"' in source
assert 'attempt_status="no_result"' in source
assert 'attempt_status="no_match"' in source
assert 'attempt_status="error"' in source
assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source
assert 'attempt_status="needs_review"' in source
assert 'attempt_status="protected_existing_match"' in source
assert "_should_upsert_competitor_price" in source
assert "_classify_low_score_attempt" in source
assert "replace_legacy_unverified" in source
assert "identity_v2" in source
assert "_fetch_expired_identity_skus" in source
@@ -35,8 +37,7 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
retryable_source = source.split("def _fetch_retryable_candidate_skus", 1)[1].split(
"def _fetch_expired_identity_skus", 1
)[0]
assert "la.attempt_status = 'low_score'" in retryable_source
assert "refresh_low_score')" not in retryable_source
assert "la.attempt_status IN ('low_score', 'refresh_low_score', 'recoverable_low_score')" in retryable_source
latest_attempt_source = retryable_source.split("latest_attempt AS", 1)[1].split(
"SELECT\n lm.product_id", 1
)[0]
@@ -144,7 +145,7 @@ def test_reject_review_expires_current_formal_price():
best_competitor_price, best_match_score, error_message, attempted_at)
VALUES
('A005', 'pchome', 1, '舒特膚 AD 乳液 200ml', 980,
'[]', 1, 'needs_review',
'[]', 1, 'protected_existing_match',
'DDAB01-REJECT', '舒特膚 AD 乳液 200ml', 899, 0.84,
'score=0.84', '2026-05-20 09:10:00')
"""))
@@ -408,6 +409,134 @@ def test_competitor_feeder_splits_hard_veto_from_low_score(monkeypatch):
assert attempts[0]["diagnostics"].hard_veto is True
def test_competitor_feeder_marks_near_threshold_same_line_as_recoverable(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-RECOVERABLE",
name="Recipe Box 韓兔 兒童防曬氣墊粉餅",
price=699,
original_price=799,
discount=12,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERABLE",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.754,
brand_score=1.0,
token_score=0.59,
spec_score=0.55,
sequence_score=0.53,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=("strong_product_line_match",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "RB001",
"name": "【Recipebox】Recipe Box兒童防曬氣墊粉餅(兒童化妝品/無毒防曬粉餅/天然彩妝)",
"product_id": 8,
"momo_price": 699,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "recoverable_low_score"
def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-WEAK",
name="韓系彩妝 十色眼影盤",
price=499,
original_price=699,
discount=28,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-WEAK",
stock=20,
store="24h",
rating=4.2,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.733,
brand_score=0.95,
token_score=0.51,
spec_score=0.45,
sequence_score=0.44,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=(),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "RM001",
"name": "【rom&nd】勝過眼皮十色眼影盤",
"product_id": 9,
"momo_price": 499,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score():
from sqlalchemy import create_engine, text
@@ -455,6 +584,83 @@ def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score():
assert reason.startswith("replace_same_identity_better_score=0.788->0.811")
def test_competitor_feeder_marks_existing_stronger_match_as_protected(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-NEW",
name="PONY EFFECT 絕對持久定妝噴霧",
price=599,
original_price=699,
discount=14,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-NEW",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.781,
brand_score=1.0,
token_score=0.79,
spec_score=0.55,
sequence_score=0.68,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_identity_anchor_packaging_variant",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (False, "existing_match_conflict;existing_score=0.948;incoming_score=0.781"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "14133077",
"name": "【PONY EFFECT】絕對持久定妝噴霧",
"product_id": 10,
"momo_price": 599,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert writes == []
assert attempts[0]["attempt_status"] == "protected_existing_match"
assert "existing_match_conflict" in attempts[0]["error_message"]
def test_search_candidates_does_not_stop_on_merely_acceptable_match(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct

View File

@@ -573,6 +573,22 @@ def test_marketplace_matcher_does_not_promote_different_option_without_spec():
assert diagnostics.score < 0.76
assert "strong_exact_spec_match" not in diagnostics.reasons
assert "variant_descriptor_conflict" in diagnostics.reasons
def test_marketplace_matcher_promotes_variant_safe_exact_option():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【DASHING DIVA】MAGICPRESS時尚潮流美甲片_極光之藍",
"Dashing Diva/F 時尚潮流美甲片-極光之藍 MDF5F001AG",
momo_price=331,
competitor_price=420,
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert "shared_identity_anchor_variant_safe" in diagnostics.reasons
def test_marketplace_matcher_promotes_shared_identity_anchor_near_threshold():
@@ -649,6 +665,36 @@ def test_marketplace_matcher_promotes_shared_anchor_without_spec_conflict():
assert "shared_identity_anchor_no_spec" in diagnostics.reasons
def test_marketplace_matcher_promotes_recipe_box_near_threshold_with_variant_safe_anchor():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【Recipebox】Recipe Box兒童防曬氣墊粉餅(兒童化妝品/無毒防曬粉餅/天然彩妝)",
"Recipe Box 韓兔 兒童防曬氣墊粉餅",
momo_price=699,
competitor_price=699,
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert "shared_identity_anchor_variant_safe" in diagnostics.reasons
def test_marketplace_matcher_promotes_romand_palette_exact_line():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【rom&nd】勝過眼皮十色眼影盤",
"rom&nd X ZO&FRIENDS 勝過眼皮十色眼影盤 8g/7g",
momo_price=499,
competitor_price=499,
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert "shared_identity_anchor_variant_safe" in diagnostics.reasons
def test_marketplace_matcher_promotes_shared_model_token_for_exact_model():
from services.marketplace_product_matcher import score_marketplace_match
@@ -735,6 +781,16 @@ def test_marketplace_search_terms_keep_professional_product_phrase():
assert not any("卸除防曬" in term or "外出清潔" in term for term in mustela_terms)
def test_marketplace_search_terms_keep_variant_descriptor_for_sensitive_lines():
from services.marketplace_product_matcher import build_search_terms
dashing_terms = build_search_terms("【DASHING DIVA】MAGICPRESS時尚潮流美甲片_極光之藍", max_terms=5)
romand_terms = build_search_terms("【rom&nd】勝過眼皮十色眼影盤", max_terms=5)
assert dashing_terms[0] == "dashing diva 時尚潮流美甲片 極光之藍"
assert romand_terms[0] == "romand 勝過眼皮十色眼影盤"
def test_marketplace_search_terms_prefer_specific_line_over_generic_usage_words():
from services.marketplace_product_matcher import build_search_terms