[V10.343] 強化 PChome 商品搜尋召回
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
This commit is contained in:
@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.342"
|
||||
SYSTEM_VERSION = "V10.343"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
- 2026-05-20 追記:同步背景 PChome identity / price direction 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更商品比對行為。
|
||||
- 2026-05-20 追記:同步背景 PChome crawler 搜尋韌性擴充後的 `services/pchome_crawler.py` 行數;此處只更新 inventory,不變更 PChome crawler 行為。
|
||||
- 2026-05-20 追記:同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更比價行為。
|
||||
- 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -52,16 +53,17 @@
|
||||
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
|
||||
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
|
||||
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
|
||||
| 1128 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / persistence normalization |
|
||||
| 1356 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
|
||||
| 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
|
||||
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
|
||||
| 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter |
|
||||
| 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration |
|
||||
| 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 |
|
||||
| 1592 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
|
||||
| 1602 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
|
||||
| 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
|
||||
| 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
|
||||
| 811 | `services/market_intel/deployment_readiness.py` | P2 market intel deployment readiness | preflight gates / readiness payload / route contract helpers |
|
||||
|
||||
## 市場情報開發前置禁區
|
||||
|
||||
|
||||
@@ -144,7 +144,13 @@ def _build_search_keywords(momo_name: str) -> list:
|
||||
cleaned = _clean_search_text(momo_name)
|
||||
terms = [cleaned[:36], cleaned[:24]]
|
||||
|
||||
return _dedupe_terms(terms)
|
||||
primary_terms = _dedupe_terms(terms[: max(1, MAX_SEARCH_TERMS - 1)])
|
||||
original_terms = _dedupe_terms([momo_name])
|
||||
for term in original_terms:
|
||||
if term.lower() not in {existing.lower() for existing in primary_terms}:
|
||||
primary_terms.append(term)
|
||||
break
|
||||
return _dedupe_terms(primary_terms)
|
||||
|
||||
|
||||
def _format_match_diagnostics(diagnostics) -> str:
|
||||
@@ -252,8 +258,9 @@ def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None, mo
|
||||
"""以多組搜尋詞擴大 PChome 候選池,只在強同款時提前停止。"""
|
||||
candidates = []
|
||||
seen_ids = set()
|
||||
search_limit = SEARCH_LIMIT * max(1, SEARCH_MAX_PAGES)
|
||||
for keyword in keywords or _build_search_keywords(momo_name):
|
||||
ok, _, products = crawler.search_products(keyword, limit=SEARCH_LIMIT, max_pages=SEARCH_MAX_PAGES)
|
||||
ok, _, products = crawler.search_products(keyword, limit=search_limit, max_pages=SEARCH_MAX_PAGES)
|
||||
if not ok or not products:
|
||||
continue
|
||||
for product in products:
|
||||
@@ -1154,6 +1161,7 @@ class CompetitorPriceFeeder:
|
||||
continue
|
||||
|
||||
if score < MIN_MATCH_SCORE and not manual_accept_override:
|
||||
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "low_score"
|
||||
logger.debug(
|
||||
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE}),"
|
||||
f"{_format_match_diagnostics(diagnostics)}"
|
||||
@@ -1165,7 +1173,7 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=len(products),
|
||||
attempt_status="low_score",
|
||||
attempt_status=attempt_status,
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
@@ -1382,6 +1390,7 @@ class CompetitorPriceFeeder:
|
||||
continue
|
||||
|
||||
if score < MIN_MATCH_SCORE:
|
||||
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "refresh_low_score"
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1389,7 +1398,7 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=1,
|
||||
attempt_status="refresh_low_score",
|
||||
attempt_status=attempt_status,
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
|
||||
@@ -111,6 +111,96 @@ GENERIC_TOKENS = {
|
||||
"美國",
|
||||
}
|
||||
|
||||
SEARCH_NOISE_PHRASES = (
|
||||
"新品上市",
|
||||
"全新上市",
|
||||
"任選一款",
|
||||
"任選1款",
|
||||
"任選一色",
|
||||
"任選1色",
|
||||
"多款任選",
|
||||
"多款可選",
|
||||
"色號可選",
|
||||
"香味可選",
|
||||
"口味可選",
|
||||
"送精美紙袋",
|
||||
"精美紙袋",
|
||||
"交換禮物",
|
||||
"聖誕禮物",
|
||||
"母親節",
|
||||
"父親節",
|
||||
"情人節",
|
||||
"外出清潔",
|
||||
"卸除髒汙",
|
||||
"卸除防曬",
|
||||
"卸防曬",
|
||||
"韓國彩妝",
|
||||
"水光感",
|
||||
"官方直營",
|
||||
"官方",
|
||||
)
|
||||
|
||||
SEARCH_NOISE_TOKENS = {
|
||||
"一款",
|
||||
"1款",
|
||||
"一色",
|
||||
"1色",
|
||||
"上市",
|
||||
"全新",
|
||||
"新品",
|
||||
"香味",
|
||||
"口味",
|
||||
"味道",
|
||||
"顏色",
|
||||
"色號",
|
||||
"紙袋",
|
||||
"禮物",
|
||||
"清潔",
|
||||
"髒汙",
|
||||
"防曬",
|
||||
"彩妝",
|
||||
"水光感",
|
||||
}
|
||||
|
||||
SEARCH_IDENTITY_ANCHORS = (
|
||||
"免用水潔淨液",
|
||||
"身體按摩精油",
|
||||
"按摩精油",
|
||||
"擴香補充瓶",
|
||||
"擴香瓶",
|
||||
"全面修復霜",
|
||||
"修復霜",
|
||||
"護膚膏",
|
||||
"屁屁噴",
|
||||
"身體乳",
|
||||
"緊實乳",
|
||||
"潔膚露",
|
||||
"潔淨液",
|
||||
"護甲油",
|
||||
"指甲油",
|
||||
"美甲片",
|
||||
"唇凍",
|
||||
"唇釉",
|
||||
"唇膏",
|
||||
"粉底棒",
|
||||
"遮瑕棒",
|
||||
"化妝水",
|
||||
"精華液",
|
||||
"精華",
|
||||
"面膜",
|
||||
"乳液",
|
||||
"乳霜",
|
||||
"面霜",
|
||||
"精油",
|
||||
"水氧機",
|
||||
"香氛機",
|
||||
)
|
||||
|
||||
SEARCH_AMBIGUOUS_PRODUCT_TERMS = {
|
||||
"保護膜",
|
||||
"保護貼",
|
||||
}
|
||||
|
||||
BRAND_ALIAS_OVERRIDES = {
|
||||
"clarins": ("克蘭詩", "clarins"),
|
||||
"nars": ("nars",),
|
||||
@@ -1099,6 +1189,123 @@ def score_marketplace_match(
|
||||
)
|
||||
|
||||
|
||||
def _clean_search_phrase(value: str) -> str:
|
||||
text = normalize_product_text(value)
|
||||
for phrase in sorted(SEARCH_NOISE_PHRASES, key=len, reverse=True):
|
||||
text = text.replace(phrase.lower(), " ")
|
||||
text = re.sub(r"[^\w\u4e00-\u9fff]+", " ", text)
|
||||
text = " ".join(
|
||||
token for token in text.split()
|
||||
if token not in SEARCH_NOISE_TOKENS and token not in GENERIC_TOKENS
|
||||
)
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def _search_spec_terms(identity: ProductIdentity) -> list[str]:
|
||||
specs: list[str] = []
|
||||
if identity.volumes_ml:
|
||||
volume = identity.volumes_ml[0]
|
||||
specs.append(f"{volume:g}ml")
|
||||
if identity.weights_g:
|
||||
weight = identity.weights_g[0]
|
||||
specs.append(f"{weight:g}g")
|
||||
if identity.dosages_mg:
|
||||
dosage = identity.dosages_mg[0]
|
||||
specs.append(f"{dosage:g}mg")
|
||||
if identity.total_piece_count:
|
||||
specs.append(f"{identity.total_piece_count}包")
|
||||
return specs
|
||||
|
||||
|
||||
def _extract_anchor_phrases(token: str) -> list[str]:
|
||||
cleaned = _clean_search_phrase(token)
|
||||
if not cleaned:
|
||||
return []
|
||||
|
||||
phrases: list[str] = []
|
||||
for anchor in SEARCH_IDENTITY_ANCHORS:
|
||||
if anchor not in cleaned:
|
||||
continue
|
||||
if re.search(r"[\u4e00-\u9fff]", anchor):
|
||||
prefix_width = 0 if len(anchor) >= 5 else (4 if len(anchor) >= 3 else 6)
|
||||
match = re.search(rf"([\u4e00-\u9fff]{{0,{prefix_width}}}{re.escape(anchor)})", cleaned)
|
||||
phrase = match.group(1) if match else anchor
|
||||
else:
|
||||
phrase = anchor
|
||||
phrase = _clean_search_phrase(phrase)
|
||||
if any(existing in phrase and existing != phrase for existing in phrases):
|
||||
continue
|
||||
if len(phrase) >= 2 and phrase not in phrases:
|
||||
phrases.append(phrase)
|
||||
return phrases
|
||||
|
||||
|
||||
def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]:
|
||||
cleaned = _clean_search_phrase(token)
|
||||
if not cleaned:
|
||||
return (-999, 0, cleaned)
|
||||
compact = cleaned.replace(" ", "")
|
||||
if compact in SEARCH_NOISE_TOKENS or compact in GENERIC_TOKENS:
|
||||
return (-900, 0, cleaned)
|
||||
|
||||
score = 0
|
||||
if re.search(r"[a-z][a-z0-9-]{2,}", cleaned):
|
||||
score += 30
|
||||
if re.search(r"\d", cleaned):
|
||||
score += 12
|
||||
|
||||
anchors = _extract_anchor_phrases(cleaned)
|
||||
if anchors:
|
||||
score += 90
|
||||
if anchors[0] == compact:
|
||||
score += 8
|
||||
else:
|
||||
score += max(0, 24 - len(compact))
|
||||
|
||||
if len(compact) <= 8:
|
||||
score += 14
|
||||
elif len(compact) >= 12:
|
||||
score -= 12
|
||||
|
||||
has_better_anchor = any(
|
||||
other != token and _extract_anchor_phrases(other)
|
||||
for other in all_tokens
|
||||
)
|
||||
if has_better_anchor and any(term in compact for term in SEARCH_AMBIGUOUS_PRODUCT_TERMS):
|
||||
score -= 80
|
||||
if any(noise in compact for noise in SEARCH_NOISE_TOKENS):
|
||||
score -= 18
|
||||
|
||||
return (score, -len(compact), cleaned)
|
||||
|
||||
|
||||
def _ranked_search_core_phrases(identity: ProductIdentity, limit: int = 4) -> list[str]:
|
||||
tokens = {token for token in identity.core_tokens if token not in GENERIC_TOKENS}
|
||||
ranked_tokens = sorted(
|
||||
tokens,
|
||||
key=lambda token: _search_core_score(token, tokens),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
phrases: list[str] = []
|
||||
for token in ranked_tokens:
|
||||
if _search_core_score(token, tokens)[0] < -100:
|
||||
continue
|
||||
candidates = _extract_anchor_phrases(token) or [_clean_search_phrase(token)]
|
||||
for phrase in candidates:
|
||||
compact = phrase.replace(" ", "")
|
||||
if len(compact) < 2 or compact in SEARCH_NOISE_TOKENS:
|
||||
continue
|
||||
if any(term in compact for term in SEARCH_AMBIGUOUS_PRODUCT_TERMS) and len(phrases) > 0:
|
||||
continue
|
||||
if phrase not in phrases:
|
||||
phrases.append(phrase)
|
||||
if len(phrases) >= limit:
|
||||
return phrases
|
||||
return phrases
|
||||
|
||||
|
||||
def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
|
||||
identity = parse_product_identity(name)
|
||||
terms: list[str] = []
|
||||
@@ -1120,30 +1327,27 @@ def build_search_terms(name: str, max_terms: int = 3) -> list[str]:
|
||||
return latin[0] if latin else ""
|
||||
|
||||
brand_part = primary_brand_phrase()
|
||||
core = " ".join(sorted(identity.core_tokens, key=lambda token: (-len(token), token))[:4])
|
||||
specs = []
|
||||
if identity.volumes_ml:
|
||||
specs.append(f"{int(identity.volumes_ml[0])}ml")
|
||||
if identity.weights_g:
|
||||
specs.append(f"{int(identity.weights_g[0])}g")
|
||||
if identity.dosages_mg:
|
||||
dosage = identity.dosages_mg[0]
|
||||
dosage_label = f"{int(dosage)}mg" if dosage.is_integer() else f"{dosage:g}mg"
|
||||
specs.append(dosage_label)
|
||||
if identity.total_piece_count:
|
||||
specs.append(f"{identity.total_piece_count}包")
|
||||
|
||||
spec_part = " ".join(specs)
|
||||
core_tokens = sorted(identity.core_tokens, key=lambda token: (-len(token), token))
|
||||
core_short = " ".join(core_tokens[:2])
|
||||
spec_part = " ".join(_search_spec_terms(identity))
|
||||
core_phrases = _ranked_search_core_phrases(identity, limit=4)
|
||||
core_short = " ".join(core_phrases[:2])
|
||||
core_primary = core_phrases[0] if core_phrases else ""
|
||||
model_phrases = [
|
||||
phrase
|
||||
for phrase in core_phrases[1:]
|
||||
if re.fullmatch(r"[a-z]*\d+[a-z0-9-]*", phrase)
|
||||
or re.fullmatch(r"[a-z][a-z0-9-]{2,}", phrase)
|
||||
]
|
||||
primary_with_model = " ".join(
|
||||
part for part in (core_primary, model_phrases[0] if model_phrases else "") if part
|
||||
)
|
||||
for value in (
|
||||
" ".join(part for part in (brand_part, primary_with_model, spec_part) if part),
|
||||
" ".join(part for part in (brand_part, core_short, spec_part) if part),
|
||||
" ".join(part for part in (brand_part, core_short) if part),
|
||||
" ".join(part for part in (core_short, spec_part) if part),
|
||||
" ".join(part for part in (core_primary, spec_part) if part),
|
||||
identity.searchable_name,
|
||||
):
|
||||
cleaned = re.sub(r"[^\w\u4e00-\u9fff]+", " ", value)
|
||||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||||
cleaned = _clean_search_phrase(value)
|
||||
if cleaned and cleaned not in terms:
|
||||
terms.append(cleaned[:42])
|
||||
if len(terms) >= max_terms:
|
||||
|
||||
@@ -18,7 +18,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert "INSERT INTO competitor_match_attempts" in source
|
||||
assert "CAST(:search_terms AS jsonb)" in source
|
||||
assert 'attempt_status="matched"' in source
|
||||
assert 'attempt_status="low_score"' in source
|
||||
assert '"low_score"' in source
|
||||
assert '"identity_veto"' in source
|
||||
assert 'attempt_status="no_result"' in source
|
||||
assert 'attempt_status="no_match"' in source
|
||||
assert 'attempt_status="error"' in source
|
||||
@@ -342,6 +343,71 @@ def test_competitor_feeder_skips_rejected_candidate_and_uses_next_best(monkeypat
|
||||
assert attempts[0]["best_product"].product_id == "DDAB01-ACCEPTABLE"
|
||||
|
||||
|
||||
def test_competitor_feeder_splits_hard_veto_from_low_score(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
product = PChomeProduct(
|
||||
product_id="DDAB01-WRONG",
|
||||
name="iPhone 16 Pro 保護膜",
|
||||
price=399,
|
||||
original_price=499,
|
||||
discount=20,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-WRONG",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", [product]
|
||||
|
||||
def fake_score(*_args, **_kwargs):
|
||||
return SimpleNamespace(
|
||||
score=0.31,
|
||||
brand_score=0.0,
|
||||
token_score=0.1,
|
||||
spec_score=0.55,
|
||||
sequence_score=0.1,
|
||||
type_score=0.55,
|
||||
price_penalty=0.0,
|
||||
hard_veto=True,
|
||||
reasons=("brand_conflict", "product_line_conflict"),
|
||||
comparison_mode="not_comparable",
|
||||
tags=["identity_v2", "identity_veto"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_sku_items([{
|
||||
"sku": "A006",
|
||||
"name": "【TAICEND 泰陞】寶貝液體保護膜 屁屁噴 100ml",
|
||||
"product_id": 6,
|
||||
"momo_price": 399,
|
||||
}])
|
||||
|
||||
assert result.matched == 0
|
||||
assert result.skipped_low_score == 1
|
||||
assert attempts[0]["attempt_status"] == "identity_veto"
|
||||
assert attempts[0]["diagnostics"].hard_veto is True
|
||||
|
||||
|
||||
def test_search_candidates_does_not_stop_on_merely_acceptable_match(monkeypatch):
|
||||
from services.competitor_price_feeder import _search_pchome_candidates
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
@@ -421,6 +487,18 @@ def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog):
|
||||
assert "fallback to cleaned product name" in caplog.text
|
||||
|
||||
|
||||
def test_competitor_feeder_keeps_original_name_as_search_fallback():
|
||||
from services import competitor_price_feeder
|
||||
|
||||
terms = competitor_price_feeder._build_search_keywords(
|
||||
"【Mustela 慕之恬廊】慕之幼 免用水潔淨液 300ml(外出清潔 卸除髒汙 卸除防曬 卸防曬)"
|
||||
)
|
||||
|
||||
assert len(terms) == competitor_price_feeder.MAX_SEARCH_TERMS
|
||||
assert terms[-1].startswith("Mustela 慕之恬廊 慕之幼 免用水潔淨液")
|
||||
assert any("免用水潔淨液 300ml" in term for term in terms[:4])
|
||||
|
||||
|
||||
def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
@@ -419,7 +419,8 @@ def test_ai_product_pick_agent_uses_real_competitor_data_and_dashboard_action():
|
||||
assert "MAX_SEARCH_TERMS" in feeder_source
|
||||
assert "_build_search_keywords" in feeder_source
|
||||
assert "_search_pchome_candidates" in feeder_source
|
||||
assert "crawler.search_products(keyword, limit=SEARCH_LIMIT, max_pages=SEARCH_MAX_PAGES)" in feeder_source
|
||||
assert "search_limit = SEARCH_LIMIT * max(1, SEARCH_MAX_PAGES)" in feeder_source
|
||||
assert "crawler.search_products(keyword, limit=search_limit, max_pages=SEARCH_MAX_PAGES)" in feeder_source
|
||||
assert "_fetch_unmatched_priority_skus" in feeder_source
|
||||
assert "_fetch_expired_identity_skus" in feeder_source
|
||||
assert "run_expired_identity_refresh" in feeder_source
|
||||
|
||||
@@ -435,6 +435,39 @@ def test_marketplace_search_terms_prefer_readable_brand_core_spec():
|
||||
assert not any(term.endswith(" l") for term in terms)
|
||||
|
||||
|
||||
def test_marketplace_search_terms_prioritize_identity_phrase_over_ambiguous_copy():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
terms = build_search_terms("【TAICEND 泰陞】寶貝液體保護膜 屁屁噴 100ml", max_terms=5)
|
||||
|
||||
assert terms[0] == "泰陞 屁屁噴 100ml"
|
||||
assert "保護膜" not in terms[0]
|
||||
assert "屁屁噴" in " ".join(terms[:3])
|
||||
|
||||
|
||||
def test_marketplace_search_terms_drop_option_and_marketing_noise():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
terms = build_search_terms("【YSL】情挑誘光嫩唇凍6ml(任選一款/新品上市)", max_terms=5)
|
||||
|
||||
assert terms[0] == "ysl 情挑誘光嫩唇凍 6ml"
|
||||
assert not any("一款" in term or "上市" in term for term in terms)
|
||||
|
||||
|
||||
def test_marketplace_search_terms_keep_professional_product_phrase():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
abysse_terms = build_search_terms("【Abysse】天然植萃身體按摩精油550ml", max_terms=5)
|
||||
mustela_terms = build_search_terms(
|
||||
"【Mustela 慕之恬廊】慕之幼 免用水潔淨液 300ml(外出清潔 卸除髒汙 卸除防曬 卸防曬)",
|
||||
max_terms=5,
|
||||
)
|
||||
|
||||
assert abysse_terms[0] == "abysse 身體按摩精油 550ml"
|
||||
assert mustela_terms[0] == "慕之恬廊 免用水潔淨液 300ml"
|
||||
assert not any("卸除防曬" in term or "外出清潔" in term for term in mustela_terms)
|
||||
|
||||
|
||||
def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch):
|
||||
from services import pchome_crawler
|
||||
|
||||
|
||||
@@ -179,5 +179,5 @@ def test_feeder_search_candidate_passes_page_cap(monkeypatch):
|
||||
)
|
||||
|
||||
assert candidates == [product]
|
||||
assert calls[0][1]["limit"] == 20
|
||||
assert calls[0][1]["limit"] == 40
|
||||
assert calls[0][1]["max_pages"] == 2
|
||||
|
||||
Reference in New Issue
Block a user