Files
ewoooc/tests/test_competitor_match_attempts_persistence.py
OoO 0cea70890a
All checks were successful
CD Pipeline / deploy (push) Successful in 1m21s
導入 browse.sh 比價診斷計畫
2026-05-21 18:40:49 +08:00

1478 lines
50 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from pathlib import Path
import json
import logging
from datetime import datetime
from types import SimpleNamespace
ROOT = Path(__file__).resolve().parents[1]
def _function_body(source: str, function_name: str, next_function_name: str) -> str:
return source.split(f"def {function_name}", 1)[1].split(f"def {next_function_name}", 1)[0]
def test_competitor_dashboard_hot_paths_use_latest_price_lateral_lookup():
source = (ROOT / "services/competitor_intel_repository.py").read_text(encoding="utf-8")
coverage_body = _function_body(
source,
"_fetch_competitor_coverage_uncached",
"fetch_competitor_gap_trend",
)
review_cte_body = _function_body(
source,
"_review_queue_cte_and_filter",
"_fetch_competitor_review_queue_page_uncached",
)
review_sample_body = _function_body(
source,
"_fetch_competitor_review_queue_uncached",
"fetch_competitor_comparison_results",
)
for body in (coverage_body, review_cte_body, review_sample_body):
assert "JOIN LATERAL" in body
assert "ORDER BY pr.timestamp DESC, pr.id DESC" in body
assert "LIMIT 1" in body
assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in body
assert "lm.rn = 1" not in body
def test_competitor_feeder_persists_all_match_attempt_outcomes():
source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8")
diagnostics_migration = (ROOT / "migrations/041_competitor_match_diagnostics.sql").read_text(encoding="utf-8")
browse_migration = (ROOT / "migrations/042_add_browse_diagnostics_to_match_attempts.sql").read_text(encoding="utf-8")
assert "attempts_written" in source
assert "_ensure_competitor_match_attempts_table" in source
assert "_record_match_attempt" in source
assert "INSERT INTO competitor_match_attempts" in source
assert "CAST(:search_terms AS jsonb)" in source
assert 'attempt_status="matched"' in source
assert '"recoverable_low_score"' in source
assert '"true_low_confidence"' in source
assert '"identity_veto"' in source
assert 'attempt_status="no_result"' in source
assert 'attempt_status="no_match"' in source
assert 'attempt_status="error"' in source
assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source
assert "_prepare_browse_diagnostic" in source
assert "browse_diagnostic_json" in source
assert "PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED" in source
assert 'attempt_status="protected_existing_match"' in source
assert "_should_upsert_competitor_price" in source
assert "_classify_low_score_attempt" in source
assert "replace_legacy_unverified" in source
assert "identity_v2" in source
assert "_fetch_expired_identity_skus" in source
assert "run_expired_identity_refresh" in source
assert "_fetch_retryable_candidate_skus" in source
assert "run_retryable_candidate_revalidation" in source
retryable_source = source.split("def _fetch_retryable_candidate_skus", 1)[1].split(
"def _fetch_expired_identity_skus", 1
)[0]
assert "la.attempt_status IN ('low_score', 'refresh_low_score', 'recoverable_low_score')" in retryable_source
latest_attempt_source = retryable_source.split("latest_attempt AS", 1)[1].split(
"SELECT\n lm.product_id", 1
)[0]
assert "cma.best_competitor_product_id IS NOT NULL" not in latest_attempt_source
assert "la.best_competitor_product_id IS NOT NULL" in retryable_source
assert "refresh_known_identity" in source
assert 'attempt_status="unit_comparable"' in source
assert 'attempt_status="refresh_unit_comparable"' in source
assert "mode={getattr(diagnostics, 'comparison_mode'" in source
assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source
assert "PChomeCrawler(timeout=REQUEST_TIMEOUT" in source
assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration
assert "attempt_status" in migration
assert "search_terms" in migration
assert "best_match_score" in migration
assert "match_diagnostic_json" in diagnostics_migration
assert "comparison_mode" in diagnostics_migration
assert "diagnostic_codes" in diagnostics_migration
assert "browse_diagnostic_json" in browse_migration
assert "idx_comp_match_attempts_browse_diag_time" in browse_migration
assert "competitor_product_url" in source
assert "competitor_image_url" in source
assert "competitor_stock" in source
assert "error_message" in migration
assert "idx_comp_match_attempts_sku_source_time" in migration
def test_competitor_feeder_records_browse_sh_plan_for_no_result(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", []
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "BROWSE001",
"name": "MOMO 稀有專櫃組合 50ml",
"product_id": 901,
"momo_price": 1280,
}])
assert result.matched == 0
assert result.skipped_no_result == 1
browse_plan = attempts[0]["browse_diagnostic"]
assert browse_plan["tool"] == "browse.sh"
assert browse_plan["mode"] == "plan_only"
assert browse_plan["execute_enabled"] is False
assert browse_plan["reason"] == "no_result"
assert browse_plan["execution"]["status"] == "disabled"
assert browse_plan["suggested_commands"][0]["args"][0] == "get"
assert "ecshweb.pchome.com.tw/search" in browse_plan["urls"][0]
def test_competitor_match_attempt_persists_browse_diagnostic_json():
from sqlalchemy import create_engine, text
from services.competitor_price_feeder import CompetitorPriceFeeder
engine = create_engine("sqlite:///:memory:")
feeder = CompetitorPriceFeeder(engine=engine)
feeder._record_match_attempt(
sku="BROWSE002",
momo_name="MOMO 取證測試商品",
search_terms=["取證 測試"],
attempt_status="no_result",
browse_diagnostic={
"tool": "browse.sh",
"mode": "plan_only",
"urls": ["https://ecshweb.pchome.com.tw/search/v3.3/?q=test"],
},
)
with engine.connect() as conn:
row = conn.execute(text("""
SELECT browse_diagnostic_json
FROM competitor_match_attempts
WHERE sku = 'BROWSE002'
""")).scalar_one()
payload = json.loads(row)
assert payload["tool"] == "browse.sh"
assert payload["mode"] == "plan_only"
def test_match_diagnostics_payload_carries_professional_match_lanes():
from services.competitor_price_feeder import _match_diagnostics_payload, _extend_match_tags
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"理膚寶水 B5 全面修復霜 40ml x2 超值組",
"理膚寶水 全面修復霜 B5 40ml",
momo_price=1199,
competitor_price=679,
)
payload = _match_diagnostics_payload(diagnostics)
tags = _extend_match_tags([], diagnostics)
assert payload["match_type"] == "same_product_different_pack"
assert payload["price_basis"] == "unit_price"
assert payload["alert_tier"] == "unit_price_review"
assert "unit_comparable" in payload["evidence_flags"]
assert "match_type_same_product_different_pack" in tags
assert "price_basis_unit_price" in tags
assert "alert_tier_unit_price_review" in tags
def test_competitor_match_review_service_closes_human_review_loop():
service_source = (ROOT / "services/competitor_match_review_service.py").read_text(encoding="utf-8")
migration = (ROOT / "migrations/039_create_competitor_match_reviews.sql").read_text(encoding="utf-8")
dashboard_js = (ROOT / "web/static/js/page-dashboard-v2.js").read_text(encoding="utf-8")
assert "VALID_REVIEW_ACTIONS" in service_source
assert "accept_identity" in service_source
assert "reject_identity" in service_source
assert "unit_price_required" in service_source
assert "manual_accepted" in service_source
assert "manual_rejected" in service_source
assert "manual_unit_price_required" in service_source
assert "INSERT INTO competitor_match_reviews" in service_source
assert "INSERT INTO competitor_prices" in service_source
assert "INSERT INTO competitor_price_history" in service_source
assert "manual_review" in service_source
assert "manual_accept" in service_source
assert "_fetch_latest_manual_review_for_candidate" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
assert "manual_review_rejected" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
assert "manual_accept_override" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
assert "CREATE TABLE IF NOT EXISTS competitor_match_reviews" in migration
assert "review_action" in migration
assert "reviewer_identity" in migration
assert "candidate_diagnostic" in migration
assert "idx_comp_match_reviews_sku_source_time" in migration
assert "runPchomeReviewDecision" in dashboard_js
assert "/api/pchome-review/" in dashboard_js
def test_reject_review_expires_current_formal_price():
from sqlalchemy import create_engine, text
from services.competitor_match_review_service import record_competitor_match_review
engine = create_engine("sqlite:///:memory:")
with engine.begin() as conn:
conn.execute(text("CREATE TABLE products (id INTEGER PRIMARY KEY, i_code TEXT, name TEXT)"))
conn.execute(text("CREATE TABLE price_records (id INTEGER PRIMARY KEY, product_id INTEGER, price NUMERIC, timestamp TEXT)"))
conn.execute(text("""
CREATE TABLE competitor_match_attempts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sku TEXT,
source TEXT,
momo_product_id INTEGER,
momo_product_name TEXT,
momo_price NUMERIC,
search_terms TEXT,
candidate_count INTEGER,
attempt_status TEXT,
best_competitor_product_id TEXT,
best_competitor_product_name TEXT,
best_competitor_price NUMERIC,
best_match_score NUMERIC,
error_message TEXT,
attempted_at TEXT
)
"""))
conn.execute(text("""
CREATE TABLE competitor_prices (
sku TEXT,
source TEXT,
price NUMERIC,
original_price NUMERIC,
discount_pct INTEGER,
competitor_product_id TEXT,
competitor_product_name TEXT,
match_score NUMERIC,
tags TEXT,
crawled_at TEXT,
expires_at TEXT
)
"""))
conn.execute(text("INSERT INTO products VALUES (1, 'A005', '舒特膚 AD 乳液 200ml')"))
conn.execute(text("INSERT INTO price_records VALUES (1, 1, 980, '2026-05-20 09:00:00')"))
conn.execute(text("""
INSERT INTO competitor_match_attempts
(sku, source, momo_product_id, momo_product_name, momo_price,
search_terms, candidate_count, attempt_status,
best_competitor_product_id, best_competitor_product_name,
best_competitor_price, best_match_score, error_message, attempted_at)
VALUES
('A005', 'pchome', 1, '舒特膚 AD 乳液 200ml', 980,
'[]', 1, 'protected_existing_match',
'DDAB01-REJECT', '舒特膚 AD 乳液 200ml', 899, 0.84,
'score=0.84', '2026-05-20 09:10:00')
"""))
conn.execute(text("""
INSERT INTO competitor_prices
(sku, source, price, competitor_product_id, competitor_product_name,
match_score, tags, crawled_at, expires_at)
VALUES
('A005', 'pchome', 899, 'DDAB01-REJECT', '舒特膚 AD 乳液 200ml',
0.84, '["identity_v2"]', '2026-05-20 09:10:00', '2099-01-01 00:00:00')
"""))
result = record_competitor_match_review(
engine,
sku="A005",
review_action="reject_identity",
reviewer_identity="pytest",
)
assert result["success"] is True
with engine.connect() as conn:
expires_at = conn.execute(text("""
SELECT expires_at
FROM competitor_prices
WHERE sku = 'A005' AND source = 'pchome'
""")).scalar()
manual_status = conn.execute(text("""
SELECT attempt_status
FROM competitor_match_attempts
WHERE sku = 'A005'
ORDER BY id DESC
LIMIT 1
""")).scalar()
assert expires_at != "2099-01-01 00:00:00"
assert manual_status == "manual_rejected"
def test_competitor_feeder_respects_manual_rejected_candidate(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-REJECTED",
name="舒特膚 AD 乳液 200ml",
price=899,
original_price=999,
discount=10,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-REJECTED",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_fetch_latest_manual_review_for_candidate",
lambda *_args, **_kwargs: {"review_action": "reject_identity"},
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "A003",
"name": "舒特膚 AD 乳液 200ml",
"product_id": 3,
"momo_price": 980,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert writes == []
assert attempts[0]["attempt_status"] == "manual_rejected"
assert "manual_review_rejected" in attempts[0]["error_message"]
def test_competitor_feeder_skips_rejected_candidate_and_uses_next_best(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
rejected = PChomeProduct(
product_id="DDAB01-REJECTED",
name="舒特膚 AD 乳液 200ml 舊候選",
price=899,
original_price=999,
discount=10,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-REJECTED",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
accepted = PChomeProduct(
product_id="DDAB01-ACCEPTABLE",
name="舒特膚 AD 乳液 200ml 新候選",
price=909,
original_price=999,
discount=9,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-ACCEPTABLE",
stock=20,
store="24h",
rating=4.6,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [rejected, accepted]
def fake_score(_momo_name, competitor_name, **_kwargs):
score = 0.95 if "舊候選" in competitor_name else 0.84
return SimpleNamespace(
score=score,
brand_score=1.0,
token_score=0.9,
spec_score=1.0,
sequence_score=0.8,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=(),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_fetch_latest_manual_review_for_candidate",
lambda _sku, candidate_id, **_kwargs: (
{"review_action": "reject_identity"} if candidate_id == "DDAB01-REJECTED" else None
),
)
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "new_match"),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "A004",
"name": "舒特膚 AD 乳液 200ml",
"product_id": 4,
"momo_price": 980,
}])
assert result.matched == 1
assert writes[0][0][1].product_id == "DDAB01-ACCEPTABLE"
assert attempts[0]["attempt_status"] == "matched"
assert attempts[0]["best_product"].product_id == "DDAB01-ACCEPTABLE"
def test_competitor_feeder_splits_hard_veto_from_low_score(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-WRONG",
name="iPhone 16 Pro 保護膜",
price=399,
original_price=499,
discount=20,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-WRONG",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.31,
brand_score=0.0,
token_score=0.1,
spec_score=0.55,
sequence_score=0.1,
type_score=0.55,
price_penalty=0.0,
hard_veto=True,
reasons=("brand_conflict", "product_line_conflict"),
comparison_mode="not_comparable",
tags=["identity_v2", "identity_veto"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "A006",
"name": "【TAICEND 泰陞】寶貝液體保護膜 屁屁噴 100ml",
"product_id": 6,
"momo_price": 399,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "identity_veto"
assert attempts[0]["diagnostics"].hard_veto is True
def test_competitor_feeder_marks_near_threshold_same_line_as_recoverable(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-RECOVERABLE",
name="Recipe Box 韓兔 兒童防曬氣墊粉餅",
price=699,
original_price=799,
discount=12,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERABLE",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.754,
brand_score=1.0,
token_score=0.59,
spec_score=0.55,
sequence_score=0.53,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=("strong_product_line_match",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "RB001",
"name": "【Recipebox】Recipe Box兒童防曬氣墊粉餅(兒童化妝品/無毒防曬粉餅/天然彩妝)",
"product_id": 8,
"momo_price": 699,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "recoverable_low_score"
def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-WEAK",
name="韓系彩妝 十色眼影盤",
price=499,
original_price=699,
discount=28,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-WEAK",
stock=20,
store="24h",
rating=4.2,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.733,
brand_score=0.95,
token_score=0.51,
spec_score=0.45,
sequence_score=0.44,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=(),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "RM001",
"name": "【rom&nd】勝過眼皮十色眼影盤",
"product_id": 9,
"momo_price": 499,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
products = [
PChomeProduct(
product_id="DDAB01-08",
name="PERIPERA 雙頭旋轉極細眉筆 08深杏色 0.05g",
price=180,
original_price=220,
discount=18,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-08",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
),
PChomeProduct(
product_id="DDAB01-09",
name="PERIPERA 雙頭旋轉極細眉筆 09灰褐棕 0.05g",
price=180,
original_price=220,
discount=18,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-09",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
),
PChomeProduct(
product_id="DDAB01-11",
name="PERIPERA 雙頭旋轉極細眉筆 11摩卡灰褐 0.05g",
price=180,
original_price=220,
discount=18,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-11",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
),
]
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", products
def fake_score(_momo_name, competitor_name, **_kwargs):
return SimpleNamespace(
score=0.734 if "09灰褐棕" in competitor_name else 0.733,
brand_score=1.0,
token_score=0.74,
spec_score=0.55,
sequence_score=0.66,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_identity_anchor_packaging_variant",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "P001",
"name": "【peripera官方直營】雙頭旋轉極細眉筆_多色任選(1.5mm極細筆頭)",
"product_id": 11,
"momo_price": 180,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_competitor_feeder_treats_choose_one_offer_as_missing_variant_signal(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-YSL",
name="【YSL聖羅蘭】恆久完美透膚煙染腮紅 6g ( #12/ #57/ #93)",
price=1650,
original_price=1780,
discount=7,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-YSL",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.735,
brand_score=1.0,
token_score=0.74,
spec_score=0.55,
sequence_score=0.66,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_identity_anchor_packaging_variant",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "Y001",
"name": "【YSL】官方直營 恆久完美透膚煙染腮紅(腮紅/任選1款/新品上市)",
"product_id": 12,
"momo_price": 1650,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score():
from sqlalchemy import create_engine, text
from services.competitor_price_feeder import CompetitorPriceFeeder
engine = create_engine("sqlite:///:memory:")
with engine.begin() as conn:
conn.execute(text("""
CREATE TABLE competitor_prices (
sku TEXT,
source TEXT,
competitor_product_id TEXT,
competitor_product_name TEXT,
match_score REAL,
tags TEXT
)
"""))
conn.execute(text("""
INSERT INTO competitor_prices (
sku, source, competitor_product_id, competitor_product_name, match_score, tags
) VALUES (
'13701353',
'pchome',
'DDBH69-A900JVFUJ',
'LANCOME 蘭蔻 零粉感超持久柔焦蜜粉餅(10g)#00 UNIVERSAL-公司貨',
0.788,
'[\"identity_v2\",\"comparison_exact_identity\",\"brand_match\"]'
)
"""))
feeder = CompetitorPriceFeeder(engine=engine)
product = SimpleNamespace(
product_id="DDBH4E-A900JS80T",
name="《LANCOME 蘭蔻》零粉感超持久柔焦蜜粉餅 10g",
)
should_write, reason = feeder._should_upsert_competitor_price(
"13701353",
product,
0.811,
source="pchome",
)
assert should_write is True
assert reason.startswith("replace_same_identity_better_score=0.788->0.811")
def test_competitor_feeder_marks_existing_stronger_match_as_protected(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-NEW",
name="PONY EFFECT 絕對持久定妝噴霧",
price=599,
original_price=699,
discount=14,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-NEW",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.781,
brand_score=1.0,
token_score=0.79,
spec_score=0.55,
sequence_score=0.68,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_identity_anchor_packaging_variant",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (False, "existing_match_conflict;existing_score=0.948;incoming_score=0.781"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "14133077",
"name": "【PONY EFFECT】絕對持久定妝噴霧",
"product_id": 10,
"momo_price": 599,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert writes == []
assert attempts[0]["attempt_status"] == "protected_existing_match"
assert "existing_match_conflict" in attempts[0]["error_message"]
def test_search_candidates_does_not_stop_on_merely_acceptable_match(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct
first = PChomeProduct(
product_id="DDAB01-FIRST",
name="理膚寶水 B5 修復霜 40ml 普通候選",
price=679,
original_price=799,
discount=15,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-FIRST",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
second = PChomeProduct(
product_id="DDAB01-SECOND",
name="理膚寶水 B5 修復霜 40ml 強同款",
price=689,
original_price=799,
discount=14,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-SECOND",
stock=20,
store="24h",
rating=4.8,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self):
self.calls = []
def search_products(self, keyword, **_kwargs):
self.calls.append(keyword)
if keyword == "broad":
return True, "ok", [first]
return True, "ok", [second]
def fake_score(_momo_name, competitor_name, **_kwargs):
score = 0.80 if "普通候選" in competitor_name else 0.95
return SimpleNamespace(score=score)
crawler = FakeCrawler()
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
candidates = _search_pchome_candidates(
crawler,
"理膚寶水 B5 修復霜 40ml",
keywords=["broad", "precise", "unused"],
momo_price=699,
)
assert crawler.calls == ["broad", "precise"]
assert [candidate.product_id for candidate in candidates] == ["DDAB01-FIRST", "DDAB01-SECOND"]
def test_search_candidates_adds_variant_recall_sorts_for_dashing_diva(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct
candidate = PChomeProduct(
product_id="DDBH8E-A900JMCJZ",
name="Dashing Diva/F 時尚潮流美甲片-月光銀影 MDF5F010AG",
price=420,
original_price=520,
discount=19,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDBH8E-A900JMCJZ",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self):
self.calls = []
def search_products(self, keyword, **kwargs):
self.calls.append((keyword, kwargs.get("sort")))
return True, "ok", [candidate]
monkeypatch.setattr(
"services.marketplace_product_matcher.score_marketplace_match",
lambda *_args, **_kwargs: SimpleNamespace(score=0.72),
)
crawler = FakeCrawler()
_search_pchome_candidates(
crawler,
"【DASHING DIVA】MAGICPRESS時尚潮流美甲片_極光之藍",
keywords=["dashing diva 時尚潮流美甲片 極光之藍"],
momo_price=331,
)
assert crawler.calls == [
("dashing diva 時尚潮流美甲片 極光之藍", None),
("dashing diva 時尚潮流美甲片", None),
("dashing diva magicpress", None),
("dashing diva 美甲片", None),
("dashing diva 時尚潮流美甲片", "sale/dc"),
("dashing diva 時尚潮流美甲片", "new/dc"),
]
def test_search_candidates_uses_line_specific_recall_for_dashing_diva_p_line(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct
candidate = PChomeProduct(
product_id="MDU5F009AG",
name="Dashing Diva/P 頂級璀燦美甲片-心動陰影 MDU5F009AG",
price=420,
original_price=520,
discount=19,
image_url="",
product_url="https://24h.pchome.com.tw/prod/MDU5F009AG",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self):
self.calls = []
def search_products(self, keyword, **kwargs):
self.calls.append((keyword, kwargs.get("sort")))
return True, "ok", [candidate]
monkeypatch.setattr(
"services.marketplace_product_matcher.score_marketplace_match",
lambda *_args, **_kwargs: SimpleNamespace(score=0.72),
)
crawler = FakeCrawler()
_search_pchome_candidates(
crawler,
"【DASHING DIVA】MAGICPRESS 頂級璀燦美甲片_心情史努比(史奴比)",
keywords=["dashing diva 頂級璀燦美甲片 心情史努比"],
momo_price=331,
)
assert crawler.calls == [
("dashing diva 頂級璀燦美甲片 心情史努比", None),
("dashing diva 頂級璀燦美甲片", None),
("dashing diva magicpress", None),
("dashing diva 美甲片", None),
("dashing diva 頂級璀燦美甲片", "sale/dc"),
("dashing diva 頂級璀燦美甲片", "new/dc"),
]
def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog):
from services import competitor_price_feeder
from services import marketplace_product_matcher
def broken_build_search_terms(*_args, **_kwargs):
raise RuntimeError("matcher unavailable")
monkeypatch.setattr(marketplace_product_matcher, "build_search_terms", broken_build_search_terms)
caplog.set_level(logging.DEBUG, logger="services.competitor_price_feeder")
terms = competitor_price_feeder._build_search_keywords("理膚寶水 B5 修復霜 40ml")
assert terms
assert "fallback to cleaned product name" in caplog.text
def test_competitor_feeder_keeps_original_name_as_search_fallback():
from services import competitor_price_feeder
terms = competitor_price_feeder._build_search_keywords(
"【Mustela 慕之恬廊】慕之幼 免用水潔淨液 300ml(外出清潔 卸除髒汙 卸除防曬 卸防曬)"
)
assert len(terms) == competitor_price_feeder.MAX_SEARCH_TERMS
assert terms[-1].startswith("Mustela 慕之恬廊 慕之幼 免用水潔淨液")
assert any("免用水潔淨液 300ml" in term for term in terms[:4])
def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
requested = []
product = PChomeProduct(
product_id="DDAB01-1900ABCD",
name="舒特膚 AD 乳液 200ml",
price=899,
original_price=999,
discount=10,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-1900ABCD",
stock=50,
store="24h",
rating=4.8,
review_count=12,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def fetch_product_details(self, product_ids, batch_size=20):
requested.extend(product_ids)
return True, "ok", [product]
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
writes = []
attempts = []
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda sku, product, score, tags, **kwargs: writes.append({
"sku": sku,
"product_id": product.product_id,
"score": score,
"tags": tags,
**kwargs,
}),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_known_identity_refresh_items([{
"sku": "A001",
"name": "舒特膚 AD 乳液 200ml",
"product_id": 1,
"momo_price": 980,
"competitor_product_id": "DDAB01-1900ABCD",
}])
assert requested == ["DDAB01-1900ABCD"]
assert result.matched == 1
assert writes[0]["product_id"] == "DDAB01-1900ABCD"
assert "identity_v2" in writes[0]["tags"]
assert "refresh_known_identity" in writes[0]["tags"]
assert attempts[0]["attempt_status"] == "matched"
assert attempts[0]["search_terms"] == ["known_product_id:DDAB01-1900ABCD"]
def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_low_score(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
stale = PChomeProduct(
product_id="DDAB01-STALE",
name="Panasonic 國際牌 男仕防水美體除毛器 國際版 (ER-GK83)",
price=2290,
original_price=2490,
discount=8,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-STALE",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
recovered = PChomeProduct(
product_id="DDAB01-RECOVERED",
name="Panasonic 國際牌 男士身體除毛器 ER-GK83",
price=2390,
original_price=2490,
discount=4,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED",
stock=20,
store="24h",
rating=4.8,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def fetch_product_details(self, product_ids, batch_size=20):
assert product_ids == ["DDAB01-STALE"]
return True, "ok", [stale]
def search_products(self, *_args, **_kwargs):
return True, "ok", [stale, recovered]
def fake_score(_momo_name, competitor_name, **_kwargs):
if "RECOVERED" in competitor_name or "男士身體除毛器" in competitor_name:
return SimpleNamespace(
score=0.81,
brand_score=1.0,
token_score=0.8,
spec_score=0.8,
sequence_score=0.72,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_model_token",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
return SimpleNamespace(
score=0.68,
brand_score=1.0,
token_score=0.55,
spec_score=0.55,
sequence_score=0.6,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=(),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda sku, product, score, tags, **kwargs: writes.append({
"sku": sku,
"product_id": product.product_id,
"score": score,
"tags": tags,
**kwargs,
}),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_known_identity_refresh_items([{
"sku": "TP00090100000153",
"name": "【Panasonic 國際牌】男士身體除毛器 2025新款 ER-GK83 日版 日本直送",
"product_id": 1,
"momo_price": 2490,
"competitor_product_id": "DDAB01-STALE",
}])
assert result.matched == 1
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
assert "fresh_search_recovery" in writes[0]["tags"]
assert attempts[0]["attempt_status"] == "matched"
assert "known_product_id:DDAB01-STALE" in attempts[0]["search_terms"]
assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"])
def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
recovered = PChomeProduct(
product_id="DDAB01-RECOVERED",
name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml",
price=441,
original_price=499,
discount=11,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED",
stock=20,
store="24h",
rating=4.8,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def fetch_product_details(self, product_ids, batch_size=20):
assert product_ids == ["DDAB01-MISSING"]
return True, "ok", []
def search_products(self, *_args, **_kwargs):
return True, "ok", [recovered]
def fake_score(_momo_name, competitor_name, **_kwargs):
return SimpleNamespace(
score=0.885,
brand_score=1.0,
token_score=0.7,
spec_score=1.0,
sequence_score=0.62,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=("spec_name_alignment",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda sku, product, score, tags, **kwargs: writes.append({
"sku": sku,
"product_id": product.product_id,
"score": score,
"tags": tags,
**kwargs,
}),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_known_identity_refresh_items([{
"sku": "9823407",
"name": "【Summers Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)",
"product_id": 4864,
"momo_price": 441,
"competitor_product_id": "DDAB01-MISSING",
}])
assert result.matched == 1
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
assert "missing_known_product_id" in writes[0]["tags"]
assert "fresh_search_recovery" in writes[0]["tags"]
assert attempts[0]["attempt_status"] == "matched"
def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-UNIT",
name="理膚寶水 全面修復霜 B5 40ml",
price=679,
original_price=799,
discount=15,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-UNIT",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "A002",
"name": "理膚寶水 B5 全面修復霜 40ml x2 超值組",
"product_id": 2,
"momo_price": 1199,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert writes == []
assert attempts[0]["attempt_status"] == "unit_comparable"
assert "unit_comparable" in attempts[0]["error_message"]