Files
ewoooc/tests/test_competitor_match_attempts_persistence.py
OoO 193b6e53c5
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
[V10.343] 強化 PChome 商品搜尋召回
2026-05-20 16:21:19 +08:00

628 lines
22 KiB
Python

from pathlib import Path
import logging
from datetime import datetime
from types import SimpleNamespace
ROOT = Path(__file__).resolve().parents[1]
def test_competitor_feeder_persists_all_match_attempt_outcomes():
source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8")
diagnostics_migration = (ROOT / "migrations/041_competitor_match_diagnostics.sql").read_text(encoding="utf-8")
assert "attempts_written" in source
assert "_ensure_competitor_match_attempts_table" in source
assert "_record_match_attempt" in source
assert "INSERT INTO competitor_match_attempts" in source
assert "CAST(:search_terms AS jsonb)" in source
assert 'attempt_status="matched"' in source
assert '"low_score"' in source
assert '"identity_veto"' in source
assert 'attempt_status="no_result"' in source
assert 'attempt_status="no_match"' in source
assert 'attempt_status="error"' in source
assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source
assert 'attempt_status="needs_review"' in source
assert "_should_upsert_competitor_price" in source
assert "replace_legacy_unverified" in source
assert "identity_v2" in source
assert "_fetch_expired_identity_skus" in source
assert "run_expired_identity_refresh" in source
assert "_fetch_retryable_candidate_skus" in source
assert "run_retryable_candidate_revalidation" in source
retryable_source = source.split("def _fetch_retryable_candidate_skus", 1)[1].split(
"def _fetch_expired_identity_skus", 1
)[0]
assert "la.attempt_status = 'low_score'" in retryable_source
assert "refresh_low_score')" not in retryable_source
latest_attempt_source = retryable_source.split("latest_attempt AS", 1)[1].split(
"SELECT\n lm.product_id", 1
)[0]
assert "cma.best_competitor_product_id IS NOT NULL" not in latest_attempt_source
assert "la.best_competitor_product_id IS NOT NULL" in retryable_source
assert "refresh_known_identity" in source
assert 'attempt_status="unit_comparable"' in source
assert 'attempt_status="refresh_unit_comparable"' in source
assert "mode={getattr(diagnostics, 'comparison_mode'" in source
assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source
assert "PChomeCrawler(timeout=REQUEST_TIMEOUT" in source
assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration
assert "attempt_status" in migration
assert "search_terms" in migration
assert "best_match_score" in migration
assert "match_diagnostic_json" in diagnostics_migration
assert "comparison_mode" in diagnostics_migration
assert "diagnostic_codes" in diagnostics_migration
assert "competitor_product_url" in source
assert "competitor_image_url" in source
assert "competitor_stock" in source
assert "error_message" in migration
assert "idx_comp_match_attempts_sku_source_time" in migration
def test_competitor_match_review_service_closes_human_review_loop():
service_source = (ROOT / "services/competitor_match_review_service.py").read_text(encoding="utf-8")
migration = (ROOT / "migrations/039_create_competitor_match_reviews.sql").read_text(encoding="utf-8")
dashboard_js = (ROOT / "web/static/js/page-dashboard-v2.js").read_text(encoding="utf-8")
assert "VALID_REVIEW_ACTIONS" in service_source
assert "accept_identity" in service_source
assert "reject_identity" in service_source
assert "unit_price_required" in service_source
assert "manual_accepted" in service_source
assert "manual_rejected" in service_source
assert "manual_unit_price_required" in service_source
assert "INSERT INTO competitor_match_reviews" in service_source
assert "INSERT INTO competitor_prices" in service_source
assert "INSERT INTO competitor_price_history" in service_source
assert "manual_review" in service_source
assert "manual_accept" in service_source
assert "_fetch_latest_manual_review_for_candidate" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
assert "manual_review_rejected" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
assert "manual_accept_override" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
assert "CREATE TABLE IF NOT EXISTS competitor_match_reviews" in migration
assert "review_action" in migration
assert "reviewer_identity" in migration
assert "candidate_diagnostic" in migration
assert "idx_comp_match_reviews_sku_source_time" in migration
assert "runPchomeReviewDecision" in dashboard_js
assert "/api/pchome-review/" in dashboard_js
def test_reject_review_expires_current_formal_price():
from sqlalchemy import create_engine, text
from services.competitor_match_review_service import record_competitor_match_review
engine = create_engine("sqlite:///:memory:")
with engine.begin() as conn:
conn.execute(text("CREATE TABLE products (id INTEGER PRIMARY KEY, i_code TEXT, name TEXT)"))
conn.execute(text("CREATE TABLE price_records (id INTEGER PRIMARY KEY, product_id INTEGER, price NUMERIC, timestamp TEXT)"))
conn.execute(text("""
CREATE TABLE competitor_match_attempts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sku TEXT,
source TEXT,
momo_product_id INTEGER,
momo_product_name TEXT,
momo_price NUMERIC,
search_terms TEXT,
candidate_count INTEGER,
attempt_status TEXT,
best_competitor_product_id TEXT,
best_competitor_product_name TEXT,
best_competitor_price NUMERIC,
best_match_score NUMERIC,
error_message TEXT,
attempted_at TEXT
)
"""))
conn.execute(text("""
CREATE TABLE competitor_prices (
sku TEXT,
source TEXT,
price NUMERIC,
original_price NUMERIC,
discount_pct INTEGER,
competitor_product_id TEXT,
competitor_product_name TEXT,
match_score NUMERIC,
tags TEXT,
crawled_at TEXT,
expires_at TEXT
)
"""))
conn.execute(text("INSERT INTO products VALUES (1, 'A005', '舒特膚 AD 乳液 200ml')"))
conn.execute(text("INSERT INTO price_records VALUES (1, 1, 980, '2026-05-20 09:00:00')"))
conn.execute(text("""
INSERT INTO competitor_match_attempts
(sku, source, momo_product_id, momo_product_name, momo_price,
search_terms, candidate_count, attempt_status,
best_competitor_product_id, best_competitor_product_name,
best_competitor_price, best_match_score, error_message, attempted_at)
VALUES
('A005', 'pchome', 1, '舒特膚 AD 乳液 200ml', 980,
'[]', 1, 'needs_review',
'DDAB01-REJECT', '舒特膚 AD 乳液 200ml', 899, 0.84,
'score=0.84', '2026-05-20 09:10:00')
"""))
conn.execute(text("""
INSERT INTO competitor_prices
(sku, source, price, competitor_product_id, competitor_product_name,
match_score, tags, crawled_at, expires_at)
VALUES
('A005', 'pchome', 899, 'DDAB01-REJECT', '舒特膚 AD 乳液 200ml',
0.84, '["identity_v2"]', '2026-05-20 09:10:00', '2099-01-01 00:00:00')
"""))
result = record_competitor_match_review(
engine,
sku="A005",
review_action="reject_identity",
reviewer_identity="pytest",
)
assert result["success"] is True
with engine.connect() as conn:
expires_at = conn.execute(text("""
SELECT expires_at
FROM competitor_prices
WHERE sku = 'A005' AND source = 'pchome'
""")).scalar()
manual_status = conn.execute(text("""
SELECT attempt_status
FROM competitor_match_attempts
WHERE sku = 'A005'
ORDER BY id DESC
LIMIT 1
""")).scalar()
assert expires_at != "2099-01-01 00:00:00"
assert manual_status == "manual_rejected"
def test_competitor_feeder_respects_manual_rejected_candidate(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-REJECTED",
name="舒特膚 AD 乳液 200ml",
price=899,
original_price=999,
discount=10,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-REJECTED",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_fetch_latest_manual_review_for_candidate",
lambda *_args, **_kwargs: {"review_action": "reject_identity"},
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "A003",
"name": "舒特膚 AD 乳液 200ml",
"product_id": 3,
"momo_price": 980,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert writes == []
assert attempts[0]["attempt_status"] == "manual_rejected"
assert "manual_review_rejected" in attempts[0]["error_message"]
def test_competitor_feeder_skips_rejected_candidate_and_uses_next_best(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
rejected = PChomeProduct(
product_id="DDAB01-REJECTED",
name="舒特膚 AD 乳液 200ml 舊候選",
price=899,
original_price=999,
discount=10,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-REJECTED",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
accepted = PChomeProduct(
product_id="DDAB01-ACCEPTABLE",
name="舒特膚 AD 乳液 200ml 新候選",
price=909,
original_price=999,
discount=9,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-ACCEPTABLE",
stock=20,
store="24h",
rating=4.6,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [rejected, accepted]
def fake_score(_momo_name, competitor_name, **_kwargs):
score = 0.95 if "舊候選" in competitor_name else 0.84
return SimpleNamespace(
score=score,
brand_score=1.0,
token_score=0.9,
spec_score=1.0,
sequence_score=0.8,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=(),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_fetch_latest_manual_review_for_candidate",
lambda _sku, candidate_id, **_kwargs: (
{"review_action": "reject_identity"} if candidate_id == "DDAB01-REJECTED" else None
),
)
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "new_match"),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "A004",
"name": "舒特膚 AD 乳液 200ml",
"product_id": 4,
"momo_price": 980,
}])
assert result.matched == 1
assert writes[0][0][1].product_id == "DDAB01-ACCEPTABLE"
assert attempts[0]["attempt_status"] == "matched"
assert attempts[0]["best_product"].product_id == "DDAB01-ACCEPTABLE"
def test_competitor_feeder_splits_hard_veto_from_low_score(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-WRONG",
name="iPhone 16 Pro 保護膜",
price=399,
original_price=499,
discount=20,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-WRONG",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.31,
brand_score=0.0,
token_score=0.1,
spec_score=0.55,
sequence_score=0.1,
type_score=0.55,
price_penalty=0.0,
hard_veto=True,
reasons=("brand_conflict", "product_line_conflict"),
comparison_mode="not_comparable",
tags=["identity_v2", "identity_veto"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "A006",
"name": "【TAICEND 泰陞】寶貝液體保護膜 屁屁噴 100ml",
"product_id": 6,
"momo_price": 399,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "identity_veto"
assert attempts[0]["diagnostics"].hard_veto is True
def test_search_candidates_does_not_stop_on_merely_acceptable_match(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct
first = PChomeProduct(
product_id="DDAB01-FIRST",
name="理膚寶水 B5 修復霜 40ml 普通候選",
price=679,
original_price=799,
discount=15,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-FIRST",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
second = PChomeProduct(
product_id="DDAB01-SECOND",
name="理膚寶水 B5 修復霜 40ml 強同款",
price=689,
original_price=799,
discount=14,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-SECOND",
stock=20,
store="24h",
rating=4.8,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self):
self.calls = []
def search_products(self, keyword, **_kwargs):
self.calls.append(keyword)
if keyword == "broad":
return True, "ok", [first]
return True, "ok", [second]
def fake_score(_momo_name, competitor_name, **_kwargs):
score = 0.80 if "普通候選" in competitor_name else 0.95
return SimpleNamespace(score=score)
crawler = FakeCrawler()
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
candidates = _search_pchome_candidates(
crawler,
"理膚寶水 B5 修復霜 40ml",
keywords=["broad", "precise", "unused"],
momo_price=699,
)
assert crawler.calls == ["broad", "precise"]
assert [candidate.product_id for candidate in candidates] == ["DDAB01-FIRST", "DDAB01-SECOND"]
def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog):
from services import competitor_price_feeder
from services import marketplace_product_matcher
def broken_build_search_terms(*_args, **_kwargs):
raise RuntimeError("matcher unavailable")
monkeypatch.setattr(marketplace_product_matcher, "build_search_terms", broken_build_search_terms)
caplog.set_level(logging.DEBUG, logger="services.competitor_price_feeder")
terms = competitor_price_feeder._build_search_keywords("理膚寶水 B5 修復霜 40ml")
assert terms
assert "fallback to cleaned product name" in caplog.text
def test_competitor_feeder_keeps_original_name_as_search_fallback():
from services import competitor_price_feeder
terms = competitor_price_feeder._build_search_keywords(
"【Mustela 慕之恬廊】慕之幼 免用水潔淨液 300ml(外出清潔 卸除髒汙 卸除防曬 卸防曬)"
)
assert len(terms) == competitor_price_feeder.MAX_SEARCH_TERMS
assert terms[-1].startswith("Mustela 慕之恬廊 慕之幼 免用水潔淨液")
assert any("免用水潔淨液 300ml" in term for term in terms[:4])
def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
requested = []
product = PChomeProduct(
product_id="DDAB01-1900ABCD",
name="舒特膚 AD 乳液 200ml",
price=899,
original_price=999,
discount=10,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-1900ABCD",
stock=50,
store="24h",
rating=4.8,
review_count=12,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def fetch_product_details(self, product_ids, batch_size=20):
requested.extend(product_ids)
return True, "ok", [product]
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
writes = []
attempts = []
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda sku, product, score, tags, **kwargs: writes.append({
"sku": sku,
"product_id": product.product_id,
"score": score,
"tags": tags,
**kwargs,
}),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_known_identity_refresh_items([{
"sku": "A001",
"name": "舒特膚 AD 乳液 200ml",
"product_id": 1,
"momo_price": 980,
"competitor_product_id": "DDAB01-1900ABCD",
}])
assert requested == ["DDAB01-1900ABCD"]
assert result.matched == 1
assert writes[0]["product_id"] == "DDAB01-1900ABCD"
assert "identity_v2" in writes[0]["tags"]
assert "refresh_known_identity" in writes[0]["tags"]
assert attempts[0]["attempt_status"] == "matched"
assert attempts[0]["search_terms"] == ["known_product_id:DDAB01-1900ABCD"]
def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-UNIT",
name="理膚寶水 全面修復霜 B5 40ml",
price=679,
original_price=799,
discount=15,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-UNIT",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda *args, **kwargs: writes.append((args, kwargs)),
)
result = feeder._run_sku_items([{
"sku": "A002",
"name": "理膚寶水 B5 全面修復霜 40ml x2 超值組",
"product_id": 2,
"momo_price": 1199,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert writes == []
assert attempts[0]["attempt_status"] == "unit_comparable"
assert "unit_comparable" in attempts[0]["error_message"]