290 lines
9.2 KiB
Python
290 lines
9.2 KiB
Python
from datetime import datetime
|
|
|
|
import requests
|
|
|
|
|
|
class _FakeResponse:
|
|
def __init__(self, payload=None, status_code=200):
|
|
self._payload = payload or {}
|
|
self.status_code = status_code
|
|
self.text = ""
|
|
|
|
def json(self):
|
|
return self._payload
|
|
|
|
def raise_for_status(self):
|
|
if self.status_code >= 400:
|
|
raise requests.HTTPError(f"HTTP {self.status_code}", response=self)
|
|
|
|
|
|
def test_pchome_search_scans_multiple_pages_until_limit(monkeypatch):
|
|
from services.pchome_crawler import PChomeCrawler, PChomeProduct
|
|
|
|
crawler = PChomeCrawler(timeout=1, delay=0, max_retries=0)
|
|
calls = []
|
|
fetched_ids = []
|
|
|
|
class FakeSession:
|
|
headers = {}
|
|
|
|
def get(self, url, params=None, timeout=None):
|
|
calls.append((url, dict(params or {}), timeout))
|
|
page = int((params or {}).get("page") or 1)
|
|
if page == 1:
|
|
return _FakeResponse({"Prods": [{"Id": "A001"}, {"Id": "A002"}]})
|
|
if page == 2:
|
|
return _FakeResponse({"Prods": [{"Id": "A002"}, {"Id": "A003"}]})
|
|
return _FakeResponse({"Prods": []})
|
|
|
|
def fake_fetch_product_details(product_ids, batch_size=20):
|
|
fetched_ids.extend(product_ids)
|
|
return True, "details ok", [
|
|
PChomeProduct(
|
|
product_id=product_id,
|
|
name=f"商品 {product_id}",
|
|
price=100,
|
|
original_price=120,
|
|
discount=17,
|
|
image_url="",
|
|
product_url=f"https://24h.pchome.com.tw/prod/{product_id}",
|
|
stock=10,
|
|
store="24h",
|
|
rating=None,
|
|
review_count=0,
|
|
is_on_sale=True,
|
|
crawled_at=datetime.now(),
|
|
)
|
|
for product_id in product_ids
|
|
]
|
|
|
|
crawler.session = FakeSession()
|
|
monkeypatch.setattr(crawler, "fetch_product_details", fake_fetch_product_details)
|
|
|
|
success, message, products = crawler.search_products("理膚寶水", limit=3, max_pages=3)
|
|
|
|
assert success is True
|
|
assert "搜尋頁數 2" in message
|
|
assert fetched_ids == ["A001", "A002", "A003"]
|
|
assert [call[1]["page"] for call in calls] == [1, 2]
|
|
assert [product.product_id for product in products] == ["A001", "A002", "A003"]
|
|
|
|
|
|
def test_pchome_get_retries_transient_timeout():
|
|
from services.pchome_crawler import PChomeCrawler
|
|
|
|
crawler = PChomeCrawler(timeout=1, delay=0, max_retries=1, retry_backoff=0)
|
|
calls = []
|
|
|
|
class FakeSession:
|
|
headers = {}
|
|
|
|
def get(self, url, **kwargs):
|
|
calls.append((url, kwargs))
|
|
if len(calls) == 1:
|
|
raise requests.Timeout("temporary timeout")
|
|
return _FakeResponse({"ok": True})
|
|
|
|
crawler.session = FakeSession()
|
|
|
|
response = crawler._get_with_retry("https://example.test/api", timeout=1)
|
|
|
|
assert response.json() == {"ok": True}
|
|
assert len(calls) == 2
|
|
|
|
|
|
def test_pchome_fetch_product_details_accepts_list_payload():
|
|
from services.pchome_crawler import PChomeCrawler
|
|
|
|
crawler = PChomeCrawler(timeout=1, delay=0, max_retries=0)
|
|
calls = []
|
|
|
|
class FakeSession:
|
|
headers = {}
|
|
|
|
def get(self, url, params=None, timeout=None):
|
|
calls.append((url, params, timeout))
|
|
return _FakeResponse([
|
|
{
|
|
"Id": "DDABCD-12345678",
|
|
"Name": "測試商品 50ml",
|
|
"Nick": "測試商品 50ml x2 限量組",
|
|
"Price": {"P": 799, "M": 999},
|
|
"Pic": {"B": "/items/DDABCD12345678.jpg"},
|
|
"Qty": 8,
|
|
"Store": "24h",
|
|
"isOnSale": True,
|
|
}
|
|
])
|
|
|
|
crawler.session = FakeSession()
|
|
|
|
success, message, products = crawler.fetch_product_details(["DDABCD-12345678"])
|
|
|
|
assert success is True
|
|
assert message == "成功取得 1 個商品資料"
|
|
assert len(calls) == 1
|
|
assert [product.product_id for product in products] == ["DDABCD-12345678"]
|
|
assert products[0].price == 799
|
|
assert products[0].subtitle == "測試商品 50ml x2 限量組"
|
|
assert products[0].match_name == "測試商品 50ml x2 限量組"
|
|
|
|
|
|
def test_pchome_match_name_combines_non_duplicate_nick():
|
|
from services.pchome_crawler import _build_match_name
|
|
|
|
assert _build_match_name("水楊酸身體乳雙入組", "2% 水楊酸身體乳 210ml x2") == (
|
|
"水楊酸身體乳雙入組 2% 水楊酸身體乳 210ml x2"
|
|
)
|
|
|
|
|
|
def test_pchome_match_name_deduplicates_normalized_nick_prefix():
|
|
from services.pchome_crawler import _build_match_name
|
|
|
|
match_name = _build_match_name(
|
|
"【Laura Mercier 蘿拉蜜思】 煥顏透明蜜粉 29g",
|
|
"【Laura Mercier 蘿拉蜜思】煥顏透明蜜粉 29g 專櫃公司貨",
|
|
)
|
|
|
|
assert match_name == "【Laura Mercier 蘿拉蜜思】煥顏透明蜜粉 29g 專櫃公司貨"
|
|
assert match_name.count("29g") == 1
|
|
|
|
|
|
def test_pchome_match_name_deduplicates_marketing_prefix_before_title():
|
|
from services.pchome_crawler import _build_match_name
|
|
|
|
ad_cream = _build_match_name(
|
|
"Cetaphil舒特膚 AD益膚康修護舒敏乳霜227g",
|
|
"《即期特賣》Cetaphil舒特膚 AD益膚康修護舒敏乳霜227g",
|
|
)
|
|
moisturizing_cream = _build_match_name(
|
|
"【Cetaphil 舒特膚】長效潤膚霜250g",
|
|
"48小時長效保濕升級版 【Cetaphil 舒特膚】長效潤膚霜250g",
|
|
)
|
|
|
|
assert ad_cream == "Cetaphil舒特膚 AD益膚康修護舒敏乳霜227g 《即期特賣》"
|
|
assert ad_cream.count("227g") == 1
|
|
assert moisturizing_cream == "【Cetaphil 舒特膚】長效潤膚霜250g 48小時長效保濕升級版"
|
|
assert moisturizing_cream.count("250g") == 1
|
|
|
|
|
|
def test_pchome_match_name_strips_html_marketing_noise():
|
|
from services.pchome_crawler import _build_match_name
|
|
|
|
match_name = _build_match_name(
|
|
"TS6護一生沁涼潔淨慕斯100g",
|
|
'<font color="#FF0066">★降溫限定。92%滿意★</font><br>TS6護一生 沁涼潔淨慕斯100g',
|
|
)
|
|
|
|
assert "<font" not in match_name
|
|
assert "降溫限定" not in match_name
|
|
assert match_name == "TS6護一生 沁涼潔淨慕斯100g"
|
|
assert match_name.count("100g") == 1
|
|
|
|
|
|
def test_feeder_search_cleanup_preserves_bracket_brand_and_specs():
|
|
from services.competitor_price_feeder import _clean_search_text
|
|
|
|
cleaned = _clean_search_text("【蘭蔻】絕對完美玫瑰霜(60ml)+玫瑰精露150ml")
|
|
|
|
assert "蘭蔻" in cleaned
|
|
assert "60ml" in cleaned
|
|
assert "150ml" in cleaned
|
|
|
|
|
|
def test_feeder_search_candidate_passes_page_cap(monkeypatch):
|
|
from services.competitor_price_feeder import _search_pchome_candidates
|
|
from services.pchome_crawler import PChomeProduct
|
|
|
|
product = PChomeProduct(
|
|
product_id="DDAB01-PAGE2",
|
|
name="理膚寶水 B5 修復霜 40ml",
|
|
price=679,
|
|
original_price=799,
|
|
discount=15,
|
|
image_url="",
|
|
product_url="https://24h.pchome.com.tw/prod/DDAB01-PAGE2",
|
|
stock=20,
|
|
store="24h",
|
|
rating=4.7,
|
|
review_count=8,
|
|
is_on_sale=True,
|
|
crawled_at=datetime.now(),
|
|
)
|
|
calls = []
|
|
|
|
class FakeCrawler:
|
|
def search_products(self, keyword, **kwargs):
|
|
calls.append((keyword, kwargs))
|
|
return True, "ok", [product]
|
|
|
|
monkeypatch.setattr(
|
|
"services.marketplace_product_matcher.score_marketplace_match",
|
|
lambda *_args, **_kwargs: type(
|
|
"Diagnostics",
|
|
(),
|
|
{"score": 0.95},
|
|
)(),
|
|
)
|
|
|
|
candidates = _search_pchome_candidates(
|
|
FakeCrawler(),
|
|
"理膚寶水 B5 修復霜 40ml",
|
|
keywords=["理膚寶水 B5 40ml"],
|
|
momo_price=699,
|
|
)
|
|
|
|
assert candidates == [product]
|
|
assert calls[0][1]["limit"] == 40
|
|
assert calls[0][1]["max_pages"] == 2
|
|
|
|
|
|
def test_feeder_search_candidate_respects_bounded_budget(monkeypatch):
|
|
from services.competitor_price_feeder import _search_pchome_candidates
|
|
from services.pchome_crawler import PChomeProduct
|
|
|
|
product = PChomeProduct(
|
|
product_id="DDAB01-FAST",
|
|
name="理膚寶水 B5 修復霜 40ml",
|
|
price=679,
|
|
original_price=799,
|
|
discount=15,
|
|
image_url="",
|
|
product_url="https://24h.pchome.com.tw/prod/DDAB01-FAST",
|
|
stock=20,
|
|
store="24h",
|
|
rating=4.7,
|
|
review_count=8,
|
|
is_on_sale=True,
|
|
crawled_at=datetime.now(),
|
|
)
|
|
calls = []
|
|
|
|
class FakeCrawler:
|
|
def search_products(self, keyword, **kwargs):
|
|
calls.append((keyword, kwargs))
|
|
return True, "ok", [product]
|
|
|
|
monkeypatch.setattr(
|
|
"services.marketplace_product_matcher.score_marketplace_match",
|
|
lambda *_args, **_kwargs: type(
|
|
"Diagnostics",
|
|
(),
|
|
{"score": 0.80},
|
|
)(),
|
|
)
|
|
|
|
candidates = _search_pchome_candidates(
|
|
FakeCrawler(),
|
|
"理膚寶水 B5 修復霜 40ml",
|
|
keywords=["理膚寶水 B5", "理膚寶水 修復霜", "b5 cream"],
|
|
momo_price=699,
|
|
max_terms=1,
|
|
max_pages=1,
|
|
max_seconds=30,
|
|
)
|
|
|
|
assert candidates == [product]
|
|
assert [call[0] for call in calls] == ["理膚寶水 B5"]
|
|
assert calls[0][1]["limit"] == 20
|
|
assert calls[0][1]["max_pages"] == 1
|