195 lines
6.1 KiB
Python
195 lines
6.1 KiB
Python
from datetime import datetime
|
|
|
|
import requests
|
|
|
|
|
|
class _FakeResponse:
|
|
def __init__(self, payload=None, status_code=200):
|
|
self._payload = payload or {}
|
|
self.status_code = status_code
|
|
self.text = ""
|
|
|
|
def json(self):
|
|
return self._payload
|
|
|
|
def raise_for_status(self):
|
|
if self.status_code >= 400:
|
|
raise requests.HTTPError(f"HTTP {self.status_code}", response=self)
|
|
|
|
|
|
def test_pchome_search_scans_multiple_pages_until_limit(monkeypatch):
|
|
from services.pchome_crawler import PChomeCrawler, PChomeProduct
|
|
|
|
crawler = PChomeCrawler(timeout=1, delay=0, max_retries=0)
|
|
calls = []
|
|
fetched_ids = []
|
|
|
|
class FakeSession:
|
|
headers = {}
|
|
|
|
def get(self, url, params=None, timeout=None):
|
|
calls.append((url, dict(params or {}), timeout))
|
|
page = int((params or {}).get("page") or 1)
|
|
if page == 1:
|
|
return _FakeResponse({"Prods": [{"Id": "A001"}, {"Id": "A002"}]})
|
|
if page == 2:
|
|
return _FakeResponse({"Prods": [{"Id": "A002"}, {"Id": "A003"}]})
|
|
return _FakeResponse({"Prods": []})
|
|
|
|
def fake_fetch_product_details(product_ids, batch_size=20):
|
|
fetched_ids.extend(product_ids)
|
|
return True, "details ok", [
|
|
PChomeProduct(
|
|
product_id=product_id,
|
|
name=f"商品 {product_id}",
|
|
price=100,
|
|
original_price=120,
|
|
discount=17,
|
|
image_url="",
|
|
product_url=f"https://24h.pchome.com.tw/prod/{product_id}",
|
|
stock=10,
|
|
store="24h",
|
|
rating=None,
|
|
review_count=0,
|
|
is_on_sale=True,
|
|
crawled_at=datetime.now(),
|
|
)
|
|
for product_id in product_ids
|
|
]
|
|
|
|
crawler.session = FakeSession()
|
|
monkeypatch.setattr(crawler, "fetch_product_details", fake_fetch_product_details)
|
|
|
|
success, message, products = crawler.search_products("理膚寶水", limit=3, max_pages=3)
|
|
|
|
assert success is True
|
|
assert "搜尋頁數 2" in message
|
|
assert fetched_ids == ["A001", "A002", "A003"]
|
|
assert [call[1]["page"] for call in calls] == [1, 2]
|
|
assert [product.product_id for product in products] == ["A001", "A002", "A003"]
|
|
|
|
|
|
def test_pchome_get_retries_transient_timeout():
|
|
from services.pchome_crawler import PChomeCrawler
|
|
|
|
crawler = PChomeCrawler(timeout=1, delay=0, max_retries=1, retry_backoff=0)
|
|
calls = []
|
|
|
|
class FakeSession:
|
|
headers = {}
|
|
|
|
def get(self, url, **kwargs):
|
|
calls.append((url, kwargs))
|
|
if len(calls) == 1:
|
|
raise requests.Timeout("temporary timeout")
|
|
return _FakeResponse({"ok": True})
|
|
|
|
crawler.session = FakeSession()
|
|
|
|
response = crawler._get_with_retry("https://example.test/api", timeout=1)
|
|
|
|
assert response.json() == {"ok": True}
|
|
assert len(calls) == 2
|
|
|
|
|
|
def test_pchome_fetch_product_details_accepts_list_payload():
|
|
from services.pchome_crawler import PChomeCrawler
|
|
|
|
crawler = PChomeCrawler(timeout=1, delay=0, max_retries=0)
|
|
calls = []
|
|
|
|
class FakeSession:
|
|
headers = {}
|
|
|
|
def get(self, url, params=None, timeout=None):
|
|
calls.append((url, params, timeout))
|
|
return _FakeResponse([
|
|
{
|
|
"Id": "DDABCD-12345678",
|
|
"Name": "測試商品 50ml",
|
|
"Nick": "測試商品 50ml x2 限量組",
|
|
"Price": {"P": 799, "M": 999},
|
|
"Pic": {"B": "/items/DDABCD12345678.jpg"},
|
|
"Qty": 8,
|
|
"Store": "24h",
|
|
"isOnSale": True,
|
|
}
|
|
])
|
|
|
|
crawler.session = FakeSession()
|
|
|
|
success, message, products = crawler.fetch_product_details(["DDABCD-12345678"])
|
|
|
|
assert success is True
|
|
assert message == "成功取得 1 個商品資料"
|
|
assert len(calls) == 1
|
|
assert [product.product_id for product in products] == ["DDABCD-12345678"]
|
|
assert products[0].price == 799
|
|
assert products[0].subtitle == "測試商品 50ml x2 限量組"
|
|
assert products[0].match_name == "測試商品 50ml x2 限量組"
|
|
|
|
|
|
def test_pchome_match_name_combines_non_duplicate_nick():
|
|
from services.pchome_crawler import _build_match_name
|
|
|
|
assert _build_match_name("水楊酸身體乳雙入組", "2% 水楊酸身體乳 210ml x2") == (
|
|
"水楊酸身體乳雙入組 2% 水楊酸身體乳 210ml x2"
|
|
)
|
|
|
|
|
|
def test_feeder_search_cleanup_preserves_bracket_brand_and_specs():
|
|
from services.competitor_price_feeder import _clean_search_text
|
|
|
|
cleaned = _clean_search_text("【蘭蔻】絕對完美玫瑰霜(60ml)+玫瑰精露150ml")
|
|
|
|
assert "蘭蔻" in cleaned
|
|
assert "60ml" in cleaned
|
|
assert "150ml" in cleaned
|
|
|
|
|
|
def test_feeder_search_candidate_passes_page_cap(monkeypatch):
|
|
from services.competitor_price_feeder import _search_pchome_candidates
|
|
from services.pchome_crawler import PChomeProduct
|
|
|
|
product = PChomeProduct(
|
|
product_id="DDAB01-PAGE2",
|
|
name="理膚寶水 B5 修復霜 40ml",
|
|
price=679,
|
|
original_price=799,
|
|
discount=15,
|
|
image_url="",
|
|
product_url="https://24h.pchome.com.tw/prod/DDAB01-PAGE2",
|
|
stock=20,
|
|
store="24h",
|
|
rating=4.7,
|
|
review_count=8,
|
|
is_on_sale=True,
|
|
crawled_at=datetime.now(),
|
|
)
|
|
calls = []
|
|
|
|
class FakeCrawler:
|
|
def search_products(self, keyword, **kwargs):
|
|
calls.append((keyword, kwargs))
|
|
return True, "ok", [product]
|
|
|
|
monkeypatch.setattr(
|
|
"services.marketplace_product_matcher.score_marketplace_match",
|
|
lambda *_args, **_kwargs: type(
|
|
"Diagnostics",
|
|
(),
|
|
{"score": 0.95},
|
|
)(),
|
|
)
|
|
|
|
candidates = _search_pchome_candidates(
|
|
FakeCrawler(),
|
|
"理膚寶水 B5 修復霜 40ml",
|
|
keywords=["理膚寶水 B5 40ml"],
|
|
momo_price=699,
|
|
)
|
|
|
|
assert candidates == [product]
|
|
assert calls[0][1]["limit"] == 40
|
|
assert calls[0][1]["max_pages"] == 2
|