ewoooc/services/momo_crawler.py

"""
MOMO 購物網爬蟲服務

爬取 MOMO 購物網商品資料，支援：
- 關鍵字搜尋
- 熱銷商品排行

API 參考:
- 搜尋 API: https://m.momoshop.com.tw/search.momo
- 商品 API: https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code=XXX
"""

from __future__ import annotations

import re
import json
import time
import logging
import os
from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass, asdict
from datetime import datetime

import requests
from bs4 import BeautifulSoup

logger = logging.getLogger(__name__)

MOMO_TARGETED_SEARCH_MIN_SCORE = float(os.getenv("MOMO_TARGETED_SEARCH_MIN_SCORE", "0.45"))
MOMO_TARGETED_SEARCH_MAX_PRODUCTS = int(os.getenv("MOMO_TARGETED_SEARCH_MAX_PRODUCTS", "30"))
MOMO_TARGETED_SEARCH_MAX_TERMS = int(os.getenv("MOMO_TARGETED_SEARCH_MAX_TERMS", "4"))
MOMO_TARGETED_SEARCH_LIMIT_PER_TERM = int(os.getenv("MOMO_TARGETED_SEARCH_LIMIT_PER_TERM", "8"))


@dataclass
class MomoProduct:
    """MOMO 商品資料結構"""
    product_id: str           # 商品 ID
    name: str                 # 商品名稱
    price: int                # 售價
    original_price: int       # 原價
    discount: Optional[int]   # 折扣 (%)
    image_url: str            # 圖片 URL
    product_url: str          # 商品頁面 URL
    brand: str                # 品牌
    crawled_at: datetime      # 爬取時間

    def to_dict(self) -> dict:
        """轉換為字典"""
        data = asdict(self)
        data['crawled_at'] = self.crawled_at.isoformat()
        return data


class MomoCrawler:
    """MOMO 購物網爬蟲"""

    # 基礎 URL
    BASE_URL = 'https://www.momoshop.com.tw'
    MOBILE_URL = 'https://m.momoshop.com.tw'
    SEARCH_API = 'https://www.momoshop.com.tw/search/searchShop.jsp'
    # 使用行動版搜尋 API（更容易爬取）
    MOBILE_SEARCH_API = 'https://m.momoshop.com.tw/search.momo'

    # 預設 Headers（模擬行動裝置）
    DEFAULT_HEADERS = {
        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Referer': 'https://m.momoshop.com.tw/',
    }

    def __init__(self, timeout: int = 30, delay: float = 1.0):
        """
        初始化爬蟲

        Args:
            timeout: 請求超時時間 (秒)
            delay: 請求間隔延遲 (秒)
        """
        self.timeout = timeout
        self.delay = delay
        self.session = requests.Session()
        self.session.headers.update(self.DEFAULT_HEADERS)
        self._last_request_time = 0

    def _rate_limit(self):
        """速率限制"""
        elapsed = time.time() - self._last_request_time
        if elapsed < self.delay:
            time.sleep(self.delay - elapsed)
        self._last_request_time = time.time()

    def search_products(self, keyword: str, limit: int = 10, sort_by: str = 'sSaleQty/dc') -> Tuple[bool, str, List[MomoProduct]]:
        """
        搜尋商品

        Args:
            keyword: 搜尋關鍵字
            limit: 最多回傳數量
            sort_by: 排序方式
                - 'sSaleQty/dc': 銷量高到低（熱銷）
                - 'sPrice/ac': 價格低到高
                - 'sPrice/dc': 價格高到低
                - 'sSaleDate/dc': 上架時間新到舊

        Returns:
            (成功與否, 訊息, 商品列表)
        """
        try:
            self._rate_limit()

            # 先嘗試行動版 API
            products = self._search_mobile(keyword, limit, sort_by)
            if products:
                return True, f"成功取得 {len(products)} 個商品", products

            # 如果行動版失敗，嘗試桌面版
            products = self._search_desktop(keyword, limit, sort_by)
            if products:
                return True, f"成功取得 {len(products)} 個商品", products

            return False, "無法解析商品資料", []

        except requests.Timeout:
            logger.error("[MOMO] 請求超時")
            return False, "請求超時", []
        except Exception as e:
            logger.error(f"[MOMO] 搜尋失敗: {e}")
            return False, str(e), []

    def _search_mobile(self, keyword: str, limit: int, sort_by: str) -> List[MomoProduct]:
        """使用行動版 API 搜尋"""
        try:
            # 行動版搜尋參數
            params = {
                'searchKeyword': keyword,
                'sortType': '4' if 'sSaleQty' in sort_by else '1',  # 4=銷量, 1=相關
                'maxPage': '1',
                'curPage': '1',
            }

            logger.info(f"[MOMO] 行動版搜尋: {keyword}")

            response = self.session.get(
                self.MOBILE_SEARCH_API,
                params=params,
                timeout=self.timeout
            )

            if response.status_code != 200:
                logger.warning(f"[MOMO] 行動版搜尋失敗: HTTP {response.status_code}")
                return []

            return self._parse_mobile_results(response.text, limit)

        except Exception as e:
            logger.warning(f"[MOMO] 行動版搜尋異常: {e}")
            return []

    def _search_desktop(self, keyword: str, limit: int, sort_by: str) -> List[MomoProduct]:
        """使用桌面版 API 搜尋"""
        try:
            params = {
                'keyword': keyword,
                'searchType': '1',
                'cateLevel': '-1',
                'curPage': '1',
                'maxPage': '1',
                'minPage': '1',
                'areaCode': 'all',
                'isFuzzy': '0',
                'sortType': sort_by
            }

            logger.info(f"[MOMO] 桌面版搜尋: {keyword}")

            # 更換為桌面版 User-Agent
            headers = self.DEFAULT_HEADERS.copy()
            headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'

            response = self.session.get(
                self.SEARCH_API,
                params=params,
                headers=headers,
                timeout=self.timeout
            )

            if response.status_code != 200:
                logger.warning(f"[MOMO] 桌面版搜尋失敗: HTTP {response.status_code}")
                return []

            return self._parse_search_results(response.text, limit)

        except Exception as e:
            logger.warning(f"[MOMO] 桌面版搜尋異常: {e}")
            return []

    def _parse_mobile_results(self, html: str, limit: int) -> List[MomoProduct]:
        """解析行動版搜尋結果"""
        products = []

        try:
            soup = BeautifulSoup(html, 'html.parser')

            # 方法 1: 找到所有包含 i_code 的連結
            product_links = soup.find_all('a', href=re.compile(r'i_code=\d+'))

            seen_ids = set()
            for link in product_links:
                if len(products) >= limit:
                    break

                try:
                    href = link.get('href', '')
                    match = re.search(r'i_code=(\d+)', href)
                    if not match:
                        continue

                    product_id = match.group(1)
                    if product_id in seen_ids:
                        continue
                    seen_ids.add(product_id)

                    # 尋找商品名稱 - 從多個可能位置
                    name = ''
                    # 從 title 屬性
                    if link.get('title'):
                        name = link.get('title')
                    # 從 img alt
                    if not name:
                        img = link.find('img')
                        if img and img.get('alt'):
                            name = img.get('alt')
                    # 從 text 內容
                    if not name:
                        name_elem = link.find(class_=re.compile(r'name|title|goods', re.I))
                        if name_elem:
                            name = name_elem.get_text(strip=True)
                    # 從連結本身的文字
                    if not name:
                        name = link.get_text(strip=True)

                    if not name or len(name) < 3:
                        continue

                    # 尋找價格 - 從父元素或兄弟元素
                    price = 0
                    parent = link.find_parent(['li', 'div', 'article'])
                    if parent:
                        price_elem = parent.find(class_=re.compile(r'price', re.I))
                        if price_elem:
                            price_text = price_elem.get_text(strip=True)
                            price_match = re.search(r'[\d,]+', price_text)
                            if price_match:
                                price = int(price_match.group().replace(',', ''))

                    # 尋找圖片
                    image_url = ''
                    img = link.find('img')
                    if img:
                        image_url = img.get('src') or img.get('data-src') or img.get('data-original', '')
                        if image_url and image_url.startswith('//'):
                            image_url = 'https:' + image_url

                    product = MomoProduct(
                        product_id=product_id,
                        name=name.strip()[:100],  # 限制名稱長度
                        price=price,
                        original_price=price,
                        discount=None,
                        image_url=image_url,
                        product_url=f'{self.BASE_URL}/goods/GoodsDetail.jsp?i_code={product_id}',
                        brand='',
                        crawled_at=datetime.now()
                    )
                    products.append(product)

                except Exception as e:
                    logger.debug(f"[MOMO] 解析商品連結失敗: {e}")
                    continue

            # 方法 2: 新版 Next.js app router 會把 goodsInfoList 放在 script payload 字串中
            if not products:
                products = self._parse_next_search_payload_results(html, limit)

            # 方法 3: 如果上面沒找到，嘗試從 __NEXT_DATA__ 或 JSON
            if not products:
                # 嘗試找 Next.js 資料
                script = soup.find('script', {'id': '__NEXT_DATA__'})
                if script and script.string:
                    try:
                        next_data = json.loads(script.string)
                        props = next_data.get('props', {}).get('pageProps', {})
                        items = props.get('products', []) or props.get('items', [])
                        for item in items[:limit]:
                            product = MomoProduct(
                                product_id=str(item.get('goodsCode', '')),
                                name=item.get('goodsName', ''),
                                price=int(item.get('price', 0)),
                                original_price=int(item.get('suggestPrice', item.get('price', 0))),
                                discount=None,
                                image_url=item.get('imgUrl', ''),
                                product_url=f'{self.BASE_URL}/goods/GoodsDetail.jsp?i_code={item.get("goodsCode", "")}',
                                brand='',
                                crawled_at=datetime.now()
                            )
                            if product.product_id and product.name:
                                products.append(product)
                    except json.JSONDecodeError:
                        pass

            # 方法 4: 從 HTML 中找嵌入的 JSON
            if not products:
                json_pattern = re.compile(r'"goodsCode"\s*:\s*"?(\d+)"?.*?"goodsName"\s*:\s*"([^"]+)".*?"price"\s*:\s*(\d+)', re.DOTALL)
                matches = json_pattern.findall(html)
                for match in matches[:limit]:
                    product_id, name, price = match
                    product = MomoProduct(
                        product_id=product_id,
                        name=name,
                        price=int(price),
                        original_price=int(price),
                        discount=None,
                        image_url='',
                        product_url=f'{self.BASE_URL}/goods/GoodsDetail.jsp?i_code={product_id}',
                        brand='',
                        crawled_at=datetime.now()
                    )
                    products.append(product)

            logger.info(f"[MOMO] 行動版解析到 {len(products)} 個商品")
            return products

        except Exception as e:
            logger.error(f"[MOMO] 解析行動版結果失敗: {e}")
            return []

    def _parse_next_search_payload_results(self, html: str, limit: int) -> List[MomoProduct]:
        """解析 MOMO 新版搜尋頁嵌入的 Next.js goodsInfoList payload。"""
        products: List[MomoProduct] = []
        seen_ids: set[str] = set()

        product_pattern = re.compile(
            r'\\"goodsCode\\"\s*:\s*\\"(?P<code>\d+)\\"'
            r'.{0,800}?'
            r'\\"goodsName\\"\s*:\s*\\"(?P<name>.*?)\\"'
            r'.{0,1600}?'
            r'\\"goodsPrice\\"\s*:\s*\\"(?P<price>[^\\"]+)\\"'
            r'.{0,2400}?'
            r'\\"imgUrl\\"\s*:\s*\\"(?P<img>[^\\"]*)\\"',
            re.DOTALL,
        )
        for match in product_pattern.finditer(html):
            if len(products) >= limit:
                break
            product_id = match.group("code")
            if product_id in seen_ids:
                continue
            seen_ids.add(product_id)

            name = self._decode_payload_text(match.group("name"))
            price = self._parse_momo_price(match.group("price"))
            if not name or price <= 0:
                continue
            image_url = self._decode_payload_text(match.group("img"))
            original_price = self._parse_original_price_nearby(html, match.start(), match.end()) or price
            discount = round((1 - price / original_price) * 100) if original_price > price else None

            products.append(MomoProduct(
                product_id=product_id,
                name=name.strip()[:160],
                price=price,
                original_price=original_price,
                discount=discount,
                image_url=image_url,
                product_url=f'{self.BASE_URL}/goods/GoodsDetail.jsp?i_code={product_id}',
                brand='',
                crawled_at=datetime.now(),
            ))
        return products

    @staticmethod
    def _decode_payload_text(value: str) -> str:
        try:
            return json.loads(f'"{value}"')
        except Exception:
            return (value or "").replace("\\u0026", "&").replace("\\/", "/")

    @staticmethod
    def _parse_momo_price(value: str) -> int:
        match = re.search(r"[\d,]+", value or "")
        return int(match.group(0).replace(",", "")) if match else 0

    def _parse_original_price_nearby(self, html: str, start: int, end: int) -> int:
        snippet = html[start:min(len(html), end + 1800)]
        match = re.search(r'\\"goodsPriceOri\\"\s*:\s*\\"(?P<price>[^\\"]+)\\"', snippet)
        return self._parse_momo_price(match.group("price")) if match else 0

    def _parse_search_results(self, html: str, limit: int) -> List[MomoProduct]:
        """
        解析搜尋結果 HTML

        Args:
            html: HTML 內容
            limit: 最多回傳數量

        Returns:
            商品列表
        """
        products = []

        try:
            # 使用正則表達式解析商品資訊
            # MOMO 商品卡片通常包含 goodsUrl、goodsName、price 等資訊

            # 嘗試從 goodsCode 取得商品 ID
            goods_pattern = re.compile(
                r'<a[^>]*href=["\']([^"\']*i_code=(\d+)[^"\']*)["\'][^>]*>.*?'
                r'<img[^>]*(?:src|data-original)=["\']([^"\']+)["\'][^>]*>.*?'
                r'<p[^>]*class=["\'][^"\']*prdName[^"\']*["\'][^>]*>([^<]+)</p>.*?'
                r'<b[^>]*class=["\'][^"\']*price[^"\']*["\'][^>]*>\$?([\d,]+)</b>',
                re.DOTALL | re.IGNORECASE
            )

            # 備用：使用更簡單的模式
            simple_pattern = re.compile(
                r'i_code=(\d+).*?'
                r'title=["\']([^"\']+)["\'].*?'
                r'(?:src|data-original)=["\']([^"\']*(?:jpg|png|webp)[^"\']*)["\'].*?'
                r'\$?([\d,]+)',
                re.DOTALL | re.IGNORECASE
            )

            # 嘗試用 li.goodsItemLi 模式
            item_pattern = re.compile(
                r'<li[^>]*class=["\'][^"\']*goodsItemLi[^"\']*["\'][^>]*>(.*?)</li>',
                re.DOTALL | re.IGNORECASE
            )

            items = item_pattern.findall(html)

            for item_html in items[:limit]:
                try:
                    # 從每個商品項目中提取資料
                    code_match = re.search(r'i_code=(\d+)', item_html)
                    name_match = re.search(r'<p[^>]*class=["\'][^"\']*prdName[^"\']*["\'][^>]*>([^<]+)</p>', item_html)
                    if not name_match:
                        name_match = re.search(r'title=["\']([^"\']+)["\']', item_html)
                    price_match = re.search(r'<b[^>]*>\$?([\d,]+)</b>', item_html)
                    if not price_match:
                        price_match = re.search(r'\$?([\d,]+)', item_html)
                    img_match = re.search(r'(?:src|data-original)=["\']([^"\']+\.(?:jpg|png|webp)[^"\']*)["\']', item_html, re.IGNORECASE)
                    original_price_match = re.search(r'<del[^>]*>\$?([\d,]+)</del>', item_html)

                    if code_match and name_match and price_match:
                        product_id = code_match.group(1)
                        name = name_match.group(1).strip()
                        price = int(price_match.group(1).replace(',', ''))
                        original_price = int(original_price_match.group(1).replace(',', '')) if original_price_match else price
                        image_url = img_match.group(1) if img_match else ''

                        # 計算折扣
                        discount = None
                        if original_price > price:
                            discount = round((1 - price / original_price) * 100)

                        product = MomoProduct(
                            product_id=product_id,
                            name=name,
                            price=price,
                            original_price=original_price,
                            discount=discount,
                            image_url=image_url if image_url.startswith('http') else f'https:{image_url}' if image_url.startswith('//') else image_url,
                            product_url=f'{self.BASE_URL}/goods/GoodsDetail.jsp?i_code={product_id}',
                            brand='',
                            crawled_at=datetime.now()
                        )
                        products.append(product)

                except Exception as e:
                    logger.debug(f"[MOMO] 解析商品項目失敗: {e}")
                    continue

            # 如果上面的方法都失敗，嘗試用 JSON-like 結構
            if not products:
                # 有時候 MOMO 會在 HTML 中嵌入 JSON 資料
                json_pattern = re.compile(r'"goodsCode"\s*:\s*"(\d+)".*?"goodsName"\s*:\s*"([^"]+)".*?"price"\s*:\s*(\d+)', re.DOTALL)
                matches = json_pattern.findall(html)

                for match in matches[:limit]:
                    product_id, name, price = match
                    product = MomoProduct(
                        product_id=product_id,
                        name=name,
                        price=int(price),
                        original_price=int(price),
                        discount=None,
                        image_url='',
                        product_url=f'{self.BASE_URL}/goods/GoodsDetail.jsp?i_code={product_id}',
                        brand='',
                        crawled_at=datetime.now()
                    )
                    products.append(product)

            logger.info(f"[MOMO] 解析到 {len(products)} 個商品")
            return products

        except Exception as e:
            logger.error(f"[MOMO] 解析搜尋結果失敗: {e}")
            return []


# 全域爬蟲實例
_crawler_instance: Optional[MomoCrawler] = None


def get_crawler() -> MomoCrawler:
    """取得爬蟲實例（單例模式）"""
    global _crawler_instance
    if _crawler_instance is None:
        _crawler_instance = MomoCrawler()
    return _crawler_instance


def search_momo_products(keyword: str, limit: int = 10) -> Tuple[bool, str, List[dict]]:
    """
    搜尋 MOMO 商品（便捷函數）

    Args:
        keyword: 搜尋關鍵字
        limit: 最多回傳數量

    Returns:
        (成功與否, 訊息, 商品資料列表)
    """
    crawler = get_crawler()
    success, message, products = crawler.search_products(keyword, limit)
    return success, message, [p.to_dict() for p in products]


def _to_float(value, default: float = 0.0) -> float:
    try:
        if value is None:
            return default
        return float(str(value).replace(",", "").replace("$", "").strip())
    except (TypeError, ValueError):
        return default


def _product_name_from_payload(payload: dict) -> str:
    return str(
        payload.get("name")
        or payload.get("product_name")
        or payload.get("title")
        or payload.get("商品名稱")
        or ""
    ).strip()


def _product_price_from_payload(payload: dict) -> float:
    return _to_float(
        payload.get("price")
        or payload.get("pchome_price")
        or payload.get("sale_price")
        or payload.get("售價")
    )


def _dedupe_terms(terms: list[str], max_terms: int) -> list[str]:
    result: list[str] = []
    seen: set[str] = set()
    for term in terms:
        normalized = re.sub(r"\s+", " ", str(term or "").strip())
        if len(normalized) < 2:
            continue
        key = normalized.lower()
        if key in seen:
            continue
        seen.add(key)
        result.append(normalized)
        if len(result) >= max_terms:
            break
    return result


def build_targeted_momo_search_terms(pchome_name: str, max_terms: int = MOMO_TARGETED_SEARCH_MAX_TERMS) -> list[str]:
    """用 PChome 商品名稱產生 MOMO 精準搜尋詞，保留品名、容量與組合線索。"""
    if not pchome_name:
        return []
    try:
        from services.marketplace_product_matcher import build_search_terms

        terms = build_search_terms(pchome_name, max_terms=max_terms)
    except Exception:
        logger.warning("[MOMO] 產生精準搜尋詞失敗，改用原商品名", exc_info=True)
        terms = []
    terms.append(pchome_name)
    return _dedupe_terms(terms, max_terms=max_terms)


def search_momo_products_for_pchome_products(
    pchome_products: list[dict],
    *,
    limit_per_product: int = MOMO_TARGETED_SEARCH_LIMIT_PER_TERM,
    max_products: int = MOMO_TARGETED_SEARCH_MAX_PRODUCTS,
    max_terms_per_product: int = MOMO_TARGETED_SEARCH_MAX_TERMS,
    min_score: float = MOMO_TARGETED_SEARCH_MIN_SCORE,
    crawler: MomoCrawler | None = None,
) -> Tuple[bool, str, List[dict]]:
    """以 PChome 商品逐筆反查 MOMO 候選，補足單品與組合的精準比價來源。"""
    if not pchome_products:
        return False, "沒有 PChome 商品可用來搜尋 MOMO", []

    try:
        from services.marketplace_product_matcher import (
            build_unit_price_comparison,
            score_marketplace_match,
        )
    except Exception as exc:
        logger.error("[MOMO] 無法載入商品比對工具: %s", exc, exc_info=True)
        return False, "商品比對工具暫時不可用", []

    crawler = crawler or get_crawler()
    candidates_by_id: dict[str, dict] = {}
    searched_products = 0
    searched_terms: list[str] = []

    for target in pchome_products[:max_products]:
        pchome_name = _product_name_from_payload(target)
        if not pchome_name:
            continue
        searched_products += 1
        pchome_price = _product_price_from_payload(target)
        pchome_id = str(target.get("product_id") or target.get("id") or target.get("sku") or "").strip()
        terms = build_targeted_momo_search_terms(pchome_name, max_terms=max_terms_per_product)

        for term in terms:
            searched_terms.append(term)
            success, _, products = crawler.search_products(term, limit=limit_per_product)
            if not success or not products:
                continue

            for product in products:
                row = product.to_dict() if hasattr(product, "to_dict") else dict(product)
                momo_name = _product_name_from_payload(row)
                if not momo_name:
                    continue
                momo_price = _to_float(row.get("price"))
                diagnostics = score_marketplace_match(
                    momo_name,
                    pchome_name,
                    momo_price=momo_price,
                    competitor_price=pchome_price,
                )
                score = float(getattr(diagnostics, "score", 0.0) or 0.0)
                if score < min_score:
                    continue
                hard_veto = bool(getattr(diagnostics, "hard_veto", False))
                comparison_mode = getattr(diagnostics, "comparison_mode", "exact_identity")
                diagnostic_price_basis = str(getattr(diagnostics, "price_basis", "") or "")
                diagnostic_alert_tier = str(getattr(diagnostics, "alert_tier", "") or "")
                diagnostic_match_type = str(getattr(diagnostics, "match_type", "") or "")
                unit_price_comparison = {}
                auto_compare_type = "manual_review"
                price_basis = "none"
                review_status = "需人工確認"
                if (
                    not hard_veto
                    and comparison_mode == "exact_identity"
                    and diagnostic_price_basis == "total_price"
                    and diagnostic_alert_tier == "price_alert_exact"
                ):
                    can_auto_compare = True
                    auto_compare_type = "total_price"
                    price_basis = "total_price"
                    review_status = "可直接比價"
                elif comparison_mode == "unit_comparable" or diagnostic_price_basis == "unit_price":
                    unit_price_comparison = build_unit_price_comparison(
                        momo_name,
                        pchome_name,
                        momo_price=momo_price,
                        competitor_price=pchome_price,
                    )
                    can_auto_compare = bool(unit_price_comparison.get("comparable"))
                    if can_auto_compare:
                        auto_compare_type = "unit_price"
                        price_basis = "unit_price"
                        review_status = "自動單位價比較"
                    else:
                        price_basis = "unit_price_review"
                else:
                    can_auto_compare = False

                if comparison_mode != "unit_comparable":
                    unit_price_comparison = {}

                gap_pct = None
                if unit_price_comparison:
                    gap_pct = unit_price_comparison.get("unit_gap_pct")
                elif pchome_price:
                    try:
                        gap_pct = (float(momo_price or 0) - float(pchome_price)) / float(pchome_price) * 100
                    except (TypeError, ValueError, ZeroDivisionError):
                        gap_pct = None

                product_id = str(row.get("product_id") or row.get("goodsCode") or row.get("id") or "").strip()
                if not product_id:
                    product_id = f"momo_candidate_{len(candidates_by_id)}"
                existing = candidates_by_id.get(product_id)
                if existing and float(existing.get("target_match_score") or 0.0) >= score:
                    continue

                row.update({
                    "product_id": product_id,
                    "target_pchome_product_id": pchome_id,
                    "target_pchome_name": pchome_name,
                    "target_pchome_price": pchome_price,
                    "target_match_score": round(score, 3),
                    "target_search_term": term,
                    "target_match_reasons": list(getattr(diagnostics, "reasons", ()) or ()),
                    "target_comparison_mode": comparison_mode,
                    "target_match_type": diagnostic_match_type,
                    "target_alert_tier": diagnostic_alert_tier,
                    "target_hard_veto": hard_veto,
                    "can_auto_compare": can_auto_compare,
                    "auto_compare_type": auto_compare_type,
                    "target_price_basis": price_basis,
                    "target_gap_pct": round(float(gap_pct), 2) if gap_pct is not None else None,
                    "target_unit_price_comparison": unit_price_comparison,
                    "target_review_status": review_status,
                    "source_strategy": "pchome_targeted_momo_search",
                })
                candidates_by_id[product_id] = row

    candidates = sorted(
        candidates_by_id.values(),
        key=lambda item: float(item.get("target_match_score") or 0.0),
        reverse=True,
    )
    if not candidates:
        return False, f"已用 {searched_products} 筆 PChome 商品搜尋 MOMO，但沒有找到可用候選", []
    exact_count = sum(1 for item in candidates if item.get("auto_compare_type") == "total_price")
    unit_count = sum(1 for item in candidates if item.get("auto_compare_type") == "unit_price")
    review_count = len(candidates) - exact_count - unit_count
    return (
        True,
        (
            f"已用 {searched_products} 筆 PChome 商品搜尋 MOMO，找到 {len(candidates)} 筆候選"
            f"（可直接比價 {exact_count} 筆、自動單位價比較 {unit_count} 筆、需人工確認 {review_count} 筆）"
        ),
        candidates,
    )


def get_momo_bestsellers(category: str, limit: int = 5) -> Tuple[bool, str, List[dict]]:
    """
    取得 MOMO 分類熱銷商品

    Args:
        category: 分類關鍵字 (如 '面膜', '乳液', '精華液')
        limit: 最多回傳數量

    Returns:
        (成功與否, 訊息, 商品資料列表)
    """
    crawler = get_crawler()
    # 使用搜尋 API，按銷量排序
    success, message, products = crawler.search_products(category, limit=limit, sort_by='sSaleQty/dc')
    if success and products:
        # 轉換為精簡格式
        result = []
        for p in products[:limit]:
            result.append({
                'id': p.product_id,
                'product_id': p.product_id,
                'platform': 'momo',
                'name': p.name,
                'price': p.price,
                'original_price': p.original_price,
                'discount': p.discount,
                'url': p.product_url,
                'product_url': p.product_url,
                'image_url': p.image_url,
                'image': p.image_url
            })
        return True, f"成功取得 {len(result)} 個熱銷商品", result
    return success, message, []


if __name__ == '__main__':
    # 測試
    logging.basicConfig(level=logging.INFO)

    print("=== MOMO 爬蟲測試 ===\n")

    # 測試搜尋
    print("[1] 測試搜尋 (關鍵字: 面膜)")
    success, msg, products = search_momo_products('面膜', limit=5)
    print(f"結果: {msg}")
    if products:
        print(f"搜尋結果:")
        for p in products[:3]:
            print(f"  - {p['name'][:30]}... ${p['price']}")

    print("\n[2] 測試熱銷商品 (分類: 精華液)")
    success, msg, products = get_momo_bestsellers('精華液', limit=5)
    print(f"結果: {msg}")
    if products:
        print(f"熱銷商品:")
        for i, p in enumerate(products, 1):
            print(f"  {i}. {p['name'][:30]}... ${p['price']}")