ewoooc/tests/test_image_fetch.py

# cSpell:ignore momo goodsimg
"""
測試腳本：驗證新的圖片抓取邏輯
抓取一個分類的商品，並檢查圖片 URL 是否正確
"""
import os
import sys
import re
import pytest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time

BASE_DIR = os.path.dirname(os.path.abspath(__file__))


@pytest.mark.skipif(
    os.getenv("RUN_MOMO_BROWSER_TESTS") != "1",
    reason="Selenium smoke 會開啟外部 MOMO 網站；預設不在一般 pytest 執行。",
)
def test_image_fetch():
    """測試單一分類的圖片抓取"""
    print("🧪 開始測試圖片抓取功能...\n")

    # 設定 Selenium (啟用圖片載入)
    options = Options()
    options.page_load_strategy = 'eager'
    if os.getenv("MOMO_BROWSER_TEST_VISIBLE") != "1":
        options.add_argument('--headless=new')
    options.add_argument('--window-size=1920,5000')
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")

    # 注意：已啟用圖片載入
    options.add_argument('--disable-gpu')
    options.add_argument('--disable-extensions')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-features=AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck')
    options.add_experimental_option('prefs', {
        'credentials_enable_service': False,
        'profile.password_manager_enabled': False,
    })

    driver = webdriver.Chrome(options=options)
    driver.set_page_load_timeout(45)

    try:
        # 測試網址：化妝水分類
        test_url = "https://www.momoshop.com.tw/cateGoods.jsp?cateLevel=1&cateCode=1100300017"
        print(f"📍 測試網址: {test_url}")
        print("⏳ 正在載入頁面...\n")

        driver.get(test_url)
        print("⏳ 等待頁面完全載入 (10秒)...")
        time.sleep(10)  # 等待 JavaScript 渲染商品

        # 滾動頁面以觸發延遲載入
        print("📜 滾動頁面以觸發商品載入...")
        driver.execute_script("window.scrollTo(0, 1000);")
        time.sleep(2)
        driver.execute_script("window.scrollTo(0, 2000);")
        time.sleep(2)

        # 先保存 HTML 以便分析
        print("💾 保存頁面 HTML...")
        debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
        os.makedirs(debug_path, exist_ok=True)
        with open(os.path.join(debug_path, "test_fetch_debug.html"), "w", encoding="utf-8") as f:
            f.write(driver.page_source)
        print(f"   已保存至: {debug_path}/test_fetch_debug.html\n")

        # 尋找商品容器
        print("🔍 正在尋找商品容器...")
        containers = driver.find_elements(By.CSS_SELECTOR, "li.goods, div.eachGood, li.box1, li.product_item")

        if not containers:
            print("⚠️ 使用備案選擇器...")
            containers = driver.find_elements(By.XPATH,
                "//li[.//p[contains(@class, 'prdName')] or .//h3[contains(@class, 'prdName')]]")

        if not containers:
            print("❌ 找不到商品容器！")
            print("💡 提示: 請檢查 logs/debug_htmls/test_fetch_debug.html 查看頁面結構")
            return

        print(f"✅ 找到 {len(containers)} 個商品容器\n")
        print("=" * 80)

        # 測試前 5 個商品
        success_count = 0
        fail_count = 0

        for idx, container in enumerate(containers[:5], 1):
            print(f"\n📦 測試商品 #{idx}")
            print("-" * 80)

            # 測試圖片選擇器
            image_url = None
            img_selectors = [
                "div.prd_img img",
                "div.productImg img",
                "div.imgArea img",
                "a.goodsUrl > img",
                "a[href*='i_code'] img",
                "img.goodsImg",
                "img.prdimg",
                "img.prdImg",
            ]

            print("🔍 嘗試圖片選擇器:")
            for selector_idx, selector in enumerate(img_selectors, 1):
                imgs = container.find_elements(By.CSS_SELECTOR, selector)
                if imgs:
                    img_tag = imgs[0]
                    url = img_tag.get_attribute("data-original") or img_tag.get_attribute("src")

                    print(f"   [{selector_idx}] {selector}")
                    print(f"       找到圖片: {url[:80] if url else 'None'}...")

                    # 過濾無效的圖片 URL
                    if url and 'loader.gif' not in url and 'data:image' not in url and 'blank.png' not in url:
                        # 確認是商品圖片
                        if 'goodsimg' in url.lower() or 'goods' in url.lower() or re.search(r'/\d{7,}', url):
                            image_url = url
                            print(f"       ✅ 有效的商品圖片！")
                            break
                        else:
                            print(f"       ⚠️ 不像商品圖片 (可能是廣告或 icon)")
                    else:
                        print(f"       ❌ 無效圖片 (loader/placeholder/blank)")

            # URL 正規化
            if image_url:
                if image_url.startswith("//"):
                    image_url = "https:" + image_url
                elif image_url.startswith("/"):
                    image_url = "https://www.momoshop.com.tw" + image_url

            # 顯示結果
            if image_url:
                print(f"\n✅ 成功抓取圖片:")
                print(f"   {image_url}")
                success_count += 1
            else:
                print(f"\n❌ 未能抓取圖片")
                fail_count += 1

        # 總結
        print("\n" + "=" * 80)
        print("📊 測試結果")
        print("=" * 80)
        print(f"✅ 成功: {success_count}/5")
        print(f"❌ 失敗: {fail_count}/5")
        print(f"📈 成功率: {success_count/5*100:.0f}%")

        if success_count >= 4:
            print("\n🎉 測試通過！圖片抓取邏輯運作正常。")
        elif success_count >= 2:
            print("\n⚠️ 測試部分通過，建議檢查失敗的情況。")
        else:
            print("\n❌ 測試失敗，需要進一步調整選擇器。")

    except Exception as e:
        print(f"\n❌ 測試過程發生錯誤: {e}")
        import traceback
        traceback.print_exc()

    finally:
        print("\n⏳ 關閉瀏覽器...")
        driver.quit()
        print("✅ 測試完成")

if __name__ == "__main__":
    test_image_fetch()