# cSpell:ignore momo goodsimg """ 測試腳本:驗證新的圖片抓取邏輯 抓取一個分類的商品,並檢查圖片 URL 是否正確 """ import os import sys import re import pytest from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By import time BASE_DIR = os.path.dirname(os.path.abspath(__file__)) @pytest.mark.skipif( os.getenv("RUN_MOMO_BROWSER_TESTS") != "1", reason="Selenium smoke 會開啟外部 MOMO 網站;預設不在一般 pytest 執行。", ) def test_image_fetch(): """測試單一分類的圖片抓取""" print("🧪 開始測試圖片抓取功能...\n") # 設定 Selenium (啟用圖片載入) options = Options() options.page_load_strategy = 'eager' if os.getenv("MOMO_BROWSER_TEST_VISIBLE") != "1": options.add_argument('--headless=new') options.add_argument('--window-size=1920,5000') options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36") # 注意:已啟用圖片載入 options.add_argument('--disable-gpu') options.add_argument('--disable-extensions') options.add_argument('--disable-dev-shm-usage') options.add_argument('--no-sandbox') options.add_argument('--disable-features=AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck') options.add_experimental_option('prefs', { 'credentials_enable_service': False, 'profile.password_manager_enabled': False, }) driver = webdriver.Chrome(options=options) driver.set_page_load_timeout(45) try: # 測試網址:化妝水分類 test_url = "https://www.momoshop.com.tw/cateGoods.jsp?cateLevel=1&cateCode=1100300017" print(f"📍 測試網址: {test_url}") print("⏳ 正在載入頁面...\n") driver.get(test_url) print("⏳ 等待頁面完全載入 (10秒)...") time.sleep(10) # 等待 JavaScript 渲染商品 # 滾動頁面以觸發延遲載入 print("📜 滾動頁面以觸發商品載入...") driver.execute_script("window.scrollTo(0, 1000);") time.sleep(2) driver.execute_script("window.scrollTo(0, 2000);") time.sleep(2) # 先保存 HTML 以便分析 print("💾 保存頁面 HTML...") debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls') os.makedirs(debug_path, exist_ok=True) with open(os.path.join(debug_path, "test_fetch_debug.html"), "w", encoding="utf-8") as f: f.write(driver.page_source) print(f" 已保存至: {debug_path}/test_fetch_debug.html\n") # 尋找商品容器 print("🔍 正在尋找商品容器...") containers = driver.find_elements(By.CSS_SELECTOR, "li.goods, div.eachGood, li.box1, li.product_item") if not containers: print("⚠️ 使用備案選擇器...") containers = driver.find_elements(By.XPATH, "//li[.//p[contains(@class, 'prdName')] or .//h3[contains(@class, 'prdName')]]") if not containers: print("❌ 找不到商品容器!") print("💡 提示: 請檢查 logs/debug_htmls/test_fetch_debug.html 查看頁面結構") return print(f"✅ 找到 {len(containers)} 個商品容器\n") print("=" * 80) # 測試前 5 個商品 success_count = 0 fail_count = 0 for idx, container in enumerate(containers[:5], 1): print(f"\n📦 測試商品 #{idx}") print("-" * 80) # 測試圖片選擇器 image_url = None img_selectors = [ "div.prd_img img", "div.productImg img", "div.imgArea img", "a.goodsUrl > img", "a[href*='i_code'] img", "img.goodsImg", "img.prdimg", "img.prdImg", ] print("🔍 嘗試圖片選擇器:") for selector_idx, selector in enumerate(img_selectors, 1): imgs = container.find_elements(By.CSS_SELECTOR, selector) if imgs: img_tag = imgs[0] url = img_tag.get_attribute("data-original") or img_tag.get_attribute("src") print(f" [{selector_idx}] {selector}") print(f" 找到圖片: {url[:80] if url else 'None'}...") # 過濾無效的圖片 URL if url and 'loader.gif' not in url and 'data:image' not in url and 'blank.png' not in url: # 確認是商品圖片 if 'goodsimg' in url.lower() or 'goods' in url.lower() or re.search(r'/\d{7,}', url): image_url = url print(f" ✅ 有效的商品圖片!") break else: print(f" ⚠️ 不像商品圖片 (可能是廣告或 icon)") else: print(f" ❌ 無效圖片 (loader/placeholder/blank)") # URL 正規化 if image_url: if image_url.startswith("//"): image_url = "https:" + image_url elif image_url.startswith("/"): image_url = "https://www.momoshop.com.tw" + image_url # 顯示結果 if image_url: print(f"\n✅ 成功抓取圖片:") print(f" {image_url}") success_count += 1 else: print(f"\n❌ 未能抓取圖片") fail_count += 1 # 總結 print("\n" + "=" * 80) print("📊 測試結果") print("=" * 80) print(f"✅ 成功: {success_count}/5") print(f"❌ 失敗: {fail_count}/5") print(f"📈 成功率: {success_count/5*100:.0f}%") if success_count >= 4: print("\n🎉 測試通過!圖片抓取邏輯運作正常。") elif success_count >= 2: print("\n⚠️ 測試部分通過,建議檢查失敗的情況。") else: print("\n❌ 測試失敗,需要進一步調整選擇器。") except Exception as e: print(f"\n❌ 測試過程發生錯誤: {e}") import traceback traceback.print_exc() finally: print("\n⏳ 關閉瀏覽器...") driver.quit() print("✅ 測試完成") if __name__ == "__main__": test_image_fetch()