177 lines
6.6 KiB
Python
177 lines
6.6 KiB
Python
# cSpell:ignore momo goodsimg
|
|
"""
|
|
測試腳本:驗證新的圖片抓取邏輯
|
|
抓取一個分類的商品,並檢查圖片 URL 是否正確
|
|
"""
|
|
import os
|
|
import sys
|
|
import re
|
|
import pytest
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.common.by import By
|
|
import time
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.getenv("RUN_MOMO_BROWSER_TESTS") != "1",
|
|
reason="Selenium smoke 會開啟外部 MOMO 網站;預設不在一般 pytest 執行。",
|
|
)
|
|
def test_image_fetch():
|
|
"""測試單一分類的圖片抓取"""
|
|
print("🧪 開始測試圖片抓取功能...\n")
|
|
|
|
# 設定 Selenium (啟用圖片載入)
|
|
options = Options()
|
|
options.page_load_strategy = 'eager'
|
|
if os.getenv("MOMO_BROWSER_TEST_VISIBLE") != "1":
|
|
options.add_argument('--headless=new')
|
|
options.add_argument('--window-size=1920,5000')
|
|
options.add_argument("--disable-blink-features=AutomationControlled")
|
|
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
|
|
|
|
# 注意:已啟用圖片載入
|
|
options.add_argument('--disable-gpu')
|
|
options.add_argument('--disable-extensions')
|
|
options.add_argument('--disable-dev-shm-usage')
|
|
options.add_argument('--no-sandbox')
|
|
options.add_argument('--disable-features=AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck')
|
|
options.add_experimental_option('prefs', {
|
|
'credentials_enable_service': False,
|
|
'profile.password_manager_enabled': False,
|
|
})
|
|
|
|
driver = webdriver.Chrome(options=options)
|
|
driver.set_page_load_timeout(45)
|
|
|
|
try:
|
|
# 測試網址:化妝水分類
|
|
test_url = "https://www.momoshop.com.tw/cateGoods.jsp?cateLevel=1&cateCode=1100300017"
|
|
print(f"📍 測試網址: {test_url}")
|
|
print("⏳ 正在載入頁面...\n")
|
|
|
|
driver.get(test_url)
|
|
print("⏳ 等待頁面完全載入 (10秒)...")
|
|
time.sleep(10) # 等待 JavaScript 渲染商品
|
|
|
|
# 滾動頁面以觸發延遲載入
|
|
print("📜 滾動頁面以觸發商品載入...")
|
|
driver.execute_script("window.scrollTo(0, 1000);")
|
|
time.sleep(2)
|
|
driver.execute_script("window.scrollTo(0, 2000);")
|
|
time.sleep(2)
|
|
|
|
# 先保存 HTML 以便分析
|
|
print("💾 保存頁面 HTML...")
|
|
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
|
|
os.makedirs(debug_path, exist_ok=True)
|
|
with open(os.path.join(debug_path, "test_fetch_debug.html"), "w", encoding="utf-8") as f:
|
|
f.write(driver.page_source)
|
|
print(f" 已保存至: {debug_path}/test_fetch_debug.html\n")
|
|
|
|
# 尋找商品容器
|
|
print("🔍 正在尋找商品容器...")
|
|
containers = driver.find_elements(By.CSS_SELECTOR, "li.goods, div.eachGood, li.box1, li.product_item")
|
|
|
|
if not containers:
|
|
print("⚠️ 使用備案選擇器...")
|
|
containers = driver.find_elements(By.XPATH,
|
|
"//li[.//p[contains(@class, 'prdName')] or .//h3[contains(@class, 'prdName')]]")
|
|
|
|
if not containers:
|
|
print("❌ 找不到商品容器!")
|
|
print("💡 提示: 請檢查 logs/debug_htmls/test_fetch_debug.html 查看頁面結構")
|
|
return
|
|
|
|
print(f"✅ 找到 {len(containers)} 個商品容器\n")
|
|
print("=" * 80)
|
|
|
|
# 測試前 5 個商品
|
|
success_count = 0
|
|
fail_count = 0
|
|
|
|
for idx, container in enumerate(containers[:5], 1):
|
|
print(f"\n📦 測試商品 #{idx}")
|
|
print("-" * 80)
|
|
|
|
# 測試圖片選擇器
|
|
image_url = None
|
|
img_selectors = [
|
|
"div.prd_img img",
|
|
"div.productImg img",
|
|
"div.imgArea img",
|
|
"a.goodsUrl > img",
|
|
"a[href*='i_code'] img",
|
|
"img.goodsImg",
|
|
"img.prdimg",
|
|
"img.prdImg",
|
|
]
|
|
|
|
print("🔍 嘗試圖片選擇器:")
|
|
for selector_idx, selector in enumerate(img_selectors, 1):
|
|
imgs = container.find_elements(By.CSS_SELECTOR, selector)
|
|
if imgs:
|
|
img_tag = imgs[0]
|
|
url = img_tag.get_attribute("data-original") or img_tag.get_attribute("src")
|
|
|
|
print(f" [{selector_idx}] {selector}")
|
|
print(f" 找到圖片: {url[:80] if url else 'None'}...")
|
|
|
|
# 過濾無效的圖片 URL
|
|
if url and 'loader.gif' not in url and 'data:image' not in url and 'blank.png' not in url:
|
|
# 確認是商品圖片
|
|
if 'goodsimg' in url.lower() or 'goods' in url.lower() or re.search(r'/\d{7,}', url):
|
|
image_url = url
|
|
print(f" ✅ 有效的商品圖片!")
|
|
break
|
|
else:
|
|
print(f" ⚠️ 不像商品圖片 (可能是廣告或 icon)")
|
|
else:
|
|
print(f" ❌ 無效圖片 (loader/placeholder/blank)")
|
|
|
|
# URL 正規化
|
|
if image_url:
|
|
if image_url.startswith("//"):
|
|
image_url = "https:" + image_url
|
|
elif image_url.startswith("/"):
|
|
image_url = "https://www.momoshop.com.tw" + image_url
|
|
|
|
# 顯示結果
|
|
if image_url:
|
|
print(f"\n✅ 成功抓取圖片:")
|
|
print(f" {image_url}")
|
|
success_count += 1
|
|
else:
|
|
print(f"\n❌ 未能抓取圖片")
|
|
fail_count += 1
|
|
|
|
# 總結
|
|
print("\n" + "=" * 80)
|
|
print("📊 測試結果")
|
|
print("=" * 80)
|
|
print(f"✅ 成功: {success_count}/5")
|
|
print(f"❌ 失敗: {fail_count}/5")
|
|
print(f"📈 成功率: {success_count/5*100:.0f}%")
|
|
|
|
if success_count >= 4:
|
|
print("\n🎉 測試通過!圖片抓取邏輯運作正常。")
|
|
elif success_count >= 2:
|
|
print("\n⚠️ 測試部分通過,建議檢查失敗的情況。")
|
|
else:
|
|
print("\n❌ 測試失敗,需要進一步調整選擇器。")
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ 測試過程發生錯誤: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
finally:
|
|
print("\n⏳ 關閉瀏覽器...")
|
|
driver.quit()
|
|
print("✅ 測試完成")
|
|
|
|
if __name__ == "__main__":
|
|
test_image_fetch()
|