Files
ewoooc/tests/test_image_fetch.py
OoO e57793829c
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
[V10.360] 關閉預設瀏覽器 smoke 並優化 PChome 熱路徑
2026-05-21 12:07:25 +08:00

177 lines
6.6 KiB
Python

# cSpell:ignore momo goodsimg
"""
測試腳本:驗證新的圖片抓取邏輯
抓取一個分類的商品,並檢查圖片 URL 是否正確
"""
import os
import sys
import re
import pytest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@pytest.mark.skipif(
os.getenv("RUN_MOMO_BROWSER_TESTS") != "1",
reason="Selenium smoke 會開啟外部 MOMO 網站;預設不在一般 pytest 執行。",
)
def test_image_fetch():
"""測試單一分類的圖片抓取"""
print("🧪 開始測試圖片抓取功能...\n")
# 設定 Selenium (啟用圖片載入)
options = Options()
options.page_load_strategy = 'eager'
if os.getenv("MOMO_BROWSER_TEST_VISIBLE") != "1":
options.add_argument('--headless=new')
options.add_argument('--window-size=1920,5000')
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
# 注意:已啟用圖片載入
options.add_argument('--disable-gpu')
options.add_argument('--disable-extensions')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--no-sandbox')
options.add_argument('--disable-features=AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck')
options.add_experimental_option('prefs', {
'credentials_enable_service': False,
'profile.password_manager_enabled': False,
})
driver = webdriver.Chrome(options=options)
driver.set_page_load_timeout(45)
try:
# 測試網址:化妝水分類
test_url = "https://www.momoshop.com.tw/cateGoods.jsp?cateLevel=1&cateCode=1100300017"
print(f"📍 測試網址: {test_url}")
print("⏳ 正在載入頁面...\n")
driver.get(test_url)
print("⏳ 等待頁面完全載入 (10秒)...")
time.sleep(10) # 等待 JavaScript 渲染商品
# 滾動頁面以觸發延遲載入
print("📜 滾動頁面以觸發商品載入...")
driver.execute_script("window.scrollTo(0, 1000);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, 2000);")
time.sleep(2)
# 先保存 HTML 以便分析
print("💾 保存頁面 HTML...")
debug_path = os.path.join(BASE_DIR, 'logs', 'debug_htmls')
os.makedirs(debug_path, exist_ok=True)
with open(os.path.join(debug_path, "test_fetch_debug.html"), "w", encoding="utf-8") as f:
f.write(driver.page_source)
print(f" 已保存至: {debug_path}/test_fetch_debug.html\n")
# 尋找商品容器
print("🔍 正在尋找商品容器...")
containers = driver.find_elements(By.CSS_SELECTOR, "li.goods, div.eachGood, li.box1, li.product_item")
if not containers:
print("⚠️ 使用備案選擇器...")
containers = driver.find_elements(By.XPATH,
"//li[.//p[contains(@class, 'prdName')] or .//h3[contains(@class, 'prdName')]]")
if not containers:
print("❌ 找不到商品容器!")
print("💡 提示: 請檢查 logs/debug_htmls/test_fetch_debug.html 查看頁面結構")
return
print(f"✅ 找到 {len(containers)} 個商品容器\n")
print("=" * 80)
# 測試前 5 個商品
success_count = 0
fail_count = 0
for idx, container in enumerate(containers[:5], 1):
print(f"\n📦 測試商品 #{idx}")
print("-" * 80)
# 測試圖片選擇器
image_url = None
img_selectors = [
"div.prd_img img",
"div.productImg img",
"div.imgArea img",
"a.goodsUrl > img",
"a[href*='i_code'] img",
"img.goodsImg",
"img.prdimg",
"img.prdImg",
]
print("🔍 嘗試圖片選擇器:")
for selector_idx, selector in enumerate(img_selectors, 1):
imgs = container.find_elements(By.CSS_SELECTOR, selector)
if imgs:
img_tag = imgs[0]
url = img_tag.get_attribute("data-original") or img_tag.get_attribute("src")
print(f" [{selector_idx}] {selector}")
print(f" 找到圖片: {url[:80] if url else 'None'}...")
# 過濾無效的圖片 URL
if url and 'loader.gif' not in url and 'data:image' not in url and 'blank.png' not in url:
# 確認是商品圖片
if 'goodsimg' in url.lower() or 'goods' in url.lower() or re.search(r'/\d{7,}', url):
image_url = url
print(f" ✅ 有效的商品圖片!")
break
else:
print(f" ⚠️ 不像商品圖片 (可能是廣告或 icon)")
else:
print(f" ❌ 無效圖片 (loader/placeholder/blank)")
# URL 正規化
if image_url:
if image_url.startswith("//"):
image_url = "https:" + image_url
elif image_url.startswith("/"):
image_url = "https://www.momoshop.com.tw" + image_url
# 顯示結果
if image_url:
print(f"\n✅ 成功抓取圖片:")
print(f" {image_url}")
success_count += 1
else:
print(f"\n❌ 未能抓取圖片")
fail_count += 1
# 總結
print("\n" + "=" * 80)
print("📊 測試結果")
print("=" * 80)
print(f"✅ 成功: {success_count}/5")
print(f"❌ 失敗: {fail_count}/5")
print(f"📈 成功率: {success_count/5*100:.0f}%")
if success_count >= 4:
print("\n🎉 測試通過!圖片抓取邏輯運作正常。")
elif success_count >= 2:
print("\n⚠️ 測試部分通過,建議檢查失敗的情況。")
else:
print("\n❌ 測試失敗,需要進一步調整選擇器。")
except Exception as e:
print(f"\n❌ 測試過程發生錯誤: {e}")
import traceback
traceback.print_exc()
finally:
print("\n⏳ 關閉瀏覽器...")
driver.quit()
print("✅ 測試完成")
if __name__ == "__main__":
test_image_fetch()