Some checks failed
CD Pipeline / deploy (push) Failing after 59s
- 建立 Gitea Actions CD pipeline (.gitea/workflows/cd.yaml) - 部署模式: rsync Python 檔案至 188 → docker restart (volume mount) - Dockerfile/requirements 變動時自動重建 Docker image - 部署通知: Telegram (開始/成功/失敗) - 健康檢查: https://mo.wooo.work/health (最多 5 次重試) - 同步最新 CLAUDE.md / ADR-008 / memory (2026-04-19) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
195 lines
6.1 KiB
Python
195 lines
6.1 KiB
Python
# cSpell:ignore momo goodsimg webp
|
||
"""
|
||
MOMO 商品圖片 URL 構建器
|
||
根據 i_code 直接構建商品圖片 URL,不需要從頁面抓取
|
||
"""
|
||
import re
|
||
import random
|
||
import requests
|
||
from typing import Optional
|
||
|
||
def build_image_url(i_code: str) -> Optional[str]:
|
||
"""
|
||
根據 i_code 構建 MOMO 商品圖片 URL
|
||
|
||
MOMO 圖片 URL 格式:
|
||
1. TP 開頭: https://i{1-8}.momoshop.com.tw/{timestamp}/goodsimg/TP000/{XXXX}/{YYYY}/{ZZZ}/{i_code}_O.webp
|
||
2. 純數字: https://i{1-8}.momoshop.com.tw/{timestamp}/goodsimg/{XXXX}/{YYY}/{ZZZ}/{i_code}_OL_m.webp
|
||
|
||
Args:
|
||
i_code: 商品編號
|
||
|
||
Returns:
|
||
圖片 URL,如果無法構建則返回 None
|
||
"""
|
||
if not i_code:
|
||
return None
|
||
|
||
# 隨機選擇一個 CDN 服務器 (i1-i8)
|
||
cdn_num = random.randint(1, 8)
|
||
|
||
if i_code.startswith('TP'):
|
||
# TP 開頭的商品
|
||
# 格式: TP00008290000375 -> TP000/0829/0000/375/TP00008290000375_O.webp
|
||
# 去掉 TP 前綴,然後按照 4-4-4-3 的格式分割
|
||
code_without_tp = i_code[2:] # 去掉 'TP'
|
||
|
||
if len(code_without_tp) >= 15:
|
||
part1 = code_without_tp[0:4] # 0000
|
||
part2 = code_without_tp[4:8] # 8290
|
||
part3 = code_without_tp[8:12] # 0000
|
||
part4 = code_without_tp[12:] # 375
|
||
|
||
# 構建路徑
|
||
path = f"TP000/{part2}/{part3}/{part4}"
|
||
|
||
# 嘗試多種可能的格式
|
||
possible_formats = [
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_O.webp",
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_O.jpg",
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_O_m.jpg",
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_OL_m.webp",
|
||
]
|
||
|
||
# 嘗試每個格式,返回第一個有效的
|
||
for url in possible_formats:
|
||
if verify_image_url(url):
|
||
return url
|
||
|
||
else:
|
||
# 純數字商品
|
||
# 格式: 14548538 -> 0014/548/538/14548538_OL_m.webp
|
||
try:
|
||
# 確保是純數字
|
||
code_num = int(i_code)
|
||
code_str = str(code_num).zfill(8) # 補齊到 8 位
|
||
|
||
# 按照 4-3-3 的格式分割(從右邊開始)
|
||
part3 = code_str[-3:] # 538
|
||
part2 = code_str[-6:-3] # 548
|
||
part1 = code_str[:-6].zfill(4) # 0014
|
||
|
||
# 構建路徑
|
||
path = f"{part1}/{part2}/{part3}"
|
||
|
||
# 嘗試多種可能的格式
|
||
possible_formats = [
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_OL_m.webp",
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_O_m.webp",
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_L.webp",
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_OL_m.jpg",
|
||
f"https://i{cdn_num}.momoshop.com.tw/goodsimg/{path}/{i_code}_O_m.jpg",
|
||
]
|
||
|
||
# 嘗試每個格式
|
||
for url in possible_formats:
|
||
if verify_image_url(url):
|
||
return url
|
||
|
||
except ValueError:
|
||
# 不是純數字
|
||
pass
|
||
|
||
return None
|
||
|
||
|
||
def verify_image_url(url: str, timeout: int = 3) -> bool:
|
||
"""
|
||
驗證圖片 URL 是否有效
|
||
|
||
Args:
|
||
url: 圖片 URL
|
||
timeout: 超時時間(秒)
|
||
|
||
Returns:
|
||
True 如果圖片可訪問,False 否則
|
||
"""
|
||
try:
|
||
response = requests.head(url, timeout=timeout, allow_redirects=True)
|
||
return response.status_code == 200
|
||
except:
|
||
return False
|
||
|
||
|
||
def get_product_image_url(i_code: str) -> Optional[str]:
|
||
"""
|
||
獲取商品圖片 URL(直接從商品詳情頁 meta 標籤獲取)
|
||
|
||
這是最可靠的方法,因為 MOMO 的圖片 URL 格式包含動態 timestamp
|
||
|
||
Args:
|
||
i_code: 商品編號
|
||
|
||
Returns:
|
||
圖片 URL,如果無法獲取則返回 None
|
||
"""
|
||
return fetch_image_from_detail_page(i_code)
|
||
|
||
|
||
def fetch_image_from_detail_page(i_code: str) -> Optional[str]:
|
||
"""
|
||
從商品詳情頁面抓取圖片 URL
|
||
|
||
Args:
|
||
i_code: 商品編號
|
||
|
||
Returns:
|
||
圖片 URL,如果無法抓取則返回 None
|
||
"""
|
||
try:
|
||
# MOMO 商品詳情頁 URL
|
||
detail_url = f"https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={i_code}"
|
||
|
||
# 添加必要的 headers 避免被封鎖
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||
'Accept-Encoding': 'gzip, deflate, br',
|
||
'Connection': 'keep-alive',
|
||
}
|
||
|
||
response = requests.get(detail_url, headers=headers, timeout=10)
|
||
if response.status_code != 200:
|
||
return None
|
||
|
||
html = response.text
|
||
|
||
# 使用正則表達式查找圖片 URL
|
||
# MOMO 的主圖在 og:image meta 標籤中
|
||
pattern = r'<meta property="og:image" content="([^"]+)"'
|
||
match = re.search(pattern, html, re.IGNORECASE)
|
||
|
||
if match:
|
||
img_url = match.group(1)
|
||
# 正規化 URL
|
||
if img_url.startswith('//'):
|
||
img_url = 'https:' + img_url
|
||
elif img_url.startswith('/'):
|
||
img_url = 'https://www.momoshop.com.tw' + img_url
|
||
return img_url
|
||
|
||
except Exception as e:
|
||
print(f"從詳情頁抓取圖片失敗: {e}")
|
||
|
||
return None
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 測試
|
||
test_codes = [
|
||
"14548538", # 純數字
|
||
"TP00008290000375", # TP 開頭
|
||
"12092813", # 另一個純數字
|
||
]
|
||
|
||
print("🧪 測試圖片 URL 構建器\n")
|
||
for i_code in test_codes:
|
||
print(f"商品編號: {i_code}")
|
||
url = build_image_url(i_code)
|
||
if url:
|
||
print(f"✅ 圖片 URL: {url}")
|
||
else:
|
||
print(f"❌ 無法構建圖片 URL")
|
||
print()
|