Files
ewoooc/services/pchome_crawler.py
OoO bb8c29e56d
All checks were successful
CD Pipeline / deploy (push) Successful in 1m7s
V10.590 修正 PChome 副標去重與比價覆核入口
2026-06-04 22:00:32 +08:00

927 lines
31 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
PChome 24h 爬蟲服務
爬取 PChome 24h 商品資料,支援:
- 館別頁面爬取 (如 /region/DDAB)
- 商品詳細資料取得
- 批次查詢商品 API
API 參考:
- 商品 API: https://ecapi-cdn.pchome.com.tw/cdn/ecshop/prodapi/v2/prod?id=ID1,ID2,...
- 圖片 URL: https://img.pchome.com.tw/cs{Pic.B}
"""
import html
import re
import time
import logging
from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass, asdict
from datetime import datetime
import requests
logger = logging.getLogger(__name__)
@dataclass
class PChomeProduct:
"""PChome 商品資料結構"""
product_id: str # 商品 ID (如 DDABSD-1900HIE3P)
name: str # 商品名稱
price: int # 售價
original_price: int # 原價
discount: Optional[int] # 折扣 (%)
image_url: str # 圖片 URL
product_url: str # 商品頁面 URL
stock: int # 庫存數量
store: str # 店家代碼
rating: Optional[float] # 評分
review_count: int # 評論數
is_on_sale: bool # 是否特價中
crawled_at: datetime # 爬取時間
subtitle: str = '' # PChome Nick / 副標,常含容量、入數與濃度
match_name: str = '' # 給 matcher 使用的身份文字UI/DB 顯示仍用 name
def to_dict(self) -> dict:
"""轉換為字典"""
data = asdict(self)
data['crawled_at'] = self.crawled_at.isoformat()
return data
def _clean_subtitle_text(value: str) -> str:
"""Normalize PChome Nick text before using it as identity evidence."""
text = html.unescape(str(value or ""))
text = re.sub(r"<\s*br\s*/?\s*>", " ", text, flags=re.I)
text = re.sub(r"<[^>]+>", " ", text)
text = re.sub(r"[★☆][^★☆]{0,80}[★☆]", " ", text)
return re.sub(r"\s+", " ", text).strip()
def _compact_identity_text(value: str) -> str:
return re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff]+", "", str(value or "").lower())
def _remove_display_name_from_subtitle(display_name: str, subtitle: str) -> str:
"""Remove one repeated display title from Nick while keeping useful promo/spec text."""
cleaned = str(subtitle or "").strip()
title = str(display_name or "").strip()
if not cleaned or not title:
return cleaned
if title in cleaned:
cleaned = cleaned.replace(title, " ", 1)
return re.sub(r"\s+", " ", cleaned).strip()
def _build_match_name(name: str, subtitle: str) -> str:
"""Build an identity-rich title without duplicating the PChome display name."""
display_name = str(name or '').strip()
nick = _clean_subtitle_text(subtitle)
if not nick or nick == display_name:
return display_name
display_compact = _compact_identity_text(display_name)
nick_compact = _compact_identity_text(nick)
if display_name and (
nick.startswith(display_name)
or (display_compact and nick_compact.startswith(display_compact))
):
return nick
if display_compact and display_compact == nick_compact:
return display_name
if display_name and display_compact and display_compact in nick_compact:
reduced_nick = _remove_display_name_from_subtitle(display_name, nick)
if reduced_nick:
return f"{display_name} {reduced_nick}".strip()
return display_name
return f"{display_name} {nick}".strip()
class PChomeCrawler:
"""PChome 24h 爬蟲"""
# 基礎 URL
BASE_URL = 'https://24h.pchome.com.tw'
API_URL = 'https://ecapi-cdn.pchome.com.tw/cdn/ecshop/prodapi/v2/prod'
IMAGE_BASE_URL = 'https://img.pchome.com.tw/cs'
# 預設 Headers
DEFAULT_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/json, text/html,application/xhtml+xml',
'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8',
'Referer': 'https://24h.pchome.com.tw/',
}
# 商品 ID 正則表達式
PRODUCT_ID_PATTERN = re.compile(r'[A-Z]{4}[A-Z0-9]{2}-?[A-Z0-9]{8,10}')
def __init__(
self,
timeout: int = 30,
delay: float = 1.0,
max_retries: int = 2,
retry_backoff: float = 0.8,
):
"""
初始化爬蟲
Args:
timeout: 請求超時時間 (秒)
delay: 請求間隔延遲 (秒),避免過度頻繁請求
max_retries: 暫時性錯誤的重試次數
retry_backoff: 指數退避基礎秒數
"""
self.timeout = timeout
self.delay = delay
self.max_retries = max(0, int(max_retries))
self.retry_backoff = max(0.0, float(retry_backoff))
self.session = requests.Session()
self.session.headers.update(self.DEFAULT_HEADERS)
self._last_request_time = 0
def _rate_limit(self):
"""速率限制"""
elapsed = time.time() - self._last_request_time
if elapsed < self.delay:
time.sleep(self.delay - elapsed)
self._last_request_time = time.time()
def _get_with_retry(self, url: str, **kwargs) -> requests.Response:
"""GET with polite rate limiting and bounded retry for transient failures."""
retryable_statuses = {429, 500, 502, 503, 504}
last_error = None
for attempt in range(self.max_retries + 1):
self._rate_limit()
try:
response = self.session.get(url, **kwargs)
status_code = getattr(response, "status_code", 200)
if (
status_code in retryable_statuses
and attempt < self.max_retries
):
last_error = requests.HTTPError(
f"HTTP {status_code} for {url}",
response=response,
)
else:
response.raise_for_status()
return response
except (requests.Timeout, requests.ConnectionError, requests.HTTPError) as exc:
last_error = exc
response = getattr(exc, "response", None)
status_code = getattr(response, "status_code", None)
if (
attempt >= self.max_retries
or (
isinstance(exc, requests.HTTPError)
and status_code not in retryable_statuses
)
):
raise
sleep_sec = self.retry_backoff * (2 ** attempt)
if sleep_sec > 0:
time.sleep(sleep_sec)
if last_error:
raise last_error
raise requests.RequestException(f"GET failed: {url}")
def _normalize_product_id(self, product_id: str) -> str:
"""
正規化商品 ID 格式
Args:
product_id: 原始商品 ID
Returns:
正規化後的 ID (格式: XXXXXX-XXXXXXXX)
"""
# 移除空白
product_id = product_id.strip()
# 如果已經有 - 就直接返回
if '-' in product_id:
return product_id
# 在第 6 個字元後加入 -
if len(product_id) > 6:
return f"{product_id[:6]}-{product_id[6:]}"
return product_id
def _extract_product_ids_from_html(self, html: str) -> List[str]:
"""
從 HTML 中提取商品 ID
Args:
html: 頁面 HTML 內容
Returns:
商品 ID 列表 (已去重)
"""
raw_ids = self.PRODUCT_ID_PATTERN.findall(html)
# 正規化並去重
normalized_ids = set()
for pid in raw_ids:
normalized = self._normalize_product_id(pid)
normalized_ids.add(normalized)
return list(normalized_ids)
def fetch_region_page(self, region_code: str) -> Tuple[bool, str, List[str]]:
"""
爬取館別頁面,取得商品 ID 列表
Args:
region_code: 館別代碼 (如 DDAB)
Returns:
(成功與否, 訊息, 商品 ID 列表)
"""
url = f"{self.BASE_URL}/region/{region_code}"
try:
response = self._get_with_retry(url, timeout=self.timeout)
product_ids = self._extract_product_ids_from_html(response.text)
logger.info(f"{url} 取得 {len(product_ids)} 個商品 ID")
return True, f"成功取得 {len(product_ids)} 個商品", product_ids
except requests.RequestException as e:
logger.error(f"爬取 {url} 失敗: {e}")
return False, f"請求失敗: {str(e)}", []
def fetch_product_details(self, product_ids: List[str], batch_size: int = 20) -> Tuple[bool, str, List[PChomeProduct]]:
"""
批次取得商品詳細資料
Args:
product_ids: 商品 ID 列表
batch_size: 每批次查詢數量 (API 限制約 20-30 個)
Returns:
(成功與否, 訊息, 商品資料列表)
"""
if not product_ids:
return False, "沒有提供商品 ID", []
all_products = []
failed_count = 0
# 分批處理
for i in range(0, len(product_ids), batch_size):
batch = product_ids[i:i + batch_size]
try:
# 呼叫商品 API
params = {'id': ','.join(batch)}
response = self._get_with_retry(
self.API_URL,
params=params,
timeout=self.timeout
)
data = response.json()
crawled_at = datetime.now()
if isinstance(data, dict):
product_entries = data.items()
product_count = len(data)
elif isinstance(data, list):
product_entries = [
((item or {}).get('Id') or f'index_{idx}', item)
for idx, item in enumerate(data)
if isinstance(item, dict)
]
product_count = len(product_entries)
failed_count += max(0, len(data) - product_count)
else:
logger.warning(
"PChome 商品 API 回傳格式異常 (批次 %s): %s",
i // batch_size + 1,
type(data).__name__,
)
failed_count += len(batch)
continue
# 解析商品資料
for prod_key, prod_data in product_entries:
try:
product = self._parse_product_data(prod_data, crawled_at)
if product:
all_products.append(product)
except Exception as e:
logger.warning(f"解析商品 {prod_key} 失敗: {e}")
failed_count += 1
logger.info(f"批次 {i // batch_size + 1}: 取得 {product_count} 個商品資料")
except requests.RequestException as e:
logger.error(f"API 請求失敗 (批次 {i // batch_size + 1}): {e}")
failed_count += len(batch)
message = f"成功取得 {len(all_products)} 個商品資料"
if failed_count > 0:
message += f"{failed_count} 個失敗"
return len(all_products) > 0, message, all_products
def _parse_product_data(self, data: dict, crawled_at: datetime) -> Optional[PChomeProduct]:
"""
解析 API 回傳的商品資料
Args:
data: API 回傳的商品資料
crawled_at: 爬取時間
Returns:
PChomeProduct 物件
"""
try:
product_id = data.get('Id', '')
# 移除尾部的 -000
if product_id.endswith('-000'):
product_id = product_id[:-4]
# 取得價格資訊
# [2026-04-18 台北] Bug-1 防禦 Layer CPChome API 若改版導致 'P' 欄位消失,
# 舊版靜默歸零 → DB 寫入 $0 → 全鏈路幻覺。改為偵測到缺值時 return None
# 由上層決定跳過 — Claude Opus 4.7
price_info = data.get('Price', {})
if isinstance(price_info, dict):
price = price_info.get('P')
if price is None or price == 0:
logger.warning(
f"[PChome] Id={data.get('Id', '?')} 價格欄位 Price.P 缺失或為 0"
f"疑似 API 格式變更或商品下架,跳過此筆"
)
return None
original_price = price_info.get('M', price)
else:
price = price_info
if not price:
logger.warning(
f"[PChome] Id={data.get('Id', '?')} 價格欄位為非 dict 且為空,跳過"
)
return None
original_price = price
# 計算折扣
discount = None
if original_price and original_price > price:
discount = round((1 - price / original_price) * 100)
# 取得圖片 URL
pic_info = data.get('Pic', {})
if isinstance(pic_info, dict):
pic_path = pic_info.get('B', '')
else:
pic_path = pic_info or ''
image_url = f"{self.IMAGE_BASE_URL}{pic_path}" if pic_path else ''
name = data.get('Name', '') or ''
subtitle = _clean_subtitle_text(data.get('Nick', '') or '')
return PChomeProduct(
product_id=product_id,
name=name,
price=price,
original_price=original_price,
discount=discount,
image_url=image_url,
product_url=f"{self.BASE_URL}/prod/{product_id}",
stock=data.get('Qty', 0),
store=data.get('Store', ''),
rating=data.get('RatingValue'),
review_count=data.get('ReviewCount', 0),
is_on_sale=data.get('isOnSale', False),
crawled_at=crawled_at,
subtitle=subtitle,
match_name=_build_match_name(name, subtitle),
)
except Exception as e:
logger.error(f"解析商品資料失敗: {e}")
return None
def crawl_region(self, region_code: str) -> Tuple[bool, str, List[PChomeProduct]]:
"""
完整爬取館別頁面 (取得 ID + 詳細資料)
Args:
region_code: 館別代碼
Returns:
(成功與否, 訊息, 商品資料列表)
"""
# Step 1: 取得商品 ID
success, message, product_ids = self.fetch_region_page(region_code)
if not success:
return False, message, []
if not product_ids:
return False, "頁面中沒有找到商品", []
# Step 2: 取得詳細資料
success, message, products = self.fetch_product_details(product_ids)
return success, message, products
def search_products(
self,
keyword: str,
limit: int = 50,
max_pages: Optional[int] = None,
sort: str = "rnk/dc",
) -> Tuple[bool, str, List[PChomeProduct]]:
"""
搜尋商品 (使用搜尋 API)
Args:
keyword: 搜尋關鍵字
limit: 最多回傳數量
max_pages: 搜尋結果最多掃描頁數;預設依 limit 最多掃到 3 頁
sort: 搜尋排序;預設 relevance ranking (`rnk/dc`)
Returns:
(成功與否, 訊息, 商品資料列表)
"""
search_url = f"https://ecshweb.pchome.com.tw/search/v4.3/all/results"
limit = max(1, int(limit or 1))
page_cap = max_pages if max_pages is not None else min(3, max(1, (limit // 20) + 1))
page_cap = max(1, int(page_cap or 1))
try:
product_ids = []
seen_ids = set()
pages_scanned = 0
for page in range(1, page_cap + 1):
params = {
'q': keyword,
'page': page,
'sort': sort,
'cateid': '24h',
}
response = self._get_with_retry(search_url, params=params, timeout=self.timeout)
pages_scanned += 1
data = response.json()
prods = data.get('Prods', [])
if not prods:
break
for item in prods:
product_id = item.get('Id', '')
if not product_id or product_id in seen_ids:
continue
seen_ids.add(product_id)
product_ids.append(product_id)
if len(product_ids) >= limit:
break
if len(product_ids) >= limit:
break
if not product_ids:
return False, "沒有找到符合的商品", []
# 取得詳細資料
success, message, products = self.fetch_product_details(product_ids[:limit])
if success:
message = f"{message};搜尋頁數 {pages_scanned};排序 {sort}"
return success, message, products
except requests.RequestException as e:
logger.error(f"搜尋失敗: {e}")
return False, f"搜尋失敗: {str(e)}", []
# 預設爬蟲實例
_crawler_instance = None
def get_crawler() -> PChomeCrawler:
"""取得爬蟲實例 (單例模式)"""
global _crawler_instance
if _crawler_instance is None:
_crawler_instance = PChomeCrawler()
return _crawler_instance
# 快捷函數
def crawl_pchome_region(region_code: str) -> Tuple[bool, str, List[dict]]:
"""
爬取 PChome 館別頁面
Args:
region_code: 館別代碼 (如 DDAB)
Returns:
(成功與否, 訊息, 商品資料列表)
"""
crawler = get_crawler()
success, message, products = crawler.crawl_region(region_code)
return success, message, [p.to_dict() for p in products]
def search_pchome_products(keyword: str, limit: int = 50) -> Tuple[bool, str, List[dict]]:
"""
搜尋 PChome 商品
Args:
keyword: 搜尋關鍵字
limit: 最多回傳數量
Returns:
(成功與否, 訊息, 商品資料列表)
"""
crawler = get_crawler()
success, message, products = crawler.search_products(keyword, limit)
return success, message, [p.to_dict() for p in products]
def get_pchome_bestsellers(category: str, limit: int = 5) -> Tuple[bool, str, List[dict]]:
"""
取得 PChome 分類熱銷商品
Args:
category: 分類關鍵字 (如 '面膜', '乳液', '精華液')
limit: 最多回傳數量
Returns:
(成功與否, 訊息, 商品資料列表)
"""
crawler = get_crawler()
# 使用搜尋 API按銷量排序
success, message, products = crawler.search_products(category, limit=limit)
if success and products:
# 轉換為精簡格式
result = []
for p in products[:limit]:
result.append({
'name': p.name,
'price': p.price,
'original_price': p.original_price,
'discount': p.discount,
'url': p.product_url,
'image': p.image_url
})
return True, f"成功取得 {len(result)} 個熱銷商品", result
return success, message, []
if __name__ == '__main__':
# 測試
logging.basicConfig(level=logging.INFO)
print("=== PChome 爬蟲測試 ===\n")
# 測試館別爬取
print("[1] 測試館別爬取 (DDAB - 美妝保養)")
success, msg, products = crawl_pchome_region('DDAB')
print(f"結果: {msg}")
if products:
print(f"範例商品:")
for p in products[:3]:
print(f" - {p['name'][:30]}... ${p['price']} (原價 ${p['original_price']})")
print("\n" + "=" * 50 + "\n")
# 測試搜尋
print("[2] 測試搜尋 (關鍵字: iPhone)")
success, msg, products = search_pchome_products('iPhone', limit=5)
print(f"結果: {msg}")
if products:
print(f"搜尋結果:")
for p in products[:3]:
print(f" - {p['name'][:30]}... ${p['price']}")
# =============================================================================
# 高階競品比較函數(供 openclaw_bot_routes 使用)
# =============================================================================
def search_pchome(keyword: str, limit: int = 10) -> List[dict]:
"""
搜尋 PChome 商品(簡化版,直接回傳 list
Returns:
[{'name', 'price', 'url', 'in_stock'}, ...]
"""
ok, _, products = search_pchome_products(keyword, limit=limit)
if not ok:
return []
result = []
for p in products:
result.append({
'name': p.get('name', ''),
'price': p.get('price', 0),
'url': p.get('product_url', ''),
'in_stock': p.get('stock', 0) > 0,
})
return result
def find_best_match(keyword: str, momo_price: float) -> Optional[dict]:
"""
在 PChome 搜尋最接近 keyword 的商品並回傳最佳匹配。
Returns:
{'name', 'price', 'url', 'price_diff', 'match_score'} or None
"""
results = search_pchome(keyword, limit=5)
if not results:
return None
try:
from services.marketplace_product_matcher import score_marketplace_match
best = None
best_score = 0.0
best_diagnostics = None
for result in results:
diagnostics = score_marketplace_match(
keyword,
result.get('name', ''),
momo_price=momo_price,
competitor_price=result.get('price'),
)
if diagnostics.score > best_score:
best = result
best_score = diagnostics.score
best_diagnostics = diagnostics
if not best or best_score < 0.76:
return None
best['match_score'] = best_score
best['match_reasons'] = list(getattr(best_diagnostics, 'reasons', ()) or ())
except Exception:
logger.warning("[PChome] identity matcher unavailable, fallback to price distance", exc_info=True)
best = min(results, key=lambda r: abs(r['price'] - momo_price))
best['price_diff'] = best['price'] - momo_price
return best
def compare_product(
momo_name: str,
momo_price: float,
momo_icode: str = '',
) -> dict:
"""
單一商品 momo vs PChome 比價。
Returns:
{
'momo_name', 'momo_price', 'momo_icode',
'found': bool,
'pc_name', 'pc_price', 'pc_url',
'price_diff': pc_price - momo_price (正值=PChome貴=momo有優勢),
'price_diff_pct': %
}
"""
base: dict = {
'momo_name': momo_name,
'momo_price': momo_price,
'momo_icode': momo_icode,
'found': False,
'pc_name': '',
'pc_price': 0,
'pc_url': '',
'price_diff': 0,
'price_diff_pct': 0.0,
}
try:
match = find_best_match(momo_name, momo_price)
if not match:
return base
pc_price = float(match.get('price', 0))
diff = pc_price - momo_price
pct = (diff / momo_price * 100) if momo_price else 0
base.update({
'found': True,
'pc_name': match.get('name', ''),
'pc_price': pc_price,
'pc_url': match.get('url', ''),
'price_diff': diff,
'price_diff_pct': pct,
'match_score': match.get('match_score', 0),
})
except Exception as e:
logger.warning("[PChome] compare_product error: %s", e)
return base
def batch_compare_top(
db,
top_n: int = 30,
date_str: str = '',
) -> List[dict]:
"""
批量比較 momo TOP-N 熱銷商品 vs PChome。
Args:
db: SQLAlchemy engine由 _db() 回傳)
top_n: 取 momo 前 N 名熱銷商品
date_str: 日期字串,格式 'YYYY/MM/DD';空則取最新日期
Returns:
[compare_product() 結果, ...]
"""
results: List[dict] = []
try:
from sqlalchemy import text as _text
date_filter = ''
params: dict = {'limit': top_n}
if date_str:
date_filter = "WHERE DATE(s.date) = DATE(:date_str)"
params['date_str'] = date_str.replace('/', '-')
sql = f"""
SELECT p.name, p.i_code,
COALESCE(SUM(s.revenue), 0) AS total_rev,
(
SELECT pr.price
FROM price_records pr
WHERE pr.product_id = p.id
ORDER BY pr.timestamp DESC, pr.id DESC
LIMIT 1
) AS momo_price
FROM products p
JOIN daily_sales s ON p.id = s.product_id
{date_filter}
GROUP BY p.id, p.name, p.i_code
ORDER BY total_rev DESC
LIMIT :limit
"""
with db.connect() as conn:
rows = conn.execute(_text(sql), params).fetchall()
for row in rows:
name, icode, rev, momo_price = row[0], row[1], float(row[2] or 0), float(row[3] or 0)
if momo_price <= 0:
logger.warning("[PChome] skip %s because latest momo price is missing; total_rev=%s", icode, rev)
continue
try:
cmp = compare_product(name, momo_price, icode)
results.append(cmp)
time.sleep(0.4) # 限速
except Exception as e:
logger.warning("[PChome] batch item error: %s", e)
except Exception as e:
logger.error("[PChome] batch_compare_top error: %s", e)
return results
def save_matches(db, results: List[dict]) -> None:
"""
將比價結果寫入 pchome_matches 表(若不存在則建立)。
"""
if not results:
return
try:
from sqlalchemy import text as _text
ensure_tables(db)
with db.begin() as conn:
for r in results:
if not r.get('found'):
continue
conn.execute(_text("""
INSERT INTO pchome_matches
(momo_icode, momo_name, momo_price,
pc_name, pc_price, pc_url,
price_diff, price_diff_pct, matched_at)
VALUES
(:icode, :mname, :mprice,
:pcname, :pcprice, :pcurl,
:diff, :pct, NOW())
ON CONFLICT (momo_icode) DO UPDATE SET
pc_name = EXCLUDED.pc_name,
pc_price = EXCLUDED.pc_price,
pc_url = EXCLUDED.pc_url,
price_diff = EXCLUDED.price_diff,
price_diff_pct = EXCLUDED.price_diff_pct,
matched_at = NOW()
"""), {
'icode': r.get('momo_icode', ''),
'mname': r.get('momo_name', ''),
'mprice': r.get('momo_price', 0),
'pcname': r.get('pc_name', ''),
'pcprice': r.get('pc_price', 0),
'pcurl': r.get('pc_url', ''),
'diff': r.get('price_diff', 0),
'pct': r.get('price_diff_pct', 0),
})
except Exception as e:
logger.warning("[PChome] save_matches error: %s", e)
def ensure_tables(db) -> None:
"""建立 pchome_matches 表(冪等)"""
try:
from sqlalchemy import text as _text
with db.begin() as conn:
conn.execute(_text("""
CREATE TABLE IF NOT EXISTS pchome_matches (
id SERIAL PRIMARY KEY,
momo_icode VARCHAR(64) UNIQUE,
momo_name TEXT,
momo_price NUMERIC(12,2),
pc_name TEXT,
pc_price NUMERIC(12,2),
pc_url TEXT,
price_diff NUMERIC(12,2),
price_diff_pct NUMERIC(8,2),
matched_at TIMESTAMP DEFAULT NOW()
)
"""))
except Exception as e:
logger.warning("[PChome] ensure_tables error: %s", e)
def fmt_compare_msg(results: List[dict], keyword: str = '') -> str:
"""
格式化單品比價訊息Telegram Markdown
"""
if not results:
return f"⚠️ 找不到「{keyword}」的 PChome 比價資料"
lines = [f"🔍 *momo vs PChome 比價|{keyword}*\n"]
for r in results[:5]:
found = r.get('found')
mname = r.get('momo_name', '')[:28]
mprice = r.get('momo_price', 0)
if not found:
lines.append(f"{mname}\n momo `NT${mprice:,.0f}` PChome _未找到_\n")
continue
pcprice = r.get('pc_price', 0)
diff = r.get('price_diff', 0)
pct = r.get('price_diff_pct', 0)
pcurl = r.get('pc_url', '')
if diff > 10:
icon = "" # PChome 貴 → momo 有優勢
note = f"momo 便宜 NT${abs(diff):,.0f}{abs(pct):.1f}%"
elif diff < -10:
icon = "⚠️" # momo 貴
note = f"PChome 便宜 NT${abs(diff):,.0f}{abs(pct):.1f}%"
else:
icon = ""
note = "價差 <NT$10持平"
lines.append(
f"{icon} *{mname}*\n"
f" momo `NT${mprice:,.0f}` "
f"[PChome `NT${pcprice:,.0f}`]({pcurl})\n"
f" {note}\n"
)
return "\n".join(lines)
def fmt_daily_report(results: List[dict], date_str: str = '') -> str:
"""
格式化競品日報訊息Telegram Markdown
"""
found = [r for r in results if r.get('found')]
pc_wins = [r for r in found if r.get('price_diff', 0) > 10] # PChome 貴 → momo優
mo_wins = [r for r in found if r.get('price_diff', 0) < -10] # momo 貴 → PChome優
avg_pct = (sum(r.get('price_diff_pct', 0) for r in found) / len(found)
if found else 0)
label = date_str or datetime.now().strftime('%Y/%m/%d')
lines = [
f"📊 *競品比價日報|{label}*\n",
f"🔢 掃描 `{len(results)}` 件 | 比對成功 `{len(found)}` 件",
f"✅ momo 具優勢 `{len(pc_wins)}` 件 | ⚠️ 需注意 `{len(mo_wins)}` 件",
f"📈 平均價差 `{avg_pct:+.1f}%`(正=PChome貴=momo有優勢\n",
]
if pc_wins:
lines.append("🏆 *momo 優勢商品TOP5*")
for r in pc_wins[:5]:
lines.append(
f"{r['momo_name'][:20]} "
f"momo `NT${r['momo_price']:,.0f}` vs PC `NT${r['pc_price']:,.0f}`"
f" 省 NT${abs(r['price_diff']):,.0f}"
)
lines.append("")
if mo_wins:
lines.append("⚠️ *需注意商品PChome 更便宜 TOP5*")
for r in mo_wins[:5]:
lines.append(
f" ⚠️ {r['momo_name'][:20]} "
f"momo `NT${r['momo_price']:,.0f}` vs PC `NT${r['pc_price']:,.0f}`"
f" 差 NT${abs(r['price_diff']):,.0f}"
)
lines.append("")
lines.append("_資料來源PChome 24h 即時爬取_")
return "\n".join(lines)