diff --git a/routes/api_routes.py b/routes/api_routes.py
index 86e04fe..cf34bd5 100644
--- a/routes/api_routes.py
+++ b/routes/api_routes.py
@@ -9,8 +9,10 @@ import os
import threading
import importlib
from datetime import datetime, timezone, timedelta
+import re
from flask import Blueprint, request, jsonify
from sqlalchemy import func, desc, text
+from urllib.parse import parse_qs, urlparse
from auth import login_required
from config import BASE_DIR
@@ -18,6 +20,8 @@ from database.manager import DatabaseManager
from database.models import Product, PriceRecord
from database.edm_models import PromoProduct
from services.logger_manager import SystemLogger
+from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url
+from utils.momo_url_utils import is_probable_momo_icode
# 時區設定
TAIPEI_TZ = timezone(timedelta(hours=8))
@@ -420,6 +424,9 @@ def get_price_change_details():
db = DatabaseManager()
session = db.get_session()
try:
+ def _safe_product_url(product):
+ return normalize_momo_product_url(product.url, product.i_code) or build_momo_product_url(product.i_code)
+
# 取得今日起始時間
now_taipei = datetime.now(TAIPEI_TZ)
today_start = now_taipei.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None)
@@ -467,7 +474,7 @@ def get_price_change_details():
'product_id': product.i_code,
'name': product.name,
'category': product.category,
- 'url': product.url,
+ 'url': _safe_product_url(product),
'image_url': product.image_url or '/static/placeholder.png',
'old_price': old_price,
'current_price': record.price,
@@ -484,7 +491,7 @@ def get_price_change_details():
'product_id': product.i_code,
'name': product.name,
'category': product.category,
- 'url': product.url,
+ 'url': _safe_product_url(product),
'image_url': product.image_url or '/static/placeholder.png',
'old_price': old_price,
'current_price': record.price,
@@ -509,7 +516,7 @@ def get_price_change_details():
'product_id': product.i_code,
'name': product.name,
'category': product.category,
- 'url': product.url,
+ 'url': _safe_product_url(product),
'image_url': product.image_url or '/static/placeholder.png',
'last_price': last_record.price,
'update_time': product.updated_at.strftime('%Y-%m-%d %H:%M') if product.updated_at else ''
@@ -522,3 +529,62 @@ def get_price_change_details():
return jsonify({'products': []}), 500
finally:
session.close()
+
+
+@api_bp.route('/api/track_momo_link', methods=['POST'])
+@login_required
+def track_momo_link():
+ """API: 記錄 MOMO 連結點擊與異常開啟事件,用於診斷自動開啟來源。"""
+ def _is_blocked_momo_url(url: str) -> bool:
+ url_l = str(url or '').lower()
+ if 'ec404.html' in url_l or 'ec404' in url_l:
+ return True
+
+ try:
+ parsed = urlparse(str(url or ''))
+ path = (parsed.path or '').lower()
+ if 'goodsdetail' in path:
+ query = parse_qs(parsed.query or '')
+ i_code = (query.get('i_code') or [''])[0]
+ if i_code:
+ return not is_probable_momo_icode(i_code)
+ if not re.search(r'/goodsdetail/[^/]+', path):
+ return True
+ except Exception:
+ pass
+
+ return False
+
+ payload = request.get_json(silent=True) or {}
+ url = str(payload.get('url') or '').strip()
+ effective_url = str(payload.get('effective_url') or '').strip()
+ if not url:
+ return jsonify({'status': 'ignored', 'reason': 'missing_url'}), 400
+
+ is_blocked = _is_blocked_momo_url(url) or _is_blocked_momo_url(effective_url)
+ level = "[Web] [MOMO_LINK_TRACK] "
+ product_id = str(payload.get('product_id', '') or '').strip()
+ i_code = str(payload.get('i_code', '') or '').strip()
+ source = str(payload.get('source', '') or 'unknown').strip()
+ page = str(payload.get('page', '') or '').strip()
+ label = str(payload.get('label', '') or '').strip()
+ platform = str(payload.get('platform', '') or 'momo').strip()
+ product_name = str(payload.get('product_name', '') or '').strip()
+ referer = request.headers.get('Referer', '')
+ user_ip = request.remote_addr
+
+ if not effective_url:
+ effective_url = url
+
+ msg = (
+ f"{level}platform={platform} source={source} page={page} "
+ f"i_code={i_code} product_id={product_id} label={label} "
+ f"name={product_name} url={url} effective_url={effective_url} ip={user_ip} referer={referer}"
+ )
+
+ if is_blocked:
+ sys_log.warning(msg + " | status=blocked_link")
+ else:
+ sys_log.info(msg + " | status=tracked")
+
+ return jsonify({'status': 'ok'})
diff --git a/routes/dashboard_routes.py b/routes/dashboard_routes.py
index 5107f2c..c9b9a9e 100644
--- a/routes/dashboard_routes.py
+++ b/routes/dashboard_routes.py
@@ -27,6 +27,7 @@ from services.cache_manager import (
_DASHBOARD_SHARED_CACHE_FILE,
_DASHBOARD_STALE_CACHE_FILE,
)
+from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url
# 時區設定
TAIPEI_TZ = timezone(timedelta(hours=8))
@@ -47,9 +48,7 @@ def _build_pchome_product_url(product_id):
def _build_momo_product_url(i_code):
- if not i_code:
- return None
- return f"https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={str(i_code).strip()}"
+ return build_momo_product_url(i_code)
def _to_float(value):
@@ -189,6 +188,7 @@ def _ai_pick_evidence_fields(model_footprint):
def _dashboard_decision_row(row, tone):
sku = str(row.get('sku') or '')
pchome_id = row.get('competitor_product_id')
+ momo_url = normalize_momo_product_url(row.get('momo_url'), sku) or _build_momo_product_url(sku)
return {
'sku': sku,
'name': row.get('name') or '',
@@ -200,7 +200,7 @@ def _dashboard_decision_row(row, tone):
'confidence': _to_float(row.get('confidence')),
'reason': row.get('reason') or '',
'tone': tone,
- 'momo_url': row.get('momo_url') or _build_momo_product_url(sku),
+ 'momo_url': momo_url,
'pchome_id': pchome_id,
'pchome_name': row.get('competitor_product_name') or '',
'pchome_url': _build_pchome_product_url(pchome_id),
@@ -246,11 +246,12 @@ def _load_competitor_decision_overview(session, latest_items=None):
sku = str(getattr(product, 'i_code', '') or '')
if not sku:
continue
+ safe_product_url = normalize_momo_product_url(getattr(product, 'url', None), sku)
item_map[sku] = {
'sku': sku,
'name': getattr(product, 'name', '') or '',
'category': getattr(product, 'category', '') or '',
- 'momo_url': getattr(product, 'url', None) or _build_momo_product_url(sku),
+ 'momo_url': safe_product_url or _build_momo_product_url(sku),
'momo_price': _to_float(getattr(record, 'price', None)) or 0,
}
@@ -350,7 +351,7 @@ def _load_competitor_decision_overview(session, latest_items=None):
'name': row['name'],
'category': row['category'],
'momo_price': row['momo_price'],
- 'momo_url': row['momo_url'],
+ 'momo_url': normalize_momo_product_url(row.get('momo_url'), row.get('sku')) or _build_momo_product_url(row.get('sku')),
}
for row in sorted(pending_items, key=lambda row: row['momo_price'], reverse=True)[:3]
]
@@ -541,7 +542,7 @@ def _load_competitor_decision_overview(session, latest_items=None):
'name': row.get('name') or '',
'category': row.get('category') or '',
'momo_price': _to_float(row.get('momo_price')) or 0,
- 'momo_url': row.get('momo_url') or _build_momo_product_url(row.get('sku')),
+ 'momo_url': normalize_momo_product_url(row.get('momo_url'), row.get('sku')) or _build_momo_product_url(row.get('sku')),
}
for row in session.execute(pending_sql).mappings().all()
]
@@ -956,6 +957,8 @@ def get_consolidated_data():
unique_items = []
for r in latest_records:
pid = r.product_id
+ product = r.product
+ safe_product_url = normalize_momo_product_url(getattr(product, 'url', None), getattr(product, 'i_code', ''))
price_7d = prices_7d_ago_map.get(pid)
price_30d = prices_30d_ago_map.get(pid)
@@ -991,6 +994,7 @@ def get_consolidated_data():
unique_items.append({
'record': r,
+ 'safe_product_url': safe_product_url or _build_momo_product_url(getattr(product, 'i_code', '')),
'stats': {'7d_diff': stats_7d_diff, '30d_diff': stats_30d_diff, '1d_diff': today_diff},
'yesterday_diff': yesterday_diff,
'today_changes': today_changes,
@@ -1384,6 +1388,11 @@ def index():
item['safe_created_at'] = getattr(item['record'].product, 'created_at', None)
sku = str(item['record'].product.i_code)
item['ai_pick'] = ai_pick_map.get(sku)
+ item['safe_momo_url'] = (
+ item.get('safe_product_url')
+ or normalize_momo_product_url(item['record'].product.url, sku)
+ or _build_momo_product_url(sku)
+ )
# 為當前頁面項目添加顏色
for item in paged_items:
diff --git a/routes/edm_routes.py b/routes/edm_routes.py
index b6d903e..fb1240c 100644
--- a/routes/edm_routes.py
+++ b/routes/edm_routes.py
@@ -16,6 +16,7 @@ from database.manager import DatabaseManager
from database.models import Product
from database.edm_models import PromoProduct
from services.logger_manager import SystemLogger
+from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url
# 時區設定
TAIPEI_TZ = timezone(timedelta(hours=8))
@@ -217,6 +218,7 @@ def _build_promo_dashboard_data(session, page_type, page_name, sort_by, order):
# 8. 附加分類資訊到每個 item
for item in items_in_batch:
+ item.safe_product_url = normalize_momo_product_url(item.url, item.i_code) or build_momo_product_url(item.i_code)
item.main_category = product_categories.get(item.i_code)
if item.main_category:
item.category_color = get_color_for_string(item.main_category)
diff --git a/routes/export_routes.py b/routes/export_routes.py
index 78d5195..8e70467 100644
--- a/routes/export_routes.py
+++ b/routes/export_routes.py
@@ -20,6 +20,7 @@ from database.manager import DatabaseManager
from database.models import Product, PriceRecord
from services.exporter import Exporter
from services.logger_manager import SystemLogger
+from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url
# 時區設定
TAIPEI_TZ = timezone(timedelta(hours=8))
@@ -178,8 +179,9 @@ def export_excel_ai_picks():
export_rows = []
for row in rows:
sku = str(row.get('sku') or '')
+ normalized_sku = str(sku or '').strip()
pchome_id = row.get('competitor_product_id') or ''
- momo_url = row.get('momo_url') or f"https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={sku}"
+ momo_url = normalize_momo_product_url(row.get('momo_url'), normalized_sku) or build_momo_product_url(normalized_sku)
pchome_url = f"https://24h.pchome.com.tw/prod/{str(pchome_id).strip()}" if pchome_id else ''
footprint = row.get('model_footprint') or {}
if isinstance(footprint, str):
@@ -392,6 +394,7 @@ def export_price_changes():
for product, record, old_price in products:
change = record.price - old_price
change_pct = (change / old_price * 100) if old_price > 0 else 0
+ safe_product_url = normalize_momo_product_url(product.url, product.i_code) or build_momo_product_url(product.i_code)
ws.append([
product.i_code,
product.name,
@@ -401,7 +404,7 @@ def export_price_changes():
change,
f"{change_pct:.2f}%",
record.timestamp.strftime('%Y-%m-%d %H:%M'),
- product.url
+ safe_product_url
])
# 調整欄寬
diff --git a/routes/price_comparison_routes.py b/routes/price_comparison_routes.py
index c6b981f..8a61186 100644
--- a/routes/price_comparison_routes.py
+++ b/routes/price_comparison_routes.py
@@ -6,6 +6,7 @@ from flask import Blueprint, request, jsonify, render_template
import logging
from auth import login_required
+from utils.momo_url_utils import extract_momo_i_code, is_probable_momo_icode
from services.price_comparison import (
compare_brand_prices,
BRAND_ALIASES,
@@ -200,11 +201,26 @@ def parse_momo_excel():
products = []
for idx, row in df.iterrows():
try:
+ raw_id = ''
+ if id_col:
+ raw_id = str(row[id_col]).strip()
+ if raw_id in {'nan', 'None', 'NoneType', 'nan.'}:
+ raw_id = ''
+ raw_url = ''
+ if url_col:
+ raw_url = str(row[url_col]).strip()
+ if raw_url in {'nan', 'None', 'NoneType'}:
+ raw_url = ''
+
+ product_id = raw_id if is_probable_momo_icode(raw_id) else extract_momo_i_code(raw_url) or ''
+ if not is_probable_momo_icode(product_id):
+ product_id = ''
+
product = {
'name': str(row[name_col]),
'price': int(float(row[price_col])),
- 'product_id': str(row[id_col]) if id_col else f'momo_{idx}',
- 'url': str(row[url_col]) if url_col else ''
+ 'product_id': product_id,
+ 'url': raw_url
}
products.append(product)
except (ValueError, TypeError):
@@ -263,17 +279,22 @@ def quick_compare():
}), 400
# 補充缺失欄位
- for i, p in enumerate(pchome_products):
- if 'product_id' not in p:
- p['product_id'] = f'pchome_{i}'
- if 'product_url' not in p:
- p['product_url'] = ''
+ def _normalize_product_id(products_list, source_prefix, id_field, url_field):
+ for i, p in enumerate(products_list):
+ if id_field not in p:
+ p[id_field] = ''
+ product_id = str(p.get(id_field) or '').strip()
+ if not is_probable_momo_icode(product_id):
+ product_id = extract_momo_i_code(p.get(url_field)) or ''
+ if not is_probable_momo_icode(product_id):
+ p[id_field] = f'{source_prefix}_{i}'
+ else:
+ p[id_field] = product_id
+ if url_field not in p or p.get(url_field) is None:
+ p[url_field] = ''
- for i, p in enumerate(momo_products):
- if 'product_id' not in p:
- p['product_id'] = f'momo_{i}'
- if 'url' not in p:
- p['url'] = ''
+ _normalize_product_id(pchome_products, 'pchome', 'product_id', 'product_url')
+ _normalize_product_id(momo_products, 'momo', 'product_id', 'url')
# 執行比價 (不限定品牌)
result = compare_brand_prices('', pchome_products, momo_products)
diff --git a/scheduler.py b/scheduler.py
index 5d610ea..a45179e 100644
--- a/scheduler.py
+++ b/scheduler.py
@@ -20,6 +20,7 @@ from database.models import Product, PriceRecord
from database.edm_models import PromoProduct
from services.notification_manager import NotificationManager
from services.edm_notifier import EdmNotifier # V-New: 導入新的通知模組
+from utils.momo_url_utils import normalize_momo_product_url
# V-Fix: 改為匯入讀取函式,而非靜態變數,以支援動態更新
from config import load_momo_categories
@@ -310,7 +311,7 @@ def run_momo_task():
if not link_els: continue
link_url = link_els[0].get_attribute("href")
- if not link_url or "javascript" in link_url:
+ if not link_url:
continue
# 從 URL 提取 i_code
@@ -331,6 +332,13 @@ def run_momo_task():
except ValueError:
i_code = i_code_raw.upper()
+ product_url = normalize_momo_product_url(link_url, i_code)
+ if not product_url:
+ logging.warning(
+ f"[Crawler] [MOMO] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
+ )
+ continue
+
# 提取名稱
name_els = container.find_elements(By.CSS_SELECTOR, ".prdName, .goodsName, .productName, .title")
if name_els:
@@ -369,7 +377,7 @@ def run_momo_task():
'i_code': str(i_code),
'name': title,
'category': cat_name,
- 'url': link_url,
+ 'url': product_url,
'image_url': image_url,
'price': price_val
})
@@ -396,6 +404,9 @@ def run_momo_task():
else:
if product.category != item['category']:
product.category = item['category']
+ normalized_existing_url = normalize_momo_product_url(item['url'], item['i_code'])
+ if product.url != normalized_existing_url:
+ product.url = normalized_existing_url
if item['image_url']:
product.image_url = item['image_url']
@@ -724,13 +735,20 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"):
previous_price = prev_record.previous_price
is_changed = status_change != "NONE"
+ normalized_link_url = normalize_momo_product_url(link_url, i_code)
+ if not normalized_link_url:
+ logging.warning(
+ f"[Crawler] [EDM] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
+ )
+ continue
+
new_promo = PromoProduct(
batch_id=batch_id,
i_code=i_code,
name=name,
price=price,
discount_text=discount_text,
- url=link_url,
+ url=normalized_link_url,
previous_price=previous_price, # V9.64: 寫入舊價格
time_slot=time_slot,
status_change=status_change if is_changed else "ACTIVE",
@@ -1162,8 +1180,15 @@ def run_festival_task(lpn_code="O7ylWfihYUM"):
logging.info(f"[Crawler] [Festival] -> 狀態: 圖片更新 (UPDATE)")
is_changed = status_change != "NONE"
+ normalized_link_url = normalize_momo_product_url(link_url, i_code)
+ if not normalized_link_url:
+ logging.warning(
+ f"[Crawler] [Festival] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
+ )
+ continue
+
new_promo = PromoProduct(
- batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url,
+ batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url,
image_url=image_url, previous_price=previous_price, time_slot=group_title,
status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name,
session_time_text=group_title, page_type=PAGE_TYPE
@@ -1497,8 +1522,15 @@ def run_promo_event_task(lpn_code, page_type, activity_name):
logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 圖片更新 (UPDATE)")
is_changed = status_change != "NONE"
+ normalized_link_url = normalize_momo_product_url(link_url, i_code)
+ if not normalized_link_url:
+ logging.warning(
+ f"[Crawler] [{page_type.upper()}] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}"
+ )
+ continue
+
new_promo = PromoProduct(
- batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url,
+ batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url,
image_url=image_url, previous_price=previous_price, time_slot=group_title,
status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name,
session_time_text=group_title, page_type=page_type
diff --git a/scripts/tools/sanitize_momo_urls.py b/scripts/tools/sanitize_momo_urls.py
new file mode 100644
index 0000000..5027ac9
--- /dev/null
+++ b/scripts/tools/sanitize_momo_urls.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""
+修正 MOMO 商品與促銷商品網址中的壞連結(如 javascript:void(0)、EC404、非商品頁)
+將可修正者改為:
+1) 以 i_code 組出正確商品網址
+2) 無法修正時清空網址 (避免連到錯頁)
+"""
+import argparse
+import logging
+import os
+import sys
+
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, BASE_DIR)
+
+from database.manager import DatabaseManager
+from database.models import Product
+from database.edm_models import PromoProduct
+from utils.momo_url_utils import normalize_momo_product_url
+
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s [%(levelname)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+def _sanitize_records(records, label, commit=False):
+ updated = 0
+ skipped = 0
+ cleared = 0
+ unchanged = 0
+
+ for record in records:
+ old_url = getattr(record, "url", None)
+ if not old_url:
+ unchanged += 1
+ continue
+
+ normalized = normalize_momo_product_url(old_url, getattr(record, "i_code", None))
+ if normalized == old_url:
+ unchanged += 1
+ continue
+
+ if normalized is None:
+ if commit:
+ record.url = None
+ cleared += 1
+ logger.info(
+ "清空 %s 不可修正 URL | id=%s | i_code=%s | old=%s",
+ label,
+ getattr(record, "id", "n/a"),
+ getattr(record, "i_code", ""),
+ old_url,
+ )
+ else:
+ if commit:
+ record.url = normalized
+ updated += 1
+ logger.info(
+ "修正 %s URL | id=%s | i_code=%s | old=%s | new=%s",
+ label,
+ getattr(record, "id", "n/a"),
+ getattr(record, "i_code", ""),
+ old_url,
+ normalized,
+ )
+
+ logger.info(
+ "%s 結果 | unchanged=%s, updated=%s, cleared=%s",
+ label,
+ unchanged,
+ updated,
+ cleared,
+ )
+ return {"unchanged": unchanged, "updated": updated, "cleared": cleared}
+
+
+def main(commit=False):
+ db = DatabaseManager()
+ session = db.get_session()
+ try:
+ product_rows = session.query(Product).all()
+ promo_rows = session.query(PromoProduct).all()
+
+ product_result = _sanitize_records(product_rows, "products", commit=commit)
+ promo_result = _sanitize_records(promo_rows, "promo_products", commit=commit)
+
+ if commit:
+ session.commit()
+ logger.info("變更已提交")
+ else:
+ session.rollback()
+ logger.info("Dry-run 模式:未提交變更")
+
+ logger.info(
+ "整體結果 | products: %s | promo_products: %s",
+ product_result,
+ promo_result,
+ )
+ except Exception as exc:
+ if commit:
+ session.rollback()
+ logger.exception("清理網址失敗: %s", exc)
+ raise
+ finally:
+ session.close()
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--commit",
+ action="store_true",
+ help="實際寫回資料庫(不加此參數則為 dry-run)",
+ )
+ args = parser.parse_args()
+ main(commit=args.commit)
diff --git a/services/price_comparison.py b/services/price_comparison.py
index 53d3a21..00f1a72 100644
--- a/services/price_comparison.py
+++ b/services/price_comparison.py
@@ -14,6 +14,7 @@ from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass, asdict
from difflib import SequenceMatcher
from datetime import datetime
+from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url
logger = logging.getLogger(__name__)
@@ -182,6 +183,13 @@ class ProductNameParser:
# 提取產品線和關鍵字
product_line, keywords = self._extract_keywords(cleaned_name, brand)
+ safe_product_url = (
+ normalize_momo_product_url(product_url, product_id)
+ if source == 'momo'
+ else product_url
+ )
+ safe_product_url = safe_product_url or build_momo_product_url(product_id)
+
return ParsedProduct(
original_name=name,
brand=brand,
@@ -192,7 +200,7 @@ class ProductNameParser:
source=source,
price=price,
product_id=product_id,
- product_url=product_url
+ product_url=safe_product_url
)
def _clean_name(self, name: str) -> str:
diff --git a/templates/base.html b/templates/base.html
index b909b1f..6ad9a63 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -157,6 +157,226 @@
+
+
+