diff --git a/routes/api_routes.py b/routes/api_routes.py index 86e04fe..cf34bd5 100644 --- a/routes/api_routes.py +++ b/routes/api_routes.py @@ -9,8 +9,10 @@ import os import threading import importlib from datetime import datetime, timezone, timedelta +import re from flask import Blueprint, request, jsonify from sqlalchemy import func, desc, text +from urllib.parse import parse_qs, urlparse from auth import login_required from config import BASE_DIR @@ -18,6 +20,8 @@ from database.manager import DatabaseManager from database.models import Product, PriceRecord from database.edm_models import PromoProduct from services.logger_manager import SystemLogger +from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url +from utils.momo_url_utils import is_probable_momo_icode # 時區設定 TAIPEI_TZ = timezone(timedelta(hours=8)) @@ -420,6 +424,9 @@ def get_price_change_details(): db = DatabaseManager() session = db.get_session() try: + def _safe_product_url(product): + return normalize_momo_product_url(product.url, product.i_code) or build_momo_product_url(product.i_code) + # 取得今日起始時間 now_taipei = datetime.now(TAIPEI_TZ) today_start = now_taipei.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) @@ -467,7 +474,7 @@ def get_price_change_details(): 'product_id': product.i_code, 'name': product.name, 'category': product.category, - 'url': product.url, + 'url': _safe_product_url(product), 'image_url': product.image_url or '/static/placeholder.png', 'old_price': old_price, 'current_price': record.price, @@ -484,7 +491,7 @@ def get_price_change_details(): 'product_id': product.i_code, 'name': product.name, 'category': product.category, - 'url': product.url, + 'url': _safe_product_url(product), 'image_url': product.image_url or '/static/placeholder.png', 'old_price': old_price, 'current_price': record.price, @@ -509,7 +516,7 @@ def get_price_change_details(): 'product_id': product.i_code, 'name': product.name, 'category': product.category, - 'url': product.url, + 'url': _safe_product_url(product), 'image_url': product.image_url or '/static/placeholder.png', 'last_price': last_record.price, 'update_time': product.updated_at.strftime('%Y-%m-%d %H:%M') if product.updated_at else '' @@ -522,3 +529,62 @@ def get_price_change_details(): return jsonify({'products': []}), 500 finally: session.close() + + +@api_bp.route('/api/track_momo_link', methods=['POST']) +@login_required +def track_momo_link(): + """API: 記錄 MOMO 連結點擊與異常開啟事件,用於診斷自動開啟來源。""" + def _is_blocked_momo_url(url: str) -> bool: + url_l = str(url or '').lower() + if 'ec404.html' in url_l or 'ec404' in url_l: + return True + + try: + parsed = urlparse(str(url or '')) + path = (parsed.path or '').lower() + if 'goodsdetail' in path: + query = parse_qs(parsed.query or '') + i_code = (query.get('i_code') or [''])[0] + if i_code: + return not is_probable_momo_icode(i_code) + if not re.search(r'/goodsdetail/[^/]+', path): + return True + except Exception: + pass + + return False + + payload = request.get_json(silent=True) or {} + url = str(payload.get('url') or '').strip() + effective_url = str(payload.get('effective_url') or '').strip() + if not url: + return jsonify({'status': 'ignored', 'reason': 'missing_url'}), 400 + + is_blocked = _is_blocked_momo_url(url) or _is_blocked_momo_url(effective_url) + level = "[Web] [MOMO_LINK_TRACK] " + product_id = str(payload.get('product_id', '') or '').strip() + i_code = str(payload.get('i_code', '') or '').strip() + source = str(payload.get('source', '') or 'unknown').strip() + page = str(payload.get('page', '') or '').strip() + label = str(payload.get('label', '') or '').strip() + platform = str(payload.get('platform', '') or 'momo').strip() + product_name = str(payload.get('product_name', '') or '').strip() + referer = request.headers.get('Referer', '') + user_ip = request.remote_addr + + if not effective_url: + effective_url = url + + msg = ( + f"{level}platform={platform} source={source} page={page} " + f"i_code={i_code} product_id={product_id} label={label} " + f"name={product_name} url={url} effective_url={effective_url} ip={user_ip} referer={referer}" + ) + + if is_blocked: + sys_log.warning(msg + " | status=blocked_link") + else: + sys_log.info(msg + " | status=tracked") + + return jsonify({'status': 'ok'}) diff --git a/routes/dashboard_routes.py b/routes/dashboard_routes.py index 5107f2c..c9b9a9e 100644 --- a/routes/dashboard_routes.py +++ b/routes/dashboard_routes.py @@ -27,6 +27,7 @@ from services.cache_manager import ( _DASHBOARD_SHARED_CACHE_FILE, _DASHBOARD_STALE_CACHE_FILE, ) +from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url # 時區設定 TAIPEI_TZ = timezone(timedelta(hours=8)) @@ -47,9 +48,7 @@ def _build_pchome_product_url(product_id): def _build_momo_product_url(i_code): - if not i_code: - return None - return f"https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={str(i_code).strip()}" + return build_momo_product_url(i_code) def _to_float(value): @@ -189,6 +188,7 @@ def _ai_pick_evidence_fields(model_footprint): def _dashboard_decision_row(row, tone): sku = str(row.get('sku') or '') pchome_id = row.get('competitor_product_id') + momo_url = normalize_momo_product_url(row.get('momo_url'), sku) or _build_momo_product_url(sku) return { 'sku': sku, 'name': row.get('name') or '', @@ -200,7 +200,7 @@ def _dashboard_decision_row(row, tone): 'confidence': _to_float(row.get('confidence')), 'reason': row.get('reason') or '', 'tone': tone, - 'momo_url': row.get('momo_url') or _build_momo_product_url(sku), + 'momo_url': momo_url, 'pchome_id': pchome_id, 'pchome_name': row.get('competitor_product_name') or '', 'pchome_url': _build_pchome_product_url(pchome_id), @@ -246,11 +246,12 @@ def _load_competitor_decision_overview(session, latest_items=None): sku = str(getattr(product, 'i_code', '') or '') if not sku: continue + safe_product_url = normalize_momo_product_url(getattr(product, 'url', None), sku) item_map[sku] = { 'sku': sku, 'name': getattr(product, 'name', '') or '', 'category': getattr(product, 'category', '') or '', - 'momo_url': getattr(product, 'url', None) or _build_momo_product_url(sku), + 'momo_url': safe_product_url or _build_momo_product_url(sku), 'momo_price': _to_float(getattr(record, 'price', None)) or 0, } @@ -350,7 +351,7 @@ def _load_competitor_decision_overview(session, latest_items=None): 'name': row['name'], 'category': row['category'], 'momo_price': row['momo_price'], - 'momo_url': row['momo_url'], + 'momo_url': normalize_momo_product_url(row.get('momo_url'), row.get('sku')) or _build_momo_product_url(row.get('sku')), } for row in sorted(pending_items, key=lambda row: row['momo_price'], reverse=True)[:3] ] @@ -541,7 +542,7 @@ def _load_competitor_decision_overview(session, latest_items=None): 'name': row.get('name') or '', 'category': row.get('category') or '', 'momo_price': _to_float(row.get('momo_price')) or 0, - 'momo_url': row.get('momo_url') or _build_momo_product_url(row.get('sku')), + 'momo_url': normalize_momo_product_url(row.get('momo_url'), row.get('sku')) or _build_momo_product_url(row.get('sku')), } for row in session.execute(pending_sql).mappings().all() ] @@ -956,6 +957,8 @@ def get_consolidated_data(): unique_items = [] for r in latest_records: pid = r.product_id + product = r.product + safe_product_url = normalize_momo_product_url(getattr(product, 'url', None), getattr(product, 'i_code', '')) price_7d = prices_7d_ago_map.get(pid) price_30d = prices_30d_ago_map.get(pid) @@ -991,6 +994,7 @@ def get_consolidated_data(): unique_items.append({ 'record': r, + 'safe_product_url': safe_product_url or _build_momo_product_url(getattr(product, 'i_code', '')), 'stats': {'7d_diff': stats_7d_diff, '30d_diff': stats_30d_diff, '1d_diff': today_diff}, 'yesterday_diff': yesterday_diff, 'today_changes': today_changes, @@ -1384,6 +1388,11 @@ def index(): item['safe_created_at'] = getattr(item['record'].product, 'created_at', None) sku = str(item['record'].product.i_code) item['ai_pick'] = ai_pick_map.get(sku) + item['safe_momo_url'] = ( + item.get('safe_product_url') + or normalize_momo_product_url(item['record'].product.url, sku) + or _build_momo_product_url(sku) + ) # 為當前頁面項目添加顏色 for item in paged_items: diff --git a/routes/edm_routes.py b/routes/edm_routes.py index b6d903e..fb1240c 100644 --- a/routes/edm_routes.py +++ b/routes/edm_routes.py @@ -16,6 +16,7 @@ from database.manager import DatabaseManager from database.models import Product from database.edm_models import PromoProduct from services.logger_manager import SystemLogger +from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url # 時區設定 TAIPEI_TZ = timezone(timedelta(hours=8)) @@ -217,6 +218,7 @@ def _build_promo_dashboard_data(session, page_type, page_name, sort_by, order): # 8. 附加分類資訊到每個 item for item in items_in_batch: + item.safe_product_url = normalize_momo_product_url(item.url, item.i_code) or build_momo_product_url(item.i_code) item.main_category = product_categories.get(item.i_code) if item.main_category: item.category_color = get_color_for_string(item.main_category) diff --git a/routes/export_routes.py b/routes/export_routes.py index 78d5195..8e70467 100644 --- a/routes/export_routes.py +++ b/routes/export_routes.py @@ -20,6 +20,7 @@ from database.manager import DatabaseManager from database.models import Product, PriceRecord from services.exporter import Exporter from services.logger_manager import SystemLogger +from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url # 時區設定 TAIPEI_TZ = timezone(timedelta(hours=8)) @@ -178,8 +179,9 @@ def export_excel_ai_picks(): export_rows = [] for row in rows: sku = str(row.get('sku') or '') + normalized_sku = str(sku or '').strip() pchome_id = row.get('competitor_product_id') or '' - momo_url = row.get('momo_url') or f"https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={sku}" + momo_url = normalize_momo_product_url(row.get('momo_url'), normalized_sku) or build_momo_product_url(normalized_sku) pchome_url = f"https://24h.pchome.com.tw/prod/{str(pchome_id).strip()}" if pchome_id else '' footprint = row.get('model_footprint') or {} if isinstance(footprint, str): @@ -392,6 +394,7 @@ def export_price_changes(): for product, record, old_price in products: change = record.price - old_price change_pct = (change / old_price * 100) if old_price > 0 else 0 + safe_product_url = normalize_momo_product_url(product.url, product.i_code) or build_momo_product_url(product.i_code) ws.append([ product.i_code, product.name, @@ -401,7 +404,7 @@ def export_price_changes(): change, f"{change_pct:.2f}%", record.timestamp.strftime('%Y-%m-%d %H:%M'), - product.url + safe_product_url ]) # 調整欄寬 diff --git a/routes/price_comparison_routes.py b/routes/price_comparison_routes.py index c6b981f..8a61186 100644 --- a/routes/price_comparison_routes.py +++ b/routes/price_comparison_routes.py @@ -6,6 +6,7 @@ from flask import Blueprint, request, jsonify, render_template import logging from auth import login_required +from utils.momo_url_utils import extract_momo_i_code, is_probable_momo_icode from services.price_comparison import ( compare_brand_prices, BRAND_ALIASES, @@ -200,11 +201,26 @@ def parse_momo_excel(): products = [] for idx, row in df.iterrows(): try: + raw_id = '' + if id_col: + raw_id = str(row[id_col]).strip() + if raw_id in {'nan', 'None', 'NoneType', 'nan.'}: + raw_id = '' + raw_url = '' + if url_col: + raw_url = str(row[url_col]).strip() + if raw_url in {'nan', 'None', 'NoneType'}: + raw_url = '' + + product_id = raw_id if is_probable_momo_icode(raw_id) else extract_momo_i_code(raw_url) or '' + if not is_probable_momo_icode(product_id): + product_id = '' + product = { 'name': str(row[name_col]), 'price': int(float(row[price_col])), - 'product_id': str(row[id_col]) if id_col else f'momo_{idx}', - 'url': str(row[url_col]) if url_col else '' + 'product_id': product_id, + 'url': raw_url } products.append(product) except (ValueError, TypeError): @@ -263,17 +279,22 @@ def quick_compare(): }), 400 # 補充缺失欄位 - for i, p in enumerate(pchome_products): - if 'product_id' not in p: - p['product_id'] = f'pchome_{i}' - if 'product_url' not in p: - p['product_url'] = '' + def _normalize_product_id(products_list, source_prefix, id_field, url_field): + for i, p in enumerate(products_list): + if id_field not in p: + p[id_field] = '' + product_id = str(p.get(id_field) or '').strip() + if not is_probable_momo_icode(product_id): + product_id = extract_momo_i_code(p.get(url_field)) or '' + if not is_probable_momo_icode(product_id): + p[id_field] = f'{source_prefix}_{i}' + else: + p[id_field] = product_id + if url_field not in p or p.get(url_field) is None: + p[url_field] = '' - for i, p in enumerate(momo_products): - if 'product_id' not in p: - p['product_id'] = f'momo_{i}' - if 'url' not in p: - p['url'] = '' + _normalize_product_id(pchome_products, 'pchome', 'product_id', 'product_url') + _normalize_product_id(momo_products, 'momo', 'product_id', 'url') # 執行比價 (不限定品牌) result = compare_brand_prices('', pchome_products, momo_products) diff --git a/scheduler.py b/scheduler.py index 5d610ea..a45179e 100644 --- a/scheduler.py +++ b/scheduler.py @@ -20,6 +20,7 @@ from database.models import Product, PriceRecord from database.edm_models import PromoProduct from services.notification_manager import NotificationManager from services.edm_notifier import EdmNotifier # V-New: 導入新的通知模組 +from utils.momo_url_utils import normalize_momo_product_url # V-Fix: 改為匯入讀取函式,而非靜態變數,以支援動態更新 from config import load_momo_categories @@ -310,7 +311,7 @@ def run_momo_task(): if not link_els: continue link_url = link_els[0].get_attribute("href") - if not link_url or "javascript" in link_url: + if not link_url: continue # 從 URL 提取 i_code @@ -331,6 +332,13 @@ def run_momo_task(): except ValueError: i_code = i_code_raw.upper() + product_url = normalize_momo_product_url(link_url, i_code) + if not product_url: + logging.warning( + f"[Crawler] [MOMO] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" + ) + continue + # 提取名稱 name_els = container.find_elements(By.CSS_SELECTOR, ".prdName, .goodsName, .productName, .title") if name_els: @@ -369,7 +377,7 @@ def run_momo_task(): 'i_code': str(i_code), 'name': title, 'category': cat_name, - 'url': link_url, + 'url': product_url, 'image_url': image_url, 'price': price_val }) @@ -396,6 +404,9 @@ def run_momo_task(): else: if product.category != item['category']: product.category = item['category'] + normalized_existing_url = normalize_momo_product_url(item['url'], item['i_code']) + if product.url != normalized_existing_url: + product.url = normalized_existing_url if item['image_url']: product.image_url = item['image_url'] @@ -724,13 +735,20 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): previous_price = prev_record.previous_price is_changed = status_change != "NONE" + normalized_link_url = normalize_momo_product_url(link_url, i_code) + if not normalized_link_url: + logging.warning( + f"[Crawler] [EDM] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" + ) + continue + new_promo = PromoProduct( batch_id=batch_id, i_code=i_code, name=name, price=price, discount_text=discount_text, - url=link_url, + url=normalized_link_url, previous_price=previous_price, # V9.64: 寫入舊價格 time_slot=time_slot, status_change=status_change if is_changed else "ACTIVE", @@ -1162,8 +1180,15 @@ def run_festival_task(lpn_code="O7ylWfihYUM"): logging.info(f"[Crawler] [Festival] -> 狀態: 圖片更新 (UPDATE)") is_changed = status_change != "NONE" + normalized_link_url = normalize_momo_product_url(link_url, i_code) + if not normalized_link_url: + logging.warning( + f"[Crawler] [Festival] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" + ) + continue + new_promo = PromoProduct( - batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url, + batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url, image_url=image_url, previous_price=previous_price, time_slot=group_title, status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name, session_time_text=group_title, page_type=PAGE_TYPE @@ -1497,8 +1522,15 @@ def run_promo_event_task(lpn_code, page_type, activity_name): logging.info(f"[Crawler] [{page_type.upper()}] -> 狀態: 圖片更新 (UPDATE)") is_changed = status_change != "NONE" + normalized_link_url = normalize_momo_product_url(link_url, i_code) + if not normalized_link_url: + logging.warning( + f"[Crawler] [{page_type.upper()}] ⚠️ 商品網址無法修正,改用 i_code 組網址 | i_code: {i_code}" + ) + continue + new_promo = PromoProduct( - batch_id=batch_id, i_code=i_code, name=name, price=price, url=link_url, + batch_id=batch_id, i_code=i_code, name=name, price=price, url=normalized_link_url, image_url=image_url, previous_price=previous_price, time_slot=group_title, status_change=status_change if is_changed else "ACTIVE", crawled_at=now, activity_time_text=activity_name, session_time_text=group_title, page_type=page_type diff --git a/scripts/tools/sanitize_momo_urls.py b/scripts/tools/sanitize_momo_urls.py new file mode 100644 index 0000000..5027ac9 --- /dev/null +++ b/scripts/tools/sanitize_momo_urls.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +""" +修正 MOMO 商品與促銷商品網址中的壞連結(如 javascript:void(0)、EC404、非商品頁) +將可修正者改為: +1) 以 i_code 組出正確商品網址 +2) 無法修正時清空網址 (避免連到錯頁) +""" +import argparse +import logging +import os +import sys + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.insert(0, BASE_DIR) + +from database.manager import DatabaseManager +from database.models import Product +from database.edm_models import PromoProduct +from utils.momo_url_utils import normalize_momo_product_url + + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) +logger = logging.getLogger(__name__) + + +def _sanitize_records(records, label, commit=False): + updated = 0 + skipped = 0 + cleared = 0 + unchanged = 0 + + for record in records: + old_url = getattr(record, "url", None) + if not old_url: + unchanged += 1 + continue + + normalized = normalize_momo_product_url(old_url, getattr(record, "i_code", None)) + if normalized == old_url: + unchanged += 1 + continue + + if normalized is None: + if commit: + record.url = None + cleared += 1 + logger.info( + "清空 %s 不可修正 URL | id=%s | i_code=%s | old=%s", + label, + getattr(record, "id", "n/a"), + getattr(record, "i_code", ""), + old_url, + ) + else: + if commit: + record.url = normalized + updated += 1 + logger.info( + "修正 %s URL | id=%s | i_code=%s | old=%s | new=%s", + label, + getattr(record, "id", "n/a"), + getattr(record, "i_code", ""), + old_url, + normalized, + ) + + logger.info( + "%s 結果 | unchanged=%s, updated=%s, cleared=%s", + label, + unchanged, + updated, + cleared, + ) + return {"unchanged": unchanged, "updated": updated, "cleared": cleared} + + +def main(commit=False): + db = DatabaseManager() + session = db.get_session() + try: + product_rows = session.query(Product).all() + promo_rows = session.query(PromoProduct).all() + + product_result = _sanitize_records(product_rows, "products", commit=commit) + promo_result = _sanitize_records(promo_rows, "promo_products", commit=commit) + + if commit: + session.commit() + logger.info("變更已提交") + else: + session.rollback() + logger.info("Dry-run 模式:未提交變更") + + logger.info( + "整體結果 | products: %s | promo_products: %s", + product_result, + promo_result, + ) + except Exception as exc: + if commit: + session.rollback() + logger.exception("清理網址失敗: %s", exc) + raise + finally: + session.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--commit", + action="store_true", + help="實際寫回資料庫(不加此參數則為 dry-run)", + ) + args = parser.parse_args() + main(commit=args.commit) diff --git a/services/price_comparison.py b/services/price_comparison.py index 53d3a21..00f1a72 100644 --- a/services/price_comparison.py +++ b/services/price_comparison.py @@ -14,6 +14,7 @@ from typing import List, Dict, Optional, Tuple from dataclasses import dataclass, asdict from difflib import SequenceMatcher from datetime import datetime +from utils.momo_url_utils import build_momo_product_url, normalize_momo_product_url logger = logging.getLogger(__name__) @@ -182,6 +183,13 @@ class ProductNameParser: # 提取產品線和關鍵字 product_line, keywords = self._extract_keywords(cleaned_name, brand) + safe_product_url = ( + normalize_momo_product_url(product_url, product_id) + if source == 'momo' + else product_url + ) + safe_product_url = safe_product_url or build_momo_product_url(product_id) + return ParsedProduct( original_name=name, brand=brand, @@ -192,7 +200,7 @@ class ProductNameParser: source=source, price=price, product_id=product_id, - product_url=product_url + product_url=safe_product_url ) def _clean_name(self, name: str) -> str: diff --git a/templates/base.html b/templates/base.html index b909b1f..6ad9a63 100644 --- a/templates/base.html +++ b/templates/base.html @@ -157,6 +157,226 @@ + + + - \ No newline at end of file + diff --git a/templates/dashboard_v2.html b/templates/dashboard_v2.html index e086c8d..821abcc 100644 --- a/templates/dashboard_v2.html +++ b/templates/dashboard_v2.html @@ -959,7 +959,12 @@
{% for pick in overview.top_picks %}
- {{ pick.name }} + {{ pick.name }}
AI {{ (pick.confidence * 100) | round(0) | int if pick.confidence else 0 }}% 證據 {{ pick.evidence_quality | round(0) | int }}% @@ -976,7 +981,12 @@
{% endif %}