From 7a2520dc679d17dee0f8c98b9beae993a47b9043 Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 16 Jun 2026 11:13:24 +0800 Subject: [PATCH] =?UTF-8?q?V10.619=20MOMO=20=E7=B2=BE=E6=BA=96=E5=80=99?= =?UTF-8?q?=E9=81=B8=E6=90=9C=E5=B0=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 4 +- .../current_execution_queue_20260524.md | 16 + routes/price_comparison_routes.py | 75 ++- services/momo_crawler.py | 230 ++++++- templates/price_comparison.html | 628 ++++++++++++++++-- tests/test_frontend_v2_assets.py | 42 ++ tests/test_momo_crawler_targeted_search.py | 124 ++++ tests/test_price_comparison_routes.py | 110 ++- 9 files changed, 1160 insertions(+), 71 deletions(-) create mode 100644 tests/test_momo_crawler_targeted_search.py diff --git a/config.py b/config.py index a54e45b..4a7bf62 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.617" +SYSTEM_VERSION = "V10.619" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index f43d22e..a1b4ce3 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-06-16 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯;PChome 後台業績匯入韌性已補強;產品定位正名為「PChome 業績成長自動化作戰系統」;外部市場來源正規化層、自動同步、作戰清單與價格參考表優先讀取、CSV 備援預檢、前台操作入口與高可見頁面繁中化守門已建立 -> **適用版本**: V10.617 +> **適用版本**: V10.619 --- @@ -64,6 +64,8 @@ - V10.615 起 AI 智慧推薦頁必須把 Ollama 顯示為「Ollama 主路徑」,Gemini 只能顯示為「Gemini 備援」且手動選項停用;使用者可見錯誤與搜尋流程不得出現 `Web Search`、`Token:`、半形英文冒號等工程文案。 - V10.616 起主商品看板 `/` 的統計與補強區塊也納入繁中守門:不得顯示 `ACTIVE`、`PICK COUNT`、`AVG CONFIDENCE`、`EVIDENCE GAP`、`PCHOME MATCH BACKFILL` 等工程標籤;畫面需使用「有效商品」「挑品數」「平均信心」「待補證據」「PChome 比價補強」等白話營運文案。 - V10.617 起 `/ai_intelligence` 必須採「先給下一步」的作戰導向 UI:首屏需先回答「今天先做什麼」,再呈現商品處理進度、外部價格來源與操作捷徑;今日處理清單需用表格呈現優先級、建議動作、商品、近 7 天業績、比價結果、資料可信度與下一步;MOMO 外部價格參考需顯示價格風險分佈,且表格需以 PChome 價格優先,明確顯示「PChome 貴 / PChome 便宜」與可信度,不得只用大段文字說明使用方式。 +- V10.618 起 `/price_comparison` 也必須採「先給下一步」的比價決策 UI:首屏需顯示目前卡在哪一步、PChome / MOMO 資料準備狀態與下一個按鈕;比價結果需先呈現「需檢查價格 / 可主推曝光 / 價格接近」分佈,再用表格列出每筆商品的下一步,不得只呈現 Step 流程或原始價差表。 +- V10.619 起 MOMO 比價候選來源新增「PChome 商品導向搜尋」:當比價 API 已有 PChome 商品但缺 MOMO 清單時,必須用每筆 PChome 商品名稱產生精準搜尋詞反查 MOMO,保留品牌、品名、容量與組合線索;新版 MOMO 搜尋頁需解析 Next.js `goodsInfoList` payload。此路徑只擴大候選池,不放寬同款 matcher 門檻;`unit_comparable` 與 hard veto 候選只能標成「需人工確認」,不得直接進自動比價告警。 ## 零之一、12 Agent 決策信封(2026-05-24) diff --git a/docs/memory/current_execution_queue_20260524.md b/docs/memory/current_execution_queue_20260524.md index 52ed748..0c61a5b 100644 --- a/docs/memory/current_execution_queue_20260524.md +++ b/docs/memory/current_execution_queue_20260524.md @@ -259,3 +259,19 @@ - MOMO 外部價格參考新增價格風險分佈,表格改為 PChome 價格在前、MOMO 參考價在後,價差明確顯示「PChome 貴 / PChome 便宜」,並新增「鎖定商品」操作。 - 備援 CSV 流程降級為「備援資料檢查」,移到主要作戰與價格表後面,避免誤導使用者以為日常仍要人工匯入。 - 前端補上 payload fallback、動態表格 escape、手機版 `data-label` 與補商品對應 busy lock,避免資料缺欄位、特殊字元或重複點擊造成壞畫面。 + +## 21. 2026-06-16 V10.618 比價頁改為下一步導向 + +- `/price_comparison` 改為「PChome 商品比價決策台」,首屏需先顯示「今天先做」與 PChome / MOMO 商品準備狀態,不再讓使用者從 Step 1/2/3 自行猜流程。 +- 頁面會依目前資料狀態切換下一步:輸入關鍵字、取得 PChome 商品、匯入 MOMO 商品、開始檢查價差、查看需檢查價格或可主推商品。 +- 比價結果新增判讀分佈:「需檢查價格」「可主推曝光」「價格接近」,表格第一欄直接呈現每筆商品下一步。 +- Toast 改用純文字 DOM,手動輸入錯誤訊息不再塞 HTML;更新商品資料時會清掉舊比價結果,避免資料已更新但畫面仍顯示舊判讀。 + +## 22. 2026-06-16 V10.619 PChome 導向 MOMO 精準候選搜尋 + +- 使用者指出只抓 MOMO 活動頁會讓比價候選池偏窄;V10.619 新增 `search_momo_products_for_pchome_products()`,用 PChome 商品名稱逐筆反查 MOMO 候選。 +- 搜尋詞沿用 `marketplace_product_matcher.build_search_terms()`,保留品牌、品名、容量、單品與組合線索,例如 B5 40ml、500ml 2入組,避免只用品牌或活動頁商品池。 +- `/api/price_comparison/compare` 在已有 PChome 商品但缺 MOMO 清單時,會優先走 PChome 導向 MOMO 搜尋;完全沒有 PChome 商品時才退回品牌搜尋。 +- MOMO 搜尋 parser 已補新版 Next.js `goodsInfoList`,避免明明搜尋頁有商品但 crawler 回 0 筆。 +- `/price_comparison` 已新增「自動找 MOMO 候選」操作,PChome 商品準備後可直接搜尋 MOMO;回傳會分成「可直接比價」與「需人工確認」。 +- 新路徑只擴大候選池,不放寬 `score_marketplace_match()` 的 hard veto 與同款分數篩選;`unit_comparable` 候選保留為「需人工確認」,不得直接進自動比價。後續才評估把這條路徑接進背景自動同步 / `external_offers`。 diff --git a/routes/price_comparison_routes.py b/routes/price_comparison_routes.py index 1bb833b..72cac64 100644 --- a/routes/price_comparison_routes.py +++ b/routes/price_comparison_routes.py @@ -12,7 +12,7 @@ from services.price_comparison import ( BRAND_ALIASES, BRAND_NORMALIZE_MAP ) -from services.momo_crawler import search_momo_products +from services.momo_crawler import search_momo_products, search_momo_products_for_pchome_products from services.pchome_crawler import search_pchome_products logger = logging.getLogger(__name__) @@ -91,17 +91,45 @@ def compare_prices(): logger.warning(f"PChome 搜尋失敗: {msg}") pchome_products = [] + targeted_momo_summary = None + # 取得 MOMO 商品 momo_products = data.get('momo_products') if not momo_products: - logger.info(f"自動搜尋 MOMO: {brand}") - success, msg, momo_products = search_momo_products(brand, limit=100) + if pchome_products: + logger.info("[PriceComparison] 以 PChome 商品精準搜尋 MOMO 候選: brand=%s pchome_count=%s", brand, len(pchome_products)) + success, msg, targeted_momo_products = search_momo_products_for_pchome_products( + pchome_products, + max_products=30, + limit_per_product=8, + ) + targeted_momo_summary = { + "message": msg, + "candidate_count": len(targeted_momo_products or []), + "auto_compare_count": len([ + item for item in (targeted_momo_products or []) + if item.get("can_auto_compare") + ]), + "review_count": len([ + item for item in (targeted_momo_products or []) + if not item.get("can_auto_compare") + ]), + } + momo_products = [ + item for item in (targeted_momo_products or []) + if item.get("can_auto_compare") + ] + else: + logger.info(f"自動搜尋 MOMO: {brand}") + success, msg, momo_products = search_momo_products(brand, limit=100) if not success: logger.warning(f"MOMO 搜尋失敗: {msg}") momo_products = [] # 執行比價 result = compare_brand_prices(brand, pchome_products, momo_products) + if targeted_momo_summary: + result["momo_targeted_search"] = targeted_momo_summary return jsonify({ 'success': True, @@ -155,6 +183,47 @@ def fetch_pchome_products(): }), 500 +@price_comparison_bp.route('/api/price_comparison/fetch_momo_for_pchome', methods=['POST']) +@login_required +def fetch_momo_for_pchome_products(): + """用 PChome 商品清單反查 MOMO 候選;只讀、不寫 DB。""" + try: + data = request.get_json() or {} + pchome_products = data.get('pchome_products') or [] + if not pchome_products: + return jsonify({ + 'success': False, + 'message': '請先取得 PChome 商品,再搜尋 MOMO 候選' + }), 400 + + success, message, products = search_momo_products_for_pchome_products( + pchome_products, + max_products=30, + limit_per_product=8, + ) + auto_products = [item for item in products if item.get("can_auto_compare")] + review_candidates = [item for item in products if not item.get("can_auto_compare")] + + return jsonify({ + 'success': success, + 'message': message, + 'data': { + 'products': auto_products, + 'review_candidates': review_candidates, + 'count': len(auto_products), + 'review_count': len(review_candidates), + 'candidate_count': len(products), + } + }) + + except Exception as e: + logger.error(f"搜尋 MOMO 候選失敗: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'message': f'搜尋 MOMO 候選失敗: {str(e)}' + }), 500 + + @price_comparison_bp.route('/api/price_comparison/parse_momo_excel', methods=['POST']) @login_required def parse_momo_excel(): diff --git a/services/momo_crawler.py b/services/momo_crawler.py index 1adeb1e..1813211 100644 --- a/services/momo_crawler.py +++ b/services/momo_crawler.py @@ -14,6 +14,7 @@ import re import json import time import logging +import os from typing import List, Dict, Optional, Tuple from dataclasses import dataclass, asdict from datetime import datetime @@ -23,6 +24,11 @@ from bs4 import BeautifulSoup logger = logging.getLogger(__name__) +MOMO_TARGETED_SEARCH_MIN_SCORE = float(os.getenv("MOMO_TARGETED_SEARCH_MIN_SCORE", "0.45")) +MOMO_TARGETED_SEARCH_MAX_PRODUCTS = int(os.getenv("MOMO_TARGETED_SEARCH_MAX_PRODUCTS", "30")) +MOMO_TARGETED_SEARCH_MAX_TERMS = int(os.getenv("MOMO_TARGETED_SEARCH_MAX_TERMS", "4")) +MOMO_TARGETED_SEARCH_LIMIT_PER_TERM = int(os.getenv("MOMO_TARGETED_SEARCH_LIMIT_PER_TERM", "8")) + @dataclass class MomoProduct: @@ -273,7 +279,11 @@ class MomoCrawler: logger.debug(f"[MOMO] 解析商品連結失敗: {e}") continue - # 方法 2: 如果上面沒找到,嘗試從 __NEXT_DATA__ 或 JSON + # 方法 2: 新版 Next.js app router 會把 goodsInfoList 放在 script payload 字串中 + if not products: + products = self._parse_next_search_payload_results(html, limit) + + # 方法 3: 如果上面沒找到,嘗試從 __NEXT_DATA__ 或 JSON if not products: # 嘗試找 Next.js 資料 script = soup.find('script', {'id': '__NEXT_DATA__'}) @@ -299,7 +309,7 @@ class MomoCrawler: except json.JSONDecodeError: pass - # 方法 3: 從 HTML 中找嵌入的 JSON + # 方法 4: 從 HTML 中找嵌入的 JSON if not products: json_pattern = re.compile(r'"goodsCode"\s*:\s*"?(\d+)"?.*?"goodsName"\s*:\s*"([^"]+)".*?"price"\s*:\s*(\d+)', re.DOTALL) matches = json_pattern.findall(html) @@ -325,6 +335,67 @@ class MomoCrawler: logger.error(f"[MOMO] 解析行動版結果失敗: {e}") return [] + def _parse_next_search_payload_results(self, html: str, limit: int) -> List[MomoProduct]: + """解析 MOMO 新版搜尋頁嵌入的 Next.js goodsInfoList payload。""" + products: List[MomoProduct] = [] + seen_ids: set[str] = set() + + product_pattern = re.compile( + r'\\"goodsCode\\"\s*:\s*\\"(?P\d+)\\"' + r'.{0,800}?' + r'\\"goodsName\\"\s*:\s*\\"(?P.*?)\\"' + r'.{0,1600}?' + r'\\"goodsPrice\\"\s*:\s*\\"(?P[^\\"]+)\\"' + r'.{0,2400}?' + r'\\"imgUrl\\"\s*:\s*\\"(?P[^\\"]*)\\"', + re.DOTALL, + ) + for match in product_pattern.finditer(html): + if len(products) >= limit: + break + product_id = match.group("code") + if product_id in seen_ids: + continue + seen_ids.add(product_id) + + name = self._decode_payload_text(match.group("name")) + price = self._parse_momo_price(match.group("price")) + if not name or price <= 0: + continue + image_url = self._decode_payload_text(match.group("img")) + original_price = self._parse_original_price_nearby(html, match.start(), match.end()) or price + discount = round((1 - price / original_price) * 100) if original_price > price else None + + products.append(MomoProduct( + product_id=product_id, + name=name.strip()[:160], + price=price, + original_price=original_price, + discount=discount, + image_url=image_url, + product_url=f'{self.BASE_URL}/goods/GoodsDetail.jsp?i_code={product_id}', + brand='', + crawled_at=datetime.now(), + )) + return products + + @staticmethod + def _decode_payload_text(value: str) -> str: + try: + return json.loads(f'"{value}"') + except Exception: + return (value or "").replace("\\u0026", "&").replace("\\/", "/") + + @staticmethod + def _parse_momo_price(value: str) -> int: + match = re.search(r"[\d,]+", value or "") + return int(match.group(0).replace(",", "")) if match else 0 + + def _parse_original_price_nearby(self, html: str, start: int, end: int) -> int: + snippet = html[start:min(len(html), end + 1800)] + match = re.search(r'\\"goodsPriceOri\\"\s*:\s*\\"(?P[^\\"]+)\\"', snippet) + return self._parse_momo_price(match.group("price")) if match else 0 + def _parse_search_results(self, html: str, limit: int) -> List[MomoProduct]: """ 解析搜尋結果 HTML @@ -467,6 +538,161 @@ def search_momo_products(keyword: str, limit: int = 10) -> Tuple[bool, str, List return success, message, [p.to_dict() for p in products] +def _to_float(value, default: float = 0.0) -> float: + try: + if value is None: + return default + return float(str(value).replace(",", "").replace("$", "").strip()) + except (TypeError, ValueError): + return default + + +def _product_name_from_payload(payload: dict) -> str: + return str( + payload.get("name") + or payload.get("product_name") + or payload.get("title") + or payload.get("商品名稱") + or "" + ).strip() + + +def _product_price_from_payload(payload: dict) -> float: + return _to_float( + payload.get("price") + or payload.get("pchome_price") + or payload.get("sale_price") + or payload.get("售價") + ) + + +def _dedupe_terms(terms: list[str], max_terms: int) -> list[str]: + result: list[str] = [] + seen: set[str] = set() + for term in terms: + normalized = re.sub(r"\s+", " ", str(term or "").strip()) + if len(normalized) < 2: + continue + key = normalized.lower() + if key in seen: + continue + seen.add(key) + result.append(normalized) + if len(result) >= max_terms: + break + return result + + +def build_targeted_momo_search_terms(pchome_name: str, max_terms: int = MOMO_TARGETED_SEARCH_MAX_TERMS) -> list[str]: + """用 PChome 商品名稱產生 MOMO 精準搜尋詞,保留品名、容量與組合線索。""" + if not pchome_name: + return [] + try: + from services.marketplace_product_matcher import build_search_terms + + terms = build_search_terms(pchome_name, max_terms=max_terms) + except Exception: + logger.warning("[MOMO] 產生精準搜尋詞失敗,改用原商品名", exc_info=True) + terms = [] + terms.append(pchome_name) + return _dedupe_terms(terms, max_terms=max_terms) + + +def search_momo_products_for_pchome_products( + pchome_products: list[dict], + *, + limit_per_product: int = MOMO_TARGETED_SEARCH_LIMIT_PER_TERM, + max_products: int = MOMO_TARGETED_SEARCH_MAX_PRODUCTS, + max_terms_per_product: int = MOMO_TARGETED_SEARCH_MAX_TERMS, + min_score: float = MOMO_TARGETED_SEARCH_MIN_SCORE, + crawler: MomoCrawler | None = None, +) -> Tuple[bool, str, List[dict]]: + """以 PChome 商品逐筆反查 MOMO 候選,補足單品與組合的精準比價來源。""" + if not pchome_products: + return False, "沒有 PChome 商品可用來搜尋 MOMO", [] + + try: + from services.marketplace_product_matcher import score_marketplace_match + except Exception as exc: + logger.error("[MOMO] 無法載入商品比對工具: %s", exc, exc_info=True) + return False, "商品比對工具暫時不可用", [] + + crawler = crawler or get_crawler() + candidates_by_id: dict[str, dict] = {} + searched_products = 0 + searched_terms: list[str] = [] + + for target in pchome_products[:max_products]: + pchome_name = _product_name_from_payload(target) + if not pchome_name: + continue + searched_products += 1 + pchome_price = _product_price_from_payload(target) + pchome_id = str(target.get("product_id") or target.get("id") or target.get("sku") or "").strip() + terms = build_targeted_momo_search_terms(pchome_name, max_terms=max_terms_per_product) + + for term in terms: + searched_terms.append(term) + success, _, products = crawler.search_products(term, limit=limit_per_product) + if not success or not products: + continue + + for product in products: + row = product.to_dict() if hasattr(product, "to_dict") else dict(product) + momo_name = _product_name_from_payload(row) + if not momo_name: + continue + diagnostics = score_marketplace_match( + pchome_name, + momo_name, + momo_price=_to_float(row.get("price")), + competitor_price=pchome_price, + ) + score = float(getattr(diagnostics, "score", 0.0) or 0.0) + if score < min_score: + continue + hard_veto = bool(getattr(diagnostics, "hard_veto", False)) + comparison_mode = getattr(diagnostics, "comparison_mode", "exact_identity") + can_auto_compare = not hard_veto and comparison_mode == "exact_identity" + + product_id = str(row.get("product_id") or row.get("goodsCode") or row.get("id") or "").strip() + if not product_id: + product_id = f"momo_candidate_{len(candidates_by_id)}" + existing = candidates_by_id.get(product_id) + if existing and float(existing.get("target_match_score") or 0.0) >= score: + continue + + row.update({ + "product_id": product_id, + "target_pchome_product_id": pchome_id, + "target_pchome_name": pchome_name, + "target_match_score": round(score, 3), + "target_search_term": term, + "target_match_reasons": list(getattr(diagnostics, "reasons", ()) or ()), + "target_comparison_mode": comparison_mode, + "target_hard_veto": hard_veto, + "can_auto_compare": can_auto_compare, + "target_review_status": "可直接比價" if can_auto_compare else "需人工確認", + "source_strategy": "pchome_targeted_momo_search", + }) + candidates_by_id[product_id] = row + + candidates = sorted( + candidates_by_id.values(), + key=lambda item: float(item.get("target_match_score") or 0.0), + reverse=True, + ) + if not candidates: + return False, f"已用 {searched_products} 筆 PChome 商品搜尋 MOMO,但沒有找到可用候選", [] + auto_count = sum(1 for item in candidates if item.get("can_auto_compare")) + review_count = len(candidates) - auto_count + return ( + True, + f"已用 {searched_products} 筆 PChome 商品搜尋 MOMO,找到 {len(candidates)} 筆候選(可直接比價 {auto_count} 筆、需人工確認 {review_count} 筆)", + candidates, + ) + + def get_momo_bestsellers(category: str, limit: int = 5) -> Tuple[bool, str, List[dict]]: """ 取得 MOMO 分類熱銷商品 diff --git a/templates/price_comparison.html b/templates/price_comparison.html index d3ea309..a37fd02 100644 --- a/templates/price_comparison.html +++ b/templates/price_comparison.html @@ -1,6 +1,6 @@ {% extends "ewoooc_base.html" %} -{% block title %}比價系統 - EwoooC{% endblock %} +{% block title %}PChome 商品比價決策台 - EwoooC{% endblock %} {% block extra_css %} {% endblock %} @@ -155,22 +325,49 @@ {% block content %}
-

PChome vs MOMO 比價

-

比較 PChome 24h 和 MOMO 美妝商品價格

+

PChome 商品比價決策台

+

先確認兩邊資料是否齊,再找出 PChome 價格偏高、可主推或需要補資料的商品。

+
+
+ + + 今天先做:選擇要檢查的商品範圍 + 請先選品牌或輸入關鍵字,系統才知道要抓哪一批 PChome 商品。 + + +
+
+
+ 資料準備狀態 + 尚未開始 +
+
+
PChome 商品0 筆
+
+
+
+
MOMO 商品0 筆
+
+
+
+
+
- +
-
+
- Step 1: 選擇品牌 + 選擇要檢查的範圍
@@ -181,15 +378,15 @@
- +
-
+
- Step 2: PChome 商品 + 補齊 PChome 商品
- +
-
+
- Step 3: MOMO 商品 + 補齊 MOMO 商品
+ +
0 筆商品 + 0 筆需確認
+
@@ -228,7 +431,7 @@
@@ -253,13 +456,40 @@