diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 35309fc..ef774e7 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.360 收斂瀏覽器自動開啟與 PChome 熱路徑:`tests/test_image_fetch.py` 改成 `RUN_MOMO_BROWSER_TESTS=1` 才會跑,預設 headless 且關閉 Chrome 密碼管理,避免一般 pytest 自動打開 MOMO 網站與觸發密碼允許提示;scheduler Selenium 同步關閉 password manager/autofill;PChome coverage/review queue 熱查詢改用 `JOIN LATERAL` 取 active 商品最新價,並補 Dashing Diva 品線召回搜尋詞。 - V10.359 導入 Browse.sh 可選爬蟲診斷與強化 MOMO/PChome 色號比對:新增 `BrowseShTool` wrapper、probe CLI 與操作手冊,讓 browse.sh 只作 selector/XHR/network trace 探勘、不進正式 scheduler;matcher 補護甲油/洗手慕斯/足膜精準搜尋詞,保留小數規格,並對唇釉、妝前乳、素顏霜等顯性色號/色系不一致候選做 hard veto,避免同系列不同色號污染正式價差。 - V10.358 補市場情報 MCP 啟用證據審核:新增 `mcp_activation_evidence` read-only builder、GET/POST endpoint、UI redacted evidence 審核面板與 deployment readiness smoke target,讓操作員貼上 env/health/router/telemetry/fallback 證據後判斷能否補齊 external/internal MCP runtime 缺口;API/UI 不保存 payload、不打 health、不啟動 MCP、不執行 docker/SSH、不開 DB、不抓外站、不掛 scheduler,且會阻擋真實 secret 字串與任何 DB write/fetch/scheduler 證據。 - V10.357 補市場情報 MCP 完整度稽核:新增 `mcp_completion_audit` read-only builder、GET endpoint、UI 面板與 deployment readiness smoke target,彙整外部 MCP design/runtime、內部 tool contract/runtime、activation runbook 與 fetch gate 狀態;API/UI 不啟動 MCP、不打 health、不執行 docker/SSH、不開 DB、不寫檔、不抓外站、不掛 scheduler。 diff --git a/config.py b/config.py index acc4a00..87482a4 100644 --- a/config.py +++ b/config.py @@ -323,7 +323,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.359" +SYSTEM_VERSION = "V10.360" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index b95b633..0f2aa13 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -12,6 +12,11 @@ ## 📅 詳細更新日誌 (考古存檔) +### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.360 browser smoke guard**: `tests/test_image_fetch.py` 改為預設 skip,只有 `RUN_MOMO_BROWSER_TESTS=1` 才會打開外部 MOMO 網站;手動執行時預設 headless,並關閉 Chrome password manager/autofill,避免一般 pytest 觸發瀏覽器與密碼允許提示。 +- **Scheduler Selenium 防彈窗**: `managed_scraper_resources()` 補 `credentials_enable_service=false`、`profile.password_manager_enabled=false` 與 Autofill/PasswordManager feature disable,降低背景 Selenium 觸發密碼管理提示的機率。 +- **PChome dashboard hot path**: `competitor_intel_repository.py` 的 coverage / review queue 最新 MOMO 價格查詢改用 `JOIN LATERAL ... ORDER BY pr.timestamp DESC, pr.id DESC LIMIT 1`,避免 window function 掃描造成首頁與覆核隊列熱路徑變慢;Dashing Diva 召回搜尋補品線與 `magicpress` broad terms。 + ### 2026-05-21:Browse.sh 爬蟲診斷與 PChome 色號比對強化 - **V10.359 Browse.sh optional diagnostics**: 新增 `services/browse_sh_tool.py` 與 `scripts/tools/browse_sh_probe.py`,可檢查或執行 `browse` CLI;目前只定位為 MOMO/PChome selector、XHR 與 network trace 探勘,不進 scheduler 主路徑,也不直接寫正式競品價格。 - **MOMO/PChome matcher 色號防錯配**: `marketplace_product_matcher.py` 補護甲油、洗手慕斯、足膜精準搜尋,搜尋詞保留 `4.2ml` 這類小數規格;唇釉、妝前乳、素顏霜等顯性色號/色系不一致時會 hard veto,避免同系列不同色號被推成正式價差。 diff --git a/scheduler.py b/scheduler.py index 6681bc4..7daec97 100644 --- a/scheduler.py +++ b/scheduler.py @@ -122,7 +122,7 @@ def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, options.add_argument('--disable-backgrounding-occluded-windows') options.add_argument('--disable-breakpad') # 禁用崩潰報告 options.add_argument('--disable-component-extensions-with-background-pages') - options.add_argument('--disable-features=TranslateUI') + options.add_argument('--disable-features=TranslateUI,AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck') options.add_argument('--disable-hang-monitor') # 禁用掛起監控,避免誤殺 options.add_argument('--disable-ipc-flooding-protection') options.add_argument('--disable-popup-blocking') @@ -148,6 +148,8 @@ def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, 'intl.accept_languages': 'zh-TW,zh,en', 'profile.default_content_setting_values.notifications': 2, 'profile.managed_default_content_settings.images': 1, # 允許圖片 + 'credentials_enable_service': False, + 'profile.password_manager_enabled': False, } options.add_experimental_option('prefs', prefs) diff --git a/services/competitor_intel_repository.py b/services/competitor_intel_repository.py index 869b691..a29c012 100644 --- a/services/competitor_intel_repository.py +++ b/services/competitor_intel_repository.py @@ -341,10 +341,15 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: SELECT p.id AS product_id, p.i_code AS sku, - pr.price AS momo_price, - ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn + latest_price.price AS momo_price FROM products p - JOIN price_records pr ON pr.product_id = p.id + JOIN LATERAL ( + SELECT pr.price + FROM price_records pr + WHERE pr.product_id = p.id + ORDER BY pr.timestamp DESC, pr.id DESC + LIMIT 1 + ) latest_price ON TRUE WHERE p.status = 'ACTIVE' ), valid_competitor AS ( @@ -361,22 +366,20 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: ), {attempt_cte} SELECT - (SELECT COUNT(*) FROM latest_momo WHERE rn = 1) AS active_with_price, + (SELECT COUNT(*) FROM latest_momo) AS active_with_price, (SELECT COUNT(*) FROM latest_momo lm - JOIN valid_competitor vc ON vc.sku = lm.sku - WHERE lm.rn = 1) AS valid_matches, + JOIN valid_competitor vc ON vc.sku = lm.sku) AS valid_matches, (SELECT COUNT(*) FROM latest_momo lm LEFT JOIN valid_competitor vc ON vc.sku = lm.sku - WHERE lm.rn = 1 AND vc.sku IS NULL) AS pending, + WHERE vc.sku IS NULL) AS pending, COALESCE(la.attempt_status, 'never_attempted') AS attempt_status, COUNT(*) AS status_count FROM latest_momo lm LEFT JOIN valid_competitor vc ON vc.sku = lm.sku LEFT JOIN latest_attempt la ON la.sku = lm.sku - WHERE lm.rn = 1 - AND vc.sku IS NULL + WHERE vc.sku IS NULL GROUP BY COALESCE(la.attempt_status, 'never_attempted') """) with engine.connect() as conn: @@ -699,7 +702,6 @@ def _review_queue_cte_and_filter( status_values = REVIEW_STATUS_FILTER_GROUPS.get(status_filter) or tuple(ACTIONABLE_ATTEMPT_STATUSES) status_sql = ", ".join(f"'{status}'" for status in status_values) filters = [ - "lm.rn = 1", "vc.sku IS NULL", f"la.attempt_status IN ({status_sql})", ] @@ -718,10 +720,15 @@ def _review_queue_cte_and_filter( p.i_code AS sku, p.name, p.category, - pr.price AS momo_price, - ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn + latest_price.price AS momo_price FROM products p - JOIN price_records pr ON pr.product_id = p.id + JOIN LATERAL ( + SELECT pr.price + FROM price_records pr + WHERE pr.product_id = p.id + ORDER BY pr.timestamp DESC, pr.id DESC + LIMIT 1 + ) latest_price ON TRUE WHERE p.status = 'ACTIVE' ), valid_competitor AS ( @@ -861,10 +868,15 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic p.i_code AS sku, p.name, p.category, - pr.price AS momo_price, - ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn + latest_price.price AS momo_price FROM products p - JOIN price_records pr ON pr.product_id = p.id + JOIN LATERAL ( + SELECT pr.price + FROM price_records pr + WHERE pr.product_id = p.id + ORDER BY pr.timestamp DESC, pr.id DESC + LIMIT 1 + ) latest_price ON TRUE WHERE p.status = 'ACTIVE' ), valid_competitor AS ( @@ -910,8 +922,7 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic FROM latest_momo lm JOIN latest_attempt la ON la.sku = lm.sku LEFT JOIN valid_competitor vc ON vc.sku = lm.sku - WHERE lm.rn = 1 - AND vc.sku IS NULL + WHERE vc.sku IS NULL AND la.attempt_status IN ( 'unit_comparable', 'refresh_unit_comparable', diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 8ce66dd..3bfd551 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -202,9 +202,37 @@ def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> li if not ({"dashing", "diva"} <= brand_tokens and "美甲片" in getattr(identity, "searchable_name", "")): return plan - broad_keyword = "dashing diva 時尚潮流美甲片" - for sort in VARIANT_RECALL_SORTS: - plan.append((broad_keyword, sort)) + searchable_name = getattr(identity, "searchable_name", "") + broad_keywords = [] + if "足部時尚潮流美甲片" in searchable_name: + broad_keywords.append("dashing diva 足部時尚潮流美甲片") + elif "頂級璀燦美甲片" in searchable_name: + broad_keywords.append("dashing diva 頂級璀燦美甲片") + elif "時尚潮流美甲片" in searchable_name: + broad_keywords.append("dashing diva 時尚潮流美甲片") + elif "薄型經典美甲片" in searchable_name: + broad_keywords.append("dashing diva 薄型經典美甲片") + + broad_keywords.extend(( + "dashing diva magicpress", + "dashing diva 美甲片", + )) + + seen = {(keyword.lower(), sort) for keyword, sort in plan} + + def append(keyword: str, sort: str | None = None) -> None: + key = (keyword.lower(), sort) + if key in seen: + return + seen.add(key) + plan.append((keyword, sort)) + + for broad_keyword in broad_keywords: + append(broad_keyword, None) + + if broad_keywords: + for sort in VARIANT_RECALL_SORTS: + append(broad_keywords[0], sort) return plan diff --git a/tests/test_competitor_intel_cache.py b/tests/test_competitor_intel_cache.py index 8e61914..bdb245c 100644 --- a/tests/test_competitor_intel_cache.py +++ b/tests/test_competitor_intel_cache.py @@ -78,7 +78,8 @@ def test_competitor_coverage_counts_only_active_product_intersection(): assert "coverage:v4" in source assert "(SELECT COUNT(*) FROM valid_competitor) AS valid_matches" not in coverage_source assert "FROM latest_momo lm\n JOIN valid_competitor vc ON vc.sku = lm.sku" in coverage_source - assert "WHERE lm.rn = 1) AS valid_matches" in coverage_source + assert "FROM products p\n JOIN LATERAL" in coverage_source + assert "WHERE p.status = 'ACTIVE'" in coverage_source def test_competitor_ppt_and_ai_use_momo_minus_pchome_gap_direction(): diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index 0eb2135..32317d7 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -7,6 +7,37 @@ from types import SimpleNamespace ROOT = Path(__file__).resolve().parents[1] +def _function_body(source: str, function_name: str, next_function_name: str) -> str: + return source.split(f"def {function_name}", 1)[1].split(f"def {next_function_name}", 1)[0] + + +def test_competitor_dashboard_hot_paths_use_latest_price_lateral_lookup(): + source = (ROOT / "services/competitor_intel_repository.py").read_text(encoding="utf-8") + + coverage_body = _function_body( + source, + "_fetch_competitor_coverage_uncached", + "fetch_competitor_gap_trend", + ) + review_cte_body = _function_body( + source, + "_review_queue_cte_and_filter", + "_fetch_competitor_review_queue_page_uncached", + ) + review_sample_body = _function_body( + source, + "_fetch_competitor_review_queue_uncached", + "fetch_competitor_comparison_results", + ) + + for body in (coverage_body, review_cte_body, review_sample_body): + assert "JOIN LATERAL" in body + assert "ORDER BY pr.timestamp DESC, pr.id DESC" in body + assert "LIMIT 1" in body + assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in body + assert "lm.rn = 1" not in body + + def test_competitor_feeder_persists_all_match_attempt_outcomes(): source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8") @@ -767,11 +798,65 @@ def test_search_candidates_adds_variant_recall_sorts_for_dashing_diva(monkeypatc assert crawler.calls == [ ("dashing diva 時尚潮流美甲片 極光之藍", None), + ("dashing diva 時尚潮流美甲片", None), + ("dashing diva magicpress", None), + ("dashing diva 美甲片", None), ("dashing diva 時尚潮流美甲片", "sale/dc"), ("dashing diva 時尚潮流美甲片", "new/dc"), ] +def test_search_candidates_uses_line_specific_recall_for_dashing_diva_p_line(monkeypatch): + from services.competitor_price_feeder import _search_pchome_candidates + from services.pchome_crawler import PChomeProduct + + candidate = PChomeProduct( + product_id="MDU5F009AG", + name="Dashing Diva/P 頂級璀燦美甲片-心動陰影 MDU5F009AG", + price=420, + original_price=520, + discount=19, + image_url="", + product_url="https://24h.pchome.com.tw/prod/MDU5F009AG", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self): + self.calls = [] + + def search_products(self, keyword, **kwargs): + self.calls.append((keyword, kwargs.get("sort"))) + return True, "ok", [candidate] + + monkeypatch.setattr( + "services.marketplace_product_matcher.score_marketplace_match", + lambda *_args, **_kwargs: SimpleNamespace(score=0.72), + ) + + crawler = FakeCrawler() + _search_pchome_candidates( + crawler, + "【DASHING DIVA】MAGICPRESS 頂級璀燦美甲片_心情史努比(史奴比)", + keywords=["dashing diva 頂級璀燦美甲片 心情史努比"], + momo_price=331, + ) + + assert crawler.calls == [ + ("dashing diva 頂級璀燦美甲片 心情史努比", None), + ("dashing diva 頂級璀燦美甲片", None), + ("dashing diva magicpress", None), + ("dashing diva 美甲片", None), + ("dashing diva 頂級璀燦美甲片", "sale/dc"), + ("dashing diva 頂級璀燦美甲片", "new/dc"), + ] + + def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog): from services import competitor_price_feeder from services import marketplace_product_matcher diff --git a/tests/test_image_fetch.py b/tests/test_image_fetch.py index fde09a7..63e39fd 100644 --- a/tests/test_image_fetch.py +++ b/tests/test_image_fetch.py @@ -6,6 +6,7 @@ import os import sys import re +import pytest from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By @@ -13,6 +14,11 @@ import time BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +@pytest.mark.skipif( + os.getenv("RUN_MOMO_BROWSER_TESTS") != "1", + reason="Selenium smoke 會開啟外部 MOMO 網站;預設不在一般 pytest 執行。", +) def test_image_fetch(): """測試單一分類的圖片抓取""" print("🧪 開始測試圖片抓取功能...\n") @@ -20,7 +26,8 @@ def test_image_fetch(): # 設定 Selenium (啟用圖片載入) options = Options() options.page_load_strategy = 'eager' - # options.add_argument('--headless=new') # 暫時不用無頭模式,方便觀察 + if os.getenv("MOMO_BROWSER_TEST_VISIBLE") != "1": + options.add_argument('--headless=new') options.add_argument('--window-size=1920,5000') options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36") @@ -30,6 +37,11 @@ def test_image_fetch(): options.add_argument('--disable-extensions') options.add_argument('--disable-dev-shm-usage') options.add_argument('--no-sandbox') + options.add_argument('--disable-features=AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck') + options.add_experimental_option('prefs', { + 'credentials_enable_service': False, + 'profile.password_manager_enabled': False, + }) driver = webdriver.Chrome(options=options) driver.set_page_load_timeout(45)