[V10.360] 關閉預設瀏覽器 smoke 並優化 PChome 熱路徑
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s

This commit is contained in:
OoO
2026-05-21 12:07:25 +08:00
parent fb5a4435d7
commit e57793829c
9 changed files with 170 additions and 25 deletions

View File

@@ -4,6 +4,7 @@
================================================================================
【已完成】
- V10.360 收斂瀏覽器自動開啟與 PChome 熱路徑:`tests/test_image_fetch.py` 改成 `RUN_MOMO_BROWSER_TESTS=1` 才會跑,預設 headless 且關閉 Chrome 密碼管理,避免一般 pytest 自動打開 MOMO 網站與觸發密碼允許提示scheduler Selenium 同步關閉 password manager/autofillPChome coverage/review queue 熱查詢改用 `JOIN LATERAL` 取 active 商品最新價,並補 Dashing Diva 品線召回搜尋詞。
- V10.359 導入 Browse.sh 可選爬蟲診斷與強化 MOMO/PChome 色號比對:新增 `BrowseShTool` wrapper、probe CLI 與操作手冊,讓 browse.sh 只作 selector/XHR/network trace 探勘、不進正式 schedulermatcher 補護甲油/洗手慕斯/足膜精準搜尋詞,保留小數規格,並對唇釉、妝前乳、素顏霜等顯性色號/色系不一致候選做 hard veto避免同系列不同色號污染正式價差。
- V10.358 補市場情報 MCP 啟用證據審核:新增 `mcp_activation_evidence` read-only builder、GET/POST endpoint、UI redacted evidence 審核面板與 deployment readiness smoke target讓操作員貼上 env/health/router/telemetry/fallback 證據後判斷能否補齊 external/internal MCP runtime 缺口API/UI 不保存 payload、不打 health、不啟動 MCP、不執行 docker/SSH、不開 DB、不抓外站、不掛 scheduler且會阻擋真實 secret 字串與任何 DB write/fetch/scheduler 證據。
- V10.357 補市場情報 MCP 完整度稽核:新增 `mcp_completion_audit` read-only builder、GET endpoint、UI 面板與 deployment readiness smoke target彙整外部 MCP design/runtime、內部 tool contract/runtime、activation runbook 與 fetch gate 狀態API/UI 不啟動 MCP、不打 health、不執行 docker/SSH、不開 DB、不寫檔、不抓外站、不掛 scheduler。

View File

@@ -323,7 +323,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.359"
SYSTEM_VERSION = "V10.360"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -12,6 +12,11 @@
## 📅 詳細更新日誌 (考古存檔)
### 2026-05-21瀏覽器測試守門與 PChome 熱路徑優化
- **V10.360 browser smoke guard**: `tests/test_image_fetch.py` 改為預設 skip只有 `RUN_MOMO_BROWSER_TESTS=1` 才會打開外部 MOMO 網站;手動執行時預設 headless並關閉 Chrome password manager/autofill避免一般 pytest 觸發瀏覽器與密碼允許提示。
- **Scheduler Selenium 防彈窗**: `managed_scraper_resources()``credentials_enable_service=false``profile.password_manager_enabled=false` 與 Autofill/PasswordManager feature disable降低背景 Selenium 觸發密碼管理提示的機率。
- **PChome dashboard hot path**: `competitor_intel_repository.py` 的 coverage / review queue 最新 MOMO 價格查詢改用 `JOIN LATERAL ... ORDER BY pr.timestamp DESC, pr.id DESC LIMIT 1`,避免 window function 掃描造成首頁與覆核隊列熱路徑變慢Dashing Diva 召回搜尋補品線與 `magicpress` broad terms。
### 2026-05-21Browse.sh 爬蟲診斷與 PChome 色號比對強化
- **V10.359 Browse.sh optional diagnostics**: 新增 `services/browse_sh_tool.py``scripts/tools/browse_sh_probe.py`,可檢查或執行 `browse` CLI目前只定位為 MOMO/PChome selector、XHR 與 network trace 探勘,不進 scheduler 主路徑,也不直接寫正式競品價格。
- **MOMO/PChome matcher 色號防錯配**: `marketplace_product_matcher.py` 補護甲油、洗手慕斯、足膜精準搜尋,搜尋詞保留 `4.2ml` 這類小數規格;唇釉、妝前乳、素顏霜等顯性色號/色系不一致時會 hard veto避免同系列不同色號被推成正式價差。

View File

@@ -122,7 +122,7 @@ def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45,
options.add_argument('--disable-backgrounding-occluded-windows')
options.add_argument('--disable-breakpad') # 禁用崩潰報告
options.add_argument('--disable-component-extensions-with-background-pages')
options.add_argument('--disable-features=TranslateUI')
options.add_argument('--disable-features=TranslateUI,AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck')
options.add_argument('--disable-hang-monitor') # 禁用掛起監控,避免誤殺
options.add_argument('--disable-ipc-flooding-protection')
options.add_argument('--disable-popup-blocking')
@@ -148,6 +148,8 @@ def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45,
'intl.accept_languages': 'zh-TW,zh,en',
'profile.default_content_setting_values.notifications': 2,
'profile.managed_default_content_settings.images': 1, # 允許圖片
'credentials_enable_service': False,
'profile.password_manager_enabled': False,
}
options.add_experimental_option('prefs', prefs)

View File

@@ -341,10 +341,15 @@ def _fetch_competitor_coverage_uncached(engine) -> dict:
SELECT
p.id AS product_id,
p.i_code AS sku,
pr.price AS momo_price,
ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
latest_price.price AS momo_price
FROM products p
JOIN price_records pr ON pr.product_id = p.id
JOIN LATERAL (
SELECT pr.price
FROM price_records pr
WHERE pr.product_id = p.id
ORDER BY pr.timestamp DESC, pr.id DESC
LIMIT 1
) latest_price ON TRUE
WHERE p.status = 'ACTIVE'
),
valid_competitor AS (
@@ -361,22 +366,20 @@ def _fetch_competitor_coverage_uncached(engine) -> dict:
),
{attempt_cte}
SELECT
(SELECT COUNT(*) FROM latest_momo WHERE rn = 1) AS active_with_price,
(SELECT COUNT(*) FROM latest_momo) AS active_with_price,
(SELECT COUNT(*)
FROM latest_momo lm
JOIN valid_competitor vc ON vc.sku = lm.sku
WHERE lm.rn = 1) AS valid_matches,
JOIN valid_competitor vc ON vc.sku = lm.sku) AS valid_matches,
(SELECT COUNT(*)
FROM latest_momo lm
LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
WHERE lm.rn = 1 AND vc.sku IS NULL) AS pending,
WHERE vc.sku IS NULL) AS pending,
COALESCE(la.attempt_status, 'never_attempted') AS attempt_status,
COUNT(*) AS status_count
FROM latest_momo lm
LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
LEFT JOIN latest_attempt la ON la.sku = lm.sku
WHERE lm.rn = 1
AND vc.sku IS NULL
WHERE vc.sku IS NULL
GROUP BY COALESCE(la.attempt_status, 'never_attempted')
""")
with engine.connect() as conn:
@@ -699,7 +702,6 @@ def _review_queue_cte_and_filter(
status_values = REVIEW_STATUS_FILTER_GROUPS.get(status_filter) or tuple(ACTIONABLE_ATTEMPT_STATUSES)
status_sql = ", ".join(f"'{status}'" for status in status_values)
filters = [
"lm.rn = 1",
"vc.sku IS NULL",
f"la.attempt_status IN ({status_sql})",
]
@@ -718,10 +720,15 @@ def _review_queue_cte_and_filter(
p.i_code AS sku,
p.name,
p.category,
pr.price AS momo_price,
ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
latest_price.price AS momo_price
FROM products p
JOIN price_records pr ON pr.product_id = p.id
JOIN LATERAL (
SELECT pr.price
FROM price_records pr
WHERE pr.product_id = p.id
ORDER BY pr.timestamp DESC, pr.id DESC
LIMIT 1
) latest_price ON TRUE
WHERE p.status = 'ACTIVE'
),
valid_competitor AS (
@@ -861,10 +868,15 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic
p.i_code AS sku,
p.name,
p.category,
pr.price AS momo_price,
ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
latest_price.price AS momo_price
FROM products p
JOIN price_records pr ON pr.product_id = p.id
JOIN LATERAL (
SELECT pr.price
FROM price_records pr
WHERE pr.product_id = p.id
ORDER BY pr.timestamp DESC, pr.id DESC
LIMIT 1
) latest_price ON TRUE
WHERE p.status = 'ACTIVE'
),
valid_competitor AS (
@@ -910,8 +922,7 @@ def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dic
FROM latest_momo lm
JOIN latest_attempt la ON la.sku = lm.sku
LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
WHERE lm.rn = 1
AND vc.sku IS NULL
WHERE vc.sku IS NULL
AND la.attempt_status IN (
'unit_comparable',
'refresh_unit_comparable',

View File

@@ -202,9 +202,37 @@ def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> li
if not ({"dashing", "diva"} <= brand_tokens and "美甲片" in getattr(identity, "searchable_name", "")):
return plan
broad_keyword = "dashing diva 時尚潮流美甲片"
for sort in VARIANT_RECALL_SORTS:
plan.append((broad_keyword, sort))
searchable_name = getattr(identity, "searchable_name", "")
broad_keywords = []
if "足部時尚潮流美甲片" in searchable_name:
broad_keywords.append("dashing diva 足部時尚潮流美甲片")
elif "頂級璀燦美甲片" in searchable_name:
broad_keywords.append("dashing diva 頂級璀燦美甲片")
elif "時尚潮流美甲片" in searchable_name:
broad_keywords.append("dashing diva 時尚潮流美甲片")
elif "薄型經典美甲片" in searchable_name:
broad_keywords.append("dashing diva 薄型經典美甲片")
broad_keywords.extend((
"dashing diva magicpress",
"dashing diva 美甲片",
))
seen = {(keyword.lower(), sort) for keyword, sort in plan}
def append(keyword: str, sort: str | None = None) -> None:
key = (keyword.lower(), sort)
if key in seen:
return
seen.add(key)
plan.append((keyword, sort))
for broad_keyword in broad_keywords:
append(broad_keyword, None)
if broad_keywords:
for sort in VARIANT_RECALL_SORTS:
append(broad_keywords[0], sort)
return plan

View File

@@ -78,7 +78,8 @@ def test_competitor_coverage_counts_only_active_product_intersection():
assert "coverage:v4" in source
assert "(SELECT COUNT(*) FROM valid_competitor) AS valid_matches" not in coverage_source
assert "FROM latest_momo lm\n JOIN valid_competitor vc ON vc.sku = lm.sku" in coverage_source
assert "WHERE lm.rn = 1) AS valid_matches" in coverage_source
assert "FROM products p\n JOIN LATERAL" in coverage_source
assert "WHERE p.status = 'ACTIVE'" in coverage_source
def test_competitor_ppt_and_ai_use_momo_minus_pchome_gap_direction():

View File

@@ -7,6 +7,37 @@ from types import SimpleNamespace
ROOT = Path(__file__).resolve().parents[1]
def _function_body(source: str, function_name: str, next_function_name: str) -> str:
return source.split(f"def {function_name}", 1)[1].split(f"def {next_function_name}", 1)[0]
def test_competitor_dashboard_hot_paths_use_latest_price_lateral_lookup():
source = (ROOT / "services/competitor_intel_repository.py").read_text(encoding="utf-8")
coverage_body = _function_body(
source,
"_fetch_competitor_coverage_uncached",
"fetch_competitor_gap_trend",
)
review_cte_body = _function_body(
source,
"_review_queue_cte_and_filter",
"_fetch_competitor_review_queue_page_uncached",
)
review_sample_body = _function_body(
source,
"_fetch_competitor_review_queue_uncached",
"fetch_competitor_comparison_results",
)
for body in (coverage_body, review_cte_body, review_sample_body):
assert "JOIN LATERAL" in body
assert "ORDER BY pr.timestamp DESC, pr.id DESC" in body
assert "LIMIT 1" in body
assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in body
assert "lm.rn = 1" not in body
def test_competitor_feeder_persists_all_match_attempt_outcomes():
source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8")
@@ -767,11 +798,65 @@ def test_search_candidates_adds_variant_recall_sorts_for_dashing_diva(monkeypatc
assert crawler.calls == [
("dashing diva 時尚潮流美甲片 極光之藍", None),
("dashing diva 時尚潮流美甲片", None),
("dashing diva magicpress", None),
("dashing diva 美甲片", None),
("dashing diva 時尚潮流美甲片", "sale/dc"),
("dashing diva 時尚潮流美甲片", "new/dc"),
]
def test_search_candidates_uses_line_specific_recall_for_dashing_diva_p_line(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct
candidate = PChomeProduct(
product_id="MDU5F009AG",
name="Dashing Diva/P 頂級璀燦美甲片-心動陰影 MDU5F009AG",
price=420,
original_price=520,
discount=19,
image_url="",
product_url="https://24h.pchome.com.tw/prod/MDU5F009AG",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self):
self.calls = []
def search_products(self, keyword, **kwargs):
self.calls.append((keyword, kwargs.get("sort")))
return True, "ok", [candidate]
monkeypatch.setattr(
"services.marketplace_product_matcher.score_marketplace_match",
lambda *_args, **_kwargs: SimpleNamespace(score=0.72),
)
crawler = FakeCrawler()
_search_pchome_candidates(
crawler,
"【DASHING DIVA】MAGICPRESS 頂級璀燦美甲片_心情史努比(史奴比)",
keywords=["dashing diva 頂級璀燦美甲片 心情史努比"],
momo_price=331,
)
assert crawler.calls == [
("dashing diva 頂級璀燦美甲片 心情史努比", None),
("dashing diva 頂級璀燦美甲片", None),
("dashing diva magicpress", None),
("dashing diva 美甲片", None),
("dashing diva 頂級璀燦美甲片", "sale/dc"),
("dashing diva 頂級璀燦美甲片", "new/dc"),
]
def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog):
from services import competitor_price_feeder
from services import marketplace_product_matcher

View File

@@ -6,6 +6,7 @@
import os
import sys
import re
import pytest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
@@ -13,6 +14,11 @@ import time
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@pytest.mark.skipif(
os.getenv("RUN_MOMO_BROWSER_TESTS") != "1",
reason="Selenium smoke 會開啟外部 MOMO 網站;預設不在一般 pytest 執行。",
)
def test_image_fetch():
"""測試單一分類的圖片抓取"""
print("🧪 開始測試圖片抓取功能...\n")
@@ -20,7 +26,8 @@ def test_image_fetch():
# 設定 Selenium (啟用圖片載入)
options = Options()
options.page_load_strategy = 'eager'
# options.add_argument('--headless=new') # 暫時不用無頭模式,方便觀察
if os.getenv("MOMO_BROWSER_TEST_VISIBLE") != "1":
options.add_argument('--headless=new')
options.add_argument('--window-size=1920,5000')
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
@@ -30,6 +37,11 @@ def test_image_fetch():
options.add_argument('--disable-extensions')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--no-sandbox')
options.add_argument('--disable-features=AutofillServerCommunication,PasswordManagerOnboarding,PasswordCheck')
options.add_experimental_option('prefs', {
'credentials_enable_service': False,
'profile.password_manager_enabled': False,
})
driver = webdriver.Chrome(options=options)
driver.set_page_load_timeout(45)