diff --git a/.env.example b/.env.example index 2281c23..2adc4a1 100644 --- a/.env.example +++ b/.env.example @@ -432,6 +432,12 @@ PCHOME_FEEDER_RATE_DELAY=1.0 PCHOME_FEEDER_SEARCH_LIMIT=20 PCHOME_FEEDER_MAX_SEARCH_TERMS=5 PCHOME_FEEDER_SEARCH_MAX_PAGES=2 +# browse.sh 只作低信心/無結果的診斷計畫;正式排程預設不自動開瀏覽器。 +PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED=true +PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED=false +PCHOME_FEEDER_BROWSE_SH_TIMEOUT=20 +PCHOME_FEEDER_BROWSE_SH_MAX_PER_RUN=3 +PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS=1200 PCHOME_BACKFILL_STATUS_PATH=/app/data/pchome_match_backfill_status.json PCHOME_BACKFILL_ACTIVE_TTL_SECONDS=7200 diff --git a/config.py b/config.py index 8abbfce..2a089b7 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.380" +SYSTEM_VERSION = "V10.381" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index 3720015..1afc31c 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -42,6 +42,7 @@ - 2026-05-21 追記:同步 EDM 失效頁 alert guard 與 REJURAN 唇膏寬價差 exact-identity matcher 更新後的 `scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 - 2026-05-21 追記:同步過期 EDM / seasonal promo crawler 排程改為 opt-in、NIVEA/OPI 搜尋 noise 與 identity anchor 補強後的 `run_scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 - 2026-05-21 追記:同步 Recipe Box 多效提亮防曬霜同款漂移比對補強後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 +- 2026-05-21 追記:同步 browse.sh 診斷計畫寫入 `competitor_match_attempts` 後的 `services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更模組化決策。 ## 達到或超過 800 行檔案清單 @@ -75,7 +76,7 @@ | 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter | | 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration | | 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 | -| 1733 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy | +| 2149 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy | | 1327 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers | | 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service | | 1319 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration | diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 51e4e82..10af1a2 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.381 browse.sh 比價診斷計畫**: PChome feeder 在 `no_result`、`no_match`、低信心、單位價覆核、既有配對保護與爬蟲錯誤時,會把 read-only `browse_diagnostic_json` 寫入 `competitor_match_attempts`,內含 PChome search URL 與建議 `browse get/open` 命令;正式排程仍 API-first,`PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED=false` 預設不自動開瀏覽器,避免瀏覽器彈窗、登入或密碼提示干擾。 - **V10.380 111 Ollama final fallback 收斂**: 111 Mac fallback 從救急路徑改成更短的保護路徑,`OLLAMA_111_MAX_TIMEOUT` 預設由 45s 收緊到 20s,並新增 `OLLAMA_111_NUM_PREDICT=512` 輸出上限;落到 111 時仍會降級重模型到 `llama3.2:latest`、縮 `num_ctx=4096`、`keep_alive=5m`,避免 GCP-A/GCP-B 短暫 timeout 後把長篇 Hermes/OpenClaw 工作轉嫁到 111 造成 swap 與 load 飆高。 - **V10.379 MCP runtime promotion gate**: 新增 `mcp_runtime_promotion` read-only builder、GET/POST endpoint、UI promotion package 審核面板與 deployment readiness smoke target,將 MCP activation evidence 與 runtime smoke receipt 合併審核,讓 completion audit 的 runtime 缺口可由人工收據明確補齊。 - **V10.379 只讀安全邊界**: 本階段不保存 payload、不打 health、不開 DB、不抓外站、不掛 scheduler,也不會因 promotion 通過自動打開人工 fetch gate;正式 fetch / DB write / scheduler attach 仍需各自獨立 gate。 diff --git a/migrations/042_add_browse_diagnostics_to_match_attempts.sql b/migrations/042_add_browse_diagnostics_to_match_attempts.sql new file mode 100644 index 0000000..a715e3b --- /dev/null +++ b/migrations/042_add_browse_diagnostics_to_match_attempts.sql @@ -0,0 +1,23 @@ +-- ============================================================================= +-- Migration 042: PChome browse.sh 診斷計畫欄位 +-- MOMO PRO — Price comparison fallback observability +-- 2026-05-21 台北 +-- ============================================================================= +-- 說明: +-- competitor_match_attempts 補存 browse.sh 診斷計畫 JSON。 +-- 正式價格爬蟲仍維持 API-first;此欄位只保存低信心、無結果、 +-- 單位價覆核或外部爬蟲錯誤時的 read-only probe plan,供人工或 +-- opt-in 執行 `browse get/open` 做 selector / XHR / 商品頁取證。 +-- ============================================================================= + +ALTER TABLE IF EXISTS competitor_match_attempts + ADD COLUMN IF NOT EXISTS browse_diagnostic_json JSONB; + +CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_browse_diag_time + ON competitor_match_attempts (attempted_at DESC) + WHERE browse_diagnostic_json IS NOT NULL; + +DO $$ +BEGIN + RAISE NOTICE '✅ Migration 042 完成 — PChome browse.sh 診斷計畫欄位已補齊'; +END $$; diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index f305749..c31e611 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -31,6 +31,7 @@ import time from dataclasses import dataclass from datetime import datetime, timedelta, timezone from typing import Optional +from urllib.parse import quote_plus logger = logging.getLogger(__name__) @@ -47,6 +48,11 @@ TTL_HOURS = 6 # competitor_prices 快取有效期 REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程 VARIANT_RECALL_SORTS = ("sale/dc", "new/dc") RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72) +BROWSE_SH_DIAGNOSTIC_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED", "true").lower() in {"1", "true", "yes", "on"} +BROWSE_SH_EXECUTE_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED", "false").lower() in {"1", "true", "yes", "on"} +BROWSE_SH_TIMEOUT_SECONDS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_TIMEOUT", "20")) +BROWSE_SH_MAX_EXECUTIONS_PER_RUN = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_MAX_PER_RUN", "3")) +BROWSE_SH_OUTPUT_PREVIEW_CHARS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS", "1200")) RECOVERABLE_DIAGNOSTIC_REASONS = { "strong_product_line_match", "strong_exact_spec_match", @@ -95,6 +101,43 @@ def _classify_low_score_attempt(score: float, diagnostics) -> str: return "true_low_confidence" +def _has_variant_selection_gap( + momo_name: str, + ranked_matches: list[tuple], + best_score: float, +) -> bool: + """True when source lacks explicit variant selection but top candidates require one.""" + try: + from services.marketplace_product_matcher import ( + _explicit_variant_option_tokens, + parse_product_identity, + ) + except Exception: + return False + + source_identity = parse_product_identity(momo_name) + source_options = set(_explicit_variant_option_tokens(source_identity)) + if re.search(r"任選\s*[一二兩三四五六七八九十0-9]+\s*款", momo_name): + source_options -= {str(value) for value in range(1, 11)} + source_options -= {f"{value:02d}" for value in range(1, 11)} + if source_options: + return False + + threshold = max(best_score - 0.02, RECOVERABLE_LOW_SCORE_FLOOR) + option_buckets: set[str] = set() + for product, score, diagnostics in ranked_matches[:5]: + if getattr(diagnostics, "hard_veto", False) or score < threshold: + continue + candidate_identity = parse_product_identity(getattr(product, "name", "") or "") + options = _explicit_variant_option_tokens(candidate_identity) + if len(options) >= 2: + return True + option_buckets.update(options) + if len(option_buckets) >= 2: + return True + return False + + def _extract_tags(pchome_product) -> list: """ 從 PChomeProduct 物件提取語意標籤 @@ -286,6 +329,66 @@ def _match_diagnostics_payload(diagnostics) -> dict: } +def _pchome_search_url(keyword: str) -> str: + return f"https://ecshweb.pchome.com.tw/search/v3.3/?q={quote_plus(keyword or '')}" + + +def _build_browse_sh_diagnostic_payload( + momo_name: str, + search_terms: list[str] = None, + reason: str = "unknown", + best_product=None, + best_score: float = None, + diagnostics=None, + candidate_count: int = 0, +) -> dict: + """Build a read-only browse.sh probe plan for low-confidence PChome cases.""" + if not BROWSE_SH_DIAGNOSTIC_ENABLED: + return {} + + terms = _dedupe_terms(search_terms or _build_search_keywords(momo_name))[:3] + urls = [_pchome_search_url(term) for term in terms] + product_url = getattr(best_product, "product_url", None) + if product_url: + urls.append(product_url) + urls = list(dict.fromkeys(url for url in urls if url)) + primary_url = urls[0] if urls else _pchome_search_url(momo_name) + + diagnostic_payload = _match_diagnostics_payload(diagnostics) + return { + "tool": "browse.sh", + "mode": "execute_on_demand" if BROWSE_SH_EXECUTE_ENABLED else "plan_only", + "reason": reason, + "execute_enabled": BROWSE_SH_EXECUTE_ENABLED, + "timeout_seconds": BROWSE_SH_TIMEOUT_SECONDS, + "candidate_count": int(candidate_count or 0), + "momo_name": (momo_name or "")[:300], + "search_terms": terms, + "urls": urls, + "suggested_commands": [ + { + "purpose": "static_fetch_first_page", + "args": ["get", primary_url], + }, + { + "purpose": "manual_browser_probe", + "args": ["open", primary_url], + }, + ], + "best_candidate": { + "product_id": getattr(best_product, "product_id", None), + "name": (getattr(best_product, "name", None) or "")[:300] or None, + "price": getattr(best_product, "price", None), + "url": product_url, + "score": best_score, + } if best_product else None, + "diagnostic_codes": diagnostic_payload.get("reasons") or [], + "comparison_mode": diagnostic_payload.get("comparison_mode"), + "hard_veto": diagnostic_payload.get("hard_veto"), + "execution": {"status": "disabled"}, + } + + def _product_snapshot_payload(product) -> dict: payload = { "competitor_product_url": None, @@ -471,6 +574,7 @@ class CompetitorPriceFeeder: self._history_table_ready = False self._attempt_table_ready = False self._price_table_columns_ready = False + self._browse_sh_executions = 0 def _ensure_table_columns(self, conn, table: str, column_specs: list[tuple[str, str]]) -> None: """補齊既有表欄位;避免正式端舊表在新 INSERT 時炸掉。""" @@ -613,6 +717,7 @@ class CompetitorPriceFeeder: comparison_mode VARCHAR(40), hard_veto BOOLEAN, diagnostic_codes JSONB, + browse_diagnostic_json JSONB, error_message TEXT, attempted_at TIMESTAMP NOT NULL DEFAULT NOW() ) @@ -648,6 +753,7 @@ class CompetitorPriceFeeder: comparison_mode VARCHAR(40), hard_veto BOOLEAN, diagnostic_codes TEXT, + browse_diagnostic_json TEXT, error_message TEXT, attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ) @@ -669,9 +775,64 @@ class CompetitorPriceFeeder: ("comparison_mode", "VARCHAR(40)"), ("hard_veto", "BOOLEAN"), ("diagnostic_codes", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"), + ("browse_diagnostic_json", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"), ]) self._attempt_table_ready = True + def _prepare_browse_diagnostic( + self, + momo_name: str, + search_terms: list = None, + reason: str = "unknown", + best_product=None, + best_score: float = None, + diagnostics=None, + candidate_count: int = 0, + ) -> dict: + """Return browse.sh diagnostic evidence; CLI execution remains opt-in and rate-limited.""" + payload = _build_browse_sh_diagnostic_payload( + momo_name, + search_terms=search_terms, + reason=reason, + best_product=best_product, + best_score=best_score, + diagnostics=diagnostics, + candidate_count=candidate_count, + ) + if not payload or not BROWSE_SH_EXECUTE_ENABLED: + return payload + if self._browse_sh_executions >= BROWSE_SH_MAX_EXECUTIONS_PER_RUN: + payload["execution"] = {"status": "rate_limited"} + return payload + + command_args = tuple((payload.get("suggested_commands") or [{}])[0].get("args") or ()) + if not command_args: + payload["execution"] = {"status": "missing_command"} + return payload + + try: + from services.browse_sh_tool import BrowseShTool + + self._browse_sh_executions += 1 + result = BrowseShTool(timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS).run( + command_args, + timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS, + ) + payload["execution"] = { + "status": "ok" if result.ok else "failed", + "returncode": result.returncode, + "timed_out": result.timed_out, + "unavailable_reason": result.unavailable_reason, + "stdout_preview": (result.stdout or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS], + "stderr_preview": (result.stderr or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS], + } + except Exception as exc: + payload["execution"] = { + "status": "error", + "error": str(exc)[:500], + } + return payload + def _record_match_attempt( self, sku: str, @@ -684,6 +845,7 @@ class CompetitorPriceFeeder: best_product=None, best_score: float = None, diagnostics=None, + browse_diagnostic: dict = None, error_message: str = None, source: str = "pchome", ) -> None: @@ -695,9 +857,15 @@ class CompetitorPriceFeeder: search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms" json_cast = "CAST(:match_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":match_diagnostic_json" codes_cast = "CAST(:diagnostic_codes AS jsonb)" if conn.dialect.name == "postgresql" else ":diagnostic_codes" + browse_cast = "CAST(:browse_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":browse_diagnostic_json" diagnostic_payload = _match_diagnostics_payload(diagnostics) diagnostic_codes = diagnostic_payload.get("reasons") or [] product_payload = _product_snapshot_payload(best_product) + browse_diagnostic_json = ( + json.dumps(browse_diagnostic, ensure_ascii=False) + if browse_diagnostic + else None + ) conn.execute(text(f""" INSERT INTO competitor_match_attempts (sku, source, momo_product_id, momo_product_name, momo_price, @@ -706,6 +874,7 @@ class CompetitorPriceFeeder: competitor_product_url, competitor_image_url, competitor_stock, best_competitor_price, best_match_score, match_diagnostic_json, comparison_mode, hard_veto, diagnostic_codes, + browse_diagnostic_json, error_message, attempted_at) VALUES @@ -715,6 +884,7 @@ class CompetitorPriceFeeder: :competitor_product_url, :competitor_image_url, :competitor_stock, :best_price, :best_score, {json_cast}, :comparison_mode, :hard_veto, {codes_cast}, + {browse_cast}, :error_message, CURRENT_TIMESTAMP) """), { @@ -735,6 +905,7 @@ class CompetitorPriceFeeder: "comparison_mode": diagnostic_payload.get("comparison_mode"), "hard_veto": diagnostic_payload.get("hard_veto"), "diagnostic_codes": json.dumps(diagnostic_codes, ensure_ascii=False) if diagnostic_codes else None, + "browse_diagnostic_json": browse_diagnostic_json, "error_message": (error_message or "")[:1000] or None, }) @@ -1197,6 +1368,12 @@ class CompetitorPriceFeeder: products = _search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price) if not products: logger.debug(f"[Feeder] {sku} 無搜尋結果,跳過") + browse_diagnostic = self._prepare_browse_diagnostic( + momo_name, + search_terms=search_terms, + reason="no_result", + candidate_count=0, + ) self._record_match_attempt( sku, momo_name, @@ -1205,6 +1382,7 @@ class CompetitorPriceFeeder: search_terms=search_terms, candidate_count=0, attempt_status="no_result", + browse_diagnostic=browse_diagnostic, source=source, ) attempts_written += 1 @@ -1213,6 +1391,12 @@ class CompetitorPriceFeeder: ranked_matches = _rank_match_details(momo_name, products, momo_price=momo_price) if not ranked_matches: + browse_diagnostic = self._prepare_browse_diagnostic( + momo_name, + search_terms=search_terms, + reason="no_match", + candidate_count=len(products), + ) self._record_match_attempt( sku, momo_name, @@ -1221,6 +1405,7 @@ class CompetitorPriceFeeder: search_terms=search_terms, candidate_count=len(products), attempt_status="no_match", + browse_diagnostic=browse_diagnostic, source=source, ) attempts_written += 1 @@ -1305,6 +1490,15 @@ class CompetitorPriceFeeder: f"[Feeder] {sku} 候選屬單位價可比但非同販售組合," f"不寫入正式價差 | {_format_match_diagnostics(diagnostics)}" ) + browse_diagnostic = self._prepare_browse_diagnostic( + momo_name, + search_terms=search_terms, + reason="unit_comparable", + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + candidate_count=len(products), + ) self._record_match_attempt( sku, momo_name, @@ -1316,6 +1510,7 @@ class CompetitorPriceFeeder: best_product=best_product, best_score=score, diagnostics=diagnostics, + browse_diagnostic=browse_diagnostic, error_message=_format_match_diagnostics(diagnostics), source=source, ) @@ -1325,10 +1520,24 @@ class CompetitorPriceFeeder: if score < MIN_MATCH_SCORE and not manual_accept_override: attempt_status = _classify_low_score_attempt(score, diagnostics) + if ( + attempt_status == "recoverable_low_score" + and _has_variant_selection_gap(momo_name, ranked_matches, score) + ): + attempt_status = "true_low_confidence" logger.debug( f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE})," f"{_format_match_diagnostics(diagnostics)}" ) + browse_diagnostic = self._prepare_browse_diagnostic( + momo_name, + search_terms=search_terms, + reason=attempt_status, + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + candidate_count=len(products), + ) self._record_match_attempt( sku, momo_name, @@ -1340,6 +1549,7 @@ class CompetitorPriceFeeder: best_product=best_product, best_score=score, diagnostics=diagnostics, + browse_diagnostic=browse_diagnostic, error_message=_format_match_diagnostics(diagnostics), source=source, ) @@ -1365,6 +1575,15 @@ class CompetitorPriceFeeder: write_reason = "manual_accept_override" if not should_write: logger.info(f"[Feeder] {sku} 進入人工覆核,不覆蓋既有配對 | {write_reason}") + browse_diagnostic = self._prepare_browse_diagnostic( + momo_name, + search_terms=search_terms, + reason="protected_existing_match", + best_product=best_product, + best_score=score, + diagnostics=diagnostics, + candidate_count=len(products), + ) self._record_match_attempt( sku, momo_name, @@ -1376,6 +1595,7 @@ class CompetitorPriceFeeder: best_product=best_product, best_score=score, diagnostics=diagnostics, + browse_diagnostic=browse_diagnostic, error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}", source=source, ) @@ -1418,6 +1638,12 @@ class CompetitorPriceFeeder: except Exception as e: logger.error(f"[Feeder] {sku} 處理失敗: {e}") try: + browse_diagnostic = self._prepare_browse_diagnostic( + momo_name, + search_terms=search_terms, + reason="crawler_error", + candidate_count=0, + ) self._record_match_attempt( sku, momo_name, @@ -1425,6 +1651,7 @@ class CompetitorPriceFeeder: momo_price=momo_price, search_terms=search_terms, attempt_status="error", + browse_diagnostic=browse_diagnostic, error_message=str(e), source=source, ) diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 1b13d69..6fcdc5f 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -520,6 +520,7 @@ BRAND_ALIAS_OVERRIDES = { "xiaomi": ("小米有品", "小米", "xiaomi"), "mac": ("m.a.c", "mac", "m a c"), "opi": ("o.p.i", "opi", "o p i"), + "st雞仔牌": ("日本雞仔牌st", "日本st雞仔牌", "st雞仔牌", "雞仔牌st", "雞仔牌"), } PRODUCT_TYPES = { @@ -1157,12 +1158,25 @@ def _has_refill_pack(identity: ProductIdentity) -> bool: return bool( "補充瓶" in text or "補充包" in text + or "補充芯" in text + or "補充蕊" in text or "替換蕊" in text or "替換芯" in text or "refill" in text ) +def _has_accessory_case(identity: ProductIdentity) -> bool: + text = identity.normalized_name + return bool( + "眉彩餅盒" in text + or "盒一入款" in text + or "盒三入款" in text + or "盒單入" in text + or "空盒" in text + ) + + def _spec_mention_count(identity: ProductIdentity) -> int: return len( re.findall( @@ -1461,6 +1475,7 @@ def _build_evidence_flags( "count_conflict", "bundle_offer_conflict", "multi_component_conflict", + "accessory_case_conflict", "refill_pack_conflict", "price_ratio_extreme", "price_ratio_wide", @@ -1557,6 +1572,9 @@ def score_marketplace_match( reasons.append("multi_component_conflict") if _has_refill_pack(left) != _has_refill_pack(right): reasons.append("refill_pack_conflict") + accessory_case_conflict = _has_accessory_case(left) != _has_accessory_case(right) + if accessory_case_conflict: + reasons.append("accessory_case_conflict") left_spec_mentions = _spec_mention_count(left) right_spec_mentions = _spec_mention_count(right) if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions: @@ -1579,6 +1597,8 @@ def score_marketplace_match( hard_veto = True if _has_refill_pack(left) != _has_refill_pack(right): hard_veto = True + if accessory_case_conflict: + hard_veto = True if model_line_conflict: hard_veto = True if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions: @@ -1752,6 +1772,20 @@ def score_marketplace_match( ): score += 0.07 reasons.append("shared_identity_anchor_exact_line") + if ( + "無印乾爽止汗爽身乳液" in shared_anchor + and {"nivea", "妮維雅"} & (left.brand_tokens | right.brand_tokens) + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and spec_score >= 0.45 + and token_score >= 0.55 + and sequence_score >= 0.62 + and not variant_descriptor_conflict + ): + score += 0.08 + reasons.append("shared_identity_anchor_nivea_dry_lotion") if ( "多效提亮防曬霜" in shared_anchor and {"recipe", "box"} <= (left.brand_tokens | right.brand_tokens) @@ -1967,6 +2001,10 @@ def _extract_anchor_phrases(token: str) -> list[str]: phrases: list[str] = [] if "經典旋轉眉筆" in cleaned: phrases.append("經典旋轉眉筆") + if "無印乾爽" in cleaned and "止汗爽身乳液" in cleaned: + phrases.append("無印乾爽止汗爽身乳液") + if "智能光感應" in cleaned and "無線自動除臭芳香噴霧機" in cleaned: + phrases.append("智能光感應無線自動除臭芳香噴霧機") if "悠斯晶" in normalized and "經典乳霜" in normalized: phrases.append("悠斯晶經典乳霜") if "經典乳霜" in normalized: diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index fa0edef..d391342 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -1,4 +1,5 @@ from pathlib import Path +import json import logging from datetime import datetime from types import SimpleNamespace @@ -42,6 +43,7 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8") diagnostics_migration = (ROOT / "migrations/041_competitor_match_diagnostics.sql").read_text(encoding="utf-8") + browse_migration = (ROOT / "migrations/042_add_browse_diagnostics_to_match_attempts.sql").read_text(encoding="utf-8") assert "attempts_written" in source assert "_ensure_competitor_match_attempts_table" in source @@ -56,6 +58,9 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert 'attempt_status="no_match"' in source assert 'attempt_status="error"' in source assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source + assert "_prepare_browse_diagnostic" in source + assert "browse_diagnostic_json" in source + assert "PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED" in source assert 'attempt_status="protected_existing_match"' in source assert "_should_upsert_competitor_price" in source assert "_classify_low_score_attempt" in source @@ -88,6 +93,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert "match_diagnostic_json" in diagnostics_migration assert "comparison_mode" in diagnostics_migration assert "diagnostic_codes" in diagnostics_migration + assert "browse_diagnostic_json" in browse_migration + assert "idx_comp_match_attempts_browse_diag_time" in browse_migration assert "competitor_product_url" in source assert "competitor_image_url" in source assert "competitor_stock" in source @@ -95,6 +102,74 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert "idx_comp_match_attempts_sku_source_time" in migration +def test_competitor_feeder_records_browse_sh_plan_for_no_result(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def search_products(self, *_args, **_kwargs): + return True, "ok", [] + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + + result = feeder._run_sku_items([{ + "sku": "BROWSE001", + "name": "MOMO 稀有專櫃組合 50ml", + "product_id": 901, + "momo_price": 1280, + }]) + + assert result.matched == 0 + assert result.skipped_no_result == 1 + browse_plan = attempts[0]["browse_diagnostic"] + assert browse_plan["tool"] == "browse.sh" + assert browse_plan["mode"] == "plan_only" + assert browse_plan["execute_enabled"] is False + assert browse_plan["reason"] == "no_result" + assert browse_plan["execution"]["status"] == "disabled" + assert browse_plan["suggested_commands"][0]["args"][0] == "get" + assert "ecshweb.pchome.com.tw/search" in browse_plan["urls"][0] + + +def test_competitor_match_attempt_persists_browse_diagnostic_json(): + from sqlalchemy import create_engine, text + from services.competitor_price_feeder import CompetitorPriceFeeder + + engine = create_engine("sqlite:///:memory:") + feeder = CompetitorPriceFeeder(engine=engine) + feeder._record_match_attempt( + sku="BROWSE002", + momo_name="MOMO 取證測試商品", + search_terms=["取證 測試"], + attempt_status="no_result", + browse_diagnostic={ + "tool": "browse.sh", + "mode": "plan_only", + "urls": ["https://ecshweb.pchome.com.tw/search/v3.3/?q=test"], + }, + ) + + with engine.connect() as conn: + row = conn.execute(text(""" + SELECT browse_diagnostic_json + FROM competitor_match_attempts + WHERE sku = 'BROWSE002' + """)).scalar_one() + + payload = json.loads(row) + assert payload["tool"] == "browse.sh" + assert payload["mode"] == "plan_only" + + def test_match_diagnostics_payload_carries_professional_match_lanes(): from services.competitor_price_feeder import _match_diagnostics_payload, _extend_match_tags from services.marketplace_product_matcher import score_marketplace_match @@ -591,6 +666,166 @@ def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatc assert attempts[0]["attempt_status"] == "true_low_confidence" +def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + from services.pchome_crawler import PChomeProduct + + products = [ + PChomeProduct( + product_id="DDAB01-08", + name="PERIPERA 雙頭旋轉極細眉筆 08深杏色 0.05g", + price=180, + original_price=220, + discount=18, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-08", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ), + PChomeProduct( + product_id="DDAB01-09", + name="PERIPERA 雙頭旋轉極細眉筆 09灰褐棕 0.05g", + price=180, + original_price=220, + discount=18, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-09", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ), + PChomeProduct( + product_id="DDAB01-11", + name="PERIPERA 雙頭旋轉極細眉筆 11摩卡灰褐 0.05g", + price=180, + original_price=220, + discount=18, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-11", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ), + ] + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def search_products(self, *_args, **_kwargs): + return True, "ok", products + + def fake_score(_momo_name, competitor_name, **_kwargs): + return SimpleNamespace( + score=0.734 if "09灰褐棕" in competitor_name else 0.733, + brand_score=1.0, + token_score=0.74, + spec_score=0.55, + sequence_score=0.66, + type_score=0.55, + price_penalty=0.0, + hard_veto=False, + reasons=("shared_identity_anchor_packaging_variant",), + comparison_mode="exact_identity", + tags=["identity_v2", "comparison_exact_identity", "brand_match"], + ) + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + + result = feeder._run_sku_items([{ + "sku": "P001", + "name": "【peripera官方直營】雙頭旋轉極細眉筆_多色任選(1.5mm極細筆頭)", + "product_id": 11, + "momo_price": 180, + }]) + + assert result.matched == 0 + assert result.skipped_low_score == 1 + assert attempts[0]["attempt_status"] == "true_low_confidence" + + +def test_competitor_feeder_treats_choose_one_offer_as_missing_variant_signal(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + from services.pchome_crawler import PChomeProduct + + product = PChomeProduct( + product_id="DDAB01-YSL", + name="【YSL聖羅蘭】恆久完美透膚煙染腮紅 6g ( #12/ #57/ #93)", + price=1650, + original_price=1780, + discount=7, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-YSL", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def search_products(self, *_args, **_kwargs): + return True, "ok", [product] + + def fake_score(*_args, **_kwargs): + return SimpleNamespace( + score=0.735, + brand_score=1.0, + token_score=0.74, + spec_score=0.55, + sequence_score=0.66, + type_score=1.0, + price_penalty=0.0, + hard_veto=False, + reasons=("shared_identity_anchor_packaging_variant",), + comparison_mode="exact_identity", + tags=["identity_v2", "comparison_exact_identity", "brand_match"], + ) + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + + result = feeder._run_sku_items([{ + "sku": "Y001", + "name": "【YSL】官方直營 恆久完美透膚煙染腮紅(腮紅/任選1款/新品上市)", + "product_id": 12, + "momo_price": 1650, + }]) + + assert result.matched == 0 + assert result.skipped_low_score == 1 + assert attempts[0]["attempt_status"] == "true_low_confidence" + + def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score(): from sqlalchemy import create_engine, text diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index c1357d2..24f02f7 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -503,6 +503,49 @@ def test_marketplace_matcher_promotes_recipe_box_marketing_line_drift(): assert "shared_identity_anchor_recipe_box_line" in diagnostics.reasons +def test_marketplace_matcher_promotes_st_deodorizer_with_brand_alias_and_line_anchor(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【日本雞仔牌ST】室內消臭力智能光感應3段定時無線自動除臭芳香噴霧機(內贈芳香劑39ml 衛浴精油擴香瓶棒組)", + "日本ST雞仔牌-室內消臭力智能光感應3段定時無線自動除臭芳香噴霧機1入(含芳香劑39ml)", + momo_price=699, + competitor_price=699, + ) + + assert diagnostics.score >= 0.76 + assert "shared_identity_anchor_exact_line" in diagnostics.reasons or "shared_identity_anchor_packaging_variant" in diagnostics.reasons + + +def test_marketplace_matcher_promotes_nivea_dry_lotion_with_long_shared_anchor(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【NIVEA 妮維雅】男士無印乾爽止汗爽身乳液(無印止汗滾珠/德國妮維雅)", + "【NIVEA 妮維雅】止汗爽身乳液 無印乾爽50ml", + momo_price=129, + competitor_price=129, + ) + + assert diagnostics.score >= 0.76 + assert "shared_identity_anchor_nivea_dry_lotion" in diagnostics.reasons + + +def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【KATE 凱婷】3D造型眉彩餅補充芯(眉彩刷、眉餅盒分開販售)", + "【KATE 凱婷】眉彩餅盒一入款(搭配3D造型眉彩餅補充芯)", + momo_price=280, + competitor_price=280, + ) + + assert diagnostics.score < 0.76 + assert diagnostics.hard_veto is True + assert "accessory_case_conflict" in diagnostics.reasons or "refill_pack_conflict" in diagnostics.reasons + + def test_marketplace_matcher_suppresses_wide_price_penalty_for_exact_lip_product(): from services.marketplace_product_matcher import score_marketplace_match