This commit is contained in:
@@ -432,6 +432,12 @@ PCHOME_FEEDER_RATE_DELAY=1.0
|
||||
PCHOME_FEEDER_SEARCH_LIMIT=20
|
||||
PCHOME_FEEDER_MAX_SEARCH_TERMS=5
|
||||
PCHOME_FEEDER_SEARCH_MAX_PAGES=2
|
||||
# browse.sh 只作低信心/無結果的診斷計畫;正式排程預設不自動開瀏覽器。
|
||||
PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED=true
|
||||
PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED=false
|
||||
PCHOME_FEEDER_BROWSE_SH_TIMEOUT=20
|
||||
PCHOME_FEEDER_BROWSE_SH_MAX_PER_RUN=3
|
||||
PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS=1200
|
||||
PCHOME_BACKFILL_STATUS_PATH=/app/data/pchome_match_backfill_status.json
|
||||
PCHOME_BACKFILL_ACTIVE_TTL_SECONDS=7200
|
||||
|
||||
|
||||
@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.380"
|
||||
SYSTEM_VERSION = "V10.381"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
- 2026-05-21 追記:同步 EDM 失效頁 alert guard 與 REJURAN 唇膏寬價差 exact-identity matcher 更新後的 `scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步過期 EDM / seasonal promo crawler 排程改為 opt-in、NIVEA/OPI 搜尋 noise 與 identity anchor 補強後的 `run_scheduler.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步 Recipe Box 多效提亮防曬霜同款漂移比對補強後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-21 追記:同步 browse.sh 診斷計畫寫入 `competitor_match_attempts` 後的 `services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -75,7 +76,7 @@
|
||||
| 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter |
|
||||
| 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration |
|
||||
| 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 |
|
||||
| 1733 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy |
|
||||
| 2149 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy |
|
||||
| 1327 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
|
||||
| 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
|
||||
| 1319 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
## 📅 詳細更新日誌 (考古存檔)
|
||||
|
||||
### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化
|
||||
- **V10.381 browse.sh 比價診斷計畫**: PChome feeder 在 `no_result`、`no_match`、低信心、單位價覆核、既有配對保護與爬蟲錯誤時,會把 read-only `browse_diagnostic_json` 寫入 `competitor_match_attempts`,內含 PChome search URL 與建議 `browse get/open` 命令;正式排程仍 API-first,`PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED=false` 預設不自動開瀏覽器,避免瀏覽器彈窗、登入或密碼提示干擾。
|
||||
- **V10.380 111 Ollama final fallback 收斂**: 111 Mac fallback 從救急路徑改成更短的保護路徑,`OLLAMA_111_MAX_TIMEOUT` 預設由 45s 收緊到 20s,並新增 `OLLAMA_111_NUM_PREDICT=512` 輸出上限;落到 111 時仍會降級重模型到 `llama3.2:latest`、縮 `num_ctx=4096`、`keep_alive=5m`,避免 GCP-A/GCP-B 短暫 timeout 後把長篇 Hermes/OpenClaw 工作轉嫁到 111 造成 swap 與 load 飆高。
|
||||
- **V10.379 MCP runtime promotion gate**: 新增 `mcp_runtime_promotion` read-only builder、GET/POST endpoint、UI promotion package 審核面板與 deployment readiness smoke target,將 MCP activation evidence 與 runtime smoke receipt 合併審核,讓 completion audit 的 runtime 缺口可由人工收據明確補齊。
|
||||
- **V10.379 只讀安全邊界**: 本階段不保存 payload、不打 health、不開 DB、不抓外站、不掛 scheduler,也不會因 promotion 通過自動打開人工 fetch gate;正式 fetch / DB write / scheduler attach 仍需各自獨立 gate。
|
||||
|
||||
23
migrations/042_add_browse_diagnostics_to_match_attempts.sql
Normal file
23
migrations/042_add_browse_diagnostics_to_match_attempts.sql
Normal file
@@ -0,0 +1,23 @@
|
||||
-- =============================================================================
|
||||
-- Migration 042: PChome browse.sh 診斷計畫欄位
|
||||
-- MOMO PRO — Price comparison fallback observability
|
||||
-- 2026-05-21 台北
|
||||
-- =============================================================================
|
||||
-- 說明:
|
||||
-- competitor_match_attempts 補存 browse.sh 診斷計畫 JSON。
|
||||
-- 正式價格爬蟲仍維持 API-first;此欄位只保存低信心、無結果、
|
||||
-- 單位價覆核或外部爬蟲錯誤時的 read-only probe plan,供人工或
|
||||
-- opt-in 執行 `browse get/open` 做 selector / XHR / 商品頁取證。
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE IF EXISTS competitor_match_attempts
|
||||
ADD COLUMN IF NOT EXISTS browse_diagnostic_json JSONB;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_browse_diag_time
|
||||
ON competitor_match_attempts (attempted_at DESC)
|
||||
WHERE browse_diagnostic_json IS NOT NULL;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
RAISE NOTICE '✅ Migration 042 完成 — PChome browse.sh 診斷計畫欄位已補齊';
|
||||
END $$;
|
||||
@@ -31,6 +31,7 @@ import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -47,6 +48,11 @@ TTL_HOURS = 6 # competitor_prices 快取有效期
|
||||
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
|
||||
VARIANT_RECALL_SORTS = ("sale/dc", "new/dc")
|
||||
RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72)
|
||||
BROWSE_SH_DIAGNOSTIC_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED", "true").lower() in {"1", "true", "yes", "on"}
|
||||
BROWSE_SH_EXECUTE_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED", "false").lower() in {"1", "true", "yes", "on"}
|
||||
BROWSE_SH_TIMEOUT_SECONDS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_TIMEOUT", "20"))
|
||||
BROWSE_SH_MAX_EXECUTIONS_PER_RUN = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_MAX_PER_RUN", "3"))
|
||||
BROWSE_SH_OUTPUT_PREVIEW_CHARS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS", "1200"))
|
||||
RECOVERABLE_DIAGNOSTIC_REASONS = {
|
||||
"strong_product_line_match",
|
||||
"strong_exact_spec_match",
|
||||
@@ -95,6 +101,43 @@ def _classify_low_score_attempt(score: float, diagnostics) -> str:
|
||||
return "true_low_confidence"
|
||||
|
||||
|
||||
def _has_variant_selection_gap(
|
||||
momo_name: str,
|
||||
ranked_matches: list[tuple],
|
||||
best_score: float,
|
||||
) -> bool:
|
||||
"""True when source lacks explicit variant selection but top candidates require one."""
|
||||
try:
|
||||
from services.marketplace_product_matcher import (
|
||||
_explicit_variant_option_tokens,
|
||||
parse_product_identity,
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
source_identity = parse_product_identity(momo_name)
|
||||
source_options = set(_explicit_variant_option_tokens(source_identity))
|
||||
if re.search(r"任選\s*[一二兩三四五六七八九十0-9]+\s*款", momo_name):
|
||||
source_options -= {str(value) for value in range(1, 11)}
|
||||
source_options -= {f"{value:02d}" for value in range(1, 11)}
|
||||
if source_options:
|
||||
return False
|
||||
|
||||
threshold = max(best_score - 0.02, RECOVERABLE_LOW_SCORE_FLOOR)
|
||||
option_buckets: set[str] = set()
|
||||
for product, score, diagnostics in ranked_matches[:5]:
|
||||
if getattr(diagnostics, "hard_veto", False) or score < threshold:
|
||||
continue
|
||||
candidate_identity = parse_product_identity(getattr(product, "name", "") or "")
|
||||
options = _explicit_variant_option_tokens(candidate_identity)
|
||||
if len(options) >= 2:
|
||||
return True
|
||||
option_buckets.update(options)
|
||||
if len(option_buckets) >= 2:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _extract_tags(pchome_product) -> list:
|
||||
"""
|
||||
從 PChomeProduct 物件提取語意標籤
|
||||
@@ -286,6 +329,66 @@ def _match_diagnostics_payload(diagnostics) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def _pchome_search_url(keyword: str) -> str:
|
||||
return f"https://ecshweb.pchome.com.tw/search/v3.3/?q={quote_plus(keyword or '')}"
|
||||
|
||||
|
||||
def _build_browse_sh_diagnostic_payload(
|
||||
momo_name: str,
|
||||
search_terms: list[str] = None,
|
||||
reason: str = "unknown",
|
||||
best_product=None,
|
||||
best_score: float = None,
|
||||
diagnostics=None,
|
||||
candidate_count: int = 0,
|
||||
) -> dict:
|
||||
"""Build a read-only browse.sh probe plan for low-confidence PChome cases."""
|
||||
if not BROWSE_SH_DIAGNOSTIC_ENABLED:
|
||||
return {}
|
||||
|
||||
terms = _dedupe_terms(search_terms or _build_search_keywords(momo_name))[:3]
|
||||
urls = [_pchome_search_url(term) for term in terms]
|
||||
product_url = getattr(best_product, "product_url", None)
|
||||
if product_url:
|
||||
urls.append(product_url)
|
||||
urls = list(dict.fromkeys(url for url in urls if url))
|
||||
primary_url = urls[0] if urls else _pchome_search_url(momo_name)
|
||||
|
||||
diagnostic_payload = _match_diagnostics_payload(diagnostics)
|
||||
return {
|
||||
"tool": "browse.sh",
|
||||
"mode": "execute_on_demand" if BROWSE_SH_EXECUTE_ENABLED else "plan_only",
|
||||
"reason": reason,
|
||||
"execute_enabled": BROWSE_SH_EXECUTE_ENABLED,
|
||||
"timeout_seconds": BROWSE_SH_TIMEOUT_SECONDS,
|
||||
"candidate_count": int(candidate_count or 0),
|
||||
"momo_name": (momo_name or "")[:300],
|
||||
"search_terms": terms,
|
||||
"urls": urls,
|
||||
"suggested_commands": [
|
||||
{
|
||||
"purpose": "static_fetch_first_page",
|
||||
"args": ["get", primary_url],
|
||||
},
|
||||
{
|
||||
"purpose": "manual_browser_probe",
|
||||
"args": ["open", primary_url],
|
||||
},
|
||||
],
|
||||
"best_candidate": {
|
||||
"product_id": getattr(best_product, "product_id", None),
|
||||
"name": (getattr(best_product, "name", None) or "")[:300] or None,
|
||||
"price": getattr(best_product, "price", None),
|
||||
"url": product_url,
|
||||
"score": best_score,
|
||||
} if best_product else None,
|
||||
"diagnostic_codes": diagnostic_payload.get("reasons") or [],
|
||||
"comparison_mode": diagnostic_payload.get("comparison_mode"),
|
||||
"hard_veto": diagnostic_payload.get("hard_veto"),
|
||||
"execution": {"status": "disabled"},
|
||||
}
|
||||
|
||||
|
||||
def _product_snapshot_payload(product) -> dict:
|
||||
payload = {
|
||||
"competitor_product_url": None,
|
||||
@@ -471,6 +574,7 @@ class CompetitorPriceFeeder:
|
||||
self._history_table_ready = False
|
||||
self._attempt_table_ready = False
|
||||
self._price_table_columns_ready = False
|
||||
self._browse_sh_executions = 0
|
||||
|
||||
def _ensure_table_columns(self, conn, table: str, column_specs: list[tuple[str, str]]) -> None:
|
||||
"""補齊既有表欄位;避免正式端舊表在新 INSERT 時炸掉。"""
|
||||
@@ -613,6 +717,7 @@ class CompetitorPriceFeeder:
|
||||
comparison_mode VARCHAR(40),
|
||||
hard_veto BOOLEAN,
|
||||
diagnostic_codes JSONB,
|
||||
browse_diagnostic_json JSONB,
|
||||
error_message TEXT,
|
||||
attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
)
|
||||
@@ -648,6 +753,7 @@ class CompetitorPriceFeeder:
|
||||
comparison_mode VARCHAR(40),
|
||||
hard_veto BOOLEAN,
|
||||
diagnostic_codes TEXT,
|
||||
browse_diagnostic_json TEXT,
|
||||
error_message TEXT,
|
||||
attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
@@ -669,9 +775,64 @@ class CompetitorPriceFeeder:
|
||||
("comparison_mode", "VARCHAR(40)"),
|
||||
("hard_veto", "BOOLEAN"),
|
||||
("diagnostic_codes", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||||
("browse_diagnostic_json", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||||
])
|
||||
self._attempt_table_ready = True
|
||||
|
||||
def _prepare_browse_diagnostic(
|
||||
self,
|
||||
momo_name: str,
|
||||
search_terms: list = None,
|
||||
reason: str = "unknown",
|
||||
best_product=None,
|
||||
best_score: float = None,
|
||||
diagnostics=None,
|
||||
candidate_count: int = 0,
|
||||
) -> dict:
|
||||
"""Return browse.sh diagnostic evidence; CLI execution remains opt-in and rate-limited."""
|
||||
payload = _build_browse_sh_diagnostic_payload(
|
||||
momo_name,
|
||||
search_terms=search_terms,
|
||||
reason=reason,
|
||||
best_product=best_product,
|
||||
best_score=best_score,
|
||||
diagnostics=diagnostics,
|
||||
candidate_count=candidate_count,
|
||||
)
|
||||
if not payload or not BROWSE_SH_EXECUTE_ENABLED:
|
||||
return payload
|
||||
if self._browse_sh_executions >= BROWSE_SH_MAX_EXECUTIONS_PER_RUN:
|
||||
payload["execution"] = {"status": "rate_limited"}
|
||||
return payload
|
||||
|
||||
command_args = tuple((payload.get("suggested_commands") or [{}])[0].get("args") or ())
|
||||
if not command_args:
|
||||
payload["execution"] = {"status": "missing_command"}
|
||||
return payload
|
||||
|
||||
try:
|
||||
from services.browse_sh_tool import BrowseShTool
|
||||
|
||||
self._browse_sh_executions += 1
|
||||
result = BrowseShTool(timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS).run(
|
||||
command_args,
|
||||
timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS,
|
||||
)
|
||||
payload["execution"] = {
|
||||
"status": "ok" if result.ok else "failed",
|
||||
"returncode": result.returncode,
|
||||
"timed_out": result.timed_out,
|
||||
"unavailable_reason": result.unavailable_reason,
|
||||
"stdout_preview": (result.stdout or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS],
|
||||
"stderr_preview": (result.stderr or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS],
|
||||
}
|
||||
except Exception as exc:
|
||||
payload["execution"] = {
|
||||
"status": "error",
|
||||
"error": str(exc)[:500],
|
||||
}
|
||||
return payload
|
||||
|
||||
def _record_match_attempt(
|
||||
self,
|
||||
sku: str,
|
||||
@@ -684,6 +845,7 @@ class CompetitorPriceFeeder:
|
||||
best_product=None,
|
||||
best_score: float = None,
|
||||
diagnostics=None,
|
||||
browse_diagnostic: dict = None,
|
||||
error_message: str = None,
|
||||
source: str = "pchome",
|
||||
) -> None:
|
||||
@@ -695,9 +857,15 @@ class CompetitorPriceFeeder:
|
||||
search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms"
|
||||
json_cast = "CAST(:match_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":match_diagnostic_json"
|
||||
codes_cast = "CAST(:diagnostic_codes AS jsonb)" if conn.dialect.name == "postgresql" else ":diagnostic_codes"
|
||||
browse_cast = "CAST(:browse_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":browse_diagnostic_json"
|
||||
diagnostic_payload = _match_diagnostics_payload(diagnostics)
|
||||
diagnostic_codes = diagnostic_payload.get("reasons") or []
|
||||
product_payload = _product_snapshot_payload(best_product)
|
||||
browse_diagnostic_json = (
|
||||
json.dumps(browse_diagnostic, ensure_ascii=False)
|
||||
if browse_diagnostic
|
||||
else None
|
||||
)
|
||||
conn.execute(text(f"""
|
||||
INSERT INTO competitor_match_attempts
|
||||
(sku, source, momo_product_id, momo_product_name, momo_price,
|
||||
@@ -706,6 +874,7 @@ class CompetitorPriceFeeder:
|
||||
competitor_product_url, competitor_image_url, competitor_stock,
|
||||
best_competitor_price, best_match_score,
|
||||
match_diagnostic_json, comparison_mode, hard_veto, diagnostic_codes,
|
||||
browse_diagnostic_json,
|
||||
error_message,
|
||||
attempted_at)
|
||||
VALUES
|
||||
@@ -715,6 +884,7 @@ class CompetitorPriceFeeder:
|
||||
:competitor_product_url, :competitor_image_url, :competitor_stock,
|
||||
:best_price, :best_score,
|
||||
{json_cast}, :comparison_mode, :hard_veto, {codes_cast},
|
||||
{browse_cast},
|
||||
:error_message,
|
||||
CURRENT_TIMESTAMP)
|
||||
"""), {
|
||||
@@ -735,6 +905,7 @@ class CompetitorPriceFeeder:
|
||||
"comparison_mode": diagnostic_payload.get("comparison_mode"),
|
||||
"hard_veto": diagnostic_payload.get("hard_veto"),
|
||||
"diagnostic_codes": json.dumps(diagnostic_codes, ensure_ascii=False) if diagnostic_codes else None,
|
||||
"browse_diagnostic_json": browse_diagnostic_json,
|
||||
"error_message": (error_message or "")[:1000] or None,
|
||||
})
|
||||
|
||||
@@ -1197,6 +1368,12 @@ class CompetitorPriceFeeder:
|
||||
products = _search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)
|
||||
if not products:
|
||||
logger.debug(f"[Feeder] {sku} 無搜尋結果,跳過")
|
||||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||||
momo_name,
|
||||
search_terms=search_terms,
|
||||
reason="no_result",
|
||||
candidate_count=0,
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1205,6 +1382,7 @@ class CompetitorPriceFeeder:
|
||||
search_terms=search_terms,
|
||||
candidate_count=0,
|
||||
attempt_status="no_result",
|
||||
browse_diagnostic=browse_diagnostic,
|
||||
source=source,
|
||||
)
|
||||
attempts_written += 1
|
||||
@@ -1213,6 +1391,12 @@ class CompetitorPriceFeeder:
|
||||
|
||||
ranked_matches = _rank_match_details(momo_name, products, momo_price=momo_price)
|
||||
if not ranked_matches:
|
||||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||||
momo_name,
|
||||
search_terms=search_terms,
|
||||
reason="no_match",
|
||||
candidate_count=len(products),
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1221,6 +1405,7 @@ class CompetitorPriceFeeder:
|
||||
search_terms=search_terms,
|
||||
candidate_count=len(products),
|
||||
attempt_status="no_match",
|
||||
browse_diagnostic=browse_diagnostic,
|
||||
source=source,
|
||||
)
|
||||
attempts_written += 1
|
||||
@@ -1305,6 +1490,15 @@ class CompetitorPriceFeeder:
|
||||
f"[Feeder] {sku} 候選屬單位價可比但非同販售組合,"
|
||||
f"不寫入正式價差 | {_format_match_diagnostics(diagnostics)}"
|
||||
)
|
||||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||||
momo_name,
|
||||
search_terms=search_terms,
|
||||
reason="unit_comparable",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
candidate_count=len(products),
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1316,6 +1510,7 @@ class CompetitorPriceFeeder:
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
browse_diagnostic=browse_diagnostic,
|
||||
error_message=_format_match_diagnostics(diagnostics),
|
||||
source=source,
|
||||
)
|
||||
@@ -1325,10 +1520,24 @@ class CompetitorPriceFeeder:
|
||||
|
||||
if score < MIN_MATCH_SCORE and not manual_accept_override:
|
||||
attempt_status = _classify_low_score_attempt(score, diagnostics)
|
||||
if (
|
||||
attempt_status == "recoverable_low_score"
|
||||
and _has_variant_selection_gap(momo_name, ranked_matches, score)
|
||||
):
|
||||
attempt_status = "true_low_confidence"
|
||||
logger.debug(
|
||||
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE}),"
|
||||
f"{_format_match_diagnostics(diagnostics)}"
|
||||
)
|
||||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||||
momo_name,
|
||||
search_terms=search_terms,
|
||||
reason=attempt_status,
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
candidate_count=len(products),
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1340,6 +1549,7 @@ class CompetitorPriceFeeder:
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
browse_diagnostic=browse_diagnostic,
|
||||
error_message=_format_match_diagnostics(diagnostics),
|
||||
source=source,
|
||||
)
|
||||
@@ -1365,6 +1575,15 @@ class CompetitorPriceFeeder:
|
||||
write_reason = "manual_accept_override"
|
||||
if not should_write:
|
||||
logger.info(f"[Feeder] {sku} 進入人工覆核,不覆蓋既有配對 | {write_reason}")
|
||||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||||
momo_name,
|
||||
search_terms=search_terms,
|
||||
reason="protected_existing_match",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
candidate_count=len(products),
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1376,6 +1595,7 @@ class CompetitorPriceFeeder:
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
browse_diagnostic=browse_diagnostic,
|
||||
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
|
||||
source=source,
|
||||
)
|
||||
@@ -1418,6 +1638,12 @@ class CompetitorPriceFeeder:
|
||||
except Exception as e:
|
||||
logger.error(f"[Feeder] {sku} 處理失敗: {e}")
|
||||
try:
|
||||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||||
momo_name,
|
||||
search_terms=search_terms,
|
||||
reason="crawler_error",
|
||||
candidate_count=0,
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
@@ -1425,6 +1651,7 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
attempt_status="error",
|
||||
browse_diagnostic=browse_diagnostic,
|
||||
error_message=str(e),
|
||||
source=source,
|
||||
)
|
||||
|
||||
@@ -520,6 +520,7 @@ BRAND_ALIAS_OVERRIDES = {
|
||||
"xiaomi": ("小米有品", "小米", "xiaomi"),
|
||||
"mac": ("m.a.c", "mac", "m a c"),
|
||||
"opi": ("o.p.i", "opi", "o p i"),
|
||||
"st雞仔牌": ("日本雞仔牌st", "日本st雞仔牌", "st雞仔牌", "雞仔牌st", "雞仔牌"),
|
||||
}
|
||||
|
||||
PRODUCT_TYPES = {
|
||||
@@ -1157,12 +1158,25 @@ def _has_refill_pack(identity: ProductIdentity) -> bool:
|
||||
return bool(
|
||||
"補充瓶" in text
|
||||
or "補充包" in text
|
||||
or "補充芯" in text
|
||||
or "補充蕊" in text
|
||||
or "替換蕊" in text
|
||||
or "替換芯" in text
|
||||
or "refill" in text
|
||||
)
|
||||
|
||||
|
||||
def _has_accessory_case(identity: ProductIdentity) -> bool:
|
||||
text = identity.normalized_name
|
||||
return bool(
|
||||
"眉彩餅盒" in text
|
||||
or "盒一入款" in text
|
||||
or "盒三入款" in text
|
||||
or "盒單入" in text
|
||||
or "空盒" in text
|
||||
)
|
||||
|
||||
|
||||
def _spec_mention_count(identity: ProductIdentity) -> int:
|
||||
return len(
|
||||
re.findall(
|
||||
@@ -1461,6 +1475,7 @@ def _build_evidence_flags(
|
||||
"count_conflict",
|
||||
"bundle_offer_conflict",
|
||||
"multi_component_conflict",
|
||||
"accessory_case_conflict",
|
||||
"refill_pack_conflict",
|
||||
"price_ratio_extreme",
|
||||
"price_ratio_wide",
|
||||
@@ -1557,6 +1572,9 @@ def score_marketplace_match(
|
||||
reasons.append("multi_component_conflict")
|
||||
if _has_refill_pack(left) != _has_refill_pack(right):
|
||||
reasons.append("refill_pack_conflict")
|
||||
accessory_case_conflict = _has_accessory_case(left) != _has_accessory_case(right)
|
||||
if accessory_case_conflict:
|
||||
reasons.append("accessory_case_conflict")
|
||||
left_spec_mentions = _spec_mention_count(left)
|
||||
right_spec_mentions = _spec_mention_count(right)
|
||||
if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
|
||||
@@ -1579,6 +1597,8 @@ def score_marketplace_match(
|
||||
hard_veto = True
|
||||
if _has_refill_pack(left) != _has_refill_pack(right):
|
||||
hard_veto = True
|
||||
if accessory_case_conflict:
|
||||
hard_veto = True
|
||||
if model_line_conflict:
|
||||
hard_veto = True
|
||||
if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
|
||||
@@ -1752,6 +1772,20 @@ def score_marketplace_match(
|
||||
):
|
||||
score += 0.07
|
||||
reasons.append("shared_identity_anchor_exact_line")
|
||||
if (
|
||||
"無印乾爽止汗爽身乳液" in shared_anchor
|
||||
and {"nivea", "妮維雅"} & (left.brand_tokens | right.brand_tokens)
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.95
|
||||
and spec_score >= 0.45
|
||||
and token_score >= 0.55
|
||||
and sequence_score >= 0.62
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.08
|
||||
reasons.append("shared_identity_anchor_nivea_dry_lotion")
|
||||
if (
|
||||
"多效提亮防曬霜" in shared_anchor
|
||||
and {"recipe", "box"} <= (left.brand_tokens | right.brand_tokens)
|
||||
@@ -1967,6 +2001,10 @@ def _extract_anchor_phrases(token: str) -> list[str]:
|
||||
phrases: list[str] = []
|
||||
if "經典旋轉眉筆" in cleaned:
|
||||
phrases.append("經典旋轉眉筆")
|
||||
if "無印乾爽" in cleaned and "止汗爽身乳液" in cleaned:
|
||||
phrases.append("無印乾爽止汗爽身乳液")
|
||||
if "智能光感應" in cleaned and "無線自動除臭芳香噴霧機" in cleaned:
|
||||
phrases.append("智能光感應無線自動除臭芳香噴霧機")
|
||||
if "悠斯晶" in normalized and "經典乳霜" in normalized:
|
||||
phrases.append("悠斯晶經典乳霜")
|
||||
if "經典乳霜" in normalized:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from pathlib import Path
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
@@ -42,6 +43,7 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
|
||||
migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8")
|
||||
diagnostics_migration = (ROOT / "migrations/041_competitor_match_diagnostics.sql").read_text(encoding="utf-8")
|
||||
browse_migration = (ROOT / "migrations/042_add_browse_diagnostics_to_match_attempts.sql").read_text(encoding="utf-8")
|
||||
|
||||
assert "attempts_written" in source
|
||||
assert "_ensure_competitor_match_attempts_table" in source
|
||||
@@ -56,6 +58,9 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert 'attempt_status="no_match"' in source
|
||||
assert 'attempt_status="error"' in source
|
||||
assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source
|
||||
assert "_prepare_browse_diagnostic" in source
|
||||
assert "browse_diagnostic_json" in source
|
||||
assert "PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED" in source
|
||||
assert 'attempt_status="protected_existing_match"' in source
|
||||
assert "_should_upsert_competitor_price" in source
|
||||
assert "_classify_low_score_attempt" in source
|
||||
@@ -88,6 +93,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert "match_diagnostic_json" in diagnostics_migration
|
||||
assert "comparison_mode" in diagnostics_migration
|
||||
assert "diagnostic_codes" in diagnostics_migration
|
||||
assert "browse_diagnostic_json" in browse_migration
|
||||
assert "idx_comp_match_attempts_browse_diag_time" in browse_migration
|
||||
assert "competitor_product_url" in source
|
||||
assert "competitor_image_url" in source
|
||||
assert "competitor_stock" in source
|
||||
@@ -95,6 +102,74 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert "idx_comp_match_attempts_sku_source_time" in migration
|
||||
|
||||
|
||||
def test_competitor_feeder_records_browse_sh_plan_for_no_result(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", []
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_sku_items([{
|
||||
"sku": "BROWSE001",
|
||||
"name": "MOMO 稀有專櫃組合 50ml",
|
||||
"product_id": 901,
|
||||
"momo_price": 1280,
|
||||
}])
|
||||
|
||||
assert result.matched == 0
|
||||
assert result.skipped_no_result == 1
|
||||
browse_plan = attempts[0]["browse_diagnostic"]
|
||||
assert browse_plan["tool"] == "browse.sh"
|
||||
assert browse_plan["mode"] == "plan_only"
|
||||
assert browse_plan["execute_enabled"] is False
|
||||
assert browse_plan["reason"] == "no_result"
|
||||
assert browse_plan["execution"]["status"] == "disabled"
|
||||
assert browse_plan["suggested_commands"][0]["args"][0] == "get"
|
||||
assert "ecshweb.pchome.com.tw/search" in browse_plan["urls"][0]
|
||||
|
||||
|
||||
def test_competitor_match_attempt_persists_browse_diagnostic_json():
|
||||
from sqlalchemy import create_engine, text
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
feeder = CompetitorPriceFeeder(engine=engine)
|
||||
feeder._record_match_attempt(
|
||||
sku="BROWSE002",
|
||||
momo_name="MOMO 取證測試商品",
|
||||
search_terms=["取證 測試"],
|
||||
attempt_status="no_result",
|
||||
browse_diagnostic={
|
||||
"tool": "browse.sh",
|
||||
"mode": "plan_only",
|
||||
"urls": ["https://ecshweb.pchome.com.tw/search/v3.3/?q=test"],
|
||||
},
|
||||
)
|
||||
|
||||
with engine.connect() as conn:
|
||||
row = conn.execute(text("""
|
||||
SELECT browse_diagnostic_json
|
||||
FROM competitor_match_attempts
|
||||
WHERE sku = 'BROWSE002'
|
||||
""")).scalar_one()
|
||||
|
||||
payload = json.loads(row)
|
||||
assert payload["tool"] == "browse.sh"
|
||||
assert payload["mode"] == "plan_only"
|
||||
|
||||
|
||||
def test_match_diagnostics_payload_carries_professional_match_lanes():
|
||||
from services.competitor_price_feeder import _match_diagnostics_payload, _extend_match_tags
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
@@ -591,6 +666,166 @@ def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatc
|
||||
assert attempts[0]["attempt_status"] == "true_low_confidence"
|
||||
|
||||
|
||||
def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
products = [
|
||||
PChomeProduct(
|
||||
product_id="DDAB01-08",
|
||||
name="PERIPERA 雙頭旋轉極細眉筆 08深杏色 0.05g",
|
||||
price=180,
|
||||
original_price=220,
|
||||
discount=18,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-08",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
),
|
||||
PChomeProduct(
|
||||
product_id="DDAB01-09",
|
||||
name="PERIPERA 雙頭旋轉極細眉筆 09灰褐棕 0.05g",
|
||||
price=180,
|
||||
original_price=220,
|
||||
discount=18,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-09",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
),
|
||||
PChomeProduct(
|
||||
product_id="DDAB01-11",
|
||||
name="PERIPERA 雙頭旋轉極細眉筆 11摩卡灰褐 0.05g",
|
||||
price=180,
|
||||
original_price=220,
|
||||
discount=18,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-11",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
),
|
||||
]
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", products
|
||||
|
||||
def fake_score(_momo_name, competitor_name, **_kwargs):
|
||||
return SimpleNamespace(
|
||||
score=0.734 if "09灰褐棕" in competitor_name else 0.733,
|
||||
brand_score=1.0,
|
||||
token_score=0.74,
|
||||
spec_score=0.55,
|
||||
sequence_score=0.66,
|
||||
type_score=0.55,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=("shared_identity_anchor_packaging_variant",),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_sku_items([{
|
||||
"sku": "P001",
|
||||
"name": "【peripera官方直營】雙頭旋轉極細眉筆_多色任選(1.5mm極細筆頭)",
|
||||
"product_id": 11,
|
||||
"momo_price": 180,
|
||||
}])
|
||||
|
||||
assert result.matched == 0
|
||||
assert result.skipped_low_score == 1
|
||||
assert attempts[0]["attempt_status"] == "true_low_confidence"
|
||||
|
||||
|
||||
def test_competitor_feeder_treats_choose_one_offer_as_missing_variant_signal(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
product = PChomeProduct(
|
||||
product_id="DDAB01-YSL",
|
||||
name="【YSL聖羅蘭】恆久完美透膚煙染腮紅 6g ( #12/ #57/ #93)",
|
||||
price=1650,
|
||||
original_price=1780,
|
||||
discount=7,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-YSL",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", [product]
|
||||
|
||||
def fake_score(*_args, **_kwargs):
|
||||
return SimpleNamespace(
|
||||
score=0.735,
|
||||
brand_score=1.0,
|
||||
token_score=0.74,
|
||||
spec_score=0.55,
|
||||
sequence_score=0.66,
|
||||
type_score=1.0,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=("shared_identity_anchor_packaging_variant",),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_sku_items([{
|
||||
"sku": "Y001",
|
||||
"name": "【YSL】官方直營 恆久完美透膚煙染腮紅(腮紅/任選1款/新品上市)",
|
||||
"product_id": 12,
|
||||
"momo_price": 1650,
|
||||
}])
|
||||
|
||||
assert result.matched == 0
|
||||
assert result.skipped_low_score == 1
|
||||
assert attempts[0]["attempt_status"] == "true_low_confidence"
|
||||
|
||||
|
||||
def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score():
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
|
||||
@@ -503,6 +503,49 @@ def test_marketplace_matcher_promotes_recipe_box_marketing_line_drift():
|
||||
assert "shared_identity_anchor_recipe_box_line" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_st_deodorizer_with_brand_alias_and_line_anchor():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【日本雞仔牌ST】室內消臭力智能光感應3段定時無線自動除臭芳香噴霧機(內贈芳香劑39ml 衛浴精油擴香瓶棒組)",
|
||||
"日本ST雞仔牌-室內消臭力智能光感應3段定時無線自動除臭芳香噴霧機1入(含芳香劑39ml)",
|
||||
momo_price=699,
|
||||
competitor_price=699,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert "shared_identity_anchor_exact_line" in diagnostics.reasons or "shared_identity_anchor_packaging_variant" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_nivea_dry_lotion_with_long_shared_anchor():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【NIVEA 妮維雅】男士無印乾爽止汗爽身乳液(無印止汗滾珠/德國妮維雅)",
|
||||
"【NIVEA 妮維雅】止汗爽身乳液 無印乾爽50ml",
|
||||
momo_price=129,
|
||||
competitor_price=129,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert "shared_identity_anchor_nivea_dry_lotion" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【KATE 凱婷】3D造型眉彩餅補充芯(眉彩刷、眉餅盒分開販售)",
|
||||
"【KATE 凱婷】眉彩餅盒一入款(搭配3D造型眉彩餅補充芯)",
|
||||
momo_price=280,
|
||||
competitor_price=280,
|
||||
)
|
||||
|
||||
assert diagnostics.score < 0.76
|
||||
assert diagnostics.hard_veto is True
|
||||
assert "accessory_case_conflict" in diagnostics.reasons or "refill_pack_conflict" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_suppresses_wide_price_penalty_for_exact_lip_product():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
|
||||
Reference in New Issue
Block a user