This commit is contained in:
@@ -343,6 +343,7 @@ LEFT JOIN competitor_prices cp
|
||||
- `services/competitor_identity_revalidator.py` 可對既有 `competitor_prices` legacy row 離線重跑 `identity_v2`:只有新版 matcher 分數 `>= 0.76` 且無 hard veto 才補 `identity_v2` / `legacy_revalidated` tags;預設不刷新 `expires_at`,避免過期價格進入決策。
|
||||
- `CompetitorPriceFeeder.run_expired_identity_refresh()` 會優先刷新已通過 `identity_v2` 但 TTL 過期的 PChome row:直接用既有 `competitor_product_id` 批次呼叫 PChome 商品 API,再用新版 matcher 重新驗證名稱/規格/價格 sanity,通過後寫回 `competitor_prices` 與 `competitor_price_history`。這條路徑提升新鮮價格覆蓋率,但不降低 match threshold,也不讓過期價格直接進入決策。
|
||||
- `marketplace_product_matcher.py` 的擴充只能走「正向證據 + 反向 veto」:品牌一致、商品線/型號訊號強、價格合理且無 hard veto 時才允許 `strong_product_line_match` 加分;補充瓶/補充包/refill 與一般正裝不互相配對,分享組/加量組/明星組等組合包不得誤配單品。
|
||||
- PChome feeder 的外部 request timeout 由 `PCHOME_FEEDER_TIMEOUT` 控制,預設 12 秒;排程不得因單一 PChome 搜尋 API timeout 被拖到數分鐘。
|
||||
- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待審`、`身份否決`、`找不到同款`、`抓取異常`、`尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。
|
||||
|
||||
### 執行方式
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
@@ -41,6 +42,7 @@ MAX_SEARCH_TERMS = 3 # 每個 MOMO 商品最多嘗試幾組搜尋詞
|
||||
BATCH_SIZE = 30 # 每批 DB 寫入筆數
|
||||
RATE_DELAY = 0.8 # 每次 PChome 請求間隔(秒)
|
||||
TTL_HOURS = 6 # competitor_prices 快取有效期
|
||||
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
|
||||
|
||||
# ── Feeder 結果 ───────────────────────────────────────
|
||||
@dataclass
|
||||
@@ -721,7 +723,7 @@ class CompetitorPriceFeeder:
|
||||
return FeederResult(0, 0, 0, 0, 0, 0.0)
|
||||
|
||||
from services.pchome_crawler import PChomeCrawler
|
||||
crawler = PChomeCrawler(timeout=30, delay=RATE_DELAY)
|
||||
crawler = PChomeCrawler(timeout=REQUEST_TIMEOUT, delay=RATE_DELAY)
|
||||
|
||||
logger.info(f"[Feeder] 開始抓取 {len(skus)} 支商品的 {label}")
|
||||
|
||||
@@ -908,7 +910,7 @@ class CompetitorPriceFeeder:
|
||||
return FeederResult(0, 0, 0, 0, 0, 0.0)
|
||||
|
||||
from services.pchome_crawler import PChomeCrawler
|
||||
crawler = PChomeCrawler(timeout=30, delay=RATE_DELAY)
|
||||
crawler = PChomeCrawler(timeout=REQUEST_TIMEOUT, delay=RATE_DELAY)
|
||||
|
||||
requested_ids = [
|
||||
str(item.get("competitor_product_id") or "").strip()
|
||||
|
||||
@@ -28,6 +28,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert "_fetch_expired_identity_skus" in source
|
||||
assert "run_expired_identity_refresh" in source
|
||||
assert "refresh_known_identity" in source
|
||||
assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source
|
||||
assert "PChomeCrawler(timeout=REQUEST_TIMEOUT" in source
|
||||
|
||||
assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration
|
||||
assert "attempt_status" in migration
|
||||
|
||||
Reference in New Issue
Block a user