V10.550 補安全搜尋召回詞
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
================================================================================
|
||||
|
||||
【已完成】
|
||||
- V10.550 補安全搜尋召回詞:`_build_variant_recall_search_plan()` 對低風險穩定品類新增 `品牌 + 品類` 的補搜尋詞,讓 `no_result / refresh_no_result` 更有機會找到 PChome 候選後再交給 matcher 安全判斷;美甲片、指甲油、唇彩、香氛/精油、粉底、防曬、任選/色號/款式等高 variant 風險商品不走通用召回,DASHING DIVA 仍只走既有 line-specific recall + sort fallback。此變更不改 `MIN_MATCH_SCORE`、hard veto、fresh-search write safety 或 stronger existing match 覆寫保護。
|
||||
- V10.549 收斂比價新鮮度 KPI 口徑:coverage cache 升到 v10,`expires_at IS NULL` 不再算進「可用比價 / decision ready」,改拆成 `unknown_freshness_matches` / `unknown_freshness_count`,避免沒有到期時間的舊資料被當成可直接決策的新鮮價格。Dashboard / daily / growth 同步顯示未知新鮮度與「未形成有效身份配對」,並把 PChome/MOMO 價格方向文案改成 `PChome 價格壓力` / `MOMO 價格優勢`,降低誤讀。
|
||||
- V10.548 接線更多 focused exact 舊候選回刷:把 matcher 已驗證可安全走 total-price 的 3W CLINIC 膠原蛋白粉底液 50ml x2、花美水 Moisture/Inclear 1.7g x3、KUSSEN 寶寶益菌屁屁膏 50ml 3 入、Lab52 齒妍堂嬰幼兒/汪汪隊牙刷 2 入接進 `_fetch_retryable_candidate_skus()` focused true-low / rescore 窄門。這只擴大「舊候選可被新版 matcher 重評」的入口,不改 `MIN_MATCH_SCORE`、hard veto、auto price write safety 或既有覆寫保護。
|
||||
- V10.547 強化單位價覆核洞察:`manual_unit_price_required` 不再只是人工狀態,覆核隊列與商品看板會重新帶出單位價換算、MOMO/PChome 單位價方向、差距百分比與處理建議;決策信封 / OpenClaw / PPT 摘要可讀到 `unit_price_insight`。人工覆核寫回也會保留原始 `match_diagnostic_json` / comparison mode / diagnostic codes,避免後續簡報、審計或 AI 策略只剩人工文案而失去 matcher 證據鏈。
|
||||
|
||||
@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.549"
|
||||
SYSTEM_VERSION = "V10.550"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
## 📅 詳細更新日誌 (考古存檔)
|
||||
|
||||
### 2026-06-01:PChome 比價新鮮度操作閉環
|
||||
- **V10.550 安全搜尋召回詞補強**: `competitor_price_feeder` 在既有精準搜尋詞之外,對低風險穩定品類補上一組 `品牌 + 品類` recall keyword,提升 `no_result / refresh_no_result` 找到候選的機率;高 variant 風險商品如美甲片、指甲油、唇彩、香氛/精油、粉底、防曬與含任選/色號/款式/香味的商品不走通用召回。DASHING DIVA 仍保留既有 line-specific recall 與 PChome sort fallback;本次不更動 `MIN_MATCH_SCORE`、hard veto、auto write safety 或 stronger existing match 保護。
|
||||
- **V10.549 比價新鮮度 KPI 口徑收斂**: `fetch_competitor_coverage()` cache 升到 v10,`expires_at IS NULL` 不再混入 `fresh_matches` / `decision_ready_rate`,改拆成 `unknown_freshness_matches` 與 `unknown_freshness_rate`,讓「可用比價覆蓋率」只代表有明確未過期時間的 identity 價格。Dashboard、daily、growth 同步顯示未知新鮮度與「未形成有效身份配對」,第一屏資料時間改看最新有效 PChome 價格抓取,並把價格方向文案改為 `PChome 價格壓力` / `MOMO 價格優勢`。
|
||||
- **V10.548 focused exact 舊候選回刷接線**: `_fetch_retryable_candidate_skus()` 的 focused true-low / rescore 窄門新增 3W CLINIC 膠原蛋白粉底液 50ml x2、花美水 Moisture/Inclear 1.7g x3、KUSSEN 寶寶益菌屁屁膏 50ml 3 入、Lab52 齒妍堂嬰幼兒 / 汪汪隊牙刷 2 入。這些品線在 matcher 測試中已是 `exact / total_price / price_alert_exact`,本次只讓舊 `true_low_confidence` / `rescore_accepted_current` 候選能被新版 matcher 重新判斷;仍不放寬 `MIN_MATCH_SCORE`、hard veto、auto write safety 與 stronger existing match 保護。
|
||||
- **V10.547 單位價覆核洞察與證據鏈保留**: `manual_unit_price_required` 現在會和 `unit_comparable` 一樣重新產生單位價比較,並轉成 `unit_price_insight`,明確標示 PChome 或 MOMO 哪邊單位價較低、差距百分比、嚴重度與操作建議;Dashboard 覆核卡、商品列、決策信封與 OpenClaw/PPT 摘要都可讀到這個訊號。人工覆核寫回 `competitor_match_attempts` 時也會在欄位存在時保留原始 `match_diagnostic_json`、`comparison_mode`、`hard_veto`、`diagnostic_codes`,`competitor_match_reviews.candidate_diagnostic` 同步附帶 JSON 證據,避免人工閉環後只剩狀態文字。
|
||||
|
||||
@@ -48,6 +48,52 @@ TTL_HOURS = int(os.getenv("PCHOME_FEEDER_TTL_HOURS", "48")) # competitor
|
||||
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
|
||||
VARIANT_RECALL_SORTS = ("sale/dc", "new/dc")
|
||||
RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72)
|
||||
GENERIC_RECALL_SAFE_PRODUCT_TYPES = {
|
||||
"止汗噴霧",
|
||||
"潔膚露",
|
||||
"刮鬍刀",
|
||||
"電動牙刷",
|
||||
"洗手慕斯",
|
||||
"足膜",
|
||||
"化妝水",
|
||||
"乳液",
|
||||
"面霜",
|
||||
"洗面乳",
|
||||
"面膜",
|
||||
"眼霜",
|
||||
"卸妝",
|
||||
}
|
||||
GENERIC_RECALL_BLOCK_TERMS = (
|
||||
"任選",
|
||||
"多款",
|
||||
"多色",
|
||||
"色號",
|
||||
"顏色",
|
||||
"款式",
|
||||
"香味",
|
||||
"香調",
|
||||
"香氛",
|
||||
"精油",
|
||||
"擴香",
|
||||
"蠟燭",
|
||||
"融蠟",
|
||||
"融燭",
|
||||
"美甲片",
|
||||
"指甲油",
|
||||
"指彩",
|
||||
"唇釉",
|
||||
"唇彩",
|
||||
"唇膏",
|
||||
"潤唇",
|
||||
"眼影",
|
||||
"腮紅",
|
||||
"粉底",
|
||||
"遮瑕",
|
||||
"定妝",
|
||||
"妝前",
|
||||
"防曬",
|
||||
"護手霜",
|
||||
)
|
||||
BROWSE_SH_DIAGNOSTIC_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED", "true").lower() in {"1", "true", "yes", "on"}
|
||||
BROWSE_SH_EXECUTE_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED", "false").lower() in {"1", "true", "yes", "on"}
|
||||
BROWSE_SH_TIMEOUT_SECONDS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_TIMEOUT", "20"))
|
||||
@@ -377,6 +423,67 @@ def _build_search_keywords(momo_name: str) -> list:
|
||||
return _dedupe_terms(primary_terms)
|
||||
|
||||
|
||||
def _primary_brand_phrase(identity) -> str:
|
||||
brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())}
|
||||
if {"3w", "clinic"} <= brand_tokens:
|
||||
return "3w clinic"
|
||||
if {"dashing", "diva"} <= brand_tokens:
|
||||
return "dashing diva"
|
||||
if {"rom", "nd"} <= brand_tokens:
|
||||
return "romand"
|
||||
if {"im", "meme"} <= brand_tokens:
|
||||
return "im meme"
|
||||
if {"recipe", "box"} <= brand_tokens:
|
||||
return "recipe box"
|
||||
|
||||
chinese = sorted(
|
||||
(
|
||||
token for token in getattr(identity, "brand_tokens", set())
|
||||
if re.search(r"[\u4e00-\u9fff]", token)
|
||||
),
|
||||
key=lambda token: (-len(token), token),
|
||||
)
|
||||
if chinese:
|
||||
return chinese[0]
|
||||
|
||||
latin = sorted(
|
||||
(
|
||||
token for token in getattr(identity, "brand_tokens", set())
|
||||
if re.search(r"[a-z]", token, re.I) and len(token) >= 3
|
||||
),
|
||||
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
|
||||
)
|
||||
if latin:
|
||||
return latin[0].lower()
|
||||
|
||||
short_latin = sorted(
|
||||
(
|
||||
token for token in getattr(identity, "brand_tokens", set())
|
||||
if re.search(r"[a-z]", token, re.I) and len(token) >= 2
|
||||
),
|
||||
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
|
||||
)
|
||||
return short_latin[0].lower() if short_latin else ""
|
||||
|
||||
|
||||
def _is_generic_recall_safe(identity) -> bool:
|
||||
product_type = getattr(identity, "product_type", None)
|
||||
if product_type not in GENERIC_RECALL_SAFE_PRODUCT_TYPES:
|
||||
return False
|
||||
searchable_name = getattr(identity, "searchable_name", "") or ""
|
||||
return not any(term in searchable_name for term in GENERIC_RECALL_BLOCK_TERMS)
|
||||
|
||||
|
||||
def _build_generic_recall_keywords(identity) -> list[str]:
|
||||
if not _is_generic_recall_safe(identity):
|
||||
return []
|
||||
brand = _primary_brand_phrase(identity)
|
||||
product_type = getattr(identity, "product_type", "") or ""
|
||||
if not brand or not product_type:
|
||||
return []
|
||||
return _dedupe_terms([f"{brand} {product_type}"])
|
||||
|
||||
|
||||
def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> list[tuple[str, str | None]]:
|
||||
plan = [(keyword, None) for keyword in (keywords or [])]
|
||||
try:
|
||||
@@ -388,6 +495,13 @@ def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> li
|
||||
|
||||
brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())}
|
||||
if not ({"dashing", "diva"} <= brand_tokens and "美甲片" in getattr(identity, "searchable_name", "")):
|
||||
seen = {(keyword.lower(), sort) for keyword, sort in plan}
|
||||
for keyword in _build_generic_recall_keywords(identity):
|
||||
key = (keyword.lower(), None)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
plan.append((keyword, None))
|
||||
return plan
|
||||
|
||||
searchable_name = getattr(identity, "searchable_name", "")
|
||||
|
||||
@@ -1656,6 +1656,97 @@ def test_search_candidates_uses_line_specific_recall_for_dashing_diva_p_line(mon
|
||||
]
|
||||
|
||||
|
||||
def test_search_candidates_adds_safe_generic_brand_type_recall(monkeypatch):
|
||||
from services.competitor_price_feeder import _search_pchome_candidates
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
candidate = PChomeProduct(
|
||||
product_id="DDAB01-B5CREAM",
|
||||
name="理膚寶水 B5 全面修復霜 40ml",
|
||||
price=699,
|
||||
original_price=799,
|
||||
discount=13,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-B5CREAM",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def search_products(self, keyword, **kwargs):
|
||||
self.calls.append((keyword, kwargs.get("sort")))
|
||||
return True, "ok", [candidate]
|
||||
|
||||
monkeypatch.setattr(
|
||||
"services.marketplace_product_matcher.score_marketplace_match",
|
||||
lambda *_args, **_kwargs: SimpleNamespace(score=0.72),
|
||||
)
|
||||
|
||||
crawler = FakeCrawler()
|
||||
_search_pchome_candidates(
|
||||
crawler,
|
||||
"【理膚寶水】B5 全面修復霜 40ml",
|
||||
keywords=["理膚寶水 全面修復霜 b5 40ml"],
|
||||
momo_price=699,
|
||||
)
|
||||
|
||||
assert crawler.calls == [
|
||||
("理膚寶水 全面修復霜 b5 40ml", None),
|
||||
("理膚寶水 面霜", None),
|
||||
]
|
||||
|
||||
|
||||
def test_search_candidates_does_not_add_generic_recall_for_variant_sensitive_lip(monkeypatch):
|
||||
from services.competitor_price_feeder import _search_pchome_candidates
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
candidate = PChomeProduct(
|
||||
product_id="DDBH8E-ROMAND12",
|
||||
name="rom&nd 果汁唇釉 2.0 #12",
|
||||
price=299,
|
||||
original_price=399,
|
||||
discount=25,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDBH8E-ROMAND12",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def search_products(self, keyword, **kwargs):
|
||||
self.calls.append((keyword, kwargs.get("sort")))
|
||||
return True, "ok", [candidate]
|
||||
|
||||
monkeypatch.setattr(
|
||||
"services.marketplace_product_matcher.score_marketplace_match",
|
||||
lambda *_args, **_kwargs: SimpleNamespace(score=0.72),
|
||||
)
|
||||
|
||||
crawler = FakeCrawler()
|
||||
_search_pchome_candidates(
|
||||
crawler,
|
||||
"【rom&nd】果汁唇釉 2.0 #12",
|
||||
keywords=["romand 果汁唇釉 12"],
|
||||
momo_price=299,
|
||||
)
|
||||
|
||||
assert crawler.calls == [("romand 果汁唇釉 12", None)]
|
||||
|
||||
|
||||
def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog):
|
||||
from services import competitor_price_feeder
|
||||
from services import marketplace_product_matcher
|
||||
|
||||
Reference in New Issue
Block a user