V10.550 補安全搜尋召回詞

This commit is contained in:
OoO
2026-06-01 12:42:24 +08:00
parent 339bf68e14
commit 7cb523be18
5 changed files with 208 additions and 1 deletions

View File

@@ -4,6 +4,7 @@
================================================================================
【已完成】
- V10.550 補安全搜尋召回詞:`_build_variant_recall_search_plan()` 對低風險穩定品類新增 `品牌 + 品類` 的補搜尋詞,讓 `no_result / refresh_no_result` 更有機會找到 PChome 候選後再交給 matcher 安全判斷;美甲片、指甲油、唇彩、香氛/精油、粉底、防曬、任選/色號/款式等高 variant 風險商品不走通用召回DASHING DIVA 仍只走既有 line-specific recall + sort fallback。此變更不改 `MIN_MATCH_SCORE`、hard veto、fresh-search write safety 或 stronger existing match 覆寫保護。
- V10.549 收斂比價新鮮度 KPI 口徑coverage cache 升到 v10`expires_at IS NULL` 不再算進「可用比價 / decision ready」改拆成 `unknown_freshness_matches` / `unknown_freshness_count`避免沒有到期時間的舊資料被當成可直接決策的新鮮價格。Dashboard / daily / growth 同步顯示未知新鮮度與「未形成有效身份配對」,並把 PChome/MOMO 價格方向文案改成 `PChome 價格壓力` / `MOMO 價格優勢`,降低誤讀。
- V10.548 接線更多 focused exact 舊候選回刷:把 matcher 已驗證可安全走 total-price 的 3W CLINIC 膠原蛋白粉底液 50ml x2、花美水 Moisture/Inclear 1.7g x3、KUSSEN 寶寶益菌屁屁膏 50ml 3 入、Lab52 齒妍堂嬰幼兒/汪汪隊牙刷 2 入接進 `_fetch_retryable_candidate_skus()` focused true-low / rescore 窄門。這只擴大「舊候選可被新版 matcher 重評」的入口,不改 `MIN_MATCH_SCORE`、hard veto、auto price write safety 或既有覆寫保護。
- V10.547 強化單位價覆核洞察:`manual_unit_price_required` 不再只是人工狀態覆核隊列與商品看板會重新帶出單位價換算、MOMO/PChome 單位價方向、差距百分比與處理建議;決策信封 / OpenClaw / PPT 摘要可讀到 `unit_price_insight`。人工覆核寫回也會保留原始 `match_diagnostic_json` / comparison mode / diagnostic codes避免後續簡報、審計或 AI 策略只剩人工文案而失去 matcher 證據鏈。

View File

@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.549"
SYSTEM_VERSION = "V10.550"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -13,6 +13,7 @@
## 📅 詳細更新日誌 (考古存檔)
### 2026-06-01PChome 比價新鮮度操作閉環
- **V10.550 安全搜尋召回詞補強**: `competitor_price_feeder` 在既有精準搜尋詞之外,對低風險穩定品類補上一組 `品牌 + 品類` recall keyword提升 `no_result / refresh_no_result` 找到候選的機率;高 variant 風險商品如美甲片、指甲油、唇彩、香氛/精油、粉底、防曬與含任選/色號/款式/香味的商品不走通用召回。DASHING DIVA 仍保留既有 line-specific recall 與 PChome sort fallback本次不更動 `MIN_MATCH_SCORE`、hard veto、auto write safety 或 stronger existing match 保護。
- **V10.549 比價新鮮度 KPI 口徑收斂**: `fetch_competitor_coverage()` cache 升到 v10`expires_at IS NULL` 不再混入 `fresh_matches` / `decision_ready_rate`,改拆成 `unknown_freshness_matches``unknown_freshness_rate`,讓「可用比價覆蓋率」只代表有明確未過期時間的 identity 價格。Dashboard、daily、growth 同步顯示未知新鮮度與「未形成有效身份配對」,第一屏資料時間改看最新有效 PChome 價格抓取,並把價格方向文案改為 `PChome 價格壓力` / `MOMO 價格優勢`
- **V10.548 focused exact 舊候選回刷接線**: `_fetch_retryable_candidate_skus()` 的 focused true-low / rescore 窄門新增 3W CLINIC 膠原蛋白粉底液 50ml x2、花美水 Moisture/Inclear 1.7g x3、KUSSEN 寶寶益菌屁屁膏 50ml 3 入、Lab52 齒妍堂嬰幼兒 / 汪汪隊牙刷 2 入。這些品線在 matcher 測試中已是 `exact / total_price / price_alert_exact`,本次只讓舊 `true_low_confidence` / `rescore_accepted_current` 候選能被新版 matcher 重新判斷;仍不放寬 `MIN_MATCH_SCORE`、hard veto、auto write safety 與 stronger existing match 保護。
- **V10.547 單位價覆核洞察與證據鏈保留**: `manual_unit_price_required` 現在會和 `unit_comparable` 一樣重新產生單位價比較,並轉成 `unit_price_insight`,明確標示 PChome 或 MOMO 哪邊單位價較低、差距百分比、嚴重度與操作建議Dashboard 覆核卡、商品列、決策信封與 OpenClaw/PPT 摘要都可讀到這個訊號。人工覆核寫回 `competitor_match_attempts` 時也會在欄位存在時保留原始 `match_diagnostic_json``comparison_mode``hard_veto``diagnostic_codes``competitor_match_reviews.candidate_diagnostic` 同步附帶 JSON 證據,避免人工閉環後只剩狀態文字。

View File

@@ -48,6 +48,52 @@ TTL_HOURS = int(os.getenv("PCHOME_FEEDER_TTL_HOURS", "48")) # competitor
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
VARIANT_RECALL_SORTS = ("sale/dc", "new/dc")
RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72)
GENERIC_RECALL_SAFE_PRODUCT_TYPES = {
"止汗噴霧",
"潔膚露",
"刮鬍刀",
"電動牙刷",
"洗手慕斯",
"足膜",
"化妝水",
"乳液",
"面霜",
"洗面乳",
"面膜",
"眼霜",
"卸妝",
}
GENERIC_RECALL_BLOCK_TERMS = (
"任選",
"多款",
"多色",
"色號",
"顏色",
"款式",
"香味",
"香調",
"香氛",
"精油",
"擴香",
"蠟燭",
"融蠟",
"融燭",
"美甲片",
"指甲油",
"指彩",
"唇釉",
"唇彩",
"唇膏",
"潤唇",
"眼影",
"腮紅",
"粉底",
"遮瑕",
"定妝",
"妝前",
"防曬",
"護手霜",
)
BROWSE_SH_DIAGNOSTIC_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED", "true").lower() in {"1", "true", "yes", "on"}
BROWSE_SH_EXECUTE_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED", "false").lower() in {"1", "true", "yes", "on"}
BROWSE_SH_TIMEOUT_SECONDS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_TIMEOUT", "20"))
@@ -377,6 +423,67 @@ def _build_search_keywords(momo_name: str) -> list:
return _dedupe_terms(primary_terms)
def _primary_brand_phrase(identity) -> str:
brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())}
if {"3w", "clinic"} <= brand_tokens:
return "3w clinic"
if {"dashing", "diva"} <= brand_tokens:
return "dashing diva"
if {"rom", "nd"} <= brand_tokens:
return "romand"
if {"im", "meme"} <= brand_tokens:
return "im meme"
if {"recipe", "box"} <= brand_tokens:
return "recipe box"
chinese = sorted(
(
token for token in getattr(identity, "brand_tokens", set())
if re.search(r"[\u4e00-\u9fff]", token)
),
key=lambda token: (-len(token), token),
)
if chinese:
return chinese[0]
latin = sorted(
(
token for token in getattr(identity, "brand_tokens", set())
if re.search(r"[a-z]", token, re.I) and len(token) >= 3
),
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
)
if latin:
return latin[0].lower()
short_latin = sorted(
(
token for token in getattr(identity, "brand_tokens", set())
if re.search(r"[a-z]", token, re.I) and len(token) >= 2
),
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
)
return short_latin[0].lower() if short_latin else ""
def _is_generic_recall_safe(identity) -> bool:
product_type = getattr(identity, "product_type", None)
if product_type not in GENERIC_RECALL_SAFE_PRODUCT_TYPES:
return False
searchable_name = getattr(identity, "searchable_name", "") or ""
return not any(term in searchable_name for term in GENERIC_RECALL_BLOCK_TERMS)
def _build_generic_recall_keywords(identity) -> list[str]:
if not _is_generic_recall_safe(identity):
return []
brand = _primary_brand_phrase(identity)
product_type = getattr(identity, "product_type", "") or ""
if not brand or not product_type:
return []
return _dedupe_terms([f"{brand} {product_type}"])
def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> list[tuple[str, str | None]]:
plan = [(keyword, None) for keyword in (keywords or [])]
try:
@@ -388,6 +495,13 @@ def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> li
brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())}
if not ({"dashing", "diva"} <= brand_tokens and "美甲片" in getattr(identity, "searchable_name", "")):
seen = {(keyword.lower(), sort) for keyword, sort in plan}
for keyword in _build_generic_recall_keywords(identity):
key = (keyword.lower(), None)
if key in seen:
continue
seen.add(key)
plan.append((keyword, None))
return plan
searchable_name = getattr(identity, "searchable_name", "")

View File

@@ -1656,6 +1656,97 @@ def test_search_candidates_uses_line_specific_recall_for_dashing_diva_p_line(mon
]
def test_search_candidates_adds_safe_generic_brand_type_recall(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct
candidate = PChomeProduct(
product_id="DDAB01-B5CREAM",
name="理膚寶水 B5 全面修復霜 40ml",
price=699,
original_price=799,
discount=13,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-B5CREAM",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self):
self.calls = []
def search_products(self, keyword, **kwargs):
self.calls.append((keyword, kwargs.get("sort")))
return True, "ok", [candidate]
monkeypatch.setattr(
"services.marketplace_product_matcher.score_marketplace_match",
lambda *_args, **_kwargs: SimpleNamespace(score=0.72),
)
crawler = FakeCrawler()
_search_pchome_candidates(
crawler,
"【理膚寶水】B5 全面修復霜 40ml",
keywords=["理膚寶水 全面修復霜 b5 40ml"],
momo_price=699,
)
assert crawler.calls == [
("理膚寶水 全面修復霜 b5 40ml", None),
("理膚寶水 面霜", None),
]
def test_search_candidates_does_not_add_generic_recall_for_variant_sensitive_lip(monkeypatch):
from services.competitor_price_feeder import _search_pchome_candidates
from services.pchome_crawler import PChomeProduct
candidate = PChomeProduct(
product_id="DDBH8E-ROMAND12",
name="rom&nd 果汁唇釉 2.0 #12",
price=299,
original_price=399,
discount=25,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDBH8E-ROMAND12",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self):
self.calls = []
def search_products(self, keyword, **kwargs):
self.calls.append((keyword, kwargs.get("sort")))
return True, "ok", [candidate]
monkeypatch.setattr(
"services.marketplace_product_matcher.score_marketplace_match",
lambda *_args, **_kwargs: SimpleNamespace(score=0.72),
)
crawler = FakeCrawler()
_search_pchome_candidates(
crawler,
"【rom&nd】果汁唇釉 2.0 #12",
keywords=["romand 果汁唇釉 12"],
momo_price=299,
)
assert crawler.calls == [("romand 果汁唇釉 12", None)]
def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog):
from services import competitor_price_feeder
from services import marketplace_product_matcher