[V10.346] 補強 PChome identity anchor scorer
This commit is contained in:
@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.345"
|
||||
SYSTEM_VERSION = "V10.346"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
- 2026-05-20 追記:同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更比價行為。
|
||||
- 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。
|
||||
- 2026-05-20 追記:同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-20 追記:同步 PChome 共享 identity anchor scorer 與市場情報 review report route 進入大檔門檻後的行數;此處只更新 inventory,不變更功能。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -54,7 +55,7 @@
|
||||
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
|
||||
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
|
||||
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
|
||||
| 1387 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 1447 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
|
||||
| 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
|
||||
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
|
||||
@@ -64,6 +65,7 @@
|
||||
| 1602 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
|
||||
| 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
|
||||
| 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
|
||||
| 804 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |
|
||||
| 811 | `services/market_intel/deployment_readiness.py` | P2 market intel deployment readiness | preflight gates / readiness payload / route contract helpers |
|
||||
|
||||
## 市場情報開發前置禁區
|
||||
|
||||
@@ -114,6 +114,8 @@ GENERIC_TOKENS = {
|
||||
SEARCH_NOISE_PHRASES = (
|
||||
"新品上市",
|
||||
"全新上市",
|
||||
"國際航空版",
|
||||
"超取免運",
|
||||
"任選一款",
|
||||
"任選1款",
|
||||
"任選一色",
|
||||
@@ -134,6 +136,10 @@ SEARCH_NOISE_PHRASES = (
|
||||
"卸除髒汙",
|
||||
"卸除防曬",
|
||||
"卸防曬",
|
||||
"防水眼線",
|
||||
"寶寶牙刷",
|
||||
"紗布牙刷",
|
||||
"調節亮度",
|
||||
"韓國彩妝",
|
||||
"水光感",
|
||||
"官方直營",
|
||||
@@ -161,6 +167,14 @@ SEARCH_NOISE_TOKENS = {
|
||||
"防曬",
|
||||
"彩妝",
|
||||
"水光感",
|
||||
"超取",
|
||||
"免運",
|
||||
"航空版",
|
||||
"國際版",
|
||||
"附燈泡",
|
||||
"定時",
|
||||
"眼妝",
|
||||
"滅菌",
|
||||
"保濕",
|
||||
"抗老",
|
||||
"超品日",
|
||||
@@ -178,6 +192,17 @@ SEARCH_IDENTITY_ANCHORS = (
|
||||
"青春敷面膜",
|
||||
"長效潤膚霜",
|
||||
"小黑瓶",
|
||||
"私密處護潔露",
|
||||
"私密護潔露",
|
||||
"口腔清潔棒",
|
||||
"含氟防蛀修護牙膏",
|
||||
"自然遮瑕素顏霜",
|
||||
"超持久細滑眼線筆",
|
||||
"香氛融蠟燈",
|
||||
"水晶香氛能量寶盒禮盒組",
|
||||
"零粉感超持久柔焦蜜粉餅",
|
||||
"私密肌潔淨露",
|
||||
"身體除毛器",
|
||||
"免用水潔淨液",
|
||||
"身體按摩精油",
|
||||
"按摩精油",
|
||||
@@ -487,6 +512,11 @@ def _leading_brand_tokens(original: str, normalized: str) -> set[str]:
|
||||
tokens.add(token)
|
||||
|
||||
leading = normalized[:48]
|
||||
leading_tokens = _tokenize(leading)
|
||||
if leading_tokens:
|
||||
first_token = leading_tokens[0]
|
||||
if re.fullmatch(r"[\u4e00-\u9fff]{2,6}", first_token) and first_token not in GENERIC_TOKENS:
|
||||
tokens.add(first_token)
|
||||
for token in _tokenize(leading):
|
||||
if re.fullmatch(r"[a-z][a-z0-9\-']{2,}", token):
|
||||
tokens.add(token)
|
||||
@@ -1183,6 +1213,17 @@ def score_marketplace_match(
|
||||
):
|
||||
score += 0.025
|
||||
reasons.append("strong_exact_spec_match")
|
||||
shared_anchor = _shared_identity_anchor(left, right)
|
||||
if (
|
||||
shared_anchor
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and spec_score >= 0.85
|
||||
and (token_score >= 0.43 or sequence_score >= 0.58)
|
||||
):
|
||||
score += 0.03
|
||||
reasons.append("shared_identity_anchor")
|
||||
if (
|
||||
brand_score >= 0.95
|
||||
and not hard_veto
|
||||
@@ -1267,6 +1308,25 @@ def _extract_anchor_phrases(token: str) -> list[str]:
|
||||
return phrases
|
||||
|
||||
|
||||
def _shared_identity_anchor(left: ProductIdentity, right: ProductIdentity) -> str:
|
||||
left_anchors: set[str] = set()
|
||||
right_anchors: set[str] = set()
|
||||
for token in left.core_tokens:
|
||||
left_anchors.update(_extract_anchor_phrases(token))
|
||||
for token in right.core_tokens:
|
||||
right_anchors.update(_extract_anchor_phrases(token))
|
||||
|
||||
shared = sorted(
|
||||
{
|
||||
anchor
|
||||
for anchor in left_anchors & right_anchors
|
||||
if len(anchor.replace(" ", "")) >= 5 and anchor not in SEARCH_BROAD_ANCHORS
|
||||
},
|
||||
key=lambda anchor: (-len(anchor.replace(" ", "")), anchor),
|
||||
)
|
||||
return shared[0] if shared else ""
|
||||
|
||||
|
||||
def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]:
|
||||
cleaned = _clean_search_phrase(token)
|
||||
if not cleaned:
|
||||
|
||||
@@ -426,6 +426,28 @@ def test_marketplace_matcher_does_not_promote_different_option_without_spec():
|
||||
assert "strong_exact_spec_match" not in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_shared_identity_anchor_near_threshold():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
obge = score_marketplace_match(
|
||||
"【OBgE】自然遮瑕素顏霜 50g",
|
||||
"OBgE/自然遮瑕素顏霜50g",
|
||||
momo_price=699,
|
||||
competitor_price=699,
|
||||
)
|
||||
unicat = score_marketplace_match(
|
||||
"【UNICAT 變臉貓】超持久細滑眼線筆1.5ml",
|
||||
"【UNICAT】超持久細滑眼線筆 1.5ml 新品搶先優惠",
|
||||
momo_price=399,
|
||||
competitor_price=399,
|
||||
)
|
||||
|
||||
for diagnostics in (obge, unicat):
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert "shared_identity_anchor" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_search_terms_prefer_readable_brand_core_spec():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
|
||||
Reference in New Issue
Block a user