[V10.345] 收斂 PChome 搜尋詞特定品線
All checks were successful
CD Pipeline / deploy (push) Successful in 1m34s

This commit is contained in:
OoO
2026-05-20 16:28:59 +08:00
parent e7ab333c58
commit 67cd35e2de
4 changed files with 54 additions and 2 deletions

View File

@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.344"
SYSTEM_VERSION = "V10.345"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -27,6 +27,7 @@
- 2026-05-20 追記:同步背景 PChome crawler 搜尋韌性擴充後的 `services/pchome_crawler.py` 行數;此處只更新 inventory不變更 PChome crawler 行為。
- 2026-05-20 追記:同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py``services/competitor_price_feeder.py` 行數;此處只更新 inventory不變更比價行為。
- 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py``services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。
- 2026-05-20 追記:同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
## 達到或超過 800 行檔案清單
@@ -53,7 +54,7 @@
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
| 1356 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
| 1387 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
| 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |

View File

@@ -138,6 +138,7 @@ SEARCH_NOISE_PHRASES = (
"水光感",
"官方直營",
"官方",
"經典款",
)
SEARCH_NOISE_TOKENS = {
@@ -160,9 +161,23 @@ SEARCH_NOISE_TOKENS = {
"防曬",
"彩妝",
"水光感",
"保濕",
"抗老",
"超品日",
"經典款",
"",
"pdrn",
}
SEARCH_IDENTITY_ANCHORS = (
"絕對完美永生玫瑰逆齡乳霜",
"永生玫瑰逆齡乳霜",
"永生玫瑰霜",
"玫瑰精露",
"玫瑰霜",
"青春敷面膜",
"長效潤膚霜",
"小黑瓶",
"免用水潔淨液",
"身體按摩精油",
"按摩精油",
@@ -196,6 +211,15 @@ SEARCH_IDENTITY_ANCHORS = (
"香氛機",
)
SEARCH_BROAD_ANCHORS = {
"乳霜",
"面霜",
"面膜",
"精華",
"乳液",
"精油",
}
SEARCH_AMBIGUOUS_PRODUCT_TERMS = {
"保護膜",
"保護貼",
@@ -1234,6 +1258,8 @@ def _extract_anchor_phrases(token: str) -> list[str]:
else:
phrase = anchor
phrase = _clean_search_phrase(phrase)
if phrase.startswith("") and len(phrase) > 2:
phrase = phrase[1:]
if any(existing in phrase and existing != phrase for existing in phrases):
continue
if len(phrase) >= 2 and phrase not in phrases:
@@ -1248,6 +1274,8 @@ def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]
compact = cleaned.replace(" ", "")
if compact in SEARCH_NOISE_TOKENS or compact in GENERIC_TOKENS:
return (-900, 0, cleaned)
if re.fullmatch(r"\d+(?:\.\d+)?(?:ml|g|mg|kg|l)x\d+", compact, re.I):
return (-900, 0, cleaned)
score = 0
if re.search(r"[a-z][a-z0-9-]{2,}", cleaned):
@@ -1258,8 +1286,11 @@ def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]
anchors = _extract_anchor_phrases(cleaned)
if anchors:
score += 90
score += min(24, len(anchors[0]) * 3)
if anchors[0] == compact:
score += 8
if compact in SEARCH_BROAD_ANCHORS:
score -= 28
else:
score += max(0, 24 - len(compact))

View File

@@ -468,6 +468,26 @@ def test_marketplace_search_terms_keep_professional_product_phrase():
assert not any("卸除防曬" in term or "外出清潔" in term for term in mustela_terms)
def test_marketplace_search_terms_prefer_specific_line_over_generic_usage_words():
from services.marketplace_product_matcher import build_search_terms
mask_terms = build_search_terms("【SK-II】官方直營 青春敷面膜10+6+6送12(保濕/面膜/超品日)", max_terms=5)
bottle_terms = build_search_terms(
"【LANCOME 蘭蔻】官方直營 經典款小黑瓶100ml雙入組(LANCOME/經典款/抗老)",
max_terms=5,
)
rose_terms = build_search_terms(
"【蘭蔻】官方直營 玫瑰霜60ml+玫瑰精露150ml(LANCOME/永生玫瑰霜/抗老/PDRN)",
max_terms=5,
)
assert mask_terms[0] == "sk ii 青春敷面膜"
assert bottle_terms[0] == "蘭蔻 小黑瓶 100ml"
assert rose_terms[0] == "蘭蔻 永生玫瑰霜 60ml"
assert "抗老" not in " ".join(bottle_terms[:3] + rose_terms[:3])
assert "pdrn" not in " ".join(term.lower() for term in rose_terms[:3])
def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch):
from services import pchome_crawler