[V10.345] 收斂 PChome 搜尋詞特定品線
All checks were successful
CD Pipeline / deploy (push) Successful in 1m34s
All checks were successful
CD Pipeline / deploy (push) Successful in 1m34s
This commit is contained in:
@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.344"
|
||||
SYSTEM_VERSION = "V10.345"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
- 2026-05-20 追記:同步背景 PChome crawler 搜尋韌性擴充後的 `services/pchome_crawler.py` 行數;此處只更新 inventory,不變更 PChome crawler 行為。
|
||||
- 2026-05-20 追記:同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更比價行為。
|
||||
- 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。
|
||||
- 2026-05-20 追記:同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -53,7 +54,7 @@
|
||||
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
|
||||
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
|
||||
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
|
||||
| 1356 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 1387 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
|
||||
| 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
|
||||
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
|
||||
|
||||
@@ -138,6 +138,7 @@ SEARCH_NOISE_PHRASES = (
|
||||
"水光感",
|
||||
"官方直營",
|
||||
"官方",
|
||||
"經典款",
|
||||
)
|
||||
|
||||
SEARCH_NOISE_TOKENS = {
|
||||
@@ -160,9 +161,23 @@ SEARCH_NOISE_TOKENS = {
|
||||
"防曬",
|
||||
"彩妝",
|
||||
"水光感",
|
||||
"保濕",
|
||||
"抗老",
|
||||
"超品日",
|
||||
"經典款",
|
||||
"款",
|
||||
"pdrn",
|
||||
}
|
||||
|
||||
SEARCH_IDENTITY_ANCHORS = (
|
||||
"絕對完美永生玫瑰逆齡乳霜",
|
||||
"永生玫瑰逆齡乳霜",
|
||||
"永生玫瑰霜",
|
||||
"玫瑰精露",
|
||||
"玫瑰霜",
|
||||
"青春敷面膜",
|
||||
"長效潤膚霜",
|
||||
"小黑瓶",
|
||||
"免用水潔淨液",
|
||||
"身體按摩精油",
|
||||
"按摩精油",
|
||||
@@ -196,6 +211,15 @@ SEARCH_IDENTITY_ANCHORS = (
|
||||
"香氛機",
|
||||
)
|
||||
|
||||
SEARCH_BROAD_ANCHORS = {
|
||||
"乳霜",
|
||||
"面霜",
|
||||
"面膜",
|
||||
"精華",
|
||||
"乳液",
|
||||
"精油",
|
||||
}
|
||||
|
||||
SEARCH_AMBIGUOUS_PRODUCT_TERMS = {
|
||||
"保護膜",
|
||||
"保護貼",
|
||||
@@ -1234,6 +1258,8 @@ def _extract_anchor_phrases(token: str) -> list[str]:
|
||||
else:
|
||||
phrase = anchor
|
||||
phrase = _clean_search_phrase(phrase)
|
||||
if phrase.startswith("款") and len(phrase) > 2:
|
||||
phrase = phrase[1:]
|
||||
if any(existing in phrase and existing != phrase for existing in phrases):
|
||||
continue
|
||||
if len(phrase) >= 2 and phrase not in phrases:
|
||||
@@ -1248,6 +1274,8 @@ def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]
|
||||
compact = cleaned.replace(" ", "")
|
||||
if compact in SEARCH_NOISE_TOKENS or compact in GENERIC_TOKENS:
|
||||
return (-900, 0, cleaned)
|
||||
if re.fullmatch(r"\d+(?:\.\d+)?(?:ml|g|mg|kg|l)x\d+", compact, re.I):
|
||||
return (-900, 0, cleaned)
|
||||
|
||||
score = 0
|
||||
if re.search(r"[a-z][a-z0-9-]{2,}", cleaned):
|
||||
@@ -1258,8 +1286,11 @@ def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]
|
||||
anchors = _extract_anchor_phrases(cleaned)
|
||||
if anchors:
|
||||
score += 90
|
||||
score += min(24, len(anchors[0]) * 3)
|
||||
if anchors[0] == compact:
|
||||
score += 8
|
||||
if compact in SEARCH_BROAD_ANCHORS:
|
||||
score -= 28
|
||||
else:
|
||||
score += max(0, 24 - len(compact))
|
||||
|
||||
|
||||
@@ -468,6 +468,26 @@ def test_marketplace_search_terms_keep_professional_product_phrase():
|
||||
assert not any("卸除防曬" in term or "外出清潔" in term for term in mustela_terms)
|
||||
|
||||
|
||||
def test_marketplace_search_terms_prefer_specific_line_over_generic_usage_words():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
mask_terms = build_search_terms("【SK-II】官方直營 青春敷面膜10+6+6送12(保濕/面膜/超品日)", max_terms=5)
|
||||
bottle_terms = build_search_terms(
|
||||
"【LANCOME 蘭蔻】官方直營 經典款小黑瓶100ml雙入組(LANCOME/經典款/抗老)",
|
||||
max_terms=5,
|
||||
)
|
||||
rose_terms = build_search_terms(
|
||||
"【蘭蔻】官方直營 玫瑰霜60ml+玫瑰精露150ml(LANCOME/永生玫瑰霜/抗老/PDRN)",
|
||||
max_terms=5,
|
||||
)
|
||||
|
||||
assert mask_terms[0] == "sk ii 青春敷面膜"
|
||||
assert bottle_terms[0] == "蘭蔻 小黑瓶 100ml"
|
||||
assert rose_terms[0] == "蘭蔻 永生玫瑰霜 60ml"
|
||||
assert "抗老" not in " ".join(bottle_terms[:3] + rose_terms[:3])
|
||||
assert "pdrn" not in " ".join(term.lower() for term in rose_terms[:3])
|
||||
|
||||
|
||||
def test_batch_compare_top_uses_latest_momo_price_not_revenue(monkeypatch):
|
||||
from services import pchome_crawler
|
||||
|
||||
|
||||
Reference in New Issue
Block a user