3764 lines
171 KiB
Python
3764 lines
171 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
競品價格補給線 Worker (Competitor Price Feeder)
|
||
|
||
角色:獨立背景 Worker(生產者端)
|
||
架構位置:
|
||
[本 Worker — 每 4 小時跑一次] → competitor_prices DB 表(最新快取)
|
||
→ competitor_price_history DB 表(歷史快照)
|
||
↓
|
||
[AI Pipeline] → fetch_candidates() LEFT JOIN competitor_prices(消費者端)
|
||
|
||
設計原則:
|
||
- 與 AI Pipeline 完全解耦:本 Worker 掛了不影響核心大腦
|
||
- 自帶重試機制,不阻塞主排程
|
||
- 語意化標籤 (tags) 讓 Hermes 獲得更豐富的情境
|
||
|
||
爬取邏輯:
|
||
MOMO 商品名稱 → PChome 關鍵字搜尋 → 模糊比對最佳匹配 → 寫入 competitor_prices + competitor_price_history
|
||
|
||
依賴:
|
||
services/pchome_crawler.py — 搜尋 + 批量 API
|
||
services/price_comparison.py — ProductNameParser + 模糊比對
|
||
"""
|
||
|
||
import json
|
||
import logging
|
||
import os
|
||
import re
|
||
import time
|
||
from dataclasses import dataclass
|
||
from datetime import datetime, timedelta, timezone
|
||
from typing import Optional
|
||
from urllib.parse import quote_plus
|
||
|
||
from services.marketplace_product_matcher import FOCUSED_IDENTITY_TOTAL_PRICE_REASONS
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ── 比對參數 ─────────────────────────────────────────
|
||
MIN_MATCH_SCORE = 0.76 # 低於此分數不寫入;核心比價寧可待審也不能錯配
|
||
REPLACE_DIFFERENT_PRODUCT_SCORE = 0.84 # 已有不同 PChome 商品時,需超高信心才覆蓋
|
||
EARLY_STOP_MATCH_SCORE = 0.90 # 搜尋候選池只有強同款才提前停止,避免次佳候選卡住後續精準搜尋詞
|
||
SEARCH_LIMIT = int(os.getenv("PCHOME_FEEDER_SEARCH_LIMIT", "20")) # 每個搜尋詞取 PChome 前 N 筆
|
||
MAX_SEARCH_TERMS = int(os.getenv("PCHOME_FEEDER_MAX_SEARCH_TERMS", "6")) # 每個 MOMO 商品最多嘗試幾組搜尋詞
|
||
SEARCH_MAX_PAGES = int(os.getenv("PCHOME_FEEDER_SEARCH_MAX_PAGES", "2")) # 每個搜尋詞最多掃描 PChome 搜尋頁數
|
||
BATCH_SIZE = 30 # 每批 DB 寫入筆數
|
||
RATE_DELAY = float(os.getenv("PCHOME_FEEDER_RATE_DELAY", "1.0")) # 每次 PChome 請求間隔(秒)
|
||
TTL_HOURS = int(os.getenv("PCHOME_FEEDER_TTL_HOURS", "48")) # competitor_prices 價格新鮮度有效期
|
||
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
|
||
BACKFILL_REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_BACKFILL_TIMEOUT", str(min(REQUEST_TIMEOUT, 6.0))))
|
||
BACKFILL_MAX_SEARCH_TERMS = int(os.getenv("PCHOME_FEEDER_BACKFILL_MAX_SEARCH_TERMS", "2"))
|
||
BACKFILL_SEARCH_MAX_PAGES = int(os.getenv("PCHOME_FEEDER_BACKFILL_SEARCH_MAX_PAGES", "1"))
|
||
BACKFILL_MAX_SECONDS_PER_SKU = float(os.getenv("PCHOME_FEEDER_BACKFILL_MAX_SECONDS_PER_SKU", "18"))
|
||
SEARCH_COVERAGE_RESCUE_ENABLED = os.getenv(
|
||
"PCHOME_FEEDER_SEARCH_COVERAGE_RESCUE_ENABLED",
|
||
"true",
|
||
).lower() in {"1", "true", "yes", "on"}
|
||
VARIANT_RECALL_SORTS = ("sale/dc", "new/dc")
|
||
RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72)
|
||
GENERIC_RECALL_SAFE_PRODUCT_TYPES = {
|
||
"止汗噴霧",
|
||
"潔膚露",
|
||
"刮鬍刀",
|
||
"電動牙刷",
|
||
"洗手慕斯",
|
||
"足膜",
|
||
"化妝水",
|
||
"乳液",
|
||
"面霜",
|
||
"洗面乳",
|
||
"面膜",
|
||
"眼霜",
|
||
"卸妝",
|
||
}
|
||
GENERIC_RECALL_BLOCK_TERMS = (
|
||
"任選",
|
||
"多款",
|
||
"多色",
|
||
"色號",
|
||
"顏色",
|
||
"款式",
|
||
"香味",
|
||
"香調",
|
||
"香氛",
|
||
"精油",
|
||
"擴香",
|
||
"蠟燭",
|
||
"融蠟",
|
||
"融燭",
|
||
"美甲片",
|
||
"指甲油",
|
||
"指彩",
|
||
"唇釉",
|
||
"唇彩",
|
||
"唇膏",
|
||
"潤唇",
|
||
"眼影",
|
||
"腮紅",
|
||
"粉底",
|
||
"遮瑕",
|
||
"定妝",
|
||
"妝前",
|
||
"防曬",
|
||
"護手霜",
|
||
)
|
||
COVERAGE_RESCUE_NOISE_TERMS = (
|
||
"外出清潔",
|
||
"卸除髒汙",
|
||
"卸除防曬",
|
||
"卸防曬",
|
||
"交換禮物",
|
||
"送禮",
|
||
"節日",
|
||
"推薦",
|
||
)
|
||
GENERIC_RECALL_BLOCK_NAME_PATTERN = "|".join(re.escape(term) for term in GENERIC_RECALL_BLOCK_TERMS)
|
||
BROWSE_SH_DIAGNOSTIC_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED", "true").lower() in {"1", "true", "yes", "on"}
|
||
BROWSE_SH_EXECUTE_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED", "false").lower() in {"1", "true", "yes", "on"}
|
||
BROWSE_SH_TIMEOUT_SECONDS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_TIMEOUT", "20"))
|
||
BROWSE_SH_MAX_EXECUTIONS_PER_RUN = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_MAX_PER_RUN", "3"))
|
||
BROWSE_SH_OUTPUT_PREVIEW_CHARS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS", "1200"))
|
||
RECOVERABLE_DIAGNOSTIC_REASONS = {
|
||
"strong_product_line_match",
|
||
"shared_identity_anchor",
|
||
"shared_identity_anchor_no_spec",
|
||
"shared_identity_anchor_packaging_variant",
|
||
"shared_identity_anchor_marketing_variant",
|
||
"shared_identity_anchor_core_line",
|
||
"shared_identity_anchor_variant_safe",
|
||
"shared_model_token",
|
||
"spec_name_alignment",
|
||
}
|
||
CATALOG_REVIEW_VARIANT_REASONS = {
|
||
"variant_selection_review",
|
||
"makeup_catalog_selection_gap",
|
||
}
|
||
CATALOG_REVIEW_UNIT_REASONS = {
|
||
"commercial_condition_gap",
|
||
"catalog_count_omission",
|
||
}
|
||
CATALOG_REVIEW_IDENTITY_REASONS = {
|
||
"strong_product_line_match",
|
||
"strong_exact_spec_match",
|
||
"shared_identity_anchor_exact_line",
|
||
"shared_identity_anchor_core_line",
|
||
"shared_identity_anchor_variant_safe",
|
||
"spec_name_alignment",
|
||
"shared_model_token",
|
||
}
|
||
CATALOG_REVIEW_BLOCK_REASONS = {
|
||
"brand_conflict",
|
||
"type_conflict",
|
||
"product_line_conflict",
|
||
"core_ingredient_line_conflict",
|
||
"variant_option_conflict",
|
||
"variant_descriptor_conflict",
|
||
"aroma_scent_variant_conflict",
|
||
"bath_additive_variant_gap",
|
||
"makeup_finish_conflict",
|
||
"makeup_usage_conflict",
|
||
"romand_lip_line_conflict",
|
||
"count_conflict",
|
||
"component_count_conflict",
|
||
"multi_component_conflict",
|
||
"multi_component_count_conflict",
|
||
"bundle_offer_conflict",
|
||
"refill_pack_conflict",
|
||
"accessory_case_conflict",
|
||
"named_component_quantity_conflict",
|
||
"price_ratio_extreme",
|
||
}
|
||
|
||
RECOVERABLE_SQL_REASON_LIST = ", ".join(f"'{reason}'" for reason in sorted(RECOVERABLE_DIAGNOSTIC_REASONS))
|
||
FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS = {
|
||
f"focused_exact_identity_{reason}"
|
||
for reason in FOCUSED_IDENTITY_TOTAL_PRICE_REASONS
|
||
}
|
||
FOCUSED_TOTAL_PRICE_SAFE_DIAGNOSTIC_REASONS = {
|
||
"focused_exact_total_price_safe",
|
||
*FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS,
|
||
}
|
||
FOCUSED_TOTAL_PRICE_IDENTITY_SQL_REASON_LIST = ", ".join(
|
||
f"'{reason}'" for reason in sorted(FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS)
|
||
)
|
||
FOCUSED_TOTAL_PRICE_SAFE_SQL_REASON_LIST = ", ".join(
|
||
f"'{reason}'" for reason in sorted(FOCUSED_TOTAL_PRICE_SAFE_DIAGNOSTIC_REASONS)
|
||
)
|
||
REVALIDATABLE_REVIEW_GATE_REASONS = {
|
||
"strong_exact_spec_match",
|
||
"spec_name_alignment",
|
||
"shared_model_token",
|
||
}
|
||
REVALIDATABLE_REVIEW_BLOCK_REASONS = {
|
||
"commercial_condition_gap",
|
||
"variant_selection_review",
|
||
"variant_option_conflict",
|
||
"variant_descriptor_conflict",
|
||
"aroma_scent_variant_conflict",
|
||
"romand_lip_line_conflict",
|
||
"makeup_finish_conflict",
|
||
"makeup_usage_conflict",
|
||
"nail_tool_function_conflict",
|
||
"product_line_conflict",
|
||
"count_conflict",
|
||
"bundle_offer_conflict",
|
||
"multi_component_conflict",
|
||
"multi_component_count_conflict",
|
||
"refill_pack_conflict",
|
||
"accessory_case_conflict",
|
||
"named_component_quantity_conflict",
|
||
}
|
||
FOCUSED_REVALIDATABLE_REVIEW_BLOCK_REASONS = REVALIDATABLE_REVIEW_BLOCK_REASONS - {
|
||
"variant_selection_review",
|
||
}
|
||
REVALIDATABLE_REVIEW_SQL_REASON_LIST = ", ".join(
|
||
f"'{reason}'" for reason in sorted(REVALIDATABLE_REVIEW_GATE_REASONS)
|
||
)
|
||
REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST = ", ".join(
|
||
f"'{reason}'" for reason in sorted(REVALIDATABLE_REVIEW_BLOCK_REASONS)
|
||
)
|
||
FOCUSED_REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST = ", ".join(
|
||
f"'{reason}'" for reason in sorted(FOCUSED_REVALIDATABLE_REVIEW_BLOCK_REASONS)
|
||
)
|
||
STALE_IDENTITY_RECOVERY_BLOCK_REASONS = {
|
||
"accessory_case_conflict",
|
||
"aroma_lamp_style_selection_gap",
|
||
"aroma_scent_variant_conflict",
|
||
"bundle_offer_conflict",
|
||
"candle_catalog_selection_gap",
|
||
"catalog_count_omission",
|
||
"commercial_condition_gap",
|
||
"count_conflict",
|
||
"makeup_catalog_selection_gap",
|
||
"makeup_finish_conflict",
|
||
"makeup_usage_conflict",
|
||
"multi_component_conflict",
|
||
"multi_component_count_conflict",
|
||
"named_component_quantity_conflict",
|
||
"nail_tool_function_conflict",
|
||
"price_ratio_extreme",
|
||
"price_ratio_wide",
|
||
"product_line_conflict",
|
||
"refill_pack_conflict",
|
||
"romand_lip_line_conflict",
|
||
"unit_comparable",
|
||
"variant_descriptor_conflict",
|
||
"variant_option_conflict",
|
||
"variant_selection_review",
|
||
}
|
||
STALE_IDENTITY_RECOVERY_BLOCK_SQL_REASON_LIST = ", ".join(
|
||
f"'{reason}'" for reason in sorted(STALE_IDENTITY_RECOVERY_BLOCK_REASONS)
|
||
)
|
||
STALE_IDENTITY_RECOVERY_BLOCK_NAME_PATTERN = (
|
||
r"(任選|多款|色號|顏色|款式|香味|香調|即期|短效|航空版|"
|
||
r"融燭燈|融蠟燈|香氛蠟燭|精油蠟燭|蠟燭|限定|組合任選|"
|
||
r"\+|[xX]\s*\d|[**]\s*\d|湛藍|麋香|海洋|玫瑰|薰衣草|"
|
||
r"生理呵護|日用型|清爽|潤澤)"
|
||
)
|
||
|
||
|
||
def _json_list(value) -> list:
|
||
if isinstance(value, list):
|
||
return value
|
||
if isinstance(value, tuple):
|
||
return list(value)
|
||
if isinstance(value, str):
|
||
try:
|
||
parsed = json.loads(value)
|
||
return parsed if isinstance(parsed, list) else []
|
||
except Exception:
|
||
return []
|
||
return []
|
||
|
||
|
||
def _has_stale_identity_recovery_block(row: dict) -> bool:
|
||
reasons = set(str(reason) for reason in _json_list(row.get("diagnostic_reasons")))
|
||
if reasons & STALE_IDENTITY_RECOVERY_BLOCK_REASONS:
|
||
return True
|
||
haystack = f"{row.get('name') or ''} {row.get('competitor_product_name') or ''}"
|
||
return bool(re.search(STALE_IDENTITY_RECOVERY_BLOCK_NAME_PATTERN, haystack, flags=re.IGNORECASE))
|
||
|
||
# ── Feeder 結果 ───────────────────────────────────────
|
||
@dataclass
|
||
class FeederResult:
|
||
total_skus: int
|
||
matched: int
|
||
skipped_no_result: int
|
||
skipped_low_score: int
|
||
errors: int
|
||
duration_sec: float
|
||
history_written: int = 0
|
||
attempts_written: int = 0
|
||
|
||
|
||
def _has_recoverable_identity_signal(diagnostics) -> bool:
|
||
if not diagnostics:
|
||
return False
|
||
reasons = set(getattr(diagnostics, "reasons", ()) or ())
|
||
if reasons & RECOVERABLE_DIAGNOSTIC_REASONS:
|
||
return True
|
||
return (
|
||
getattr(diagnostics, "brand_score", 0) >= 0.95
|
||
and getattr(diagnostics, "token_score", 0) >= 0.56
|
||
and getattr(diagnostics, "sequence_score", 0) >= 0.50
|
||
and getattr(diagnostics, "comparison_mode", "exact_identity") == "exact_identity"
|
||
)
|
||
|
||
|
||
def _is_multi_variant_listing_name(name: str) -> bool:
|
||
return bool(
|
||
re.search(
|
||
r"(多款任選|多款可選|多色任選|多色可選|多種香味|多種香氣|香味任選|香味可選|味道可選)",
|
||
name or "",
|
||
)
|
||
)
|
||
|
||
|
||
def _classify_low_score_attempt(score: float, diagnostics) -> str:
|
||
if getattr(diagnostics, "hard_veto", False):
|
||
return "identity_veto"
|
||
if "variant_selection_review" in set(getattr(diagnostics, "reasons", ()) or ()):
|
||
return "true_low_confidence"
|
||
if score >= RECOVERABLE_LOW_SCORE_FLOOR and _has_recoverable_identity_signal(diagnostics):
|
||
return "recoverable_low_score"
|
||
return "true_low_confidence"
|
||
|
||
|
||
def _is_auto_price_write_safe(diagnostics) -> bool:
|
||
"""Only exact, total-price identities may update the formal comparison cache."""
|
||
if not diagnostics or getattr(diagnostics, "hard_veto", False):
|
||
return False
|
||
if getattr(diagnostics, "comparison_mode", "") != "exact_identity":
|
||
return False
|
||
if getattr(diagnostics, "match_type", "") != "exact":
|
||
return False
|
||
if getattr(diagnostics, "price_basis", "") != "total_price":
|
||
return False
|
||
if getattr(diagnostics, "alert_tier", "") != "price_alert_exact":
|
||
return False
|
||
if "variant_selection_review" in set(getattr(diagnostics, "reasons", ()) or ()):
|
||
return False
|
||
return True
|
||
|
||
|
||
def _classify_catalog_review_attempt(score: float, diagnostics) -> Optional[str]:
|
||
"""Split high-confidence manual gates into operational review lanes."""
|
||
if getattr(diagnostics, "hard_veto", False):
|
||
return None
|
||
if score < MIN_MATCH_SCORE:
|
||
return None
|
||
|
||
reasons = set(getattr(diagnostics, "reasons", ()) or ())
|
||
if reasons & CATALOG_REVIEW_BLOCK_REASONS:
|
||
return None
|
||
|
||
has_identity_signal = bool(reasons & CATALOG_REVIEW_IDENTITY_REASONS) or score >= 0.95
|
||
if not has_identity_signal:
|
||
return None
|
||
if reasons & CATALOG_REVIEW_VARIANT_REASONS:
|
||
return "catalog_variant_review"
|
||
if reasons & CATALOG_REVIEW_UNIT_REASONS:
|
||
return "catalog_unit_review"
|
||
if (
|
||
getattr(diagnostics, "comparison_mode", "") == "exact_identity"
|
||
and getattr(diagnostics, "match_type", "") in {"exact", "comparable"}
|
||
and getattr(diagnostics, "price_basis", "") in {"manual_review", "total_price"}
|
||
and getattr(diagnostics, "alert_tier", "") in {"identity_review", "price_alert_exact"}
|
||
):
|
||
return "catalog_identity_review"
|
||
return None
|
||
|
||
|
||
def _classify_auto_write_block_attempt(score: float, diagnostics) -> str:
|
||
if getattr(diagnostics, "hard_veto", False):
|
||
return "identity_veto"
|
||
catalog_status = _classify_catalog_review_attempt(score, diagnostics)
|
||
if catalog_status:
|
||
return catalog_status
|
||
if score >= MIN_MATCH_SCORE:
|
||
return "true_low_confidence"
|
||
return _classify_low_score_attempt(score, diagnostics)
|
||
|
||
|
||
def _has_variant_selection_gap(
|
||
momo_name: str,
|
||
ranked_matches: list[tuple],
|
||
best_score: float,
|
||
) -> bool:
|
||
"""True when source lacks explicit variant selection but top candidates require one."""
|
||
try:
|
||
from services.marketplace_product_matcher import (
|
||
_explicit_variant_option_tokens,
|
||
parse_product_identity,
|
||
)
|
||
except Exception:
|
||
return False
|
||
|
||
source_identity = parse_product_identity(momo_name)
|
||
source_options = set(_explicit_variant_option_tokens(source_identity))
|
||
source_multi_listing = _is_multi_variant_listing_name(momo_name)
|
||
if re.search(r"任選\s*[一二兩三四五六七八九十0-9]+\s*款", momo_name):
|
||
source_options -= {str(value) for value in range(1, 11)}
|
||
source_options -= {f"{value:02d}" for value in range(1, 11)}
|
||
if source_options and not source_multi_listing:
|
||
return False
|
||
|
||
threshold = max(best_score - 0.02, RECOVERABLE_LOW_SCORE_FLOOR)
|
||
option_buckets: set[str] = set()
|
||
for product, score, diagnostics in ranked_matches[:5]:
|
||
if getattr(diagnostics, "hard_veto", False) or score < threshold:
|
||
continue
|
||
candidate_name = getattr(product, "name", "") or ""
|
||
if source_multi_listing != _is_multi_variant_listing_name(candidate_name):
|
||
return True
|
||
candidate_identity = parse_product_identity(candidate_name)
|
||
options = _explicit_variant_option_tokens(candidate_identity)
|
||
if len(options) >= 2:
|
||
return True
|
||
option_buckets.update(options)
|
||
if len(option_buckets) >= 2:
|
||
return True
|
||
return False
|
||
|
||
|
||
def _extract_tags(pchome_product) -> list:
|
||
"""
|
||
從 PChomeProduct 物件提取語意標籤
|
||
|
||
標籤設計:
|
||
- "on_sale" — is_on_sale = True
|
||
- "discount_10pct" — 折扣 10~19%
|
||
- "discount_20pct" — 折扣 20~29%
|
||
- "discount_30pct" — 折扣 ≥ 30%
|
||
- "low_stock" — 庫存 < 10
|
||
- "high_rating" — 評分 ≥ 4.5
|
||
"""
|
||
tags = []
|
||
|
||
if pchome_product.is_on_sale:
|
||
tags.append("on_sale")
|
||
|
||
try:
|
||
disc = int(pchome_product.discount or 0)
|
||
except (ValueError, TypeError):
|
||
disc = 0
|
||
if disc >= 30:
|
||
tags.append("discount_30pct")
|
||
elif disc >= 20:
|
||
tags.append("discount_20pct")
|
||
elif disc >= 10:
|
||
tags.append("discount_10pct")
|
||
|
||
try:
|
||
stock = int(pchome_product.stock) if pchome_product.stock is not None else None
|
||
if stock is not None and 0 < stock < 10:
|
||
tags.append("low_stock")
|
||
except (ValueError, TypeError):
|
||
pass
|
||
|
||
try:
|
||
if pchome_product.rating and float(pchome_product.rating) >= 4.5:
|
||
tags.append("high_rating")
|
||
except (ValueError, TypeError):
|
||
pass
|
||
|
||
return tags
|
||
|
||
|
||
def _extend_match_tags(tags: list, diagnostics, extra: list[str] = None) -> list:
|
||
"""Attach matcher evidence tags in one place so all feeder lanes agree."""
|
||
tags = list(tags or [])
|
||
tags.extend(getattr(diagnostics, "tags", []) or [])
|
||
for reason in getattr(diagnostics, "reasons", ()) or ():
|
||
tags.append(f"match_{reason}")
|
||
if extra:
|
||
tags.extend(extra)
|
||
return list(dict.fromkeys(tag for tag in tags if tag))
|
||
|
||
|
||
def _clean_search_text(value: str) -> str:
|
||
value = re.sub(r'[()()]', ' ', value or '')
|
||
value = re.sub(r'[【】\[\]]', ' ', value)
|
||
value = re.sub(r"(?<=\d)\.(?=\d)", "DECIMALPOINT", value)
|
||
value = re.sub(r'[^\w\u4e00-\u9fff]+', ' ', value)
|
||
value = value.replace("DECIMALPOINT", ".").replace("decimalpoint", ".")
|
||
return re.sub(r'\s+', ' ', value).strip()
|
||
|
||
|
||
def _dedupe_terms(terms: list) -> list:
|
||
result = []
|
||
seen = set()
|
||
for term in terms:
|
||
cleaned = _clean_search_text(term)
|
||
if len(cleaned) < 2:
|
||
continue
|
||
key = cleaned.lower()
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
result.append(cleaned[:36])
|
||
if len(result) >= MAX_SEARCH_TERMS:
|
||
break
|
||
return result
|
||
|
||
|
||
def _identity_spec_terms(identity) -> list[str]:
|
||
terms: list[str] = []
|
||
for value in getattr(identity, "volumes_ml", ()) or ():
|
||
terms.append(f"{float(value):g}ml")
|
||
for value in getattr(identity, "weights_g", ()) or ():
|
||
terms.append(f"{float(value):g}g")
|
||
for value in getattr(identity, "dosages_mg", ()) or ():
|
||
terms.append(f"{float(value):g}mg")
|
||
total_piece_count = getattr(identity, "total_piece_count", None)
|
||
if total_piece_count:
|
||
terms.append(f"{int(total_piece_count)}入")
|
||
return list(dict.fromkeys(terms))
|
||
|
||
|
||
def _identity_core_rescue_phrases(identity, limit: int = 3) -> list[str]:
|
||
product_type = getattr(identity, "product_type", "") or ""
|
||
phrases: list[str] = []
|
||
for token in sorted(
|
||
(getattr(identity, "core_tokens", set()) or set()),
|
||
key=lambda value: (
|
||
product_type not in str(value),
|
||
not bool(re.search(r"[\u4e00-\u9fff]", str(value))),
|
||
-len(str(value)),
|
||
str(value),
|
||
),
|
||
):
|
||
phrase = _clean_search_text(str(token))
|
||
compact = phrase.replace(" ", "")
|
||
if len(compact) < 2:
|
||
continue
|
||
if product_type and compact == str(product_type).replace(" ", ""):
|
||
continue
|
||
if compact.isdigit() or re.fullmatch(r"\d+(?:\.\d+)?(?:ml|g|mg|kg|l|入|片|支|瓶|盒|包)?", compact, re.I):
|
||
continue
|
||
if (
|
||
any(term in compact for term in GENERIC_RECALL_BLOCK_TERMS)
|
||
and not _is_generic_recall_safe(identity)
|
||
):
|
||
continue
|
||
if any(term in compact for term in COVERAGE_RESCUE_NOISE_TERMS):
|
||
continue
|
||
if phrase.lower() in {existing.lower() for existing in phrases}:
|
||
continue
|
||
phrases.append(phrase)
|
||
if len(phrases) >= limit:
|
||
break
|
||
return phrases
|
||
|
||
|
||
def _build_coverage_rescue_keywords(momo_name: str) -> list[str]:
|
||
"""Add narrow identity fallback terms for pending/no-result coverage recovery."""
|
||
if not SEARCH_COVERAGE_RESCUE_ENABLED:
|
||
return []
|
||
try:
|
||
from services.marketplace_product_matcher import parse_product_identity
|
||
|
||
identity = parse_product_identity(momo_name)
|
||
except Exception:
|
||
return []
|
||
|
||
brand_phrases = _coverage_brand_phrases(identity)
|
||
product_type = getattr(identity, "product_type", "") or ""
|
||
spec_part = " ".join(_identity_spec_terms(identity))
|
||
core_phrases = _identity_core_rescue_phrases(identity, limit=3)
|
||
primary_core = core_phrases[0] if core_phrases else product_type
|
||
secondary_core = core_phrases[1] if len(core_phrases) > 1 else ""
|
||
|
||
terms: list[str] = []
|
||
for brand in brand_phrases:
|
||
terms.extend([
|
||
" ".join(part for part in (brand, primary_core, spec_part) if part),
|
||
" ".join(part for part in (brand, primary_core, secondary_core, spec_part) if part),
|
||
" ".join(part for part in (brand, product_type, spec_part) if part)
|
||
if _is_generic_recall_safe(identity)
|
||
else "",
|
||
])
|
||
terms.append(" ".join(part for part in (primary_core, spec_part) if part))
|
||
return _dedupe_terms(terms)
|
||
|
||
|
||
def _append_search_term(target: list[str], term: str, *, max_terms: int) -> None:
|
||
if len(target) >= max_terms:
|
||
return
|
||
cleaned_terms = _dedupe_terms([term])
|
||
if not cleaned_terms:
|
||
return
|
||
cleaned = cleaned_terms[0]
|
||
if cleaned.lower() in {existing.lower() for existing in target}:
|
||
return
|
||
target.append(cleaned)
|
||
|
||
|
||
def _build_search_keywords(momo_name: str) -> list:
|
||
"""
|
||
用多組商品身份線索搜尋 PChome,提高命中率,但仍交給身份比對門檻把關。
|
||
"""
|
||
try:
|
||
from services.marketplace_product_matcher import build_search_terms
|
||
terms = build_search_terms(momo_name, max_terms=MAX_SEARCH_TERMS)
|
||
terms.append(momo_name)
|
||
except Exception:
|
||
logger.debug(
|
||
"[Feeder] marketplace matcher failed while building search keywords; "
|
||
"fallback to cleaned product name",
|
||
exc_info=True,
|
||
)
|
||
cleaned = _clean_search_text(momo_name)
|
||
terms = [cleaned[:36], cleaned[:24]]
|
||
|
||
max_terms = max(1, MAX_SEARCH_TERMS)
|
||
primary_terms = _dedupe_terms(terms[: max(1, max_terms - 2)])
|
||
rescue_terms = _build_coverage_rescue_keywords(momo_name)
|
||
original_terms = _dedupe_terms([momo_name])
|
||
selected_terms: list[str] = []
|
||
for term in primary_terms:
|
||
_append_search_term(selected_terms, term, max_terms=max(1, max_terms - 1))
|
||
for term in rescue_terms:
|
||
_append_search_term(selected_terms, term, max_terms=max(1, max_terms - 1))
|
||
for term in original_terms:
|
||
if term.lower() not in {existing.lower() for existing in selected_terms}:
|
||
if len(selected_terms) >= max_terms:
|
||
selected_terms = selected_terms[: max(0, max_terms - 1)]
|
||
selected_terms.append(term)
|
||
break
|
||
return _dedupe_terms(selected_terms)
|
||
|
||
|
||
def _primary_brand_phrase(identity) -> str:
|
||
brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())}
|
||
if {"3w", "clinic"} <= brand_tokens:
|
||
return "3w clinic"
|
||
if {"dashing", "diva"} <= brand_tokens:
|
||
return "dashing diva"
|
||
if {"rom", "nd"} <= brand_tokens:
|
||
return "romand"
|
||
if {"im", "meme"} <= brand_tokens:
|
||
return "im meme"
|
||
if {"recipe", "box"} <= brand_tokens:
|
||
return "recipe box"
|
||
|
||
chinese = sorted(
|
||
(
|
||
token for token in getattr(identity, "brand_tokens", set())
|
||
if re.search(r"[\u4e00-\u9fff]", token)
|
||
),
|
||
key=lambda token: (-len(token), token),
|
||
)
|
||
if chinese:
|
||
return chinese[0]
|
||
|
||
latin = sorted(
|
||
(
|
||
token for token in getattr(identity, "brand_tokens", set())
|
||
if re.search(r"[a-z]", token, re.I) and len(token) >= 3
|
||
),
|
||
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
|
||
)
|
||
if latin:
|
||
return latin[0].lower()
|
||
|
||
short_latin = sorted(
|
||
(
|
||
token for token in getattr(identity, "brand_tokens", set())
|
||
if re.search(r"[a-z]", token, re.I) and len(token) >= 2
|
||
),
|
||
key=lambda token: (" " not in token and "-" not in token, -len(token), token),
|
||
)
|
||
return short_latin[0].lower() if short_latin else ""
|
||
|
||
|
||
def _coverage_brand_phrases(identity) -> list[str]:
|
||
brand_tokens = {str(token).lower() for token in getattr(identity, "brand_tokens", set())}
|
||
chinese = sorted(
|
||
(
|
||
token for token in getattr(identity, "brand_tokens", set())
|
||
if re.search(r"[\u4e00-\u9fff]", str(token))
|
||
),
|
||
key=lambda token: (-len(str(token)), str(token)),
|
||
)
|
||
latin = sorted(
|
||
(
|
||
token for token in getattr(identity, "brand_tokens", set())
|
||
if re.search(r"[a-z]", str(token), re.I) and len(str(token)) >= 2
|
||
),
|
||
key=lambda token: (-len(str(token)), str(token)),
|
||
)
|
||
phrases: list[str] = []
|
||
if latin and chinese:
|
||
phrases.append(f"{str(latin[0]).lower()} {chinese[0]}")
|
||
primary = _primary_brand_phrase(identity)
|
||
if primary:
|
||
phrases.append(primary)
|
||
for token in (
|
||
"cerave",
|
||
"embryolisse",
|
||
"neogence",
|
||
"tunemakers",
|
||
"nivea",
|
||
"romand",
|
||
"lactacyd",
|
||
"pavaruni",
|
||
"solone",
|
||
):
|
||
if token in brand_tokens:
|
||
phrases.append(token)
|
||
return _dedupe_terms(phrases)
|
||
|
||
|
||
def _is_generic_recall_safe(identity) -> bool:
|
||
product_type = getattr(identity, "product_type", None)
|
||
if product_type not in GENERIC_RECALL_SAFE_PRODUCT_TYPES:
|
||
return False
|
||
searchable_name = getattr(identity, "searchable_name", "") or ""
|
||
return not any(term in searchable_name for term in GENERIC_RECALL_BLOCK_TERMS)
|
||
|
||
|
||
def _build_generic_recall_keywords(identity) -> list[str]:
|
||
if not _is_generic_recall_safe(identity):
|
||
return []
|
||
brand = _primary_brand_phrase(identity)
|
||
product_type = getattr(identity, "product_type", "") or ""
|
||
if not brand or not product_type:
|
||
return []
|
||
return _dedupe_terms([f"{brand} {product_type}"])
|
||
|
||
|
||
def _build_variant_recall_search_plan(momo_name: str, keywords: list[str]) -> list[tuple[str, str | None]]:
|
||
plan = [(keyword, None) for keyword in (keywords or [])]
|
||
try:
|
||
from services.marketplace_product_matcher import parse_product_identity
|
||
|
||
identity = parse_product_identity(momo_name)
|
||
except Exception:
|
||
return plan
|
||
|
||
brand_tokens = {token.lower() for token in getattr(identity, "brand_tokens", set())}
|
||
if not ({"dashing", "diva"} <= brand_tokens and "美甲片" in getattr(identity, "searchable_name", "")):
|
||
seen = {(keyword.lower(), sort) for keyword, sort in plan}
|
||
for keyword in _build_generic_recall_keywords(identity):
|
||
key = (keyword.lower(), None)
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
plan.append((keyword, None))
|
||
return plan
|
||
|
||
searchable_name = getattr(identity, "searchable_name", "")
|
||
broad_keywords = []
|
||
if "足部時尚潮流美甲片" in searchable_name:
|
||
broad_keywords.append("dashing diva 足部時尚潮流美甲片")
|
||
elif "頂級璀燦美甲片" in searchable_name:
|
||
broad_keywords.append("dashing diva 頂級璀燦美甲片")
|
||
elif "時尚潮流美甲片" in searchable_name:
|
||
broad_keywords.append("dashing diva 時尚潮流美甲片")
|
||
elif "薄型經典美甲片" in searchable_name:
|
||
broad_keywords.append("dashing diva 薄型經典美甲片")
|
||
|
||
broad_keywords.extend((
|
||
"dashing diva magicpress",
|
||
"dashing diva 美甲片",
|
||
))
|
||
|
||
seen = {(keyword.lower(), sort) for keyword, sort in plan}
|
||
|
||
def append(keyword: str, sort: str | None = None) -> None:
|
||
key = (keyword.lower(), sort)
|
||
if key in seen:
|
||
return
|
||
seen.add(key)
|
||
plan.append((keyword, sort))
|
||
|
||
for broad_keyword in broad_keywords:
|
||
append(broad_keyword, None)
|
||
|
||
if broad_keywords:
|
||
for sort in VARIANT_RECALL_SORTS:
|
||
append(broad_keywords[0], sort)
|
||
return plan
|
||
|
||
|
||
def _format_match_diagnostics(diagnostics) -> str:
|
||
if not diagnostics:
|
||
return ""
|
||
reasons = ",".join(getattr(diagnostics, "reasons", ()) or ())
|
||
return (
|
||
f"score={diagnostics.score}; brand={diagnostics.brand_score}; "
|
||
f"token={diagnostics.token_score}; spec={diagnostics.spec_score}; "
|
||
f"seq={diagnostics.sequence_score}; type={diagnostics.type_score}; "
|
||
f"penalty={diagnostics.price_penalty}; veto={diagnostics.hard_veto}; "
|
||
f"mode={getattr(diagnostics, 'comparison_mode', 'exact_identity')}; "
|
||
f"match_type={getattr(diagnostics, 'match_type', '')}; "
|
||
f"price_basis={getattr(diagnostics, 'price_basis', '')}; "
|
||
f"alert_tier={getattr(diagnostics, 'alert_tier', '')}; "
|
||
f"reasons={reasons}"
|
||
)
|
||
|
||
|
||
def _match_diagnostics_payload(diagnostics) -> dict:
|
||
"""Serialize matcher diagnostics for review/report consumers."""
|
||
if not diagnostics:
|
||
return {}
|
||
return {
|
||
"score": getattr(diagnostics, "score", None),
|
||
"brand_score": getattr(diagnostics, "brand_score", None),
|
||
"token_score": getattr(diagnostics, "token_score", None),
|
||
"spec_score": getattr(diagnostics, "spec_score", None),
|
||
"sequence_score": getattr(diagnostics, "sequence_score", None),
|
||
"type_score": getattr(diagnostics, "type_score", None),
|
||
"price_penalty": getattr(diagnostics, "price_penalty", None),
|
||
"hard_veto": bool(getattr(diagnostics, "hard_veto", False)),
|
||
"comparison_mode": getattr(diagnostics, "comparison_mode", "exact_identity"),
|
||
"match_type": getattr(diagnostics, "match_type", None),
|
||
"price_basis": getattr(diagnostics, "price_basis", None),
|
||
"alert_tier": getattr(diagnostics, "alert_tier", None),
|
||
"evidence_flags": list(getattr(diagnostics, "evidence_flags", ()) or ()),
|
||
"reasons": list(getattr(diagnostics, "reasons", ()) or ()),
|
||
"identity_evidence": getattr(diagnostics, "identity_evidence", None) or {},
|
||
"offer_evidence": getattr(diagnostics, "offer_evidence", None) or {},
|
||
}
|
||
|
||
|
||
def _pchome_search_url(keyword: str) -> str:
|
||
return f"https://ecshweb.pchome.com.tw/search/v3.3/?q={quote_plus(keyword or '')}"
|
||
|
||
|
||
def _build_browse_sh_diagnostic_payload(
|
||
momo_name: str,
|
||
search_terms: list[str] = None,
|
||
reason: str = "unknown",
|
||
best_product=None,
|
||
best_score: float = None,
|
||
diagnostics=None,
|
||
candidate_count: int = 0,
|
||
) -> dict:
|
||
"""Build a read-only browse.sh probe plan for low-confidence PChome cases."""
|
||
if not BROWSE_SH_DIAGNOSTIC_ENABLED:
|
||
return {}
|
||
|
||
terms = _dedupe_terms(search_terms or _build_search_keywords(momo_name))[:3]
|
||
urls = [_pchome_search_url(term) for term in terms]
|
||
product_url = getattr(best_product, "product_url", None)
|
||
if product_url:
|
||
urls.append(product_url)
|
||
urls = list(dict.fromkeys(url for url in urls if url))
|
||
primary_url = urls[0] if urls else _pchome_search_url(momo_name)
|
||
|
||
diagnostic_payload = _match_diagnostics_payload(diagnostics)
|
||
return {
|
||
"tool": "browse.sh",
|
||
"mode": "execute_on_demand" if BROWSE_SH_EXECUTE_ENABLED else "plan_only",
|
||
"reason": reason,
|
||
"execute_enabled": BROWSE_SH_EXECUTE_ENABLED,
|
||
"timeout_seconds": BROWSE_SH_TIMEOUT_SECONDS,
|
||
"candidate_count": int(candidate_count or 0),
|
||
"momo_name": (momo_name or "")[:300],
|
||
"search_terms": terms,
|
||
"urls": urls,
|
||
"suggested_commands": [
|
||
{
|
||
"purpose": "static_fetch_first_page",
|
||
"args": ["get", primary_url],
|
||
},
|
||
{
|
||
"purpose": "manual_browser_probe",
|
||
"args": ["open", primary_url],
|
||
},
|
||
],
|
||
"best_candidate": {
|
||
"product_id": getattr(best_product, "product_id", None),
|
||
"name": (getattr(best_product, "name", None) or "")[:300] or None,
|
||
"price": getattr(best_product, "price", None),
|
||
"url": product_url,
|
||
"score": best_score,
|
||
} if best_product else None,
|
||
"diagnostic_codes": diagnostic_payload.get("reasons") or [],
|
||
"comparison_mode": diagnostic_payload.get("comparison_mode"),
|
||
"hard_veto": diagnostic_payload.get("hard_veto"),
|
||
"execution": {"status": "disabled"},
|
||
}
|
||
|
||
|
||
def _product_snapshot_payload(product) -> dict:
|
||
payload = {
|
||
"competitor_product_url": None,
|
||
"competitor_image_url": None,
|
||
"competitor_stock": None,
|
||
}
|
||
if not product:
|
||
return payload
|
||
payload.update({
|
||
"competitor_product_url": getattr(product, "product_url", None),
|
||
"competitor_image_url": getattr(product, "image_url", None),
|
||
"competitor_stock": getattr(product, "stock", None),
|
||
})
|
||
return payload
|
||
|
||
|
||
def _product_id_key(product_id: str) -> str:
|
||
"""Normalize PChome IDs for comparing cached IDs with API-returned IDs."""
|
||
return re.sub(r"[^A-Z0-9]", "", str(product_id or "").upper())
|
||
|
||
|
||
def _candidate_match_name(product) -> str:
|
||
"""Return the identity-rich PChome text used for scoring, falling back to display name."""
|
||
return (getattr(product, "match_name", None) or getattr(product, "name", None) or "").strip()
|
||
|
||
|
||
def _find_best_match_detail(
|
||
momo_name: str,
|
||
pchome_products: list,
|
||
momo_price: float = None,
|
||
) -> Optional[tuple]:
|
||
"""
|
||
從 PChome 搜尋結果中找出與 MOMO 商品名稱最接近的一筆
|
||
|
||
Args:
|
||
momo_name: MOMO 商品名稱
|
||
pchome_products: PChomeProduct 列表
|
||
|
||
Returns:
|
||
(PChomeProduct, score, diagnostics) or None
|
||
"""
|
||
ranked = _rank_match_details(momo_name, pchome_products, momo_price=momo_price)
|
||
return ranked[0] if ranked else None
|
||
|
||
|
||
def _rank_match_details(
|
||
momo_name: str,
|
||
pchome_products: list,
|
||
momo_price: float = None,
|
||
) -> list[tuple]:
|
||
"""Score all PChome candidates and return them from strongest to weakest."""
|
||
from services.marketplace_product_matcher import score_marketplace_match
|
||
|
||
ranked = []
|
||
for p in pchome_products:
|
||
diagnostics = score_marketplace_match(
|
||
momo_name,
|
||
_candidate_match_name(p),
|
||
momo_price=momo_price,
|
||
competitor_price=getattr(p, "price", None),
|
||
)
|
||
ranked.append((p, diagnostics.score, diagnostics))
|
||
return sorted(ranked, key=lambda item: item[1], reverse=True)
|
||
|
||
|
||
def _find_best_match(momo_name: str, pchome_products: list) -> Optional[tuple]:
|
||
"""Backward-compatible helper for smoke scripts."""
|
||
result = _find_best_match_detail(momo_name, pchome_products)
|
||
if not result:
|
||
return None
|
||
best, score, _diagnostics = result
|
||
return best, score
|
||
|
||
|
||
def _search_pchome_candidates(
|
||
crawler,
|
||
momo_name: str,
|
||
keywords: list = None,
|
||
momo_price: float = None,
|
||
max_terms: int | None = None,
|
||
max_pages: int | None = None,
|
||
max_seconds: float | None = None,
|
||
) -> list:
|
||
"""以多組搜尋詞擴大 PChome 候選池,只在強同款時提前停止。"""
|
||
candidates = []
|
||
seen_ids = set()
|
||
page_cap = max(1, int(max_pages or SEARCH_MAX_PAGES))
|
||
search_limit = SEARCH_LIMIT * page_cap
|
||
active_keywords = keywords or _build_search_keywords(momo_name)
|
||
search_plan = _build_variant_recall_search_plan(momo_name, active_keywords)
|
||
if max_terms is not None:
|
||
search_plan = search_plan[:max(1, int(max_terms))]
|
||
deadline = (time.monotonic() + max_seconds) if max_seconds and max_seconds > 0 else None
|
||
for keyword, sort in search_plan:
|
||
if deadline and time.monotonic() >= deadline:
|
||
logger.info("[Feeder] PChome search budget exhausted before keyword=%s", keyword)
|
||
break
|
||
if sort:
|
||
ok, _, products = crawler.search_products(
|
||
keyword,
|
||
limit=search_limit,
|
||
max_pages=page_cap,
|
||
sort=sort,
|
||
)
|
||
else:
|
||
ok, _, products = crawler.search_products(keyword, limit=search_limit, max_pages=page_cap)
|
||
if not ok or not products:
|
||
continue
|
||
for product in products:
|
||
if product.product_id in seen_ids:
|
||
continue
|
||
seen_ids.add(product.product_id)
|
||
candidates.append(product)
|
||
best = _find_best_match_detail(momo_name, candidates, momo_price=momo_price)
|
||
if best and best[1] >= EARLY_STOP_MATCH_SCORE:
|
||
break
|
||
return candidates
|
||
|
||
|
||
def _recover_low_score_with_fresh_search(
|
||
crawler,
|
||
momo_name: str,
|
||
momo_price: float = None,
|
||
existing_product_id: str = "",
|
||
max_terms: int | None = None,
|
||
max_pages: int | None = None,
|
||
max_seconds: float | None = None,
|
||
) -> tuple[Optional[tuple], list[str], int]:
|
||
"""
|
||
當 legacy / known-id 候選重評仍低分時,再跑一次 fresh keyword search,
|
||
嘗試把舊錯配洗成新的真同款。
|
||
"""
|
||
keywords = _build_search_keywords(momo_name)
|
||
candidates = _search_pchome_candidates(
|
||
crawler,
|
||
momo_name,
|
||
keywords=keywords,
|
||
momo_price=momo_price,
|
||
max_terms=max_terms,
|
||
max_pages=max_pages,
|
||
max_seconds=max_seconds,
|
||
)
|
||
if existing_product_id:
|
||
existing_key = _product_id_key(existing_product_id)
|
||
fresh_candidates = [
|
||
candidate
|
||
for candidate in candidates
|
||
if _product_id_key(getattr(candidate, "product_id", "")) != existing_key
|
||
]
|
||
if fresh_candidates:
|
||
candidates = fresh_candidates
|
||
best = _find_best_match_detail(momo_name, candidates, momo_price=momo_price)
|
||
return best, keywords, len(candidates)
|
||
|
||
|
||
def _structural_similarity(momo_p, pchome_p) -> float:
|
||
"""
|
||
結構化相似度計算(品牌 + 規格 + 關鍵字)
|
||
|
||
權重:品牌匹配 0.4 + 規格匹配 0.3 + 關鍵字相似 0.3
|
||
"""
|
||
from difflib import SequenceMatcher
|
||
|
||
score = 0.0
|
||
|
||
# 品牌比對 (0.4)
|
||
if momo_p.brand and pchome_p.brand:
|
||
if momo_p.brand == pchome_p.brand:
|
||
score += 0.4
|
||
elif momo_p.brand in pchome_p.brand or pchome_p.brand in momo_p.brand:
|
||
score += 0.2
|
||
elif not momo_p.brand and not pchome_p.brand:
|
||
score += 0.1 # 都沒有品牌,不扣分
|
||
|
||
# 規格比對 (0.3) — 容量/克重
|
||
momo_specs = momo_p.specs or {}
|
||
pchome_specs = pchome_p.specs or {}
|
||
if momo_specs and pchome_specs:
|
||
matching_specs = sum(
|
||
1 for k, v in momo_specs.items()
|
||
if pchome_specs.get(k) == v
|
||
)
|
||
total_specs = max(len(momo_specs), len(pchome_specs), 1)
|
||
score += 0.3 * (matching_specs / total_specs)
|
||
elif not momo_specs and not pchome_specs:
|
||
score += 0.15
|
||
|
||
# 關鍵字相似度 (0.3)
|
||
momo_kws = " ".join(momo_p.keywords or [])
|
||
pchome_kws = " ".join(pchome_p.keywords or [])
|
||
if momo_kws and pchome_kws:
|
||
kw_sim = SequenceMatcher(None, momo_kws.lower(), pchome_kws.lower()).ratio()
|
||
score += 0.3 * kw_sim
|
||
|
||
return round(score, 3)
|
||
|
||
|
||
class CompetitorPriceFeeder:
|
||
"""
|
||
競品價格補給線 Worker
|
||
|
||
用法:
|
||
feeder = CompetitorPriceFeeder(engine=db_engine)
|
||
result = feeder.run(source="pchome")
|
||
"""
|
||
|
||
def __init__(self, engine=None):
|
||
self.engine = engine
|
||
self._history_table_ready = False
|
||
self._attempt_table_ready = False
|
||
self._price_table_columns_ready = False
|
||
self._browse_sh_executions = 0
|
||
|
||
def _ensure_table_columns(self, conn, table: str, column_specs: list[tuple[str, str]]) -> None:
|
||
"""補齊既有表欄位;避免正式端舊表在新 INSERT 時炸掉。"""
|
||
from sqlalchemy import inspect, text
|
||
|
||
inspector = inspect(conn)
|
||
if not inspector.has_table(table):
|
||
return
|
||
existing = {column["name"] for column in inspector.get_columns(table)}
|
||
for column_name, column_type in column_specs:
|
||
if column_name in existing:
|
||
continue
|
||
conn.execute(text(f"ALTER TABLE {table} ADD COLUMN {column_name} {column_type}"))
|
||
existing.add(column_name)
|
||
|
||
def _ensure_competitor_prices_columns(self, conn) -> None:
|
||
if self._price_table_columns_ready:
|
||
return
|
||
self._ensure_table_columns(conn, "competitor_prices", [
|
||
("competitor_product_url", "TEXT"),
|
||
("competitor_image_url", "TEXT"),
|
||
("competitor_stock", "INTEGER"),
|
||
("match_diagnostic_json", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
("comparison_mode", "VARCHAR(40)"),
|
||
("hard_veto", "BOOLEAN"),
|
||
("diagnostic_codes", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
])
|
||
self._price_table_columns_ready = True
|
||
|
||
def _ensure_competitor_price_history_table(self, conn):
|
||
"""確保競品價格歷史表存在;排程可自癒補表,不依賴手動 migration。"""
|
||
if self._history_table_ready:
|
||
return
|
||
|
||
from sqlalchemy import text
|
||
if conn.dialect.name == "postgresql":
|
||
conn.execute(text("""
|
||
CREATE TABLE IF NOT EXISTS competitor_price_history (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
sku VARCHAR(50) NOT NULL,
|
||
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
||
momo_product_id INTEGER,
|
||
momo_price NUMERIC(10,2),
|
||
price NUMERIC(10,2) NOT NULL,
|
||
original_price NUMERIC(10,2),
|
||
discount_pct INTEGER,
|
||
competitor_product_id VARCHAR(100),
|
||
competitor_product_name TEXT,
|
||
competitor_product_url TEXT,
|
||
competitor_image_url TEXT,
|
||
competitor_stock INTEGER,
|
||
match_score NUMERIC(4,3),
|
||
tags JSONB DEFAULT '[]'::jsonb,
|
||
match_diagnostic_json JSONB,
|
||
comparison_mode VARCHAR(40),
|
||
hard_veto BOOLEAN,
|
||
diagnostic_codes JSONB,
|
||
crawled_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_price_history_sku_source_time
|
||
ON competitor_price_history (sku, source, crawled_at DESC)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_price_history_competitor_id
|
||
ON competitor_price_history (competitor_product_id)
|
||
"""))
|
||
else:
|
||
conn.execute(text("""
|
||
CREATE TABLE IF NOT EXISTS competitor_price_history (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
sku VARCHAR(50) NOT NULL,
|
||
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
||
momo_product_id INTEGER,
|
||
momo_price NUMERIC(10,2),
|
||
price NUMERIC(10,2) NOT NULL,
|
||
original_price NUMERIC(10,2),
|
||
discount_pct INTEGER,
|
||
competitor_product_id VARCHAR(100),
|
||
competitor_product_name TEXT,
|
||
competitor_product_url TEXT,
|
||
competitor_image_url TEXT,
|
||
competitor_stock INTEGER,
|
||
match_score NUMERIC(4,3),
|
||
tags TEXT DEFAULT '[]',
|
||
match_diagnostic_json TEXT,
|
||
comparison_mode VARCHAR(40),
|
||
hard_veto BOOLEAN,
|
||
diagnostic_codes TEXT,
|
||
crawled_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_price_history_sku_source_time
|
||
ON competitor_price_history (sku, source, crawled_at DESC)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_price_history_competitor_id
|
||
ON competitor_price_history (competitor_product_id)
|
||
"""))
|
||
|
||
self._ensure_table_columns(conn, "competitor_price_history", [
|
||
("competitor_product_url", "TEXT"),
|
||
("competitor_image_url", "TEXT"),
|
||
("competitor_stock", "INTEGER"),
|
||
("match_diagnostic_json", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
("comparison_mode", "VARCHAR(40)"),
|
||
("hard_veto", "BOOLEAN"),
|
||
("diagnostic_codes", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
])
|
||
self._history_table_ready = True
|
||
|
||
def _ensure_competitor_match_attempts_table(self, conn):
|
||
"""確保 PChome 比對嘗試表存在;成功、低分、無結果與錯誤都要留痕。"""
|
||
if self._attempt_table_ready:
|
||
return
|
||
|
||
from sqlalchemy import text
|
||
if conn.dialect.name == "postgresql":
|
||
conn.execute(text("""
|
||
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
sku VARCHAR(50) NOT NULL,
|
||
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
||
momo_product_id INTEGER,
|
||
momo_product_name TEXT,
|
||
momo_price NUMERIC(10,2),
|
||
search_terms JSONB DEFAULT '[]'::jsonb,
|
||
candidate_count INTEGER DEFAULT 0,
|
||
attempt_status VARCHAR(30) NOT NULL,
|
||
best_competitor_product_id VARCHAR(100),
|
||
best_competitor_product_name TEXT,
|
||
competitor_product_url TEXT,
|
||
competitor_image_url TEXT,
|
||
competitor_stock INTEGER,
|
||
best_competitor_price NUMERIC(10,2),
|
||
best_match_score NUMERIC(4,3),
|
||
match_diagnostic_json JSONB,
|
||
comparison_mode VARCHAR(40),
|
||
hard_veto BOOLEAN,
|
||
diagnostic_codes JSONB,
|
||
browse_diagnostic_json JSONB,
|
||
error_message TEXT,
|
||
attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
|
||
ON competitor_match_attempts (sku, source, attempted_at DESC)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
|
||
ON competitor_match_attempts (attempt_status, attempted_at DESC)
|
||
"""))
|
||
else:
|
||
conn.execute(text("""
|
||
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
sku VARCHAR(50) NOT NULL,
|
||
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
||
momo_product_id INTEGER,
|
||
momo_product_name TEXT,
|
||
momo_price NUMERIC(10,2),
|
||
search_terms TEXT DEFAULT '[]',
|
||
candidate_count INTEGER DEFAULT 0,
|
||
attempt_status VARCHAR(30) NOT NULL,
|
||
best_competitor_product_id VARCHAR(100),
|
||
best_competitor_product_name TEXT,
|
||
competitor_product_url TEXT,
|
||
competitor_image_url TEXT,
|
||
competitor_stock INTEGER,
|
||
best_competitor_price NUMERIC(10,2),
|
||
best_match_score NUMERIC(4,3),
|
||
match_diagnostic_json TEXT,
|
||
comparison_mode VARCHAR(40),
|
||
hard_veto BOOLEAN,
|
||
diagnostic_codes TEXT,
|
||
browse_diagnostic_json TEXT,
|
||
error_message TEXT,
|
||
attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
|
||
ON competitor_match_attempts (sku, source, attempted_at DESC)
|
||
"""))
|
||
conn.execute(text("""
|
||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
|
||
ON competitor_match_attempts (attempt_status, attempted_at DESC)
|
||
"""))
|
||
|
||
self._ensure_table_columns(conn, "competitor_match_attempts", [
|
||
("search_terms", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
("competitor_product_url", "TEXT"),
|
||
("competitor_image_url", "TEXT"),
|
||
("competitor_stock", "INTEGER"),
|
||
("match_diagnostic_json", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
("comparison_mode", "VARCHAR(40)"),
|
||
("hard_veto", "BOOLEAN"),
|
||
("diagnostic_codes", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
("browse_diagnostic_json", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
|
||
("error_message", "TEXT"),
|
||
])
|
||
self._attempt_table_ready = True
|
||
|
||
def _prepare_browse_diagnostic(
|
||
self,
|
||
momo_name: str,
|
||
search_terms: list = None,
|
||
reason: str = "unknown",
|
||
best_product=None,
|
||
best_score: float = None,
|
||
diagnostics=None,
|
||
candidate_count: int = 0,
|
||
) -> dict:
|
||
"""Return browse.sh diagnostic evidence; CLI execution remains opt-in and rate-limited."""
|
||
payload = _build_browse_sh_diagnostic_payload(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason=reason,
|
||
best_product=best_product,
|
||
best_score=best_score,
|
||
diagnostics=diagnostics,
|
||
candidate_count=candidate_count,
|
||
)
|
||
if not payload or not BROWSE_SH_EXECUTE_ENABLED:
|
||
return payload
|
||
if self._browse_sh_executions >= BROWSE_SH_MAX_EXECUTIONS_PER_RUN:
|
||
payload["execution"] = {"status": "rate_limited"}
|
||
return payload
|
||
|
||
command_args = tuple((payload.get("suggested_commands") or [{}])[0].get("args") or ())
|
||
if not command_args:
|
||
payload["execution"] = {"status": "missing_command"}
|
||
return payload
|
||
|
||
try:
|
||
from services.browse_sh_tool import BrowseShTool
|
||
|
||
self._browse_sh_executions += 1
|
||
result = BrowseShTool(timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS).run(
|
||
command_args,
|
||
timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS,
|
||
)
|
||
payload["execution"] = {
|
||
"status": "ok" if result.ok else "failed",
|
||
"returncode": result.returncode,
|
||
"timed_out": result.timed_out,
|
||
"unavailable_reason": result.unavailable_reason,
|
||
"stdout_preview": (result.stdout or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS],
|
||
"stderr_preview": (result.stderr or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS],
|
||
}
|
||
except Exception as exc:
|
||
payload["execution"] = {
|
||
"status": "error",
|
||
"error": str(exc)[:500],
|
||
}
|
||
return payload
|
||
|
||
def _record_match_attempt(
|
||
self,
|
||
sku: str,
|
||
momo_name: str,
|
||
momo_product_id: int = None,
|
||
momo_price: float = None,
|
||
search_terms: list = None,
|
||
candidate_count: int = 0,
|
||
attempt_status: str = "unknown",
|
||
best_product=None,
|
||
best_score: float = None,
|
||
diagnostics=None,
|
||
browse_diagnostic: dict = None,
|
||
error_message: str = None,
|
||
source: str = "pchome",
|
||
) -> None:
|
||
"""追加一筆 PChome 比對嘗試紀錄,讓待比對/低信心也能回溯。"""
|
||
from sqlalchemy import text
|
||
|
||
with self.engine.begin() as conn:
|
||
self._ensure_competitor_match_attempts_table(conn)
|
||
search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms"
|
||
json_cast = "CAST(:match_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":match_diagnostic_json"
|
||
codes_cast = "CAST(:diagnostic_codes AS jsonb)" if conn.dialect.name == "postgresql" else ":diagnostic_codes"
|
||
browse_cast = "CAST(:browse_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":browse_diagnostic_json"
|
||
diagnostic_payload = _match_diagnostics_payload(diagnostics)
|
||
diagnostic_codes = diagnostic_payload.get("reasons") or []
|
||
product_payload = _product_snapshot_payload(best_product)
|
||
browse_diagnostic_json = (
|
||
json.dumps(browse_diagnostic, ensure_ascii=False)
|
||
if browse_diagnostic
|
||
else None
|
||
)
|
||
conn.execute(text(f"""
|
||
INSERT INTO competitor_match_attempts
|
||
(sku, source, momo_product_id, momo_product_name, momo_price,
|
||
search_terms, candidate_count, attempt_status,
|
||
best_competitor_product_id, best_competitor_product_name,
|
||
competitor_product_url, competitor_image_url, competitor_stock,
|
||
best_competitor_price, best_match_score,
|
||
match_diagnostic_json, comparison_mode, hard_veto, diagnostic_codes,
|
||
browse_diagnostic_json,
|
||
error_message,
|
||
attempted_at)
|
||
VALUES
|
||
(:sku, :source, :momo_product_id, :momo_product_name, :momo_price,
|
||
{search_terms_expr}, :candidate_count, :attempt_status,
|
||
:best_id, :best_name,
|
||
:competitor_product_url, :competitor_image_url, :competitor_stock,
|
||
:best_price, :best_score,
|
||
{json_cast}, :comparison_mode, :hard_veto, {codes_cast},
|
||
{browse_cast},
|
||
:error_message,
|
||
CURRENT_TIMESTAMP)
|
||
"""), {
|
||
"sku": sku,
|
||
"source": source,
|
||
"momo_product_id": momo_product_id,
|
||
"momo_product_name": momo_name,
|
||
"momo_price": momo_price,
|
||
"search_terms": json.dumps(search_terms or [], ensure_ascii=False),
|
||
"candidate_count": candidate_count,
|
||
"attempt_status": attempt_status,
|
||
"best_id": getattr(best_product, "product_id", None),
|
||
"best_name": (getattr(best_product, "name", None) or "")[:300] or None,
|
||
**product_payload,
|
||
"best_price": getattr(best_product, "price", None),
|
||
"best_score": best_score,
|
||
"match_diagnostic_json": json.dumps(diagnostic_payload, ensure_ascii=False) if diagnostic_payload else None,
|
||
"comparison_mode": diagnostic_payload.get("comparison_mode"),
|
||
"hard_veto": diagnostic_payload.get("hard_veto"),
|
||
"diagnostic_codes": json.dumps(diagnostic_codes, ensure_ascii=False) if diagnostic_codes else None,
|
||
"browse_diagnostic_json": browse_diagnostic_json,
|
||
"error_message": (error_message or "")[:1000] or None,
|
||
})
|
||
|
||
def _fetch_active_skus(self) -> list:
|
||
"""
|
||
從 products 表取得待監控的 ACTIVE 商品清單
|
||
|
||
Returns:
|
||
list of {"sku": str, "name": str, "category": str}
|
||
"""
|
||
if self.engine is None:
|
||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||
|
||
from sqlalchemy import text
|
||
sql = text("""
|
||
SELECT
|
||
p.id AS product_id,
|
||
p.i_code AS sku,
|
||
p.name,
|
||
p.category,
|
||
(
|
||
SELECT pr.price
|
||
FROM price_records pr
|
||
WHERE pr.product_id = p.id
|
||
ORDER BY pr.timestamp DESC
|
||
LIMIT 1
|
||
) AS momo_price
|
||
FROM products p
|
||
WHERE p.status = 'ACTIVE'
|
||
AND EXISTS (
|
||
SELECT 1
|
||
FROM price_records pr
|
||
WHERE pr.product_id = p.id
|
||
)
|
||
ORDER BY p.i_code
|
||
""")
|
||
with self.engine.connect() as conn:
|
||
rows = conn.execute(sql).fetchall()
|
||
return [dict(r._mapping) for r in rows]
|
||
|
||
def _fetch_unmatched_priority_skus(self, limit: int = 80) -> list:
|
||
"""
|
||
取得目前沒有有效 PChome 配對的高價 ACTIVE 商品,供補強流程優先處理。
|
||
"""
|
||
if self.engine is None:
|
||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||
|
||
from sqlalchemy import text
|
||
sql = text("""
|
||
WITH latest_attempt AS (
|
||
SELECT DISTINCT ON (cma.sku)
|
||
cma.sku,
|
||
cma.attempt_status,
|
||
cma.attempted_at
|
||
FROM competitor_match_attempts cma
|
||
WHERE cma.source = 'pchome'
|
||
ORDER BY cma.sku, cma.attempted_at DESC, cma.id DESC
|
||
)
|
||
SELECT
|
||
p.id AS product_id,
|
||
p.i_code AS sku,
|
||
p.name,
|
||
p.category,
|
||
latest_price.price AS momo_price,
|
||
COALESCE(la.attempt_status, 'never_attempted') AS latest_attempt_status
|
||
FROM products p
|
||
JOIN LATERAL (
|
||
SELECT pr.price
|
||
FROM price_records pr
|
||
WHERE pr.product_id = p.id
|
||
ORDER BY pr.timestamp DESC, pr.id DESC
|
||
LIMIT 1
|
||
) latest_price ON TRUE
|
||
LEFT JOIN latest_attempt la
|
||
ON la.sku = p.i_code
|
||
LEFT JOIN competitor_prices cp
|
||
ON cp.sku = p.i_code
|
||
AND cp.source = 'pchome'
|
||
AND cp.expires_at > CURRENT_TIMESTAMP
|
||
AND COALESCE(cp.match_score, 0) >= :match_score_floor
|
||
AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
|
||
WHERE p.status = 'ACTIVE'
|
||
AND cp.sku IS NULL
|
||
ORDER BY
|
||
CASE
|
||
WHEN la.attempt_status IN ('no_result', 'refresh_no_result')
|
||
AND p.name !~* :generic_recall_block_pattern THEN 0
|
||
WHEN la.sku IS NULL THEN 1
|
||
WHEN la.attempt_status IN ('no_result', 'refresh_no_result') THEN 2
|
||
WHEN la.attempt_status IN ('low_score', 'refresh_low_score', 'recoverable_low_score') THEN 3
|
||
ELSE 4
|
||
END,
|
||
latest_price.price DESC NULLS LAST,
|
||
p.i_code
|
||
LIMIT :limit
|
||
""")
|
||
with self.engine.connect() as conn:
|
||
rows = conn.execute(
|
||
sql,
|
||
{
|
||
"limit": max(1, min(int(limit), 300)),
|
||
"match_score_floor": MIN_MATCH_SCORE,
|
||
"generic_recall_block_pattern": GENERIC_RECALL_BLOCK_NAME_PATTERN,
|
||
},
|
||
).fetchall()
|
||
return [dict(r._mapping) for r in rows]
|
||
|
||
def _fetch_retryable_candidate_skus(self, limit: int = 80, min_score: float = 0.70) -> list:
|
||
"""
|
||
取得近門檻候選,供 matcher 升級後重新評分。
|
||
|
||
這條路徑優先用前次留下的 PChome product_id 批次查詢最新商品資料;
|
||
若 product_id 過期或重評仍低分,才走受控 fresh search recovery。
|
||
自動隊列只收「近門檻、無 hard veto、仍在 exact_identity 軌道、已有同品線證據」
|
||
的候選;identity_veto、unit_comparable 不在自動回刷主戰場。
|
||
true_low_confidence 只允許目前已補 focused exact 規則的品線進入窄門重評,
|
||
且必須沒有 commercial / variant / count / bundle 類阻擋理由。
|
||
最後仍由現行 matcher 重新判斷,不因舊 attempt_status 自動寫入正式比價。
|
||
"""
|
||
if self.engine is None:
|
||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||
|
||
from sqlalchemy import text
|
||
sql = text(f"""
|
||
WITH latest_attempt AS (
|
||
SELECT DISTINCT ON (cma.sku)
|
||
cma.sku,
|
||
cma.best_competitor_product_id,
|
||
cma.best_competitor_product_name,
|
||
cma.best_match_score,
|
||
cma.attempt_status,
|
||
cma.hard_veto,
|
||
cma.match_diagnostic_json,
|
||
cma.attempted_at
|
||
FROM competitor_match_attempts cma
|
||
WHERE cma.source = 'pchome'
|
||
ORDER BY cma.sku, cma.attempted_at DESC, cma.id DESC
|
||
),
|
||
legacy_unmasked_attempt AS (
|
||
SELECT DISTINCT ON (cma.sku, cma.best_competitor_product_id)
|
||
cma.sku,
|
||
cma.best_competitor_product_id,
|
||
cma.best_competitor_product_name,
|
||
cma.best_match_score,
|
||
cma.attempt_status,
|
||
cma.hard_veto,
|
||
cma.match_diagnostic_json,
|
||
cma.attempted_at
|
||
FROM competitor_match_attempts cma
|
||
JOIN latest_attempt current_la
|
||
ON current_la.sku = cma.sku
|
||
WHERE cma.source = 'pchome'
|
||
AND current_la.attempt_status IN (
|
||
'refresh_no_result',
|
||
'no_result',
|
||
'expired_match'
|
||
)
|
||
AND cma.attempt_status IN (
|
||
'low_score',
|
||
'refresh_low_score',
|
||
'recoverable_low_score',
|
||
'true_low_confidence',
|
||
'rescore_accepted_current'
|
||
)
|
||
AND cma.attempted_at < current_la.attempted_at
|
||
ORDER BY cma.sku, cma.best_competitor_product_id, cma.attempted_at DESC, cma.id DESC
|
||
),
|
||
candidate_attempt AS (
|
||
SELECT DISTINCT ON (la.sku, la.best_competitor_product_id)
|
||
la.*
|
||
FROM (
|
||
SELECT * FROM latest_attempt
|
||
UNION ALL
|
||
SELECT * FROM legacy_unmasked_attempt
|
||
) la
|
||
WHERE la.best_competitor_product_id IS NOT NULL
|
||
AND la.best_competitor_product_id <> ''
|
||
AND COALESCE(la.best_match_score, 0) >= :min_score
|
||
AND COALESCE(la.hard_veto, false) = false
|
||
AND COALESCE(la.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity'
|
||
ORDER BY la.sku, la.best_competitor_product_id, la.attempted_at DESC
|
||
)
|
||
SELECT
|
||
p.id AS product_id,
|
||
p.i_code AS sku,
|
||
p.name,
|
||
p.category,
|
||
latest_price.price AS momo_price,
|
||
la.best_competitor_product_id AS competitor_product_id,
|
||
la.best_competitor_product_name AS competitor_product_name,
|
||
la.best_match_score,
|
||
la.attempt_status
|
||
FROM candidate_attempt la
|
||
JOIN products p
|
||
ON p.i_code = la.sku
|
||
AND p.status = 'ACTIVE'
|
||
JOIN LATERAL (
|
||
SELECT pr.price
|
||
FROM price_records pr
|
||
WHERE pr.product_id = p.id
|
||
ORDER BY pr.timestamp DESC, pr.id DESC
|
||
LIMIT 1
|
||
) latest_price ON TRUE
|
||
LEFT JOIN competitor_prices cp
|
||
ON cp.sku = p.i_code
|
||
AND cp.source = 'pchome'
|
||
AND cp.expires_at > CURRENT_TIMESTAMP
|
||
AND COALESCE(cp.match_score, 0) >= :match_score_floor
|
||
AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
|
||
WHERE cp.sku IS NULL
|
||
AND (
|
||
(
|
||
la.attempt_status IN (
|
||
'low_score',
|
||
'refresh_low_score',
|
||
'recoverable_low_score'
|
||
)
|
||
AND (
|
||
la.attempt_status = 'recoverable_low_score'
|
||
OR COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb) ?| array[{RECOVERABLE_SQL_REASON_LIST}]
|
||
)
|
||
)
|
||
OR (
|
||
la.attempt_status IN (
|
||
'low_score',
|
||
'refresh_low_score',
|
||
'true_low_confidence',
|
||
'rescore_accepted_current'
|
||
)
|
||
AND COALESCE(la.best_match_score, 0) >= :min_score
|
||
AND COALESCE(la.hard_veto, false) = false
|
||
AND COALESCE(la.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity'
|
||
AND (
|
||
(
|
||
COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb) ? 'focused_exact_total_price_safe'
|
||
AND COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb) ?| array[{FOCUSED_TOTAL_PRICE_IDENTITY_SQL_REASON_LIST}]
|
||
)
|
||
OR (
|
||
COALESCE(la.best_match_score, 0) >= :match_score_floor
|
||
AND COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb) ?| array[{FOCUSED_TOTAL_PRICE_IDENTITY_SQL_REASON_LIST}]
|
||
)
|
||
)
|
||
AND NOT (
|
||
COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb)
|
||
?| array[{REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST}]
|
||
)
|
||
)
|
||
OR (
|
||
la.attempt_status = 'true_low_confidence'
|
||
AND COALESCE(la.best_match_score, 0) >= 0.95
|
||
AND COALESCE(la.match_diagnostic_json->>'match_type', '') = 'exact'
|
||
AND COALESCE(la.match_diagnostic_json->>'price_basis', '') = 'manual_review'
|
||
AND COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb) ? 'strong_product_line_match'
|
||
AND NOT (
|
||
COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb)
|
||
?| array[{REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST}]
|
||
)
|
||
AND (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%bioneo%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%bioneo%'
|
||
AND COALESCE(p.name, '') LIKE '%微煥膚藻晶去角質凝膠%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%微煥膚藻晶去角質凝膠%'
|
||
AND lower(COALESCE(p.name, '')) ~* '150\\s*ml\\s*[x**]\\s*2'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '150\\s*ml\\s*[x**]\\s*2'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%cetaphil%'
|
||
OR COALESCE(p.name, '') LIKE '%舒特膚%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%cetaphil%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%舒特膚%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%舒新雪潤安撫水%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%舒新雪潤安撫水%'
|
||
AND lower(COALESCE(p.name, '')) ~* '150\\s*ml\\s*[x**]\\s*2'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '150\\s*ml\\s*[x**]\\s*2'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%avene%'
|
||
OR COALESCE(p.name, '') LIKE '%雅漾%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%avene%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%雅漾%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%舒護活泉水%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%舒護活泉水%'
|
||
AND lower(COALESCE(p.name, '')) ~* '300\\s*ml\\s*[x**]\\s*4'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '300\\s*ml\\s*[x**]\\s*4'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%schick%'
|
||
OR COALESCE(p.name, '') LIKE '%舒適牌%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%schick%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%舒適牌%'
|
||
)
|
||
AND (
|
||
(
|
||
COALESCE(p.name, '') LIKE '%超捍輕便刀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%超捍輕便刀%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%高級防滑輕便刀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%高級防滑輕便刀%'
|
||
AND COALESCE(p.name, '') LIKE '%滋潤型%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%滋潤型%'
|
||
)
|
||
)
|
||
AND lower(COALESCE(p.name, '')) ~* '2\\s*\\+\\s*1\\s*入'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '2\\s*\\+\\s*1\\s*入'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%kose%'
|
||
OR COALESCE(p.name, '') LIKE '%高絲%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%kose%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%高絲%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%雪肌精限量豪華禮盒%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%雪肌精限量豪華禮盒%'
|
||
AND lower(COALESCE(p.name, '')) ~* '500\\s*ml\\s*[x**]\\s*2'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '500\\s*ml\\s*[x**]\\s*2'
|
||
)
|
||
)
|
||
)
|
||
OR (
|
||
la.attempt_status = 'true_low_confidence'
|
||
AND COALESCE(la.best_match_score, 0) >= 0.95
|
||
AND COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb) ? 'strong_exact_spec_match'
|
||
AND COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb) ?| array[{REVALIDATABLE_REVIEW_SQL_REASON_LIST}]
|
||
AND NOT (
|
||
COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb)
|
||
?| array[{REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST}]
|
||
)
|
||
AND (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%beauty foot%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%beauty foot%'
|
||
AND COALESCE(p.name, '') LIKE '%足膜%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%足膜%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%足足稱奇%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%足足稱奇%'
|
||
AND COALESCE(p.name, '') LIKE '%足膜%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%足膜%'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%ts6%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%ts6%'
|
||
AND (
|
||
COALESCE(p.name, '') LIKE '%蜜愛潤滑液%'
|
||
OR COALESCE(p.name, '') LIKE '%蜜桃煥白凝膠%'
|
||
OR COALESCE(p.name, '') LIKE '%極淨白私密潔膚露%'
|
||
)
|
||
AND (
|
||
COALESCE(la.best_competitor_product_name, '') LIKE '%蜜愛潤滑液%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%蜜桃煥白凝膠%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%極淨白私密潔膚露%'
|
||
)
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%vaseline%'
|
||
OR COALESCE(p.name, '') LIKE '%凡士林%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%vaseline%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%凡士林%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%嬰兒高純修護凝膠%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%嬰兒高純修護凝膠%'
|
||
)
|
||
)
|
||
)
|
||
OR (
|
||
la.attempt_status IN (
|
||
'low_score',
|
||
'refresh_low_score',
|
||
'true_low_confidence'
|
||
)
|
||
AND COALESCE(la.best_match_score, 0) >= :min_score
|
||
AND COALESCE(la.hard_veto, false) = false
|
||
AND COALESCE(la.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity'
|
||
AND NOT (
|
||
COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb)
|
||
?| array[{REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST}]
|
||
)
|
||
AND (
|
||
(
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%herb24%'
|
||
OR COALESCE(p.name, '') LIKE '%草本24%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%herb24%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%草本24%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%晨霧純精油擴香儀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%晨霧純精油擴香儀%'
|
||
AND COALESCE(p.name, '') LIKE '%黑%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%黑%'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%pavaruni%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%pavaruni%'
|
||
AND COALESCE(p.name, '') LIKE '%天然植物%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%天然植物%'
|
||
AND COALESCE(p.name, '') LIKE '%精油%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%精油%'
|
||
AND COALESCE(p.name, '') LIKE '%40%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%40%'
|
||
AND lower(COALESCE(p.name, '')) ~* '10\\s*ml'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '10\\s*ml'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%pavaruni%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%pavaruni%'
|
||
AND COALESCE(p.name, '') LIKE '%香氛蠟燭%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%香氛蠟燭%'
|
||
AND COALESCE(p.name, '') LIKE '%20%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%20%'
|
||
AND lower(COALESCE(p.name, '')) ~* '450\\s*g'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '450\\s*g'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%derma%'
|
||
OR COALESCE(p.name, '') LIKE '%德瑪%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%derma%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%德瑪%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%大地%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%大地%'
|
||
AND COALESCE(p.name, '') LIKE '%有機植萃%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%有機植萃%'
|
||
AND COALESCE(p.name, '') LIKE '%護膚油%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%護膚油%'
|
||
AND lower(COALESCE(p.name, '')) ~* '150\\s*ml'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) ~* '150\\s*ml'
|
||
AND NOT (
|
||
COALESCE(p.name, '') ~* '(2\\s*入|2\\s*瓶|x\\s*2|\\*\\s*2|*\\s*2)'
|
||
OR COALESCE(la.best_competitor_product_name, '') ~* '(2\\s*入|2\\s*瓶|x\\s*2|\\*\\s*2|*\\s*2)'
|
||
)
|
||
)
|
||
)
|
||
)
|
||
OR (
|
||
(
|
||
(
|
||
la.attempt_status = 'true_low_confidence'
|
||
AND COALESCE(la.best_match_score, 0) >= 0.95
|
||
)
|
||
OR (
|
||
la.attempt_status = 'rescore_accepted_current'
|
||
AND COALESCE(la.best_match_score, 0) >= 0.76
|
||
)
|
||
)
|
||
AND COALESCE(la.hard_veto, false) = false
|
||
AND COALESCE(la.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity'
|
||
AND NOT (
|
||
COALESCE(la.match_diagnostic_json->'reasons', '[]'::jsonb)
|
||
?| array[{FOCUSED_REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST}]
|
||
)
|
||
AND (
|
||
(
|
||
COALESCE(p.name, '') LIKE '%花美水%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%花美水%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%relax%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%relax%'
|
||
AND COALESCE(p.name, '') LIKE '%薰衣草%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%薰衣草%'
|
||
AND COALESCE(p.name, '') LIKE '%潤滑凝膠%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%潤滑凝膠%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%聖克萊爾%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%聖克萊爾%'
|
||
AND COALESCE(p.name, '') LIKE '%私密呼呼溫和潔淨慕斯%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%私密呼呼溫和潔淨慕斯%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%biopeutic%'
|
||
OR COALESCE(p.name, '') LIKE '%葆療美%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%biopeutic%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%葆療美%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%果酸煥膚水凝乳%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%果酸煥膚水凝乳%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%台塑生醫%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%台塑生醫%'
|
||
AND COALESCE(p.name, '') LIKE '%嬰兒沐浴洗髮%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%嬰兒沐浴洗髮%'
|
||
AND COALESCE(p.name, '') LIKE '%嬰兒沐浴精%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%嬰兒沐浴精%'
|
||
AND COALESCE(p.name, '') LIKE '%嬰幼童洗髮精%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%嬰幼童洗髮精%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%雅頓%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%雅頓%'
|
||
AND COALESCE(p.name, '') LIKE '%八小時潤澤護唇膏%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%八小時潤澤護唇膏%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%spf15%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%spf15%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%理膚寶水%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%理膚寶水%'
|
||
AND COALESCE(p.name, '') LIKE '%全面修復潤唇膏%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%全面修復潤唇膏%'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%3w%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%clinic%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%3w%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%clinic%'
|
||
AND COALESCE(p.name, '') LIKE '%膠原蛋白粉底液%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%膠原蛋白粉底液%'
|
||
AND COALESCE(p.name, '') ~* '50\\s*ml'
|
||
AND COALESCE(la.best_competitor_product_name, '') ~* '50\\s*ml'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%花美水%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%花美水%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%moisture%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%moisture%'
|
||
AND COALESCE(p.name, '') LIKE '%保濕修護%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%保濕修護%'
|
||
AND COALESCE(p.name, '') LIKE '%精華凝膠%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%精華凝膠%'
|
||
AND COALESCE(p.name, '') LIKE '%原黃金%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%原黃金%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%花美水%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%花美水%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%inclear%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%inclear%'
|
||
AND COALESCE(p.name, '') LIKE '%櫻克麗兒%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%櫻克麗兒%'
|
||
AND COALESCE(p.name, '') LIKE '%私密淨化凝膠%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%私密淨化凝膠%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%kussen%'
|
||
OR COALESCE(p.name, '') LIKE '%葵森%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%kussen%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%葵森%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%寶寶益菌屁屁膏%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%寶寶益菌屁屁膏%'
|
||
AND COALESCE(p.name, '') LIKE '%50%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%50%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%lab52%'
|
||
OR COALESCE(p.name, '') LIKE '%齒妍堂%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%lab52%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%齒妍堂%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%牙刷%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%牙刷%'
|
||
AND (
|
||
COALESCE(p.name, '') LIKE '%嬰幼兒%'
|
||
OR COALESCE(p.name, '') LIKE '%幼兒%'
|
||
OR COALESCE(p.name, '') LIKE '%汪汪隊%'
|
||
)
|
||
AND (
|
||
COALESCE(la.best_competitor_product_name, '') LIKE '%嬰幼兒%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%幼兒%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%汪汪隊%'
|
||
)
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%flortte%'
|
||
OR COALESCE(p.name, '') LIKE '%花洛莉亞%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%flortte%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%花洛莉亞%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%水果沙拉系列彩色防水眼線液筆%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%水果沙拉系列彩色防水眼線液筆%'
|
||
AND COALESCE(p.name, '') LIKE '%色號任選%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%色號任選%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%neutrogena%'
|
||
OR COALESCE(p.name, '') LIKE '%露得清%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%neutrogena%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%露得清%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%護手霜%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%護手霜%'
|
||
AND COALESCE(p.name, '') LIKE '%無香%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%無香%'
|
||
AND COALESCE(p.name, '') LIKE '%有香%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%有香%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%kanebo%'
|
||
OR COALESCE(p.name, '') LIKE '%佳麗寶%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%kanebo%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%佳麗寶%'
|
||
)
|
||
AND lower(COALESCE(p.name, '')) LIKE '%allie%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%allie%'
|
||
AND COALESCE(p.name, '') LIKE '%持采亮化UV防曬水凝乳%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%持采亮化UV防曬水凝乳%'
|
||
AND COALESCE(p.name, '') LIKE '%任選%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%任選%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%o.p.i%'
|
||
OR lower(COALESCE(p.name, '')) LIKE '%opi%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%o.p.i%'
|
||
OR lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%opi%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%類光繚%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%類光繚%'
|
||
AND (
|
||
COALESCE(p.name, '') LIKE '%指甲油%'
|
||
OR COALESCE(p.name, '') LIKE '%指彩%'
|
||
)
|
||
AND (
|
||
COALESCE(la.best_competitor_product_name, '') LIKE '%指甲油%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%指彩%'
|
||
)
|
||
AND COALESCE(p.name, '') ~* 'isl[a-z0-9]*[0-9]{2,3}'
|
||
AND COALESCE(la.best_competitor_product_name, '') ~* 'isl[a-z0-9]*[0-9]{2,3}'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%ordinary%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%ordinary%'
|
||
AND COALESCE(p.name, '') LIKE '%咖啡因%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%咖啡因%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%egcg%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%egcg%'
|
||
AND COALESCE(p.name, '') LIKE '%兒茶眼部配方%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%兒茶眼部配方%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%sk-ii%'
|
||
OR lower(COALESCE(p.name, '')) LIKE '%skii%'
|
||
OR lower(COALESCE(p.name, '')) LIKE '%sk2%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%sk-ii%'
|
||
OR lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%skii%'
|
||
OR lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%sk2%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%青春露%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%青春露%'
|
||
AND COALESCE(p.name, '') LIKE '%330%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%330%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%amiino%'
|
||
OR COALESCE(p.name, '') LIKE '%安美諾%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%amiino%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%安美諾%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%美白修護霜%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%美白修護霜%'
|
||
AND COALESCE(p.name, '') LIKE '%30%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%30%'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%natures%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%care%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%natures%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%care%'
|
||
AND COALESCE(p.name, '') LIKE '%綿羊油%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%綿羊油%'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%tomoon%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%tomoon%'
|
||
AND COALESCE(p.name, '') LIKE '%德國奔月%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%德國奔月%'
|
||
AND COALESCE(p.name, '') LIKE '%豪華套裝組%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%豪華套裝組%'
|
||
AND COALESCE(p.name, '') LIKE '%指甲%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%指甲%'
|
||
)
|
||
OR (
|
||
lower(COALESCE(p.name, '')) LIKE '%hh%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%hh%'
|
||
AND COALESCE(p.name, '') LIKE '%私密植萃抗菌潔淨露%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%私密植萃抗菌潔淨露%'
|
||
AND COALESCE(p.name, '') LIKE '%私密衣物抗菌手洗精%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%私密衣物抗菌手洗精%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%sebamed%'
|
||
OR COALESCE(p.name, '') LIKE '%施巴%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%sebamed%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%施巴%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%護潔露%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%護潔露%'
|
||
AND COALESCE(p.name, '') LIKE '%200%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%200%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%yes%'
|
||
OR COALESCE(p.name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%yes%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%鋒利窄弧型剪刀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%鋒利窄弧型剪刀%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%9cm%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%9cm%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%yes%'
|
||
OR COALESCE(p.name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%yes%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%腳指甲剪刀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%腳指甲剪刀%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%10.5cm%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%10.5cm%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%yes%'
|
||
OR COALESCE(p.name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%yes%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND COALESCE(p.name, '') LIKE '%極細指甲緣硬皮剪刀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%極細指甲緣硬皮剪刀%'
|
||
AND lower(COALESCE(p.name, '')) LIKE '%9cm%'
|
||
AND lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%9cm%'
|
||
)
|
||
OR (
|
||
(
|
||
lower(COALESCE(p.name, '')) LIKE '%yes%'
|
||
OR COALESCE(p.name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND (
|
||
lower(COALESCE(la.best_competitor_product_name, '')) LIKE '%yes%'
|
||
OR COALESCE(la.best_competitor_product_name, '') LIKE '%德悅氏%'
|
||
)
|
||
AND (
|
||
(
|
||
COALESCE(p.name, '') LIKE '%指甲剪附除垢銼刀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%指甲剪附除垢銼刀%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%腳皮銼腳板%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%腳皮銼腳板%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%藍寶石銼刀%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%藍寶石銼刀%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%指甲拋光棒%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%指甲拋光棒%'
|
||
)
|
||
OR (
|
||
COALESCE(p.name, '') LIKE '%指甲剪%'
|
||
AND COALESCE(la.best_competitor_product_name, '') LIKE '%指甲剪%'
|
||
AND COALESCE(p.name, '') NOT LIKE '%附除垢%'
|
||
AND COALESCE(la.best_competitor_product_name, '') NOT LIKE '%附除垢%'
|
||
AND COALESCE(p.name, '') NOT LIKE '%腳指甲%'
|
||
AND COALESCE(la.best_competitor_product_name, '') NOT LIKE '%腳指甲%'
|
||
)
|
||
)
|
||
)
|
||
)
|
||
)
|
||
)
|
||
ORDER BY la.best_match_score DESC NULLS LAST, latest_price.price DESC NULLS LAST, p.i_code
|
||
LIMIT :limit
|
||
""")
|
||
with self.engine.connect() as conn:
|
||
rows = conn.execute(
|
||
sql,
|
||
{
|
||
"limit": max(1, min(int(limit), 300)),
|
||
"min_score": float(min_score),
|
||
"match_score_floor": MIN_MATCH_SCORE,
|
||
},
|
||
).fetchall()
|
||
return [dict(r._mapping) for r in rows]
|
||
|
||
def preview_retryable_candidate_revalidation(self, limit: int = 80, min_score: float = 0.70) -> dict:
|
||
"""
|
||
只讀預覽下一批可重評候選。
|
||
|
||
這個方法刻意復用正式重評的候選 SQL,但只回傳受 limit 限制的摘要;
|
||
不啟動 PChome 搜尋、不呼叫 LLM、不寫 competitor_match_attempts / competitor_prices。
|
||
"""
|
||
preview_limit = max(1, min(int(limit), 300))
|
||
rows = self._fetch_retryable_candidate_skus(limit=preview_limit, min_score=min_score)
|
||
status_counts: dict[str, int] = {}
|
||
examples: list[dict] = []
|
||
review_gated_count = 0
|
||
for row in rows:
|
||
status = str(row.get("attempt_status") or "unknown")
|
||
status_counts[status] = status_counts.get(status, 0) + 1
|
||
if status == "true_low_confidence":
|
||
review_gated_count += 1
|
||
if len(examples) < 5:
|
||
score = row.get("best_match_score")
|
||
examples.append({
|
||
"sku": row.get("sku"),
|
||
"momo_name": row.get("name"),
|
||
"competitor_product_id": row.get("competitor_product_id"),
|
||
"competitor_product_name": row.get("competitor_product_name"),
|
||
"best_match_score": float(score) if score is not None else None,
|
||
"attempt_status": status,
|
||
})
|
||
|
||
return {
|
||
"available": True,
|
||
"preview_limit": preview_limit,
|
||
"candidate_count": len(rows),
|
||
"has_more": len(rows) >= preview_limit,
|
||
"review_gated_count": review_gated_count,
|
||
"status_counts": status_counts,
|
||
"examples": examples,
|
||
"min_score": float(min_score),
|
||
"boundary": "read_only_no_crawl_no_llm_no_db_write",
|
||
}
|
||
|
||
def preview_expired_identity_recovery(self, limit: int = 40) -> dict:
|
||
"""
|
||
只讀預覽過期 identity_v2 搜尋救援候選。
|
||
|
||
這裡只查本地 DB 中已建立 identity_v2、但價格已過期的 SKU;
|
||
不查 PChome、不重新比對、不寫 attempts / prices。
|
||
"""
|
||
preview_limit = max(1, min(int(limit), 120))
|
||
rows = self._fetch_expired_identity_recovery_skus(limit=preview_limit)
|
||
examples: list[dict] = []
|
||
for row in rows[:5]:
|
||
examples.append({
|
||
"sku": row.get("sku"),
|
||
"momo_name": row.get("name"),
|
||
"competitor_product_id": row.get("competitor_product_id"),
|
||
"competitor_product_name": row.get("competitor_product_name"),
|
||
"match_score": float(row.get("match_score")) if row.get("match_score") is not None else None,
|
||
"expires_at": str(row.get("expires_at")) if row.get("expires_at") is not None else None,
|
||
})
|
||
|
||
return {
|
||
"available": True,
|
||
"preview_limit": preview_limit,
|
||
"candidate_count": len(rows),
|
||
"has_more": len(rows) >= preview_limit,
|
||
"examples": examples,
|
||
"boundary": "read_only_no_crawl_no_llm_no_db_write",
|
||
}
|
||
|
||
def _fetch_expired_identity_recovery_skus(self, limit: int = 40) -> list:
|
||
"""
|
||
取得適合 fresh-search recovery 的過期 identity_v2 商品。
|
||
|
||
這比一般 expired refresh 更窄:只收過去已是 exact / total_price /
|
||
price_alert_exact 的正式配對,且排除款式、香味、型態、入數、商業狀態等
|
||
高風險診斷或名稱訊號。避免把本來應該人工覆核的 stale pair 送進慢速搜尋。
|
||
"""
|
||
if self.engine is None:
|
||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||
|
||
from sqlalchemy import text
|
||
candidate_limit = max(1, min(int(limit) * 6, 400))
|
||
target_limit = max(1, min(int(limit), 120))
|
||
sql = text(f"""
|
||
WITH expired_competitor AS (
|
||
SELECT
|
||
cp.sku,
|
||
cp.competitor_product_id,
|
||
cp.competitor_product_name,
|
||
cp.match_score,
|
||
cp.expires_at,
|
||
COALESCE(cp.match_diagnostic_json->'reasons', '[]'::jsonb) AS diagnostic_reasons
|
||
FROM competitor_prices cp
|
||
WHERE cp.source = 'pchome'
|
||
AND cp.competitor_product_id IS NOT NULL
|
||
AND cp.competitor_product_id <> ''
|
||
AND (cp.expires_at IS NULL OR cp.expires_at <= CURRENT_TIMESTAMP)
|
||
AND COALESCE(cp.match_score, 0) >= :match_score_floor
|
||
AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
|
||
AND (
|
||
COALESCE(cp.tags, '[]'::jsonb) ? 'price_basis_total_price'
|
||
OR cp.match_diagnostic_json->>'price_basis' = 'total_price'
|
||
)
|
||
AND (
|
||
COALESCE(cp.tags, '[]'::jsonb) ? 'alert_tier_price_alert_exact'
|
||
OR cp.match_diagnostic_json->>'alert_tier' = 'price_alert_exact'
|
||
)
|
||
AND COALESCE(cp.match_diagnostic_json->>'comparison_mode', 'exact_identity') = 'exact_identity'
|
||
AND COALESCE(cp.hard_veto, false) = false
|
||
ORDER BY
|
||
CASE WHEN cp.expires_at IS NULL THEN 0 ELSE 1 END,
|
||
cp.expires_at ASC NULLS FIRST,
|
||
cp.sku
|
||
LIMIT :candidate_limit
|
||
)
|
||
SELECT
|
||
p.id AS product_id,
|
||
p.i_code AS sku,
|
||
p.name,
|
||
p.category,
|
||
latest_price.price AS momo_price,
|
||
ec.competitor_product_id,
|
||
ec.competitor_product_name,
|
||
ec.match_score,
|
||
ec.expires_at,
|
||
ec.diagnostic_reasons
|
||
FROM expired_competitor ec
|
||
JOIN products p
|
||
ON p.i_code = ec.sku
|
||
AND p.status = 'ACTIVE'
|
||
JOIN LATERAL (
|
||
SELECT pr.price
|
||
FROM price_records pr
|
||
WHERE pr.product_id = p.id
|
||
ORDER BY pr.timestamp DESC, pr.id DESC
|
||
LIMIT 1
|
||
) latest_price ON TRUE
|
||
ORDER BY
|
||
CASE WHEN ec.expires_at IS NULL THEN 0 ELSE 1 END,
|
||
ec.expires_at ASC NULLS FIRST,
|
||
latest_price.price DESC NULLS LAST,
|
||
p.i_code
|
||
""")
|
||
with self.engine.connect() as conn:
|
||
rows = conn.execute(
|
||
sql,
|
||
{
|
||
"candidate_limit": candidate_limit,
|
||
"match_score_floor": MIN_MATCH_SCORE,
|
||
},
|
||
).fetchall()
|
||
filtered = []
|
||
for row in rows:
|
||
payload = dict(row._mapping)
|
||
if _has_stale_identity_recovery_block(payload):
|
||
continue
|
||
payload.pop("diagnostic_reasons", None)
|
||
filtered.append(payload)
|
||
if len(filtered) >= target_limit:
|
||
break
|
||
return filtered
|
||
|
||
def _fetch_expired_identity_skus(self, limit: int = 120) -> list:
|
||
"""
|
||
取得 identity_v2 已確認、但 PChome 價格快取過期的商品。
|
||
這些商品不需重新 keyword search,先用既有 PChome product_id 批次刷新價格。
|
||
"""
|
||
if self.engine is None:
|
||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||
|
||
from sqlalchemy import text
|
||
sql = text("""
|
||
SELECT
|
||
p.id AS product_id,
|
||
p.i_code AS sku,
|
||
p.name,
|
||
p.category,
|
||
latest_price.price AS momo_price,
|
||
cp.competitor_product_id,
|
||
cp.competitor_product_name,
|
||
cp.match_score,
|
||
cp.expires_at
|
||
FROM competitor_prices cp
|
||
JOIN products p
|
||
ON p.i_code = cp.sku
|
||
AND p.status = 'ACTIVE'
|
||
JOIN LATERAL (
|
||
SELECT pr.price
|
||
FROM price_records pr
|
||
WHERE pr.product_id = p.id
|
||
ORDER BY pr.timestamp DESC, pr.id DESC
|
||
LIMIT 1
|
||
) latest_price ON TRUE
|
||
WHERE cp.source = 'pchome'
|
||
AND cp.competitor_product_id IS NOT NULL
|
||
AND cp.competitor_product_id <> ''
|
||
AND (cp.expires_at IS NULL OR cp.expires_at <= CURRENT_TIMESTAMP)
|
||
AND COALESCE(cp.match_score, 0) >= :match_score_floor
|
||
AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
|
||
ORDER BY
|
||
CASE
|
||
WHEN cp.expires_at IS NULL THEN -1
|
||
WHEN (
|
||
COALESCE(cp.tags, '[]'::jsonb) ? 'price_basis_total_price'
|
||
OR cp.match_diagnostic_json->>'price_basis' = 'total_price'
|
||
)
|
||
AND (
|
||
COALESCE(cp.tags, '[]'::jsonb) ? 'alert_tier_price_alert_exact'
|
||
OR cp.match_diagnostic_json->>'alert_tier' = 'price_alert_exact'
|
||
)
|
||
THEN 0
|
||
ELSE 1
|
||
END,
|
||
cp.expires_at ASC NULLS FIRST,
|
||
latest_price.price DESC NULLS LAST,
|
||
p.i_code
|
||
LIMIT :limit
|
||
""")
|
||
with self.engine.connect() as conn:
|
||
rows = conn.execute(
|
||
sql,
|
||
{"limit": max(1, min(int(limit), 500)), "match_score_floor": MIN_MATCH_SCORE},
|
||
).fetchall()
|
||
return [dict(r._mapping) for r in rows]
|
||
|
||
def _upsert_competitor_price(
|
||
self,
|
||
sku: str,
|
||
product, # PChomeProduct
|
||
match_score: float,
|
||
tags: list,
|
||
momo_product_id: int = None,
|
||
momo_price: float = None,
|
||
diagnostics=None,
|
||
source: str = "pchome",
|
||
):
|
||
"""單筆寫入/更新最新快取,並追加一筆歷史快照。"""
|
||
from sqlalchemy import text
|
||
_taipei = timezone(timedelta(hours=8))
|
||
expires_at = (datetime.now(_taipei) + timedelta(hours=TTL_HOURS)).strftime("%Y-%m-%d %H:%M:%S")
|
||
tags_json = json.dumps(tags, ensure_ascii=False)
|
||
diagnostic_payload = _match_diagnostics_payload(diagnostics)
|
||
diagnostic_codes = diagnostic_payload.get("reasons") or []
|
||
product_payload = _product_snapshot_payload(product)
|
||
with self.engine.begin() as conn:
|
||
self._ensure_competitor_price_history_table(conn)
|
||
self._ensure_competitor_prices_columns(conn)
|
||
json_expr = "CAST(:match_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":match_diagnostic_json"
|
||
codes_expr = "CAST(:diagnostic_codes AS jsonb)" if conn.dialect.name == "postgresql" else ":diagnostic_codes"
|
||
conn.execute(text("""
|
||
INSERT INTO competitor_prices
|
||
(sku, source, price, original_price, discount_pct,
|
||
competitor_product_id, competitor_product_name,
|
||
competitor_product_url, competitor_image_url, competitor_stock,
|
||
match_score, tags, match_diagnostic_json,
|
||
comparison_mode, hard_veto, diagnostic_codes,
|
||
crawled_at, expires_at)
|
||
VALUES
|
||
(:sku, :source, :price, :original_price, :discount_pct,
|
||
:comp_id, :comp_name,
|
||
:competitor_product_url, :competitor_image_url, :competitor_stock,
|
||
:match_score, :tags, {json_expr},
|
||
:comparison_mode, :hard_veto, {codes_expr},
|
||
CURRENT_TIMESTAMP, :expires_at)
|
||
ON CONFLICT (sku, source) DO UPDATE
|
||
SET price = EXCLUDED.price,
|
||
original_price = EXCLUDED.original_price,
|
||
discount_pct = EXCLUDED.discount_pct,
|
||
competitor_product_id = EXCLUDED.competitor_product_id,
|
||
competitor_product_name = EXCLUDED.competitor_product_name,
|
||
competitor_product_url = EXCLUDED.competitor_product_url,
|
||
competitor_image_url = EXCLUDED.competitor_image_url,
|
||
competitor_stock = EXCLUDED.competitor_stock,
|
||
match_score = EXCLUDED.match_score,
|
||
tags = EXCLUDED.tags,
|
||
match_diagnostic_json = EXCLUDED.match_diagnostic_json,
|
||
comparison_mode = EXCLUDED.comparison_mode,
|
||
hard_veto = EXCLUDED.hard_veto,
|
||
diagnostic_codes = EXCLUDED.diagnostic_codes,
|
||
crawled_at = CURRENT_TIMESTAMP,
|
||
expires_at = :expires_at
|
||
""".format(json_expr=json_expr, codes_expr=codes_expr)), {
|
||
"sku": sku,
|
||
"source": source,
|
||
"price": product.price,
|
||
"original_price":product.original_price,
|
||
"discount_pct": product.discount,
|
||
"comp_id": product.product_id,
|
||
"comp_name": product.name[:200],
|
||
**product_payload,
|
||
"match_score": match_score,
|
||
"tags": tags_json,
|
||
"match_diagnostic_json": json.dumps(diagnostic_payload, ensure_ascii=False) if diagnostic_payload else None,
|
||
"comparison_mode": diagnostic_payload.get("comparison_mode"),
|
||
"hard_veto": diagnostic_payload.get("hard_veto"),
|
||
"diagnostic_codes": json.dumps(diagnostic_codes, ensure_ascii=False) if diagnostic_codes else None,
|
||
"expires_at": expires_at,
|
||
})
|
||
conn.execute(text("""
|
||
INSERT INTO competitor_price_history
|
||
(sku, source, momo_product_id, momo_price,
|
||
price, original_price, discount_pct,
|
||
competitor_product_id, competitor_product_name,
|
||
competitor_product_url, competitor_image_url, competitor_stock,
|
||
match_score, tags, match_diagnostic_json,
|
||
comparison_mode, hard_veto, diagnostic_codes,
|
||
crawled_at)
|
||
VALUES
|
||
(:sku, :source, :momo_product_id, :momo_price,
|
||
:price, :original_price, :discount_pct,
|
||
:comp_id, :comp_name,
|
||
:competitor_product_url, :competitor_image_url, :competitor_stock,
|
||
:match_score, :tags, {json_expr},
|
||
:comparison_mode, :hard_veto, {codes_expr},
|
||
CURRENT_TIMESTAMP)
|
||
""".format(json_expr=json_expr, codes_expr=codes_expr)), {
|
||
"sku": sku,
|
||
"source": source,
|
||
"momo_product_id": momo_product_id,
|
||
"momo_price": momo_price,
|
||
"price": product.price,
|
||
"original_price": product.original_price,
|
||
"discount_pct": product.discount,
|
||
"comp_id": product.product_id,
|
||
"comp_name": product.name[:200],
|
||
**product_payload,
|
||
"match_score": match_score,
|
||
"tags": tags_json,
|
||
"match_diagnostic_json": json.dumps(diagnostic_payload, ensure_ascii=False) if diagnostic_payload else None,
|
||
"comparison_mode": diagnostic_payload.get("comparison_mode"),
|
||
"hard_veto": diagnostic_payload.get("hard_veto"),
|
||
"diagnostic_codes": json.dumps(diagnostic_codes, ensure_ascii=False) if diagnostic_codes else None,
|
||
})
|
||
|
||
def _should_upsert_competitor_price(
|
||
self,
|
||
sku: str,
|
||
product,
|
||
match_score: float,
|
||
momo_name: str | None = None,
|
||
source: str = "pchome",
|
||
) -> tuple[bool, str]:
|
||
"""
|
||
保護正式 competitor_prices:若既有配對是不同 PChome 商品,
|
||
只有超高信心才允許覆蓋,避免新 matcher 一次污染核心比價資料。
|
||
"""
|
||
from sqlalchemy import text
|
||
|
||
with self.engine.connect() as conn:
|
||
row = conn.execute(text("""
|
||
SELECT competitor_product_id, competitor_product_name, match_score, tags
|
||
FROM competitor_prices
|
||
WHERE sku = :sku
|
||
AND source = :source
|
||
LIMIT 1
|
||
"""), {"sku": sku, "source": source}).mappings().first()
|
||
|
||
if not row:
|
||
return True, "new_match"
|
||
|
||
existing_id = str(row.get("competitor_product_id") or "")
|
||
existing_name = str(row.get("competitor_product_name") or "")
|
||
incoming_id = str(getattr(product, "product_id", "") or "")
|
||
incoming_name = str(getattr(product, "name", "") or "")
|
||
try:
|
||
existing_score = float(row.get("match_score") or 0)
|
||
except (TypeError, ValueError):
|
||
existing_score = 0.0
|
||
existing_tags = row.get("tags") or []
|
||
if isinstance(existing_tags, str):
|
||
try:
|
||
existing_tags = json.loads(existing_tags)
|
||
except Exception:
|
||
existing_tags = []
|
||
if "identity_v2" not in existing_tags:
|
||
return True, "replace_legacy_unverified"
|
||
|
||
if not existing_id or existing_id == incoming_id:
|
||
return True, "same_or_empty_existing"
|
||
if existing_score < MIN_MATCH_SCORE:
|
||
return True, f"replace_low_existing_score={existing_score:.3f}"
|
||
if momo_name and existing_name:
|
||
try:
|
||
from services.marketplace_product_matcher import score_marketplace_match
|
||
|
||
existing_current = score_marketplace_match(momo_name, existing_name)
|
||
except Exception:
|
||
existing_current = None
|
||
if (
|
||
existing_current
|
||
and (
|
||
existing_current.hard_veto
|
||
or existing_current.score < MIN_MATCH_SCORE
|
||
)
|
||
and match_score >= MIN_MATCH_SCORE
|
||
):
|
||
return (
|
||
True,
|
||
"replace_stale_existing_identity="
|
||
f"{existing_score:.3f}->{existing_current.score:.3f}->{match_score:.3f}",
|
||
)
|
||
if existing_name and incoming_name:
|
||
try:
|
||
from services.marketplace_product_matcher import score_marketplace_match
|
||
|
||
candidate_equivalence = score_marketplace_match(existing_name, incoming_name)
|
||
except Exception:
|
||
candidate_equivalence = None
|
||
if (
|
||
candidate_equivalence
|
||
and candidate_equivalence.score >= 0.88
|
||
and not candidate_equivalence.hard_veto
|
||
and candidate_equivalence.comparison_mode == "exact_identity"
|
||
and match_score >= max(MIN_MATCH_SCORE, existing_score + 0.015)
|
||
):
|
||
return (
|
||
True,
|
||
"replace_same_identity_better_score="
|
||
f"{existing_score:.3f}->{match_score:.3f}",
|
||
)
|
||
if existing_score >= match_score:
|
||
return (
|
||
False,
|
||
f"existing_match_conflict;stronger_existing;existing_id={existing_id};"
|
||
f"incoming_id={incoming_id};existing_score={existing_score:.3f};"
|
||
f"incoming_score={match_score:.3f}",
|
||
)
|
||
if match_score >= REPLACE_DIFFERENT_PRODUCT_SCORE:
|
||
return True, f"replace_high_confidence_score={match_score:.3f}"
|
||
return (
|
||
False,
|
||
f"existing_match_conflict;existing_id={existing_id};"
|
||
f"incoming_id={incoming_id};existing_score={existing_score:.3f};"
|
||
f"incoming_score={match_score:.3f}",
|
||
)
|
||
|
||
def _fetch_latest_manual_review_for_candidate(
|
||
self,
|
||
sku: str,
|
||
competitor_product_id: str,
|
||
source: str = "pchome",
|
||
) -> Optional[dict]:
|
||
"""Read the latest human review for this exact candidate, if the table exists."""
|
||
if not competitor_product_id:
|
||
return None
|
||
from sqlalchemy import text
|
||
|
||
try:
|
||
with self.engine.connect() as conn:
|
||
row = conn.execute(text("""
|
||
SELECT review_action, review_reason, reviewer_identity, reviewed_at
|
||
FROM competitor_match_reviews
|
||
WHERE sku = :sku
|
||
AND source = :source
|
||
AND candidate_product_id = :candidate_id
|
||
ORDER BY reviewed_at DESC, id DESC
|
||
LIMIT 1
|
||
"""), {
|
||
"sku": sku,
|
||
"source": source,
|
||
"candidate_id": competitor_product_id,
|
||
}).mappings().first()
|
||
except Exception:
|
||
return None
|
||
|
||
return dict(row) if row else None
|
||
|
||
def _run_sku_items(
|
||
self,
|
||
skus: list,
|
||
source: str = "pchome",
|
||
label: str = "PChome 競品價格",
|
||
bounded_search: bool = False,
|
||
) -> FeederResult:
|
||
start = time.time()
|
||
|
||
if source != "pchome":
|
||
logger.warning(f"[Feeder] 尚未支援 source={source},跳過")
|
||
return FeederResult(0, 0, 0, 0, 0, 0.0)
|
||
|
||
from services.pchome_crawler import PChomeCrawler
|
||
crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_search else REQUEST_TIMEOUT
|
||
crawler = PChomeCrawler(timeout=crawler_timeout, delay=RATE_DELAY)
|
||
|
||
logger.info(f"[Feeder] 開始抓取 {len(skus)} 支商品的 {label}")
|
||
|
||
matched = 0
|
||
skipped_no = 0
|
||
skipped_low = 0
|
||
errors = 0
|
||
history_written = 0
|
||
attempts_written = 0
|
||
|
||
for item in skus:
|
||
sku = item["sku"]
|
||
momo_name = item["name"]
|
||
momo_product_id = item.get("product_id")
|
||
momo_price = item.get("momo_price")
|
||
search_terms = _build_search_keywords(momo_name)
|
||
|
||
try:
|
||
if bounded_search:
|
||
products = _search_pchome_candidates(
|
||
crawler,
|
||
momo_name,
|
||
search_terms,
|
||
momo_price=momo_price,
|
||
max_terms=BACKFILL_MAX_SEARCH_TERMS,
|
||
max_pages=BACKFILL_SEARCH_MAX_PAGES,
|
||
max_seconds=BACKFILL_MAX_SECONDS_PER_SKU,
|
||
)
|
||
else:
|
||
products = _search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)
|
||
if not products:
|
||
logger.debug(f"[Feeder] {sku} 無搜尋結果,跳過")
|
||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason="no_result",
|
||
candidate_count=0,
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=0,
|
||
attempt_status="no_result",
|
||
browse_diagnostic=browse_diagnostic,
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_no += 1
|
||
continue
|
||
|
||
ranked_matches = _rank_match_details(momo_name, products, momo_price=momo_price)
|
||
if not ranked_matches:
|
||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason="no_match",
|
||
candidate_count=len(products),
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status="no_match",
|
||
browse_diagnostic=browse_diagnostic,
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_no += 1
|
||
continue
|
||
|
||
selected_match = None
|
||
manually_rejected_ids: list[str] = []
|
||
for candidate_product, candidate_score, candidate_diagnostics in ranked_matches:
|
||
candidate_review = self._fetch_latest_manual_review_for_candidate(
|
||
sku,
|
||
getattr(candidate_product, "product_id", None),
|
||
source=source,
|
||
)
|
||
if (candidate_review or {}).get("review_action") == "reject_identity":
|
||
manually_rejected_ids.append(str(getattr(candidate_product, "product_id", "") or ""))
|
||
continue
|
||
selected_match = (
|
||
candidate_product,
|
||
candidate_score,
|
||
candidate_diagnostics,
|
||
candidate_review,
|
||
)
|
||
break
|
||
|
||
if not selected_match:
|
||
best_product, score, diagnostics = ranked_matches[0]
|
||
rejected_note = ",".join(product_id for product_id in manually_rejected_ids if product_id)
|
||
logger.info(
|
||
f"[Feeder] {sku} 所有可信候選都已被人工否決,跳過正式寫入 | "
|
||
f"rejected_candidates={rejected_note}"
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status="manual_rejected",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=(
|
||
f"manual_review_rejected; rejected_candidates={rejected_note}; "
|
||
f"{_format_match_diagnostics(diagnostics)}"
|
||
),
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_low += 1
|
||
continue
|
||
|
||
best_product, score, diagnostics, manual_review = selected_match
|
||
manual_action = (manual_review or {}).get("review_action")
|
||
if manual_action == "unit_price_required":
|
||
logger.info(
|
||
f"[Feeder] {sku} 候選已被人工標記為單位價比較,不寫正式總價差 | "
|
||
f"candidate={getattr(best_product, 'product_id', None)}"
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status="manual_unit_price_required",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=f"manual_review_unit_price_required; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_low += 1
|
||
continue
|
||
|
||
manual_accept_override = manual_action == "accept_identity"
|
||
if getattr(diagnostics, "comparison_mode", "") == "unit_comparable" and not manual_accept_override:
|
||
logger.info(
|
||
f"[Feeder] {sku} 候選屬單位價可比但非同販售組合,"
|
||
f"不寫入正式價差 | {_format_match_diagnostics(diagnostics)}"
|
||
)
|
||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason="unit_comparable",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
candidate_count=len(products),
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status="unit_comparable",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
browse_diagnostic=browse_diagnostic,
|
||
error_message=_format_match_diagnostics(diagnostics),
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_low += 1
|
||
continue
|
||
|
||
if score < MIN_MATCH_SCORE and not manual_accept_override:
|
||
attempt_status = _classify_low_score_attempt(score, diagnostics)
|
||
if (
|
||
attempt_status == "recoverable_low_score"
|
||
and _has_variant_selection_gap(momo_name, ranked_matches, score)
|
||
):
|
||
attempt_status = "true_low_confidence"
|
||
logger.debug(
|
||
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE}),"
|
||
f"{_format_match_diagnostics(diagnostics)}"
|
||
)
|
||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason=attempt_status,
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
candidate_count=len(products),
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status=attempt_status,
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
browse_diagnostic=browse_diagnostic,
|
||
error_message=_format_match_diagnostics(diagnostics),
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_low += 1
|
||
continue
|
||
|
||
if manual_accept_override:
|
||
score = max(score, MIN_MATCH_SCORE)
|
||
if not manual_accept_override and not _is_auto_price_write_safe(diagnostics):
|
||
attempt_status = _classify_auto_write_block_attempt(score, diagnostics)
|
||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason=attempt_status,
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
candidate_count=len(products),
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status=attempt_status,
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
browse_diagnostic=browse_diagnostic,
|
||
error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_low += 1
|
||
continue
|
||
tags = _extend_match_tags(_extract_tags(best_product), diagnostics)
|
||
if manual_accept_override:
|
||
tags.extend(["manual_review", "manual_accept"])
|
||
tags = [tag for tag in tags if tag != "identity_veto"]
|
||
tags = list(dict.fromkeys(tags))
|
||
should_write, write_reason = self._should_upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
momo_name=momo_name,
|
||
source=source,
|
||
)
|
||
if manual_accept_override and not should_write:
|
||
should_write = True
|
||
write_reason = "manual_accept_override"
|
||
if not should_write:
|
||
logger.info(f"[Feeder] {sku} 進入人工覆核,不覆蓋既有配對 | {write_reason}")
|
||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason="protected_existing_match",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
candidate_count=len(products),
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status="protected_existing_match",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
browse_diagnostic=browse_diagnostic,
|
||
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
skipped_low += 1
|
||
continue
|
||
|
||
tags.append(write_reason)
|
||
self._upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
tags,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=len(products),
|
||
attempt_status="matched",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
matched += 1
|
||
history_written += 1
|
||
attempts_written += 1
|
||
logger.debug(
|
||
f"[Feeder] {sku} → PChome ${best_product.price} "
|
||
f"score={score:.3f} tags={tags}"
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"[Feeder] {sku} 處理失敗: {e}")
|
||
try:
|
||
browse_diagnostic = self._prepare_browse_diagnostic(
|
||
momo_name,
|
||
search_terms=search_terms,
|
||
reason="crawler_error",
|
||
candidate_count=0,
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
attempt_status="error",
|
||
browse_diagnostic=browse_diagnostic,
|
||
error_message=str(e),
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
except Exception as attempt_error:
|
||
logger.warning(f"[Feeder] {sku} 比對嘗試紀錄寫入失敗: {attempt_error}")
|
||
errors += 1
|
||
|
||
duration = round(time.time() - start, 2)
|
||
logger.info(
|
||
f"[Feeder] 完成 matched={matched} skipped_no={skipped_no} "
|
||
f"skipped_low={skipped_low} errors={errors} "
|
||
f"history_written={history_written} attempts_written={attempts_written} 耗時={duration}s"
|
||
)
|
||
return FeederResult(
|
||
total_skus=len(skus),
|
||
matched=matched,
|
||
skipped_no_result=skipped_no,
|
||
skipped_low_score=skipped_low,
|
||
errors=errors,
|
||
duration_sec=duration,
|
||
history_written=history_written,
|
||
attempts_written=attempts_written,
|
||
)
|
||
|
||
def _run_known_identity_refresh_items(
|
||
self,
|
||
skus: list,
|
||
source: str = "pchome",
|
||
label: str = "已確認身份價格刷新",
|
||
allow_missing_recovery: bool = True,
|
||
bounded_recovery: bool = False,
|
||
) -> FeederResult:
|
||
start = time.time()
|
||
|
||
if source != "pchome":
|
||
logger.warning(f"[Feeder] 尚未支援 source={source},跳過")
|
||
return FeederResult(0, 0, 0, 0, 0, 0.0)
|
||
|
||
if not skus:
|
||
return FeederResult(0, 0, 0, 0, 0, 0.0)
|
||
|
||
from services.pchome_crawler import PChomeCrawler
|
||
crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT
|
||
crawler = PChomeCrawler(timeout=crawler_timeout, delay=RATE_DELAY)
|
||
recovery_kwargs = (
|
||
{
|
||
"max_terms": BACKFILL_MAX_SEARCH_TERMS,
|
||
"max_pages": BACKFILL_SEARCH_MAX_PAGES,
|
||
"max_seconds": BACKFILL_MAX_SECONDS_PER_SKU,
|
||
}
|
||
if bounded_recovery
|
||
else {}
|
||
)
|
||
|
||
requested_ids = [
|
||
str(item.get("competitor_product_id") or "").strip()
|
||
for item in skus
|
||
if str(item.get("competitor_product_id") or "").strip()
|
||
]
|
||
ok, message, products = crawler.fetch_product_details(requested_ids, batch_size=20)
|
||
product_map = {_product_id_key(product.product_id): product for product in products} if ok else {}
|
||
logger.info(
|
||
f"[Feeder] {label} product_id 批次查詢 | requested={len(requested_ids)} "
|
||
f"returned={len(product_map)} msg={message}"
|
||
)
|
||
|
||
matched = 0
|
||
skipped_no = 0
|
||
skipped_low = 0
|
||
errors = 0
|
||
history_written = 0
|
||
attempts_written = 0
|
||
|
||
for item in skus:
|
||
sku = item["sku"]
|
||
momo_name = item["name"]
|
||
momo_product_id = item.get("product_id")
|
||
momo_price = item.get("momo_price")
|
||
competitor_product_id = str(item.get("competitor_product_id") or "").strip()
|
||
search_terms = [f"known_product_id:{competitor_product_id}"] if competitor_product_id else []
|
||
|
||
try:
|
||
product = product_map.get(_product_id_key(competitor_product_id))
|
||
if not product:
|
||
if not allow_missing_recovery:
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=0,
|
||
attempt_status="refresh_no_result",
|
||
error_message=(
|
||
f"PChome product_id not returned: {competitor_product_id}; "
|
||
"fresh_search_recovery_deferred"
|
||
),
|
||
source=source,
|
||
)
|
||
skipped_no += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search(
|
||
crawler,
|
||
momo_name,
|
||
momo_price=momo_price,
|
||
existing_product_id=competitor_product_id,
|
||
**recovery_kwargs,
|
||
)
|
||
if recovered:
|
||
best_product, score, diagnostics = recovered
|
||
if getattr(diagnostics, "comparison_mode", "") == "unit_comparable":
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
|
||
candidate_count=max(1, recovery_candidate_count),
|
||
attempt_status="refresh_unit_comparable",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=_format_match_diagnostics(diagnostics),
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
if score >= MIN_MATCH_SCORE and not getattr(diagnostics, "hard_veto", False):
|
||
if not _is_auto_price_write_safe(diagnostics):
|
||
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=attempt_terms,
|
||
candidate_count=max(1, recovery_candidate_count),
|
||
attempt_status=_classify_auto_write_block_attempt(score, diagnostics),
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
tags = _extend_match_tags(
|
||
_extract_tags(best_product),
|
||
diagnostics,
|
||
["refresh_known_identity", "fresh_search_recovery", "missing_known_product_id"],
|
||
)
|
||
|
||
should_write, write_reason = self._should_upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
momo_name=momo_name,
|
||
source=source,
|
||
)
|
||
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
|
||
if not should_write:
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=attempt_terms,
|
||
candidate_count=max(1, recovery_candidate_count),
|
||
attempt_status="protected_existing_match",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
tags.append(write_reason)
|
||
self._upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
tags,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=attempt_terms,
|
||
candidate_count=max(1, recovery_candidate_count),
|
||
attempt_status="matched",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
matched += 1
|
||
history_written += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
attempt_status = _classify_low_score_attempt(score, diagnostics)
|
||
if (
|
||
attempt_status == "recoverable_low_score"
|
||
and _has_variant_selection_gap(momo_name, [(best_product, score, diagnostics)], score)
|
||
):
|
||
attempt_status = "true_low_confidence"
|
||
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=attempt_terms,
|
||
candidate_count=max(1, recovery_candidate_count),
|
||
attempt_status=attempt_status,
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=(
|
||
"missing_known_product_id_fresh_search_low_confidence; "
|
||
f"PChome product_id not returned: {competitor_product_id}; "
|
||
f"{_format_match_diagnostics(diagnostics)}"
|
||
),
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
|
||
candidate_count=max(0, recovery_candidate_count),
|
||
attempt_status="refresh_no_result",
|
||
error_message=f"PChome product_id not returned: {competitor_product_id}",
|
||
source=source,
|
||
)
|
||
skipped_no += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
result = _find_best_match_detail(momo_name, [product], momo_price=momo_price)
|
||
if not result:
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=1,
|
||
attempt_status="refresh_no_match",
|
||
source=source,
|
||
)
|
||
skipped_no += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
best_product, score, diagnostics = result
|
||
if getattr(diagnostics, "comparison_mode", "") == "unit_comparable":
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=1,
|
||
attempt_status="refresh_unit_comparable",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=_format_match_diagnostics(diagnostics),
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
if score < MIN_MATCH_SCORE:
|
||
recovery_terms: list[str] = []
|
||
recovery_candidate_count = 0
|
||
if allow_missing_recovery:
|
||
recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search(
|
||
crawler,
|
||
momo_name,
|
||
momo_price=momo_price,
|
||
existing_product_id=competitor_product_id,
|
||
**recovery_kwargs,
|
||
)
|
||
if recovered:
|
||
recovered_product, recovered_score, recovered_diagnostics = recovered
|
||
if (
|
||
recovered_score > score
|
||
or getattr(diagnostics, "hard_veto", False)
|
||
and not getattr(recovered_diagnostics, "hard_veto", False)
|
||
):
|
||
best_product, score, diagnostics = recovered_product, recovered_score, recovered_diagnostics
|
||
|
||
if score >= MIN_MATCH_SCORE:
|
||
extras = ["refresh_known_identity"]
|
||
if recovery_terms:
|
||
extras.append("fresh_search_recovery")
|
||
if not _is_auto_price_write_safe(diagnostics):
|
||
candidate_count = max(1, recovery_candidate_count or 1)
|
||
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=attempt_terms,
|
||
candidate_count=candidate_count,
|
||
attempt_status=_classify_auto_write_block_attempt(score, diagnostics),
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
tags = _extend_match_tags(_extract_tags(best_product), diagnostics, extras)
|
||
|
||
should_write, write_reason = self._should_upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
momo_name=momo_name,
|
||
source=source,
|
||
)
|
||
candidate_count = max(1, recovery_candidate_count or 1)
|
||
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
|
||
if not should_write:
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=attempt_terms,
|
||
candidate_count=candidate_count,
|
||
attempt_status="protected_existing_match",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
tags.append(write_reason)
|
||
self._upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
tags,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=attempt_terms,
|
||
candidate_count=candidate_count,
|
||
attempt_status="matched",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
matched += 1
|
||
history_written += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
attempt_status = _classify_low_score_attempt(score, diagnostics)
|
||
if (
|
||
attempt_status == "recoverable_low_score"
|
||
and _has_variant_selection_gap(momo_name, [(best_product, score, diagnostics)], score)
|
||
):
|
||
attempt_status = "true_low_confidence"
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
|
||
candidate_count=max(1, recovery_candidate_count or 1),
|
||
attempt_status=attempt_status,
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=(
|
||
_format_match_diagnostics(diagnostics)
|
||
if allow_missing_recovery
|
||
else f"fresh_search_recovery_deferred; {_format_match_diagnostics(diagnostics)}"
|
||
),
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
if not _is_auto_price_write_safe(diagnostics):
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=1,
|
||
attempt_status=_classify_auto_write_block_attempt(score, diagnostics),
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=f"auto_price_write_blocked; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
tags = _extend_match_tags(_extract_tags(best_product), diagnostics, ["refresh_known_identity"])
|
||
|
||
should_write, write_reason = self._should_upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
momo_name=momo_name,
|
||
source=source,
|
||
)
|
||
if not should_write:
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=1,
|
||
attempt_status="protected_existing_match",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
|
||
source=source,
|
||
)
|
||
skipped_low += 1
|
||
attempts_written += 1
|
||
continue
|
||
|
||
tags.append(write_reason)
|
||
self._upsert_competitor_price(
|
||
sku,
|
||
best_product,
|
||
score,
|
||
tags,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
candidate_count=1,
|
||
attempt_status="matched",
|
||
best_product=best_product,
|
||
best_score=score,
|
||
diagnostics=diagnostics,
|
||
source=source,
|
||
)
|
||
matched += 1
|
||
history_written += 1
|
||
attempts_written += 1
|
||
except Exception as e:
|
||
logger.error(f"[Feeder] {sku} 已確認身份刷新失敗: {e}")
|
||
try:
|
||
self._record_match_attempt(
|
||
sku,
|
||
momo_name,
|
||
momo_product_id=momo_product_id,
|
||
momo_price=momo_price,
|
||
search_terms=search_terms,
|
||
attempt_status="refresh_error",
|
||
error_message=str(e),
|
||
source=source,
|
||
)
|
||
attempts_written += 1
|
||
except Exception as attempt_error:
|
||
logger.warning(f"[Feeder] {sku} 刷新嘗試紀錄寫入失敗: {attempt_error}")
|
||
errors += 1
|
||
|
||
duration = round(time.time() - start, 2)
|
||
logger.info(
|
||
f"[Feeder] {label} 完成 matched={matched}/{len(skus)} "
|
||
f"skip_no={skipped_no} skip_low={skipped_low} errors={errors} "
|
||
f"history_written={history_written} attempts_written={attempts_written} 耗時={duration}s"
|
||
)
|
||
return FeederResult(
|
||
total_skus=len(skus),
|
||
matched=matched,
|
||
skipped_no_result=skipped_no,
|
||
skipped_low_score=skipped_low,
|
||
errors=errors,
|
||
duration_sec=duration,
|
||
history_written=history_written,
|
||
attempts_written=attempts_written,
|
||
)
|
||
|
||
def run(self, source: str = "pchome") -> FeederResult:
|
||
"""
|
||
執行一輪競品價格抓取與寫入
|
||
|
||
Args:
|
||
source: 競品來源代碼(目前支援 'pchome')
|
||
|
||
Returns:
|
||
FeederResult
|
||
"""
|
||
try:
|
||
skus = self._fetch_active_skus()
|
||
except Exception as e:
|
||
logger.error(f"[Feeder] 讀取商品清單失敗: {e}")
|
||
return FeederResult(0, 0, 0, 0, 1, 0.0)
|
||
|
||
return self._run_sku_items(skus, source=source, label="PChome 競品價格")
|
||
|
||
def run_expired_identity_refresh(self, limit: int = 120, source: str = "pchome") -> FeederResult:
|
||
"""刷新已通過 identity_v2、但 PChome 價格快取過期的商品。"""
|
||
try:
|
||
skus = self._fetch_expired_identity_skus(limit=limit)
|
||
except Exception as e:
|
||
logger.error(f"[Feeder] 讀取過期 identity_v2 商品失敗: {e}")
|
||
return FeederResult(0, 0, 0, 0, 1, 0.0)
|
||
|
||
return self._run_known_identity_refresh_items(
|
||
skus,
|
||
source=source,
|
||
label="identity_v2 過期價格刷新",
|
||
allow_missing_recovery=False,
|
||
)
|
||
|
||
def run_expired_identity_search_recovery(self, limit: int = 40, source: str = "pchome") -> FeederResult:
|
||
"""
|
||
搜尋救援已建立 identity_v2、但價格過期或舊 PChome ID 失效的商品。
|
||
|
||
與一般 refresh-stale 不同,這條路徑允許在 known product_id 缺失或重評低分時,
|
||
走受控 fresh_search_recovery;最後仍會經過 hard veto、auto price write
|
||
safety、overwrite protection 才能寫入正式 competitor_prices。
|
||
"""
|
||
try:
|
||
skus = self._fetch_expired_identity_recovery_skus(limit=max(1, min(int(limit), 120)))
|
||
except Exception as e:
|
||
logger.error(f"[Feeder] 讀取過期 identity_v2 搜尋救援商品失敗: {e}")
|
||
return FeederResult(0, 0, 0, 0, 1, 0.0)
|
||
|
||
return self._run_known_identity_refresh_items(
|
||
skus,
|
||
source=source,
|
||
label="identity_v2 過期價格搜尋救援",
|
||
allow_missing_recovery=True,
|
||
bounded_recovery=True,
|
||
)
|
||
|
||
def run_retryable_candidate_revalidation(
|
||
self,
|
||
limit: int = 80,
|
||
min_score: float = 0.70,
|
||
source: str = "pchome",
|
||
) -> FeederResult:
|
||
"""重新評分近門檻候選,避免 matcher 升級後仍卡在舊的 low_score。"""
|
||
try:
|
||
skus = self._fetch_retryable_candidate_skus(limit=limit, min_score=min_score)
|
||
except Exception as e:
|
||
logger.error(f"[Feeder] 讀取近門檻候選失敗: {e}")
|
||
return FeederResult(0, 0, 0, 0, 1, 0.0)
|
||
|
||
return self._run_known_identity_refresh_items(
|
||
skus,
|
||
source=source,
|
||
label="近門檻候選重新評分",
|
||
)
|
||
|
||
def run_unmatched_priority(self, limit: int = 80, source: str = "pchome") -> FeederResult:
|
||
"""優先補抓尚未有有效 PChome 配對的高價商品。"""
|
||
try:
|
||
skus = self._fetch_unmatched_priority_skus(limit=limit)
|
||
except Exception as e:
|
||
logger.error(f"[Feeder] 讀取待比對優先商品失敗: {e}")
|
||
return FeederResult(0, 0, 0, 0, 1, 0.0)
|
||
|
||
return self._run_sku_items(
|
||
skus,
|
||
source=source,
|
||
label="待比對優先補抓",
|
||
bounded_search=True,
|
||
)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# CLI 測試(不依賴 DB,直接測試爬蟲 + 比對邏輯)
|
||
# python3 services/competitor_price_feeder.py
|
||
# ─────────────────────────────────────────────
|
||
if __name__ == "__main__":
|
||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||
|
||
from services.pchome_crawler import PChomeCrawler
|
||
|
||
test_items = [
|
||
{"sku": "A003", "name": "舒特膚AD乳液200ml"},
|
||
{"sku": "A001", "name": "玻尿酸面膜10片裝"},
|
||
{"sku": "A009", "name": "美白化妝水150ml"},
|
||
]
|
||
|
||
crawler = PChomeCrawler(delay=0.8)
|
||
|
||
print("=== Competitor Price Feeder CLI 測試 ===\n")
|
||
for item in test_items:
|
||
keyword = item["name"][:20]
|
||
ok, msg, products = crawler.search_products(keyword, limit=10)
|
||
|
||
if not ok or not products:
|
||
print(f"[{item['sku']}] 無結果: {msg}")
|
||
continue
|
||
|
||
result = _find_best_match(item["name"], products)
|
||
if not result:
|
||
print(f"[{item['sku']}] 無法比對")
|
||
continue
|
||
|
||
best, score = result
|
||
tags = _extract_tags(best)
|
||
symbol = "✅" if score >= MIN_MATCH_SCORE else "⚠️ 低分"
|
||
print(
|
||
f"{symbol} [{item['sku']}] {item['name'][:25]}\n"
|
||
f" → PChome: {best.name[:40]}\n"
|
||
f" → 售價 ${best.price} | 分數 {score:.3f} | 標籤 {tags}\n"
|
||
)
|