ewoooc/services/competitor_price_feeder.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
競品價格補給線 Worker (Competitor Price Feeder)

角色：獨立背景 Worker（生產者端）
架構位置：
  [本 Worker — 每 4 小時跑一次] → competitor_prices DB 表（最新快取）
                              → competitor_price_history DB 表（歷史快照）
                                        ↓
  [AI Pipeline] → fetch_candidates() LEFT JOIN competitor_prices（消費者端）

設計原則：
- 與 AI Pipeline 完全解耦：本 Worker 掛了不影響核心大腦
- 自帶重試機制，不阻塞主排程
- 語意化標籤 (tags) 讓 Hermes 獲得更豐富的情境

爬取邏輯：
  MOMO 商品名稱 → PChome 關鍵字搜尋 → 模糊比對最佳匹配 → 寫入 competitor_prices + competitor_price_history

依賴：
  services/pchome_crawler.py   — 搜尋 + 批量 API
  services/price_comparison.py — ProductNameParser + 模糊比對
"""

import json
import logging
import re
import time
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Optional

logger = logging.getLogger(__name__)

# ── 比對參數 ─────────────────────────────────────────
MIN_MATCH_SCORE  = 0.76  # 低於此分數不寫入；核心比價寧可待審也不能錯配
REPLACE_DIFFERENT_PRODUCT_SCORE = 0.84  # 已有不同 PChome 商品時，需超高信心才覆蓋
SEARCH_LIMIT     = 12    # 每個搜尋詞取 PChome 前 N 筆
MAX_SEARCH_TERMS  = 3     # 每個 MOMO 商品最多嘗試幾組搜尋詞
BATCH_SIZE       = 30    # 每批 DB 寫入筆數
RATE_DELAY       = 0.8   # 每次 PChome 請求間隔（秒）
TTL_HOURS        = 6     # competitor_prices 快取有效期

# ── Feeder 結果 ───────────────────────────────────────
@dataclass
class FeederResult:
    total_skus: int
    matched: int
    skipped_no_result: int
    skipped_low_score: int
    errors: int
    duration_sec: float
    history_written: int = 0
    attempts_written: int = 0


def _extract_tags(pchome_product) -> list:
    """
    從 PChomeProduct 物件提取語意標籤

    標籤設計：
      - "on_sale"        — is_on_sale = True
      - "discount_10pct" — 折扣 10~19%
      - "discount_20pct" — 折扣 20~29%
      - "discount_30pct" — 折扣 ≥ 30%
      - "low_stock"      — 庫存 < 10
      - "high_rating"    — 評分 ≥ 4.5
    """
    tags = []

    if pchome_product.is_on_sale:
        tags.append("on_sale")

    try:
        disc = int(pchome_product.discount or 0)
    except (ValueError, TypeError):
        disc = 0
    if disc >= 30:
        tags.append("discount_30pct")
    elif disc >= 20:
        tags.append("discount_20pct")
    elif disc >= 10:
        tags.append("discount_10pct")

    try:
        stock = int(pchome_product.stock) if pchome_product.stock is not None else None
        if stock is not None and 0 < stock < 10:
            tags.append("low_stock")
    except (ValueError, TypeError):
        pass

    try:
        if pchome_product.rating and float(pchome_product.rating) >= 4.5:
            tags.append("high_rating")
    except (ValueError, TypeError):
        pass

    return tags


def _clean_search_text(value: str) -> str:
    value = re.sub(r'[（(][^)）]*[)）]', ' ', value or '')
    value = re.sub(r'[【\[].*?[】\]]', ' ', value)
    value = re.sub(r'[^\w\u4e00-\u9fff]+', ' ', value)
    return re.sub(r'\s+', ' ', value).strip()


def _dedupe_terms(terms: list) -> list:
    result = []
    seen = set()
    for term in terms:
        cleaned = _clean_search_text(term)
        if len(cleaned) < 2:
            continue
        key = cleaned.lower()
        if key in seen:
            continue
        seen.add(key)
        result.append(cleaned[:36])
        if len(result) >= MAX_SEARCH_TERMS:
            break
    return result


def _build_search_keywords(momo_name: str) -> list:
    """
    用多組商品身份線索搜尋 PChome，提高命中率，但仍交給身份比對門檻把關。
    """
    try:
        from services.marketplace_product_matcher import build_search_terms
        terms = build_search_terms(momo_name, max_terms=MAX_SEARCH_TERMS)
    except Exception:
        logger.debug(
            "[Feeder] marketplace matcher failed while building search keywords; "
            "fallback to cleaned product name",
            exc_info=True,
        )
        cleaned = _clean_search_text(momo_name)
        terms = [cleaned[:36], cleaned[:24]]

    return _dedupe_terms(terms)


def _format_match_diagnostics(diagnostics) -> str:
    if not diagnostics:
        return ""
    reasons = ",".join(getattr(diagnostics, "reasons", ()) or ())
    return (
        f"score={diagnostics.score}; brand={diagnostics.brand_score}; "
        f"token={diagnostics.token_score}; spec={diagnostics.spec_score}; "
        f"seq={diagnostics.sequence_score}; type={diagnostics.type_score}; "
        f"penalty={diagnostics.price_penalty}; veto={diagnostics.hard_veto}; "
        f"reasons={reasons}"
    )


def _find_best_match_detail(
    momo_name: str,
    pchome_products: list,
    momo_price: float = None,
) -> Optional[tuple]:
    """
    從 PChome 搜尋結果中找出與 MOMO 商品名稱最接近的一筆

    Args:
        momo_name:       MOMO 商品名稱
        pchome_products: PChomeProduct 列表

    Returns:
        (PChomeProduct, score, diagnostics) or None
    """
    from services.marketplace_product_matcher import score_marketplace_match

    best, best_score, best_diagnostics = None, 0.0, None
    for p in pchome_products:
        diagnostics = score_marketplace_match(
            momo_name,
            p.name,
            momo_price=momo_price,
            competitor_price=getattr(p, "price", None),
        )
        score = diagnostics.score
        if score > best_score:
            best, best_score, best_diagnostics = p, score, diagnostics

    return (best, best_score, best_diagnostics) if best else None


def _find_best_match(momo_name: str, pchome_products: list) -> Optional[tuple]:
    """Backward-compatible helper for smoke scripts."""
    result = _find_best_match_detail(momo_name, pchome_products)
    if not result:
        return None
    best, score, _diagnostics = result
    return best, score


def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None, momo_price: float = None) -> list:
    """以多組搜尋詞擴大 PChome 候選池，找到可信候選後提早停止。"""
    candidates = []
    seen_ids = set()
    for keyword in keywords or _build_search_keywords(momo_name):
        ok, _, products = crawler.search_products(keyword, limit=SEARCH_LIMIT)
        if not ok or not products:
            continue
        for product in products:
            if product.product_id in seen_ids:
                continue
            seen_ids.add(product.product_id)
            candidates.append(product)
        best = _find_best_match_detail(momo_name, candidates, momo_price=momo_price)
        if best and best[1] >= 0.76:
            break
    return candidates


def _structural_similarity(momo_p, pchome_p) -> float:
    """
    結構化相似度計算（品牌 + 規格 + 關鍵字）

    權重：品牌匹配 0.4 + 規格匹配 0.3 + 關鍵字相似 0.3
    """
    from difflib import SequenceMatcher

    score = 0.0

    # 品牌比對 (0.4)
    if momo_p.brand and pchome_p.brand:
        if momo_p.brand == pchome_p.brand:
            score += 0.4
        elif momo_p.brand in pchome_p.brand or pchome_p.brand in momo_p.brand:
            score += 0.2
    elif not momo_p.brand and not pchome_p.brand:
        score += 0.1  # 都沒有品牌，不扣分

    # 規格比對 (0.3) — 容量/克重
    momo_specs  = momo_p.specs  or {}
    pchome_specs = pchome_p.specs or {}
    if momo_specs and pchome_specs:
        matching_specs = sum(
            1 for k, v in momo_specs.items()
            if pchome_specs.get(k) == v
        )
        total_specs = max(len(momo_specs), len(pchome_specs), 1)
        score += 0.3 * (matching_specs / total_specs)
    elif not momo_specs and not pchome_specs:
        score += 0.15

    # 關鍵字相似度 (0.3)
    momo_kws  = " ".join(momo_p.keywords or [])
    pchome_kws = " ".join(pchome_p.keywords or [])
    if momo_kws and pchome_kws:
        kw_sim = SequenceMatcher(None, momo_kws.lower(), pchome_kws.lower()).ratio()
        score += 0.3 * kw_sim

    return round(score, 3)


class CompetitorPriceFeeder:
    """
    競品價格補給線 Worker

    用法：
        feeder = CompetitorPriceFeeder(engine=db_engine)
        result = feeder.run(source="pchome")
    """

    def __init__(self, engine=None):
        self.engine = engine
        self._history_table_ready = False
        self._attempt_table_ready = False

    def _ensure_competitor_price_history_table(self, conn):
        """確保競品價格歷史表存在；排程可自癒補表，不依賴手動 migration。"""
        if self._history_table_ready:
            return

        from sqlalchemy import text
        if conn.dialect.name == "postgresql":
            conn.execute(text("""
                CREATE TABLE IF NOT EXISTS competitor_price_history (
                    id BIGSERIAL PRIMARY KEY,
                    sku VARCHAR(50) NOT NULL,
                    source VARCHAR(30) NOT NULL DEFAULT 'pchome',
                    momo_product_id INTEGER,
                    momo_price NUMERIC(10,2),
                    price NUMERIC(10,2) NOT NULL,
                    original_price NUMERIC(10,2),
                    discount_pct INTEGER,
                    competitor_product_id VARCHAR(100),
                    competitor_product_name TEXT,
                    match_score NUMERIC(4,3),
                    tags JSONB DEFAULT '[]'::jsonb,
                    crawled_at TIMESTAMP NOT NULL DEFAULT NOW()
                )
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_price_history_sku_source_time
                ON competitor_price_history (sku, source, crawled_at DESC)
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_price_history_competitor_id
                ON competitor_price_history (competitor_product_id)
            """))
        else:
            conn.execute(text("""
                CREATE TABLE IF NOT EXISTS competitor_price_history (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    sku VARCHAR(50) NOT NULL,
                    source VARCHAR(30) NOT NULL DEFAULT 'pchome',
                    momo_product_id INTEGER,
                    momo_price NUMERIC(10,2),
                    price NUMERIC(10,2) NOT NULL,
                    original_price NUMERIC(10,2),
                    discount_pct INTEGER,
                    competitor_product_id VARCHAR(100),
                    competitor_product_name TEXT,
                    match_score NUMERIC(4,3),
                    tags TEXT DEFAULT '[]',
                    crawled_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
                )
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_price_history_sku_source_time
                ON competitor_price_history (sku, source, crawled_at DESC)
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_price_history_competitor_id
                ON competitor_price_history (competitor_product_id)
            """))

        self._history_table_ready = True

    def _ensure_competitor_match_attempts_table(self, conn):
        """確保 PChome 比對嘗試表存在；成功、低分、無結果與錯誤都要留痕。"""
        if self._attempt_table_ready:
            return

        from sqlalchemy import text
        if conn.dialect.name == "postgresql":
            conn.execute(text("""
                CREATE TABLE IF NOT EXISTS competitor_match_attempts (
                    id BIGSERIAL PRIMARY KEY,
                    sku VARCHAR(50) NOT NULL,
                    source VARCHAR(30) NOT NULL DEFAULT 'pchome',
                    momo_product_id INTEGER,
                    momo_product_name TEXT,
                    momo_price NUMERIC(10,2),
                    search_terms JSONB DEFAULT '[]'::jsonb,
                    candidate_count INTEGER DEFAULT 0,
                    attempt_status VARCHAR(30) NOT NULL,
                    best_competitor_product_id VARCHAR(100),
                    best_competitor_product_name TEXT,
                    best_competitor_price NUMERIC(10,2),
                    best_match_score NUMERIC(4,3),
                    error_message TEXT,
                    attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
                )
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
                ON competitor_match_attempts (sku, source, attempted_at DESC)
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
                ON competitor_match_attempts (attempt_status, attempted_at DESC)
            """))
        else:
            conn.execute(text("""
                CREATE TABLE IF NOT EXISTS competitor_match_attempts (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    sku VARCHAR(50) NOT NULL,
                    source VARCHAR(30) NOT NULL DEFAULT 'pchome',
                    momo_product_id INTEGER,
                    momo_product_name TEXT,
                    momo_price NUMERIC(10,2),
                    search_terms TEXT DEFAULT '[]',
                    candidate_count INTEGER DEFAULT 0,
                    attempt_status VARCHAR(30) NOT NULL,
                    best_competitor_product_id VARCHAR(100),
                    best_competitor_product_name TEXT,
                    best_competitor_price NUMERIC(10,2),
                    best_match_score NUMERIC(4,3),
                    error_message TEXT,
                    attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
                )
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
                ON competitor_match_attempts (sku, source, attempted_at DESC)
            """))
            conn.execute(text("""
                CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
                ON competitor_match_attempts (attempt_status, attempted_at DESC)
            """))

        self._attempt_table_ready = True

    def _record_match_attempt(
        self,
        sku: str,
        momo_name: str,
        momo_product_id: int = None,
        momo_price: float = None,
        search_terms: list = None,
        candidate_count: int = 0,
        attempt_status: str = "unknown",
        best_product=None,
        best_score: float = None,
        error_message: str = None,
        source: str = "pchome",
    ) -> None:
        """追加一筆 PChome 比對嘗試紀錄，讓待比對/低信心也能回溯。"""
        from sqlalchemy import text

        with self.engine.begin() as conn:
            self._ensure_competitor_match_attempts_table(conn)
            search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms"
            conn.execute(text(f"""
                INSERT INTO competitor_match_attempts
                    (sku, source, momo_product_id, momo_product_name, momo_price,
                     search_terms, candidate_count, attempt_status,
                     best_competitor_product_id, best_competitor_product_name,
                     best_competitor_price, best_match_score, error_message,
                     attempted_at)
                VALUES
                    (:sku, :source, :momo_product_id, :momo_product_name, :momo_price,
                     {search_terms_expr}, :candidate_count, :attempt_status,
                     :best_id, :best_name,
                     :best_price, :best_score, :error_message,
                     CURRENT_TIMESTAMP)
            """), {
                "sku": sku,
                "source": source,
                "momo_product_id": momo_product_id,
                "momo_product_name": momo_name,
                "momo_price": momo_price,
                "search_terms": json.dumps(search_terms or [], ensure_ascii=False),
                "candidate_count": candidate_count,
                "attempt_status": attempt_status,
                "best_id": getattr(best_product, "product_id", None),
                "best_name": (getattr(best_product, "name", None) or "")[:300] or None,
                "best_price": getattr(best_product, "price", None),
                "best_score": best_score,
                "error_message": (error_message or "")[:1000] or None,
            })

    def _fetch_active_skus(self) -> list:
        """
        從 products 表取得待監控的 ACTIVE 商品清單

        Returns:
            list of {"sku": str, "name": str, "category": str}
        """
        if self.engine is None:
            raise RuntimeError("需要注入 SQLAlchemy engine")

        from sqlalchemy import text
        sql = text("""
            SELECT
                p.id AS product_id,
                p.i_code AS sku,
                p.name,
                p.category,
                (
                    SELECT pr.price
                    FROM price_records pr
                    WHERE pr.product_id = p.id
                    ORDER BY pr.timestamp DESC
                    LIMIT 1
                ) AS momo_price
            FROM products p
            WHERE p.status = 'ACTIVE'
              AND EXISTS (
                  SELECT 1
                  FROM price_records pr
                  WHERE pr.product_id = p.id
              )
            ORDER BY p.i_code
        """)
        with self.engine.connect() as conn:
            rows = conn.execute(sql).fetchall()
        return [dict(r._mapping) for r in rows]

    def _fetch_unmatched_priority_skus(self, limit: int = 80) -> list:
        """
        取得目前沒有有效 PChome 配對的高價 ACTIVE 商品，供補強流程優先處理。
        """
        if self.engine is None:
            raise RuntimeError("需要注入 SQLAlchemy engine")

        from sqlalchemy import text
        sql = text("""
            WITH latest_momo AS (
                SELECT
                    p.id AS product_id,
                    p.i_code AS sku,
                    p.name,
                    p.category,
                    pr.price AS momo_price,
                    ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC) AS rn
                FROM products p
                JOIN price_records pr ON pr.product_id = p.id
                WHERE p.status = 'ACTIVE'
            )
            SELECT
                lm.product_id,
                lm.sku,
                lm.name,
                lm.category,
                lm.momo_price
            FROM latest_momo lm
            LEFT JOIN competitor_prices cp
             ON cp.sku = lm.sku
             AND cp.source = 'pchome'
             AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)
             AND COALESCE(cp.match_score, 0) >= :match_score_floor
             AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
            WHERE lm.rn = 1
              AND cp.sku IS NULL
            ORDER BY lm.momo_price DESC NULLS LAST, lm.sku
            LIMIT :limit
        """)
        with self.engine.connect() as conn:
            rows = conn.execute(
                sql,
                {"limit": max(1, min(int(limit), 300)), "match_score_floor": MIN_MATCH_SCORE},
            ).fetchall()
        return [dict(r._mapping) for r in rows]

    def _upsert_competitor_price(
        self,
        sku: str,
        product,           # PChomeProduct
        match_score: float,
        tags: list,
        momo_product_id: int = None,
        momo_price: float = None,
        source: str = "pchome",
    ):
        """單筆寫入/更新最新快取，並追加一筆歷史快照。"""
        from sqlalchemy import text
        _taipei = timezone(timedelta(hours=8))
        expires_at = (datetime.now(_taipei) + timedelta(hours=TTL_HOURS)).strftime("%Y-%m-%d %H:%M:%S")
        tags_json = json.dumps(tags, ensure_ascii=False)
        with self.engine.begin() as conn:
            self._ensure_competitor_price_history_table(conn)
            conn.execute(text("""
                INSERT INTO competitor_prices
                    (sku, source, price, original_price, discount_pct,
                     competitor_product_id, competitor_product_name,
                     match_score, tags, crawled_at, expires_at)
                VALUES
                    (:sku, :source, :price, :original_price, :discount_pct,
                     :comp_id, :comp_name,
                     :match_score, :tags, CURRENT_TIMESTAMP, :expires_at)
                ON CONFLICT (sku, source) DO UPDATE
                    SET price                   = EXCLUDED.price,
                        original_price          = EXCLUDED.original_price,
                        discount_pct            = EXCLUDED.discount_pct,
                        competitor_product_id   = EXCLUDED.competitor_product_id,
                        competitor_product_name = EXCLUDED.competitor_product_name,
                        match_score             = EXCLUDED.match_score,
                        tags                    = EXCLUDED.tags,
                        crawled_at              = CURRENT_TIMESTAMP,
                        expires_at              = :expires_at
            """), {
                "sku":           sku,
                "source":        source,
                "price":         product.price,
                "original_price":product.original_price,
                "discount_pct":  product.discount,
                "comp_id":       product.product_id,
                "comp_name":     product.name[:200],
                "match_score":   match_score,
                "tags":          tags_json,
                "expires_at":    expires_at,
            })
            conn.execute(text("""
                INSERT INTO competitor_price_history
                    (sku, source, momo_product_id, momo_price,
                     price, original_price, discount_pct,
                     competitor_product_id, competitor_product_name,
                     match_score, tags, crawled_at)
                VALUES
                    (:sku, :source, :momo_product_id, :momo_price,
                     :price, :original_price, :discount_pct,
                     :comp_id, :comp_name,
                     :match_score, :tags, CURRENT_TIMESTAMP)
            """), {
                "sku":             sku,
                "source":          source,
                "momo_product_id": momo_product_id,
                "momo_price":      momo_price,
                "price":           product.price,
                "original_price":  product.original_price,
                "discount_pct":    product.discount,
                "comp_id":         product.product_id,
                "comp_name":       product.name[:200],
                "match_score":     match_score,
                "tags":            tags_json,
            })

    def _should_upsert_competitor_price(
        self,
        sku: str,
        product,
        match_score: float,
        source: str = "pchome",
    ) -> tuple[bool, str]:
        """
        保護正式 competitor_prices：若既有配對是不同 PChome 商品，
        只有超高信心才允許覆蓋，避免新 matcher 一次污染核心比價資料。
        """
        from sqlalchemy import text

        with self.engine.connect() as conn:
            row = conn.execute(text("""
                SELECT competitor_product_id, match_score, tags
                FROM competitor_prices
                WHERE sku = :sku
                  AND source = :source
                LIMIT 1
            """), {"sku": sku, "source": source}).mappings().first()

        if not row:
            return True, "new_match"

        existing_id = str(row.get("competitor_product_id") or "")
        incoming_id = str(getattr(product, "product_id", "") or "")
        try:
            existing_score = float(row.get("match_score") or 0)
        except (TypeError, ValueError):
            existing_score = 0.0
        existing_tags = row.get("tags") or []
        if isinstance(existing_tags, str):
            try:
                existing_tags = json.loads(existing_tags)
            except Exception:
                existing_tags = []
        if "identity_v2" not in existing_tags:
            return True, "replace_legacy_unverified"

        if not existing_id or existing_id == incoming_id:
            return True, "same_or_empty_existing"
        if existing_score < MIN_MATCH_SCORE:
            return True, f"replace_low_existing_score={existing_score:.3f}"
        if match_score >= REPLACE_DIFFERENT_PRODUCT_SCORE:
            return True, f"replace_high_confidence_score={match_score:.3f}"
        return (
            False,
            f"existing_match_conflict;existing_id={existing_id};"
            f"incoming_id={incoming_id};existing_score={existing_score:.3f};"
            f"incoming_score={match_score:.3f}",
        )

    def _run_sku_items(self, skus: list, source: str = "pchome", label: str = "PChome 競品價格") -> FeederResult:
        start = time.time()

        if source != "pchome":
            logger.warning(f"[Feeder] 尚未支援 source={source}，跳過")
            return FeederResult(0, 0, 0, 0, 0, 0.0)

        from services.pchome_crawler import PChomeCrawler
        crawler = PChomeCrawler(timeout=30, delay=RATE_DELAY)

        logger.info(f"[Feeder] 開始抓取 {len(skus)} 支商品的 {label}")

        matched = 0
        skipped_no = 0
        skipped_low = 0
        errors = 0
        history_written = 0
        attempts_written = 0

        for item in skus:
            sku      = item["sku"]
            momo_name = item["name"]
            momo_product_id = item.get("product_id")
            momo_price = item.get("momo_price")
            search_terms = _build_search_keywords(momo_name)

            try:
                products = _search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)
                if not products:
                    logger.debug(f"[Feeder] {sku} 無搜尋結果，跳過")
                    self._record_match_attempt(
                        sku,
                        momo_name,
                        momo_product_id=momo_product_id,
                        momo_price=momo_price,
                        search_terms=search_terms,
                        candidate_count=0,
                        attempt_status="no_result",
                        source=source,
                    )
                    attempts_written += 1
                    skipped_no += 1
                    continue

                result = _find_best_match_detail(momo_name, products, momo_price=momo_price)
                if not result:
                    self._record_match_attempt(
                        sku,
                        momo_name,
                        momo_product_id=momo_product_id,
                        momo_price=momo_price,
                        search_terms=search_terms,
                        candidate_count=len(products),
                        attempt_status="no_match",
                        source=source,
                    )
                    attempts_written += 1
                    skipped_no += 1
                    continue

                best_product, score, diagnostics = result

                if score < MIN_MATCH_SCORE:
                    logger.debug(
                        f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE})，"
                        f"{_format_match_diagnostics(diagnostics)}"
                    )
                    self._record_match_attempt(
                        sku,
                        momo_name,
                        momo_product_id=momo_product_id,
                        momo_price=momo_price,
                        search_terms=search_terms,
                        candidate_count=len(products),
                        attempt_status="low_score",
                        best_product=best_product,
                        best_score=score,
                        error_message=_format_match_diagnostics(diagnostics),
                        source=source,
                    )
                    attempts_written += 1
                    skipped_low += 1
                    continue

                tags = _extract_tags(best_product)
                tags.extend(getattr(diagnostics, "tags", []))
                for reason in getattr(diagnostics, "reasons", ()) or ():
                    tags.append(f"match_{reason}")
                tags = list(dict.fromkeys(tags))
                should_write, write_reason = self._should_upsert_competitor_price(
                    sku,
                    best_product,
                    score,
                    source=source,
                )
                if not should_write:
                    logger.info(f"[Feeder] {sku} 進入人工覆核，不覆蓋既有配對 | {write_reason}")
                    self._record_match_attempt(
                        sku,
                        momo_name,
                        momo_product_id=momo_product_id,
                        momo_price=momo_price,
                        search_terms=search_terms,
                        candidate_count=len(products),
                        attempt_status="needs_review",
                        best_product=best_product,
                        best_score=score,
                        error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
                        source=source,
                    )
                    attempts_written += 1
                    skipped_low += 1
                    continue

                tags.append(write_reason)
                self._upsert_competitor_price(
                    sku,
                    best_product,
                    score,
                    tags,
                    momo_product_id=momo_product_id,
                    momo_price=momo_price,
                    source=source,
                )
                self._record_match_attempt(
                    sku,
                    momo_name,
                    momo_product_id=momo_product_id,
                    momo_price=momo_price,
                    search_terms=search_terms,
                    candidate_count=len(products),
                    attempt_status="matched",
                    best_product=best_product,
                    best_score=score,
                    source=source,
                )
                matched += 1
                history_written += 1
                attempts_written += 1
                logger.debug(
                    f"[Feeder] {sku} → PChome ${best_product.price} "
                    f"score={score:.3f} tags={tags}"
                )

            except Exception as e:
                logger.error(f"[Feeder] {sku} 處理失敗: {e}")
                try:
                    self._record_match_attempt(
                        sku,
                        momo_name,
                        momo_product_id=momo_product_id,
                        momo_price=momo_price,
                        search_terms=search_terms,
                        attempt_status="error",
                        error_message=str(e),
                        source=source,
                    )
                    attempts_written += 1
                except Exception as attempt_error:
                    logger.warning(f"[Feeder] {sku} 比對嘗試紀錄寫入失敗: {attempt_error}")
                errors += 1

        duration = round(time.time() - start, 2)
        logger.info(
            f"[Feeder] 完成 matched={matched} skipped_no={skipped_no} "
            f"skipped_low={skipped_low} errors={errors} "
            f"history_written={history_written} attempts_written={attempts_written} 耗時={duration}s"
        )
        return FeederResult(
            total_skus=len(skus),
            matched=matched,
            skipped_no_result=skipped_no,
            skipped_low_score=skipped_low,
            errors=errors,
            duration_sec=duration,
            history_written=history_written,
            attempts_written=attempts_written,
        )

    def run(self, source: str = "pchome") -> FeederResult:
        """
        執行一輪競品價格抓取與寫入

        Args:
            source: 競品來源代碼（目前支援 'pchome'）

        Returns:
            FeederResult
        """
        try:
            skus = self._fetch_active_skus()
        except Exception as e:
            logger.error(f"[Feeder] 讀取商品清單失敗: {e}")
            return FeederResult(0, 0, 0, 0, 1, 0.0)

        return self._run_sku_items(skus, source=source, label="PChome 競品價格")

    def run_unmatched_priority(self, limit: int = 80, source: str = "pchome") -> FeederResult:
        """優先補抓尚未有有效 PChome 配對的高價商品。"""
        try:
            skus = self._fetch_unmatched_priority_skus(limit=limit)
        except Exception as e:
            logger.error(f"[Feeder] 讀取待比對優先商品失敗: {e}")
            return FeederResult(0, 0, 0, 0, 1, 0.0)

        return self._run_sku_items(skus, source=source, label="待比對優先補抓")


# ─────────────────────────────────────────────
# CLI 測試（不依賴 DB，直接測試爬蟲 + 比對邏輯）
# python3 services/competitor_price_feeder.py
# ─────────────────────────────────────────────
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")

    from services.pchome_crawler import PChomeCrawler

    test_items = [
        {"sku": "A003", "name": "舒特膚AD乳液200ml"},
        {"sku": "A001", "name": "玻尿酸面膜10片裝"},
        {"sku": "A009", "name": "美白化妝水150ml"},
    ]

    crawler = PChomeCrawler(delay=0.8)

    print("=== Competitor Price Feeder CLI 測試 ===\n")
    for item in test_items:
        keyword = item["name"][:20]
        ok, msg, products = crawler.search_products(keyword, limit=10)

        if not ok or not products:
            print(f"[{item['sku']}] 無結果: {msg}")
            continue

        result = _find_best_match(item["name"], products)
        if not result:
            print(f"[{item['sku']}] 無法比對")
            continue

        best, score = result
        tags = _extract_tags(best)
        symbol = "✅" if score >= MIN_MATCH_SCORE else "⚠️ 低分"
        print(
            f"{symbol} [{item['sku']}] {item['name'][:25]}\n"
            f"   → PChome: {best.name[:40]}\n"
            f"   → 售價 ${best.price} | 分數 {score:.3f} | 標籤 {tags}\n"
        )