ewoooc/services/competitor_intel_repository.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""PChome / MOMO 競價情報共用資料出口。

短期 canonical source：
- competitor_prices：目前有效配對
- competitor_price_history：價格歷史趨勢
- competitor_match_attempts：未配對與低信心診斷
"""

from __future__ import annotations

import os
import pickle
import time
from datetime import date, datetime, timedelta
from pathlib import Path
from threading import Lock
from typing import Any, Optional, Union

from sqlalchemy import inspect, text


PCHOME_MATCH_SCORE_FLOOR = 0.76
UNIT_COMPARABLE_STATUSES = {"unit_comparable", "refresh_unit_comparable"}
ACTIONABLE_ATTEMPT_STATUSES = {
    "unit_comparable",
    "refresh_unit_comparable",
    "identity_veto",
    "low_score",
    "expired_match",
    "refresh_no_result",
    "no_result",
}
REVIEW_STATUS_FILTER_GROUPS = {
    "unit_comparable": ("unit_comparable", "refresh_unit_comparable"),
    "identity_veto": ("identity_veto",),
    "low_score": ("low_score",),
    "expired_match": ("expired_match",),
    "no_result": ("no_result", "refresh_no_result"),
}
ATTEMPT_STATUS_LABELS = {
    "unit_comparable": "需單位價比較",
    "refresh_unit_comparable": "需單位價比較",
    "identity_veto": "身份否決",
    "low_score": "低信心待審",
    "expired_match": "價格過期待刷新",
    "refresh_no_result": "刷新找不到商品",
    "no_result": "找不到同款",
    "never_attempted": "尚未搜尋",
    "manual_accepted": "人工已採用",
    "manual_rejected": "人工已否決",
    "manual_unit_price_required": "人工標記單位價",
    "manual_needs_research": "人工要求補搜尋",
}
ATTEMPT_ACTION_LABELS = {
    "unit_comparable": "人工確認檔期、贈品與單位價",
    "refresh_unit_comparable": "人工確認檔期、贈品與單位價",
    "identity_veto": "確認是否為不同商品線或規格",
    "low_score": "人工審核候選商品身份",
    "expired_match": "重新刷新 PChome 價格",
    "refresh_no_result": "調整搜尋詞後重抓",
    "no_result": "補充搜尋詞或品牌關鍵字",
    "never_attempted": "排入 PChome 補抓",
    "manual_accepted": "已寫入正式 PChome 同款配對",
    "manual_rejected": "已關閉此候選，等待下一輪新候選",
    "manual_unit_price_required": "維持單位價比較，不寫入正式總價差",
    "manual_needs_research": "補搜尋詞或重新抓取後再判斷",
}
MATCH_DIAGNOSTIC_REASON_LABELS = {
    "brand_conflict": "品牌不符",
    "product_line_conflict": "商品線不符",
    "type_conflict": "品類不符",
    "volume_conflict": "容量差異",
    "weight_conflict": "重量差異",
    "count_conflict": "件數差異",
    "component_count_conflict": "入數差異",
    "multi_component_conflict": "組合差異",
    "refill_pack_conflict": "補充包差異",
    "unit_comparable": "需單位價",
    "price_ratio_extreme": "價差極端",
    "price_ratio_wide": "價差過大",
}
COMPETITOR_INTEL_CACHE_TTL_SECONDS = int(os.getenv("COMPETITOR_INTEL_CACHE_TTL_SECONDS", "1800"))
_BASE_DIR = Path(__file__).resolve().parents[1]
_CACHE_FILE = _BASE_DIR / "data" / "competitor_intel_cache.pkl"
_CACHE_LOCK = Lock()
_MEM_CACHE: dict[str, dict[str, Any]] = {}


def _num(value: Any) -> float:
    try:
        return float(value or 0)
    except (TypeError, ValueError):
        return 0.0


def _date_label(value: Any) -> str:
    if hasattr(value, "strftime"):
        return value.strftime("%Y-%m-%d")
    return str(value or "")


def _month_label(value: Any) -> str:
    if hasattr(value, "strftime"):
        return value.strftime("%Y-%m")
    return str(value or "")[:7]


def _attempt_status_label(status: Any) -> str:
    return ATTEMPT_STATUS_LABELS.get(str(status or ""), str(status or "待比對"))


def _attempt_action_label(status: Any) -> str:
    return ATTEMPT_ACTION_LABELS.get(str(status or ""), "人工確認比對證據")


def _extract_match_diagnostic_reasons(diagnostic_text: Any) -> list[dict[str, str]]:
    """Translate matcher diagnostics into short operator-facing reason chips."""
    text_value = str(diagnostic_text or "")
    if not text_value:
        return []

    reason_blob = ""
    for part in text_value.split(";"):
        key, _, value = part.strip().partition("=")
        if key.strip() == "reasons":
            reason_blob = value.strip()
            break
    if not reason_blob:
        return []

    reasons: list[dict[str, str]] = []
    seen: set[str] = set()
    for raw_reason in reason_blob.replace("|", ",").split(","):
        code = raw_reason.strip()
        if not code or code in seen:
            continue
        seen.add(code)
        reasons.append({
            "code": code,
            "label": MATCH_DIAGNOSTIC_REASON_LABELS.get(code, code.replace("_", " ")),
        })
    return reasons


def _build_unit_comparison_for_attempt(row: dict[str, Any]) -> Optional[dict[str, Any]]:
    status = str(row.get("attempt_status") or "")
    if status not in UNIT_COMPARABLE_STATUSES:
        return None
    try:
        from services.marketplace_product_matcher import build_unit_price_comparison
        return build_unit_price_comparison(
            row.get("name") or row.get("momo_product_name") or "",
            row.get("best_competitor_product_name") or "",
            row.get("momo_price"),
            row.get("best_competitor_price"),
        )
    except Exception:
        return {"comparable": False, "reason": "build_error"}


def _format_competitor_review_item(row: dict[str, Any]) -> dict[str, Any]:
    item = dict(row)
    unit_comparison = _build_unit_comparison_for_attempt(item)
    match_diagnostic = item.get("error_message") or ""
    diagnostic_reasons = _extract_match_diagnostic_reasons(match_diagnostic)
    return {
        "sku": str(item.get("sku") or ""),
        "name": item.get("name") or "",
        "category": item.get("category") or "",
        "momo_price": _num(item.get("momo_price")),
        "attempt_status": item.get("attempt_status") or "",
        "status_label": _attempt_status_label(item.get("attempt_status")),
        "action_label": _attempt_action_label(item.get("attempt_status")),
        "candidate_count": int(item.get("candidate_count") or 0),
        "candidate_pc_id": item.get("best_competitor_product_id"),
        "candidate_pc_name": item.get("best_competitor_product_name") or "",
        "candidate_pc_price": _num(item.get("best_competitor_price")),
        "best_match_score": _num(item.get("best_match_score")),
        "match_diagnostic": match_diagnostic,
        "diagnostic_reasons": diagnostic_reasons,
        "diagnostic_reason_text": "、".join(reason["label"] for reason in diagnostic_reasons),
        "attempted_at": _date_label(item.get("attempted_at")),
        "unit_comparison": unit_comparison,
    }


def clear_competitor_intel_cache() -> None:
    """Clear cached PChome/MOMO intelligence after crawler/import updates."""
    with _CACHE_LOCK:
        _MEM_CACHE.clear()
        try:
            if _CACHE_FILE.exists():
                _CACHE_FILE.unlink()
        except OSError:
            pass


def _load_shared_cache() -> dict[str, dict[str, Any]]:
    if not _CACHE_FILE.exists():
        return {}
    try:
        with _CACHE_FILE.open("rb") as handle:
            payload = pickle.load(handle)
        return payload if isinstance(payload, dict) else {}
    except Exception:
        return {}


def _write_shared_cache(payload: dict[str, dict[str, Any]]) -> None:
    try:
        _CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
        tmp_file = _CACHE_FILE.with_suffix(f".{os.getpid()}.tmp")
        with tmp_file.open("wb") as handle:
            pickle.dump(payload, handle, protocol=pickle.HIGHEST_PROTOCOL)
        os.replace(tmp_file, _CACHE_FILE)
    except Exception:
        try:
            if "tmp_file" in locals() and tmp_file.exists():
                tmp_file.unlink()
        except OSError:
            pass


def _cached_payload(cache_key: str, producer, ttl_seconds: int = COMPETITOR_INTEL_CACHE_TTL_SECONDS):
    if ttl_seconds <= 0:
        return producer()
    now = time.time()
    with _CACHE_LOCK:
        entry = _MEM_CACHE.get(cache_key)
        if entry and now - float(entry.get("time", 0)) < ttl_seconds:
            return entry.get("value")
        shared = _load_shared_cache()
        entry = shared.get(cache_key)
        if entry and now - float(entry.get("time", 0)) < ttl_seconds:
            _MEM_CACHE[cache_key] = entry
            return entry.get("value")

    value = producer()
    entry = {"time": now, "value": value}
    with _CACHE_LOCK:
        _MEM_CACHE[cache_key] = entry
        shared = _load_shared_cache()
        shared[cache_key] = entry
        stale_before = now - max(ttl_seconds * 4, 3600)
        shared = {
            key: item
            for key, item in shared.items()
            if isinstance(item, dict) and float(item.get("time", 0)) >= stale_before
        }
        _write_shared_cache(shared)
    return value


def fetch_competitor_coverage(engine) -> dict:
    return _cached_payload(
        f"coverage:v2:floor={PCHOME_MATCH_SCORE_FLOOR}",
        lambda: _fetch_competitor_coverage_uncached(engine),
    )


def _fetch_competitor_coverage_uncached(engine) -> dict:
    """讀取目前 PChome 比價覆蓋率與待審分類。"""
    inspector = inspect(engine)
    if not inspector.has_table("competitor_prices"):
        return {
            "active_with_price": 0,
            "valid_matches": 0,
            "pending": 0,
            "match_rate": 0,
            "attempt_status": {},
            "unit_comparable_count": 0,
            "actionable_review_count": 0,
        }

    has_match_attempts = inspector.has_table("competitor_match_attempts")
    attempt_cte = """
        latest_attempt AS (
            SELECT
                NULL AS sku,
                NULL AS attempt_status
            WHERE FALSE
        )
    """
    if has_match_attempts:
        attempt_cte = """
        latest_attempt AS (
            SELECT DISTINCT ON (sku)
                sku,
                attempt_status
            FROM competitor_match_attempts
            WHERE source = 'pchome'
            ORDER BY sku, attempted_at DESC NULLS LAST
        )
        """

    sql = text(f"""
        WITH latest_momo AS (
            SELECT
                p.id AS product_id,
                p.i_code AS sku,
                pr.price AS momo_price,
                ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
            FROM products p
            JOIN price_records pr ON pr.product_id = p.id
            WHERE p.status = 'ACTIVE'
        ),
        valid_competitor AS (
            SELECT DISTINCT ON (cp.sku)
                cp.sku
            FROM competitor_prices cp
            WHERE cp.source = 'pchome'
              AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)
              AND cp.price IS NOT NULL
              AND cp.price > 0
              AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR}
              AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
            ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST
        ),
        {attempt_cte}
        SELECT
            (SELECT COUNT(*) FROM latest_momo WHERE rn = 1) AS active_with_price,
            (SELECT COUNT(*) FROM valid_competitor) AS valid_matches,
            (SELECT COUNT(*)
             FROM latest_momo lm
             LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
             WHERE lm.rn = 1 AND vc.sku IS NULL) AS pending,
            COALESCE(la.attempt_status, 'never_attempted') AS attempt_status,
            COUNT(*) AS status_count
        FROM latest_momo lm
        LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
        LEFT JOIN latest_attempt la ON la.sku = lm.sku
        WHERE lm.rn = 1
          AND vc.sku IS NULL
        GROUP BY COALESCE(la.attempt_status, 'never_attempted')
    """)
    with engine.connect() as conn:
        rows = conn.execute(sql).mappings().all()

    active = int(rows[0].get("active_with_price") or 0) if rows else 0
    valid = int(rows[0].get("valid_matches") or 0) if rows else 0
    pending = int(rows[0].get("pending") or 0) if rows else 0
    statuses = {
        str(row.get("attempt_status")): int(row.get("status_count") or 0)
        for row in rows
    }
    unit_count = sum(statuses.get(status, 0) for status in UNIT_COMPARABLE_STATUSES)
    actionable_count = sum(statuses.get(status, 0) for status in ACTIONABLE_ATTEMPT_STATUSES)
    return {
        "active_with_price": active,
        "valid_matches": valid,
        "pending": pending,
        "match_rate": round(valid / max(active, 1) * 100, 1),
        "attempt_status": statuses,
        "unit_comparable_count": unit_count,
        "actionable_review_count": actionable_count,
        "match_score_floor": PCHOME_MATCH_SCORE_FLOOR,
    }


def fetch_competitor_gap_trend(engine, days: int = 30) -> dict:
    days = max(7, min(int(days or 30), 120))
    return _cached_payload(
        f"gap_trend:v2:days={days}:floor={PCHOME_MATCH_SCORE_FLOOR}",
        lambda: _fetch_competitor_gap_trend_uncached(engine, days=days),
    )


def _fetch_competitor_gap_trend_uncached(engine, days: int = 30) -> dict:
    """近 N 天 PChome 價差壓力趨勢。"""
    if not inspect(engine).has_table("competitor_price_history"):
        return {"labels": [], "avg_gap_pct": [], "risk_count": [], "momo_advantage_count": [], "match_count": []}

    days = max(7, min(int(days or 30), 120))
    sql = text(f"""
        WITH latest_history AS (
            SELECT
                date_trunc('day', cph.crawled_at)::date AS bucket_date,
                cph.sku,
                cph.momo_price,
                cph.price AS pchome_price,
                ROW_NUMBER() OVER (
                    PARTITION BY date_trunc('day', cph.crawled_at)::date, cph.sku
                    ORDER BY cph.crawled_at DESC
                ) AS rn
            FROM competitor_price_history cph
            WHERE cph.source = 'pchome'
              AND cph.crawled_at >= CURRENT_DATE - (:days * INTERVAL '1 day')
              AND cph.momo_price IS NOT NULL
              AND cph.momo_price > 0
              AND cph.price IS NOT NULL
              AND cph.price > 0
              AND COALESCE(cph.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR}
              AND COALESCE(cph.tags, '[]'::jsonb) ? 'identity_v2'
        )
        SELECT
            bucket_date,
            COUNT(*) AS match_count,
            ROUND(AVG((momo_price - pchome_price) / pchome_price * 100)::numeric, 2) AS avg_gap_pct,
            SUM(CASE WHEN momo_price > pchome_price * 1.05 THEN 1 ELSE 0 END) AS risk_count,
            SUM(CASE WHEN momo_price < pchome_price * 0.95 THEN 1 ELSE 0 END) AS momo_advantage_count
        FROM latest_history
        WHERE rn = 1
        GROUP BY bucket_date
        ORDER BY bucket_date
    """)
    with engine.connect() as conn:
        rows = conn.execute(sql, {"days": days}).mappings().all()

    return {
        "labels": [_date_label(row.get("bucket_date")) for row in rows],
        "avg_gap_pct": [_num(row.get("avg_gap_pct")) for row in rows],
        "risk_count": [int(row.get("risk_count") or 0) for row in rows],
        "momo_advantage_count": [int(row.get("momo_advantage_count") or 0) for row in rows],
        "match_count": [int(row.get("match_count") or 0) for row in rows],
    }


def fetch_competitor_monthly_pressure(engine, months: int = 12) -> dict:
    months = max(3, min(int(months or 12), 36))
    return _cached_payload(
        f"monthly_pressure:v2:months={months}:floor={PCHOME_MATCH_SCORE_FLOOR}",
        lambda: _fetch_competitor_monthly_pressure_uncached(engine, months=months),
    )


def _fetch_competitor_monthly_pressure_uncached(engine, months: int = 12) -> dict:
    """月度競品價格壓力，用於 growth analysis。"""
    if not inspect(engine).has_table("competitor_price_history"):
        return {"labels": [], "avg_gap_pct": [], "risk_count": [], "match_count": []}

    months = max(3, min(int(months or 12), 36))
    sql = text(f"""
        WITH latest_history AS (
            SELECT
                date_trunc('month', cph.crawled_at)::date AS bucket_month,
                cph.sku,
                cph.momo_price,
                cph.price AS pchome_price,
                ROW_NUMBER() OVER (
                    PARTITION BY date_trunc('month', cph.crawled_at)::date, cph.sku
                    ORDER BY cph.crawled_at DESC
                ) AS rn
            FROM competitor_price_history cph
            WHERE cph.source = 'pchome'
              AND cph.crawled_at >= date_trunc('month', CURRENT_DATE) - (:months * INTERVAL '1 month')
              AND cph.momo_price IS NOT NULL
              AND cph.momo_price > 0
              AND cph.price IS NOT NULL
              AND cph.price > 0
              AND COALESCE(cph.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR}
              AND COALESCE(cph.tags, '[]'::jsonb) ? 'identity_v2'
        )
        SELECT
            bucket_month,
            COUNT(*) AS match_count,
            ROUND(AVG((momo_price - pchome_price) / pchome_price * 100)::numeric, 2) AS avg_gap_pct,
            SUM(CASE WHEN momo_price > pchome_price * 1.05 THEN 1 ELSE 0 END) AS risk_count
        FROM latest_history
        WHERE rn = 1
        GROUP BY bucket_month
        ORDER BY bucket_month
    """)
    with engine.connect() as conn:
        rows = conn.execute(sql, {"months": months}).mappings().all()

    return {
        "labels": [_month_label(row.get("bucket_month")) for row in rows],
        "avg_gap_pct": [_num(row.get("avg_gap_pct")) for row in rows],
        "risk_count": [int(row.get("risk_count") or 0) for row in rows],
        "match_count": [int(row.get("match_count") or 0) for row in rows],
    }


def fetch_top_competitor_risks(engine, limit: int = 10) -> list[dict]:
    limit = max(1, min(int(limit or 10), 50))
    return _cached_payload(
        f"top_risks:v2:limit={limit}:floor={PCHOME_MATCH_SCORE_FLOOR}",
        lambda: _fetch_top_competitor_risks_uncached(engine, limit=limit),
    )


def _fetch_top_competitor_risks_uncached(engine, limit: int = 10) -> list[dict]:
    """目前 MOMO 比 PChome 貴的高風險商品。"""
    if not inspect(engine).has_table("competitor_prices"):
        return []

    limit = max(1, min(int(limit or 10), 50))
    sql = text(f"""
        WITH valid_competitor AS (
            SELECT DISTINCT ON (cp.sku)
                cp.sku,
                cp.price AS pchome_price,
                cp.competitor_product_id,
                cp.competitor_product_name,
                cp.match_score,
                cp.crawled_at
            FROM competitor_prices cp
            WHERE cp.source = 'pchome'
              AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)
              AND cp.price IS NOT NULL
              AND cp.price > 0
              AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR}
              AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
            ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST
        )
        SELECT
            p.i_code AS sku,
            p.name,
            p.category,
            latest_price.momo_price,
            vc.pchome_price,
            vc.competitor_product_id,
            vc.competitor_product_name,
            vc.match_score,
            vc.crawled_at,
            (latest_price.momo_price - vc.pchome_price) AS gap_amount,
            ((latest_price.momo_price - vc.pchome_price) / vc.pchome_price * 100) AS gap_pct
        FROM valid_competitor vc
        JOIN products p
          ON p.i_code = vc.sku
         AND p.status = 'ACTIVE'
        JOIN LATERAL (
            SELECT pr.price AS momo_price
            FROM price_records pr
            WHERE pr.product_id = p.id
            ORDER BY pr.timestamp DESC, pr.id DESC
            LIMIT 1
        ) latest_price ON TRUE
        WHERE latest_price.momo_price > vc.pchome_price * 1.05
        ORDER BY gap_pct DESC NULLS LAST, gap_amount DESC NULLS LAST
        LIMIT :limit
    """)
    with engine.connect() as conn:
        rows = conn.execute(sql, {"limit": limit}).mappings().all()

    result = []
    for row in rows:
        result.append({
            "sku": str(row.get("sku") or ""),
            "name": row.get("name") or "",
            "category": row.get("category") or "",
            "momo_price": _num(row.get("momo_price")),
            "pchome_price": _num(row.get("pchome_price")),
            "gap_amount": _num(row.get("gap_amount")),
            "gap_pct": _num(row.get("gap_pct")),
            "match_score": _num(row.get("match_score")),
            "pchome_id": row.get("competitor_product_id"),
            "pchome_name": row.get("competitor_product_name") or "",
            "crawled_at": _date_label(row.get("crawled_at")),
        })
    return result


def fetch_competitor_review_queue(engine, limit: int = 12) -> list[dict]:
    """可行動的 PChome 比對覆核隊列，供 Dashboard / AI / PPT 共用。"""
    limit = max(1, min(int(limit or 12), 50))
    return _cached_payload(
        f"review_queue:v1:limit={limit}:floor={PCHOME_MATCH_SCORE_FLOOR}",
        lambda: _fetch_competitor_review_queue_uncached(engine, limit=limit),
    )


def fetch_competitor_review_queue_page(
    engine,
    page: int = 1,
    per_page: int = 50,
    search_query: str = "",
    category: str = "",
    status_filter: str = "",
) -> dict:
    """Paginated PChome review queue for operator-facing Dashboard pages."""
    page = max(1, int(page or 1))
    per_page = max(1, min(int(per_page or 50), 100))
    search_query = (search_query or "").strip()
    category = (category or "").strip()
    status_filter = (status_filter or "").strip()
    if status_filter not in REVIEW_STATUS_FILTER_GROUPS:
        status_filter = ""
    cache_key = (
        "review_queue_page:v1:"
        f"page={page}:per={per_page}:q={search_query.lower()}:cat={category}:"
        f"status={status_filter}:"
        f"floor={PCHOME_MATCH_SCORE_FLOOR}"
    )
    return _cached_payload(
        cache_key,
        lambda: _fetch_competitor_review_queue_page_uncached(
            engine,
            page=page,
            per_page=per_page,
            search_query=search_query,
            category=category,
            status_filter=status_filter,
        ),
        ttl_seconds=min(COMPETITOR_INTEL_CACHE_TTL_SECONDS, 300),
    )


def _review_queue_cte_and_filter(
    search_query: str = "",
    category: str = "",
    status_filter: str = "",
) -> tuple[str, dict[str, Any]]:
    params: dict[str, Any] = {}
    status_filter = (status_filter or "").strip()
    status_values = REVIEW_STATUS_FILTER_GROUPS.get(status_filter) or tuple(ACTIONABLE_ATTEMPT_STATUSES)
    status_sql = ", ".join(f"'{status}'" for status in status_values)
    filters = [
        "lm.rn = 1",
        "vc.sku IS NULL",
        f"la.attempt_status IN ({status_sql})",
    ]
    if search_query:
        params["search_like"] = f"%{search_query.lower()}%"
        filters.append("(LOWER(lm.name) LIKE :search_like OR LOWER(lm.sku) LIKE :search_like)")
    if category:
        params["category"] = category
        filters.append("lm.category = :category")

    where_sql = "\n          AND ".join(filters)
    cte = f"""
        WITH latest_momo AS (
            SELECT
                p.id AS product_id,
                p.i_code AS sku,
                p.name,
                p.category,
                pr.price AS momo_price,
                ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
            FROM products p
            JOIN price_records pr ON pr.product_id = p.id
            WHERE p.status = 'ACTIVE'
        ),
        valid_competitor AS (
            SELECT DISTINCT ON (cp.sku)
                cp.sku
            FROM competitor_prices cp
            WHERE cp.source = 'pchome'
              AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)
              AND cp.price IS NOT NULL
              AND cp.price > 0
              AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR}
              AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
            ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST
        ),
        latest_attempt AS (
            SELECT DISTINCT ON (cma.sku)
                cma.sku,
                cma.attempt_status,
                cma.candidate_count,
                cma.best_competitor_product_id,
                cma.best_competitor_product_name,
                cma.best_competitor_price,
                cma.best_match_score,
                cma.error_message,
                cma.attempted_at
            FROM competitor_match_attempts cma
            WHERE cma.source = 'pchome'
            ORDER BY cma.sku, cma.attempted_at DESC NULLS LAST
        ),
        review_rows AS (
            SELECT
                lm.sku,
                lm.name,
                lm.category,
                lm.momo_price,
                la.attempt_status,
                la.candidate_count,
                la.best_competitor_product_id,
                la.best_competitor_product_name,
                la.best_competitor_price,
                la.best_match_score,
                la.error_message,
                la.attempted_at,
                CASE
                    WHEN la.attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 0
                    WHEN la.attempt_status = 'identity_veto' THEN 1
                    WHEN la.attempt_status = 'low_score' THEN 2
                    WHEN la.attempt_status = 'expired_match' THEN 3
                    ELSE 4
                END AS priority_rank
            FROM latest_momo lm
            JOIN latest_attempt la ON la.sku = lm.sku
            LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
            WHERE {where_sql}
        )
    """
    return cte, params


def _fetch_competitor_review_queue_page_uncached(
    engine,
    page: int = 1,
    per_page: int = 50,
    search_query: str = "",
    category: str = "",
    status_filter: str = "",
) -> dict:
    inspector = inspect(engine)
    if not (
        inspector.has_table("products")
        and inspector.has_table("price_records")
        and inspector.has_table("competitor_prices")
        and inspector.has_table("competitor_match_attempts")
    ):
        return {
            "items": [],
            "total": 0,
            "page": max(1, int(page or 1)),
            "per_page": per_page,
            "status_filter": status_filter,
        }

    page = max(1, int(page or 1))
    per_page = max(1, min(int(per_page or 50), 100))
    cte, params = _review_queue_cte_and_filter(
        search_query=search_query,
        category=category,
        status_filter=status_filter,
    )
    page_params = {
        **params,
        "limit": per_page,
        "offset": (page - 1) * per_page,
    }
    count_sql = text(cte + " SELECT COUNT(*) AS total FROM review_rows")
    page_sql = text(cte + """
        SELECT *
        FROM review_rows
        ORDER BY
            priority_rank ASC,
            momo_price DESC NULLS LAST,
            best_match_score DESC NULLS LAST,
            attempted_at DESC NULLS LAST
        LIMIT :limit OFFSET :offset
    """)

    with engine.connect() as conn:
        total = int(conn.execute(count_sql, params).scalar() or 0)
        rows = conn.execute(page_sql, page_params).mappings().all()

    return {
        "items": [_format_competitor_review_item(dict(row)) for row in rows],
        "total": total,
        "page": page,
        "per_page": per_page,
        "status_filter": status_filter,
    }


def _fetch_competitor_review_queue_uncached(engine, limit: int = 12) -> list[dict]:
    inspector = inspect(engine)
    if not (
        inspector.has_table("products")
        and inspector.has_table("price_records")
        and inspector.has_table("competitor_prices")
        and inspector.has_table("competitor_match_attempts")
    ):
        return []

    limit = max(1, min(int(limit or 12), 50))
    sql = text(f"""
        WITH latest_momo AS (
            SELECT
                p.id AS product_id,
                p.i_code AS sku,
                p.name,
                p.category,
                pr.price AS momo_price,
                ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
            FROM products p
            JOIN price_records pr ON pr.product_id = p.id
            WHERE p.status = 'ACTIVE'
        ),
        valid_competitor AS (
            SELECT DISTINCT ON (cp.sku)
                cp.sku
            FROM competitor_prices cp
            WHERE cp.source = 'pchome'
              AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)
              AND cp.price IS NOT NULL
              AND cp.price > 0
              AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR}
              AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
            ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST
        ),
        latest_attempt AS (
            SELECT DISTINCT ON (cma.sku)
                cma.sku,
                cma.attempt_status,
                cma.candidate_count,
                cma.best_competitor_product_id,
                cma.best_competitor_product_name,
                cma.best_competitor_price,
                cma.best_match_score,
                cma.error_message,
                cma.attempted_at
            FROM competitor_match_attempts cma
            WHERE cma.source = 'pchome'
            ORDER BY cma.sku, cma.attempted_at DESC NULLS LAST
        )
        SELECT
            lm.sku,
            lm.name,
            lm.category,
            lm.momo_price,
            la.attempt_status,
            la.candidate_count,
            la.best_competitor_product_id,
            la.best_competitor_product_name,
            la.best_competitor_price,
            la.best_match_score,
            la.error_message,
            la.attempted_at
        FROM latest_momo lm
        JOIN latest_attempt la ON la.sku = lm.sku
        LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
        WHERE lm.rn = 1
          AND vc.sku IS NULL
          AND la.attempt_status IN (
              'unit_comparable',
              'refresh_unit_comparable',
              'identity_veto',
              'low_score',
              'expired_match',
              'refresh_no_result',
              'no_result'
          )
        ORDER BY
            CASE
                WHEN la.attempt_status IN ('unit_comparable', 'refresh_unit_comparable') THEN 0
                WHEN la.attempt_status = 'identity_veto' THEN 1
                WHEN la.attempt_status = 'low_score' THEN 2
                WHEN la.attempt_status = 'expired_match' THEN 3
                ELSE 4
            END,
            lm.momo_price DESC NULLS LAST,
            la.best_match_score DESC NULLS LAST,
            la.attempted_at DESC NULLS LAST
        LIMIT :limit
    """)
    with engine.connect() as conn:
        rows = conn.execute(sql, {"limit": limit}).mappings().all()

    return [_format_competitor_review_item(dict(row)) for row in rows]


def fetch_competitor_comparison_results(
    engine,
    start_date: Optional[Union[date, datetime, str]] = None,
    end_date: Optional[Union[date, datetime, str]] = None,
    limit: int = 30,
) -> list[dict]:
    """輸出與 legacy competitor PPT 相容的比價結果，不再 live crawl。"""
    limit = max(1, min(int(limit or 30), 100))
    inspector = inspect(engine)
    if not (
        inspector.has_table("products")
        and inspector.has_table("price_records")
        and inspector.has_table("competitor_prices")
    ):
        return []

    has_daily_sales = inspector.has_table("daily_sales")
    has_match_attempts = inspector.has_table("competitor_match_attempts")
    sales_cte = ""
    sales_join = ""
    sales_select = "0 AS momo_revenue,"
    attempt_cte = """
        latest_attempt AS (
            SELECT
                NULL AS sku,
                NULL AS attempt_status,
                NULL AS candidate_count,
                NULL AS best_competitor_product_id,
                NULL AS best_competitor_product_name,
                NULL AS best_competitor_price,
                NULL AS best_match_score,
                NULL AS error_message,
                NULL AS attempted_at
            WHERE FALSE
        )
    """
    order_expr = (
        "lm.momo_price DESC NULLS LAST, "
        "(vc.pchome_price IS NULL), "
        "ABS((lm.momo_price - vc.pchome_price) / vc.pchome_price * 100) DESC NULLS LAST"
    )
    params: dict[str, Any] = {"limit": limit}

    if has_daily_sales:
        where = []
        if start_date:
            where.append("DATE(s.date) >= DATE(:start_date)")
            params["start_date"] = str(start_date).replace("/", "-")[:10]
        if end_date:
            where.append("DATE(s.date) <= DATE(:end_date)")
            params["end_date"] = str(end_date).replace("/", "-")[:10]
        sales_where = "WHERE " + " AND ".join(where) if where else ""
        sales_cte = f""",
        sales_rank AS (
            SELECT
                s.product_id,
                SUM(COALESCE(s.revenue, 0)) AS momo_revenue
            FROM daily_sales s
            {sales_where}
            GROUP BY s.product_id
        )
        """
        sales_join = "LEFT JOIN sales_rank sr ON sr.product_id = lm.product_id"
        sales_select = "COALESCE(sr.momo_revenue, 0) AS momo_revenue,"
        order_expr = (
            "COALESCE(sr.momo_revenue, 0) DESC, "
            "(vc.pchome_price IS NULL), "
            "ABS((lm.momo_price - vc.pchome_price) / vc.pchome_price * 100) DESC NULLS LAST"
        )

    if has_match_attempts:
        attempt_cte = """
        latest_attempt AS (
            SELECT DISTINCT ON (cma.sku)
                cma.sku,
                cma.attempt_status,
                cma.candidate_count,
                cma.best_competitor_product_id,
                cma.best_competitor_product_name,
                cma.best_competitor_price,
                cma.best_match_score,
                cma.error_message,
                cma.attempted_at
            FROM competitor_match_attempts cma
            WHERE cma.source = 'pchome'
            ORDER BY cma.sku, cma.attempted_at DESC NULLS LAST
        )
        """

    sql = text(f"""
        WITH latest_momo AS (
            SELECT
                p.id AS product_id,
                p.i_code AS sku,
                p.name,
                pr.price AS momo_price,
                ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
            FROM products p
            JOIN price_records pr ON pr.product_id = p.id
            WHERE p.status = 'ACTIVE'
        ),
        valid_competitor AS (
            SELECT DISTINCT ON (cp.sku)
                cp.sku,
                cp.price AS pchome_price,
                cp.competitor_product_id,
                cp.competitor_product_name,
                cp.match_score
            FROM competitor_prices cp
            WHERE cp.source = 'pchome'
              AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)
              AND cp.price IS NOT NULL
              AND cp.price > 0
              AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR}
              AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
            ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST
        ),
        {attempt_cte}
        {sales_cte}
        SELECT
            lm.sku,
            lm.name,
            lm.momo_price,
            vc.pchome_price,
            vc.competitor_product_id,
            vc.competitor_product_name,
            vc.match_score,
            la.attempt_status,
            la.candidate_count,
            la.best_competitor_product_id,
            la.best_competitor_product_name,
            la.best_competitor_price,
            la.best_match_score,
            la.error_message,
            la.attempted_at,
            {sales_select}
            (vc.pchome_price - lm.momo_price) AS price_diff,
            ((vc.pchome_price - lm.momo_price) / lm.momo_price * 100) AS price_diff_pct
        FROM latest_momo lm
        LEFT JOIN valid_competitor vc ON vc.sku = lm.sku
        LEFT JOIN latest_attempt la ON la.sku = lm.sku
        {sales_join}
        WHERE lm.rn = 1
          AND lm.momo_price > 0
        ORDER BY {order_expr}
        LIMIT :limit
    """)
    with engine.connect() as conn:
        rows = conn.execute(sql, params).mappings().all()

    results = []
    for row in rows:
        pchome_id = row.get("competitor_product_id")
        found = bool(row.get("pchome_price"))
        match_status = "matched" if found else (row.get("attempt_status") or "no_valid_match")
        unit_comparison = _build_unit_comparison_for_attempt({
            "attempt_status": match_status,
            "name": row.get("name") or "",
            "best_competitor_product_name": row.get("best_competitor_product_name") or "",
            "momo_price": row.get("momo_price"),
            "best_competitor_price": row.get("best_competitor_price"),
        })
        results.append({
            "found": found,
            "momo_icode": str(row.get("sku") or ""),
            "momo_name": row.get("name") or "",
            "momo_price": _num(row.get("momo_price")),
            "pc_name": row.get("competitor_product_name") or "",
            "pc_price": _num(row.get("pchome_price")),
            "pc_url": f"https://24h.pchome.com.tw/prod/{pchome_id}" if pchome_id else "",
            "candidate_pc_id": row.get("best_competitor_product_id"),
            "candidate_pc_name": row.get("best_competitor_product_name") or "",
            "candidate_pc_price": _num(row.get("best_competitor_price")),
            "price_diff": _num(row.get("price_diff")),
            "price_diff_pct": _num(row.get("price_diff_pct")),
            "match_score": _num(row.get("match_score")),
            "momo_revenue": _num(row.get("momo_revenue")),
            "match_status": match_status,
            "match_status_label": _attempt_status_label(match_status),
            "action_label": _attempt_action_label(match_status),
            "candidate_count": int(row.get("candidate_count") or 0),
            "best_match_score": _num(row.get("best_match_score")),
            "match_diagnostic": row.get("error_message") or "",
            "unit_comparison": unit_comparison,
        })
    return results


def build_competitor_intel_payload(engine, days: int = 30) -> dict:
    """頁面、AI、PPT 可共用的摘要 payload。"""
    return {
        "coverage": fetch_competitor_coverage(engine),
        "trend": fetch_competitor_gap_trend(engine, days=days),
        "top_risks": fetch_top_competitor_risks(engine, limit=10),
        "review_queue": fetch_competitor_review_queue(engine, limit=12),
        "match_score_floor": PCHOME_MATCH_SCORE_FLOOR,
    }