#!/usr/bin/env python3 # -*- coding: utf-8 -*- """PChome / MOMO 競價情報共用資料出口。 短期 canonical source: - competitor_prices:目前有效配對 - competitor_price_history:價格歷史趨勢 - competitor_match_attempts:未配對與低信心診斷 """ from __future__ import annotations import os import pickle import time from datetime import date, datetime, timedelta from pathlib import Path from threading import Lock from typing import Any, Optional, Union from sqlalchemy import inspect, text PCHOME_MATCH_SCORE_FLOOR = 0.76 COMPETITOR_INTEL_CACHE_TTL_SECONDS = int(os.getenv("COMPETITOR_INTEL_CACHE_TTL_SECONDS", "1800")) _BASE_DIR = Path(__file__).resolve().parents[1] _CACHE_FILE = _BASE_DIR / "data" / "competitor_intel_cache.pkl" _CACHE_LOCK = Lock() _MEM_CACHE: dict[str, dict[str, Any]] = {} def _num(value: Any) -> float: try: return float(value or 0) except (TypeError, ValueError): return 0.0 def _date_label(value: Any) -> str: if hasattr(value, "strftime"): return value.strftime("%Y-%m-%d") return str(value or "") def _month_label(value: Any) -> str: if hasattr(value, "strftime"): return value.strftime("%Y-%m") return str(value or "")[:7] def clear_competitor_intel_cache() -> None: """Clear cached PChome/MOMO intelligence after crawler/import updates.""" with _CACHE_LOCK: _MEM_CACHE.clear() try: if _CACHE_FILE.exists(): _CACHE_FILE.unlink() except OSError: pass def _load_shared_cache() -> dict[str, dict[str, Any]]: if not _CACHE_FILE.exists(): return {} try: with _CACHE_FILE.open("rb") as handle: payload = pickle.load(handle) return payload if isinstance(payload, dict) else {} except Exception: return {} def _write_shared_cache(payload: dict[str, dict[str, Any]]) -> None: try: _CACHE_FILE.parent.mkdir(parents=True, exist_ok=True) tmp_file = _CACHE_FILE.with_suffix(f".{os.getpid()}.tmp") with tmp_file.open("wb") as handle: pickle.dump(payload, handle, protocol=pickle.HIGHEST_PROTOCOL) os.replace(tmp_file, _CACHE_FILE) except Exception: try: if "tmp_file" in locals() and tmp_file.exists(): tmp_file.unlink() except OSError: pass def _cached_payload(cache_key: str, producer, ttl_seconds: int = COMPETITOR_INTEL_CACHE_TTL_SECONDS): if ttl_seconds <= 0: return producer() now = time.time() with _CACHE_LOCK: entry = _MEM_CACHE.get(cache_key) if entry and now - float(entry.get("time", 0)) < ttl_seconds: return entry.get("value") shared = _load_shared_cache() entry = shared.get(cache_key) if entry and now - float(entry.get("time", 0)) < ttl_seconds: _MEM_CACHE[cache_key] = entry return entry.get("value") value = producer() entry = {"time": now, "value": value} with _CACHE_LOCK: _MEM_CACHE[cache_key] = entry shared = _load_shared_cache() shared[cache_key] = entry stale_before = now - max(ttl_seconds * 4, 3600) shared = { key: item for key, item in shared.items() if isinstance(item, dict) and float(item.get("time", 0)) >= stale_before } _write_shared_cache(shared) return value def fetch_competitor_coverage(engine) -> dict: return _cached_payload( f"coverage:v2:floor={PCHOME_MATCH_SCORE_FLOOR}", lambda: _fetch_competitor_coverage_uncached(engine), ) def _fetch_competitor_coverage_uncached(engine) -> dict: """讀取目前 PChome 比價覆蓋率與待審分類。""" if not inspect(engine).has_table("competitor_prices"): return { "active_with_price": 0, "valid_matches": 0, "pending": 0, "match_rate": 0, "attempt_status": {}, } sql = text(f""" WITH latest_momo AS ( SELECT p.id AS product_id, p.i_code AS sku, pr.price AS momo_price, ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn FROM products p JOIN price_records pr ON pr.product_id = p.id WHERE p.status = 'ACTIVE' ), valid_competitor AS ( SELECT DISTINCT ON (cp.sku) cp.sku FROM competitor_prices cp WHERE cp.source = 'pchome' AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP) AND cp.price IS NOT NULL AND cp.price > 0 AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR} AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2' ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST ), latest_attempt AS ( SELECT DISTINCT ON (sku) sku, attempt_status FROM competitor_match_attempts WHERE source = 'pchome' ORDER BY sku, attempted_at DESC NULLS LAST ) SELECT (SELECT COUNT(*) FROM latest_momo WHERE rn = 1) AS active_with_price, (SELECT COUNT(*) FROM valid_competitor) AS valid_matches, (SELECT COUNT(*) FROM latest_momo lm LEFT JOIN valid_competitor vc ON vc.sku = lm.sku WHERE lm.rn = 1 AND vc.sku IS NULL) AS pending, COALESCE(la.attempt_status, 'never_attempted') AS attempt_status, COUNT(*) AS status_count FROM latest_momo lm LEFT JOIN valid_competitor vc ON vc.sku = lm.sku LEFT JOIN latest_attempt la ON la.sku = lm.sku WHERE lm.rn = 1 AND vc.sku IS NULL GROUP BY COALESCE(la.attempt_status, 'never_attempted') """) with engine.connect() as conn: rows = conn.execute(sql).mappings().all() active = int(rows[0].get("active_with_price") or 0) if rows else 0 valid = int(rows[0].get("valid_matches") or 0) if rows else 0 pending = int(rows[0].get("pending") or 0) if rows else 0 statuses = { str(row.get("attempt_status")): int(row.get("status_count") or 0) for row in rows } return { "active_with_price": active, "valid_matches": valid, "pending": pending, "match_rate": round(valid / max(active, 1) * 100, 1), "attempt_status": statuses, "match_score_floor": PCHOME_MATCH_SCORE_FLOOR, } def fetch_competitor_gap_trend(engine, days: int = 30) -> dict: days = max(7, min(int(days or 30), 120)) return _cached_payload( f"gap_trend:v2:days={days}:floor={PCHOME_MATCH_SCORE_FLOOR}", lambda: _fetch_competitor_gap_trend_uncached(engine, days=days), ) def _fetch_competitor_gap_trend_uncached(engine, days: int = 30) -> dict: """近 N 天 PChome 價差壓力趨勢。""" if not inspect(engine).has_table("competitor_price_history"): return {"labels": [], "avg_gap_pct": [], "risk_count": [], "momo_advantage_count": [], "match_count": []} days = max(7, min(int(days or 30), 120)) sql = text(f""" WITH latest_history AS ( SELECT date_trunc('day', cph.crawled_at)::date AS bucket_date, cph.sku, cph.momo_price, cph.price AS pchome_price, ROW_NUMBER() OVER ( PARTITION BY date_trunc('day', cph.crawled_at)::date, cph.sku ORDER BY cph.crawled_at DESC ) AS rn FROM competitor_price_history cph WHERE cph.source = 'pchome' AND cph.crawled_at >= CURRENT_DATE - (:days * INTERVAL '1 day') AND cph.momo_price IS NOT NULL AND cph.momo_price > 0 AND cph.price IS NOT NULL AND cph.price > 0 AND COALESCE(cph.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR} AND COALESCE(cph.tags, '[]'::jsonb) ? 'identity_v2' ) SELECT bucket_date, COUNT(*) AS match_count, ROUND(AVG((momo_price - pchome_price) / pchome_price * 100)::numeric, 2) AS avg_gap_pct, SUM(CASE WHEN momo_price > pchome_price * 1.05 THEN 1 ELSE 0 END) AS risk_count, SUM(CASE WHEN momo_price < pchome_price * 0.95 THEN 1 ELSE 0 END) AS momo_advantage_count FROM latest_history WHERE rn = 1 GROUP BY bucket_date ORDER BY bucket_date """) with engine.connect() as conn: rows = conn.execute(sql, {"days": days}).mappings().all() return { "labels": [_date_label(row.get("bucket_date")) for row in rows], "avg_gap_pct": [_num(row.get("avg_gap_pct")) for row in rows], "risk_count": [int(row.get("risk_count") or 0) for row in rows], "momo_advantage_count": [int(row.get("momo_advantage_count") or 0) for row in rows], "match_count": [int(row.get("match_count") or 0) for row in rows], } def fetch_competitor_monthly_pressure(engine, months: int = 12) -> dict: months = max(3, min(int(months or 12), 36)) return _cached_payload( f"monthly_pressure:v2:months={months}:floor={PCHOME_MATCH_SCORE_FLOOR}", lambda: _fetch_competitor_monthly_pressure_uncached(engine, months=months), ) def _fetch_competitor_monthly_pressure_uncached(engine, months: int = 12) -> dict: """月度競品價格壓力,用於 growth analysis。""" if not inspect(engine).has_table("competitor_price_history"): return {"labels": [], "avg_gap_pct": [], "risk_count": [], "match_count": []} months = max(3, min(int(months or 12), 36)) sql = text(f""" WITH latest_history AS ( SELECT date_trunc('month', cph.crawled_at)::date AS bucket_month, cph.sku, cph.momo_price, cph.price AS pchome_price, ROW_NUMBER() OVER ( PARTITION BY date_trunc('month', cph.crawled_at)::date, cph.sku ORDER BY cph.crawled_at DESC ) AS rn FROM competitor_price_history cph WHERE cph.source = 'pchome' AND cph.crawled_at >= date_trunc('month', CURRENT_DATE) - (:months * INTERVAL '1 month') AND cph.momo_price IS NOT NULL AND cph.momo_price > 0 AND cph.price IS NOT NULL AND cph.price > 0 AND COALESCE(cph.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR} AND COALESCE(cph.tags, '[]'::jsonb) ? 'identity_v2' ) SELECT bucket_month, COUNT(*) AS match_count, ROUND(AVG((momo_price - pchome_price) / pchome_price * 100)::numeric, 2) AS avg_gap_pct, SUM(CASE WHEN momo_price > pchome_price * 1.05 THEN 1 ELSE 0 END) AS risk_count FROM latest_history WHERE rn = 1 GROUP BY bucket_month ORDER BY bucket_month """) with engine.connect() as conn: rows = conn.execute(sql, {"months": months}).mappings().all() return { "labels": [_month_label(row.get("bucket_month")) for row in rows], "avg_gap_pct": [_num(row.get("avg_gap_pct")) for row in rows], "risk_count": [int(row.get("risk_count") or 0) for row in rows], "match_count": [int(row.get("match_count") or 0) for row in rows], } def fetch_top_competitor_risks(engine, limit: int = 10) -> list[dict]: limit = max(1, min(int(limit or 10), 50)) return _cached_payload( f"top_risks:v2:limit={limit}:floor={PCHOME_MATCH_SCORE_FLOOR}", lambda: _fetch_top_competitor_risks_uncached(engine, limit=limit), ) def _fetch_top_competitor_risks_uncached(engine, limit: int = 10) -> list[dict]: """目前 MOMO 比 PChome 貴的高風險商品。""" if not inspect(engine).has_table("competitor_prices"): return [] limit = max(1, min(int(limit or 10), 50)) sql = text(f""" WITH valid_competitor AS ( SELECT DISTINCT ON (cp.sku) cp.sku, cp.price AS pchome_price, cp.competitor_product_id, cp.competitor_product_name, cp.match_score, cp.crawled_at FROM competitor_prices cp WHERE cp.source = 'pchome' AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP) AND cp.price IS NOT NULL AND cp.price > 0 AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR} AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2' ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST ) SELECT p.i_code AS sku, p.name, p.category, latest_price.momo_price, vc.pchome_price, vc.competitor_product_id, vc.competitor_product_name, vc.match_score, vc.crawled_at, (latest_price.momo_price - vc.pchome_price) AS gap_amount, ((latest_price.momo_price - vc.pchome_price) / vc.pchome_price * 100) AS gap_pct FROM valid_competitor vc JOIN products p ON p.i_code = vc.sku AND p.status = 'ACTIVE' JOIN LATERAL ( SELECT pr.price AS momo_price FROM price_records pr WHERE pr.product_id = p.id ORDER BY pr.timestamp DESC, pr.id DESC LIMIT 1 ) latest_price ON TRUE WHERE latest_price.momo_price > vc.pchome_price * 1.05 ORDER BY gap_pct DESC NULLS LAST, gap_amount DESC NULLS LAST LIMIT :limit """) with engine.connect() as conn: rows = conn.execute(sql, {"limit": limit}).mappings().all() result = [] for row in rows: result.append({ "sku": str(row.get("sku") or ""), "name": row.get("name") or "", "category": row.get("category") or "", "momo_price": _num(row.get("momo_price")), "pchome_price": _num(row.get("pchome_price")), "gap_amount": _num(row.get("gap_amount")), "gap_pct": _num(row.get("gap_pct")), "match_score": _num(row.get("match_score")), "pchome_id": row.get("competitor_product_id"), "pchome_name": row.get("competitor_product_name") or "", "crawled_at": _date_label(row.get("crawled_at")), }) return result def fetch_competitor_comparison_results( engine, start_date: Optional[Union[date, datetime, str]] = None, end_date: Optional[Union[date, datetime, str]] = None, limit: int = 30, ) -> list[dict]: """輸出與 legacy competitor PPT 相容的比價結果,不再 live crawl。""" limit = max(1, min(int(limit or 30), 100)) inspector = inspect(engine) if not ( inspector.has_table("products") and inspector.has_table("price_records") and inspector.has_table("competitor_prices") ): return [] has_daily_sales = inspector.has_table("daily_sales") has_match_attempts = inspector.has_table("competitor_match_attempts") sales_cte = "" sales_join = "" sales_select = "0 AS momo_revenue," attempt_cte = """ latest_attempt AS ( SELECT NULL AS sku, NULL AS attempt_status, NULL AS candidate_count, NULL AS best_match_score, NULL AS error_message, NULL AS attempted_at WHERE FALSE ) """ order_expr = ( "lm.momo_price DESC NULLS LAST, " "(vc.pchome_price IS NULL), " "ABS((lm.momo_price - vc.pchome_price) / vc.pchome_price * 100) DESC NULLS LAST" ) params: dict[str, Any] = {"limit": limit} if has_daily_sales: where = [] if start_date: where.append("DATE(s.date) >= DATE(:start_date)") params["start_date"] = str(start_date).replace("/", "-")[:10] if end_date: where.append("DATE(s.date) <= DATE(:end_date)") params["end_date"] = str(end_date).replace("/", "-")[:10] sales_where = "WHERE " + " AND ".join(where) if where else "" sales_cte = f""", sales_rank AS ( SELECT s.product_id, SUM(COALESCE(s.revenue, 0)) AS momo_revenue FROM daily_sales s {sales_where} GROUP BY s.product_id ) """ sales_join = "LEFT JOIN sales_rank sr ON sr.product_id = lm.product_id" sales_select = "COALESCE(sr.momo_revenue, 0) AS momo_revenue," order_expr = ( "COALESCE(sr.momo_revenue, 0) DESC, " "(vc.pchome_price IS NULL), " "ABS((lm.momo_price - vc.pchome_price) / vc.pchome_price * 100) DESC NULLS LAST" ) if has_match_attempts: attempt_cte = """ latest_attempt AS ( SELECT DISTINCT ON (cma.sku) cma.sku, cma.attempt_status, cma.candidate_count, cma.best_match_score, cma.error_message, cma.attempted_at FROM competitor_match_attempts cma WHERE cma.source = 'pchome' ORDER BY cma.sku, cma.attempted_at DESC NULLS LAST ) """ sql = text(f""" WITH latest_momo AS ( SELECT p.id AS product_id, p.i_code AS sku, p.name, pr.price AS momo_price, ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn FROM products p JOIN price_records pr ON pr.product_id = p.id WHERE p.status = 'ACTIVE' ), valid_competitor AS ( SELECT DISTINCT ON (cp.sku) cp.sku, cp.price AS pchome_price, cp.competitor_product_id, cp.competitor_product_name, cp.match_score FROM competitor_prices cp WHERE cp.source = 'pchome' AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP) AND cp.price IS NOT NULL AND cp.price > 0 AND COALESCE(cp.match_score, 0) >= {PCHOME_MATCH_SCORE_FLOOR} AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2' ORDER BY cp.sku, cp.crawled_at DESC NULLS LAST ), {attempt_cte} {sales_cte} SELECT lm.sku, lm.name, lm.momo_price, vc.pchome_price, vc.competitor_product_id, vc.competitor_product_name, vc.match_score, la.attempt_status, la.candidate_count, la.best_match_score, la.error_message, la.attempted_at, {sales_select} (vc.pchome_price - lm.momo_price) AS price_diff, ((vc.pchome_price - lm.momo_price) / lm.momo_price * 100) AS price_diff_pct FROM latest_momo lm LEFT JOIN valid_competitor vc ON vc.sku = lm.sku LEFT JOIN latest_attempt la ON la.sku = lm.sku {sales_join} WHERE lm.rn = 1 AND lm.momo_price > 0 ORDER BY {order_expr} LIMIT :limit """) with engine.connect() as conn: rows = conn.execute(sql, params).mappings().all() results = [] for row in rows: pchome_id = row.get("competitor_product_id") found = bool(row.get("pchome_price")) results.append({ "found": found, "momo_icode": str(row.get("sku") or ""), "momo_name": row.get("name") or "", "momo_price": _num(row.get("momo_price")), "pc_name": row.get("competitor_product_name") or "", "pc_price": _num(row.get("pchome_price")), "pc_url": f"https://24h.pchome.com.tw/prod/{pchome_id}" if pchome_id else "", "price_diff": _num(row.get("price_diff")), "price_diff_pct": _num(row.get("price_diff_pct")), "match_score": _num(row.get("match_score")), "momo_revenue": _num(row.get("momo_revenue")), "match_status": "matched" if found else (row.get("attempt_status") or "no_valid_match"), "candidate_count": int(row.get("candidate_count") or 0), "best_match_score": _num(row.get("best_match_score")), "match_diagnostic": row.get("error_message") or "", }) return results def build_competitor_intel_payload(engine, days: int = 30) -> dict: """頁面、AI、PPT 可共用的摘要 payload。""" return { "coverage": fetch_competitor_coverage(engine), "trend": fetch_competitor_gap_trend(engine, days=days), "top_risks": fetch_top_competitor_risks(engine, limit=10), "match_score_floor": PCHOME_MATCH_SCORE_FLOOR, }