390 lines
14 KiB
Python
390 lines
14 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""外部市場來源與報價的正規化服務。
|
||
|
||
第一版只做資料規格、來源狀態與只讀統計,不主動抓資料、不寫 DB。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import logging
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime
|
||
from typing import Any
|
||
|
||
from sqlalchemy import inspect, text
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
SOURCE_CONTRACTS = [
|
||
{
|
||
"code": "momo_reference",
|
||
"display_name": "MOMO 外部價格參考",
|
||
"platform_code": "momo",
|
||
"status_code": "active",
|
||
"status_label": "正在使用",
|
||
"source_kind": "legacy_bridge",
|
||
"input_methods": ["既有比價快取", "手動 CSV", "供應商 API"],
|
||
"data_quality_label": "只採用已確認同款",
|
||
"plain_note": "目前用已確認同款的 MOMO 參考價,協助判斷 PChome 商品是否需要調整售價或曝光。",
|
||
},
|
||
{
|
||
"code": "shopee",
|
||
"display_name": "蝦皮",
|
||
"platform_code": "shopee",
|
||
"status_code": "paused",
|
||
"status_label": "先暫停",
|
||
"source_kind": "connector_contract",
|
||
"input_methods": ["官方 API", "供應商 API", "手動 CSV"],
|
||
"data_quality_label": "暫不進告警",
|
||
"plain_note": "先保留資料接口,等有穩定合法來源後再啟用,不會影響目前作戰清單。",
|
||
},
|
||
{
|
||
"code": "coupang",
|
||
"display_name": "酷澎",
|
||
"platform_code": "coupang",
|
||
"status_code": "paused",
|
||
"status_label": "先暫停",
|
||
"source_kind": "connector_contract",
|
||
"input_methods": ["官方 API", "供應商 API", "手動 CSV"],
|
||
"data_quality_label": "暫不進告警",
|
||
"plain_note": "先保留資料接口,等有穩定合法來源後再啟用,不會影響目前作戰清單。",
|
||
},
|
||
]
|
||
|
||
NORMALIZED_OFFER_FIELDS = [
|
||
{
|
||
"name": "source_code",
|
||
"label": "資料來源",
|
||
"required": True,
|
||
"plain_note": "例如 momo_reference、shopee、coupang。",
|
||
},
|
||
{
|
||
"name": "source_product_id",
|
||
"label": "外部商品 ID",
|
||
"required": True,
|
||
"plain_note": "外部平台或資料供應商給的商品編號。",
|
||
},
|
||
{
|
||
"name": "title",
|
||
"label": "商品名稱",
|
||
"required": True,
|
||
"plain_note": "用來做人工確認與名稱比對。",
|
||
},
|
||
{
|
||
"name": "price",
|
||
"label": "售價",
|
||
"required": True,
|
||
"plain_note": "只填可直接比較的成交或頁面售價。",
|
||
},
|
||
{
|
||
"name": "observed_at",
|
||
"label": "資料時間",
|
||
"required": True,
|
||
"plain_note": "這筆價格看到的時間。",
|
||
},
|
||
{
|
||
"name": "ingestion_method",
|
||
"label": "取得方式",
|
||
"required": True,
|
||
"plain_note": "official_api、provider_api、manual_csv 或 legacy_competitor_cache。",
|
||
},
|
||
{
|
||
"name": "pchome_product_id",
|
||
"label": "PChome 商品 ID",
|
||
"required": False,
|
||
"plain_note": "若已確認同款才填,未確認就留空。",
|
||
},
|
||
{
|
||
"name": "quality_score",
|
||
"label": "資料可信度",
|
||
"required": False,
|
||
"plain_note": "0 到 100,低於 76 不進自動告警。",
|
||
},
|
||
]
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ExternalOfferPayload:
|
||
source_code: str
|
||
platform_code: str
|
||
source_product_id: str
|
||
title: str
|
||
price: float | None
|
||
observed_at: str
|
||
ingestion_method: str
|
||
currency: str = "TWD"
|
||
original_price: float | None = None
|
||
product_url: str | None = None
|
||
brand: str | None = None
|
||
category_text: str | None = None
|
||
pchome_product_id: str | None = None
|
||
momo_sku: str | None = None
|
||
match_status: str = "unmatched"
|
||
quality_score: float = 0.0
|
||
data_quality_status: str = "needs_review"
|
||
quality_notes: list[str] = field(default_factory=list)
|
||
|
||
def to_record(self) -> dict[str, Any]:
|
||
return {
|
||
"source_code": self.source_code,
|
||
"platform_code": self.platform_code,
|
||
"source_product_id": self.source_product_id,
|
||
"source_offer_key": f"{self.source_code}:{self.source_product_id}",
|
||
"title": self.title,
|
||
"price": self.price,
|
||
"currency": self.currency or "TWD",
|
||
"original_price": self.original_price,
|
||
"product_url": self.product_url,
|
||
"brand": self.brand,
|
||
"category_text": self.category_text,
|
||
"observed_at": self.observed_at,
|
||
"ingestion_method": self.ingestion_method,
|
||
"pchome_product_id": self.pchome_product_id,
|
||
"momo_sku": self.momo_sku,
|
||
"match_status": self.match_status,
|
||
"quality_score": self.quality_score,
|
||
"data_quality_status": self.data_quality_status,
|
||
"quality_notes_json": json.dumps(self.quality_notes, ensure_ascii=False),
|
||
}
|
||
|
||
|
||
def _to_float(value: Any) -> float | None:
|
||
if value is None or value == "":
|
||
return None
|
||
try:
|
||
return float(value)
|
||
except (TypeError, ValueError):
|
||
return None
|
||
|
||
|
||
def _load_json_list(value: Any) -> list[Any]:
|
||
if not value:
|
||
return []
|
||
if isinstance(value, list):
|
||
return value
|
||
try:
|
||
parsed = json.loads(value)
|
||
return parsed if isinstance(parsed, list) else []
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
def _has_table(conn, table_name: str) -> bool:
|
||
try:
|
||
return inspect(conn).has_table(table_name)
|
||
except Exception:
|
||
logger.warning("[ExternalMarket] table probe failed: %s", table_name, exc_info=True)
|
||
return False
|
||
|
||
|
||
def normalize_external_offer_payload(payload: dict[str, Any]) -> tuple[ExternalOfferPayload | None, list[str]]:
|
||
"""把 official API / provider API / manual CSV 的資料轉成同一份欄位。"""
|
||
errors: list[str] = []
|
||
source_code = str(payload.get("source_code") or "").strip()
|
||
platform_code = str(payload.get("platform_code") or source_code or "").strip()
|
||
source_product_id = str(payload.get("source_product_id") or "").strip()
|
||
title = str(payload.get("title") or payload.get("name") or "").strip()
|
||
ingestion_method = str(payload.get("ingestion_method") or "").strip()
|
||
observed_at = str(payload.get("observed_at") or "").strip()
|
||
price = _to_float(payload.get("price"))
|
||
|
||
required_values = {
|
||
"資料來源": source_code,
|
||
"外部商品 ID": source_product_id,
|
||
"商品名稱": title,
|
||
"售價": price,
|
||
"資料時間": observed_at,
|
||
"取得方式": ingestion_method,
|
||
}
|
||
for label, value in required_values.items():
|
||
if value is None or value == "":
|
||
errors.append(f"缺少{label}")
|
||
|
||
if price is not None and price <= 0:
|
||
errors.append("售價必須大於 0")
|
||
|
||
if observed_at:
|
||
try:
|
||
datetime.fromisoformat(observed_at.replace("Z", "+00:00"))
|
||
except ValueError:
|
||
errors.append("資料時間格式需為 ISO 格式,例如 2026-06-15T10:00:00")
|
||
|
||
if errors:
|
||
return None, errors
|
||
|
||
quality_score = _to_float(payload.get("quality_score"))
|
||
if quality_score is None:
|
||
quality_score = 0.0
|
||
quality_notes = _load_json_list(payload.get("quality_notes"))
|
||
if not quality_notes and payload.get("quality_note"):
|
||
quality_notes = [str(payload.get("quality_note"))]
|
||
|
||
record = ExternalOfferPayload(
|
||
source_code=source_code,
|
||
platform_code=platform_code,
|
||
source_product_id=source_product_id,
|
||
title=title,
|
||
price=price,
|
||
observed_at=observed_at,
|
||
ingestion_method=ingestion_method,
|
||
currency=str(payload.get("currency") or "TWD").strip() or "TWD",
|
||
original_price=_to_float(payload.get("original_price")),
|
||
product_url=payload.get("product_url"),
|
||
brand=payload.get("brand"),
|
||
category_text=payload.get("category_text"),
|
||
pchome_product_id=payload.get("pchome_product_id"),
|
||
momo_sku=payload.get("momo_sku"),
|
||
match_status=str(payload.get("match_status") or "unmatched"),
|
||
quality_score=max(0.0, min(100.0, quality_score)),
|
||
data_quality_status=str(payload.get("data_quality_status") or "needs_review"),
|
||
quality_notes=[str(item) for item in quality_notes],
|
||
)
|
||
return record, []
|
||
|
||
|
||
def _legacy_momo_reference_stats(conn) -> dict[str, Any]:
|
||
if not _has_table(conn, "competitor_prices"):
|
||
return {"usable_offer_count": 0, "last_seen_at": None}
|
||
|
||
if conn.dialect.name == "postgresql":
|
||
sql = """
|
||
SELECT COUNT(*) AS usable_offer_count, MAX(crawled_at) AS last_seen_at
|
||
FROM competitor_prices
|
||
WHERE source = 'pchome'
|
||
AND competitor_product_id IS NOT NULL
|
||
AND price IS NOT NULL
|
||
AND price > 0
|
||
AND COALESCE(match_score, 0) >= 0.76
|
||
AND (expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)
|
||
AND COALESCE(tags, '[]'::jsonb) ? 'identity_v2'
|
||
"""
|
||
else:
|
||
sql = """
|
||
SELECT COUNT(*) AS usable_offer_count, MAX(crawled_at) AS last_seen_at
|
||
FROM competitor_prices
|
||
WHERE source = 'pchome'
|
||
AND competitor_product_id IS NOT NULL
|
||
AND price IS NOT NULL
|
||
AND price > 0
|
||
AND COALESCE(match_score, 0) >= 0.76
|
||
AND COALESCE(tags, '') LIKE '%identity_v2%'
|
||
"""
|
||
row = conn.execute(text(sql)).mappings().first() or {}
|
||
return {
|
||
"usable_offer_count": int(row.get("usable_offer_count") or 0),
|
||
"last_seen_at": str(row.get("last_seen_at") or "") or None,
|
||
}
|
||
|
||
|
||
def _normalized_offer_stats(conn) -> dict[str, dict[str, Any]]:
|
||
if not all(_has_table(conn, table) for table in {"external_market_sources", "external_offers"}):
|
||
return {}
|
||
|
||
rows = conn.execute(text("""
|
||
SELECT
|
||
s.code,
|
||
s.status,
|
||
s.enabled,
|
||
COUNT(o.id) AS offer_count,
|
||
SUM(CASE
|
||
WHEN o.data_quality_status IN ('verified', 'usable', 'reviewed')
|
||
AND COALESCE(o.quality_score, 0) >= 76
|
||
THEN 1 ELSE 0 END
|
||
) AS usable_offer_count,
|
||
MAX(o.observed_at) AS last_seen_at
|
||
FROM external_market_sources s
|
||
LEFT JOIN external_offers o ON o.source_code = s.code
|
||
GROUP BY s.code, s.status, s.enabled
|
||
""")).mappings().all()
|
||
|
||
return {
|
||
str(row["code"]): {
|
||
"status": row.get("status"),
|
||
"enabled": bool(row.get("enabled")),
|
||
"offer_count": int(row.get("offer_count") or 0),
|
||
"usable_offer_count": int(row.get("usable_offer_count") or 0),
|
||
"last_seen_at": str(row.get("last_seen_at") or "") or None,
|
||
}
|
||
for row in rows
|
||
}
|
||
|
||
|
||
def build_connector_contracts() -> dict[str, Any]:
|
||
"""回傳 connector 與手動 CSV 共同遵守的欄位規格。"""
|
||
return {
|
||
"success": True,
|
||
"version": "2026-06-15",
|
||
"plain_summary": "所有外部市場資料都先轉成同一份商品報價格式,再進作戰清單。",
|
||
"sources": SOURCE_CONTRACTS,
|
||
"normalized_offer_fields": NORMALIZED_OFFER_FIELDS,
|
||
"manual_csv": {
|
||
"encoding": "utf-8-sig",
|
||
"required_headers": [
|
||
field["name"] for field in NORMALIZED_OFFER_FIELDS if field["required"]
|
||
],
|
||
"optional_headers": [
|
||
field["name"] for field in NORMALIZED_OFFER_FIELDS if not field["required"]
|
||
],
|
||
"plain_rule": "低可信度或未確認同款的資料只進待補資料清單,不自動發告警。",
|
||
},
|
||
}
|
||
|
||
|
||
def build_external_source_readiness(engine=None) -> dict[str, Any]:
|
||
"""建立畫面可用的外部資料來源狀態。"""
|
||
sources = [dict(source) for source in SOURCE_CONTRACTS]
|
||
normalized_stats: dict[str, dict[str, Any]] = {}
|
||
legacy_stats: dict[str, Any] = {"usable_offer_count": 0, "last_seen_at": None}
|
||
schema_ready = False
|
||
|
||
if engine is not None:
|
||
try:
|
||
with engine.connect() as conn:
|
||
schema_ready = all(
|
||
_has_table(conn, table)
|
||
for table in {"external_market_sources", "external_offers"}
|
||
)
|
||
normalized_stats = _normalized_offer_stats(conn)
|
||
legacy_stats = _legacy_momo_reference_stats(conn)
|
||
except Exception:
|
||
logger.warning("[ExternalMarket] source readiness failed", exc_info=True)
|
||
|
||
for source in sources:
|
||
stats = normalized_stats.get(source["code"], {})
|
||
if source["code"] == "momo_reference":
|
||
usable = max(
|
||
int(stats.get("usable_offer_count") or 0),
|
||
int(legacy_stats.get("usable_offer_count") or 0),
|
||
)
|
||
last_seen_at = stats.get("last_seen_at") or legacy_stats.get("last_seen_at")
|
||
else:
|
||
usable = int(stats.get("usable_offer_count") or 0)
|
||
last_seen_at = stats.get("last_seen_at")
|
||
|
||
source["schema_ready"] = schema_ready
|
||
source["usable_offer_count"] = usable
|
||
source["last_seen_at"] = last_seen_at
|
||
source["can_alert"] = source["status_code"] == "active" and usable > 0
|
||
if source["status_code"] == "active":
|
||
source["plain_state"] = "已接入,可進作戰清單" if usable else "已接入,等待可用資料"
|
||
else:
|
||
source["plain_state"] = "先保留接口,不進告警"
|
||
|
||
active_count = sum(1 for source in sources if source["status_code"] == "active")
|
||
paused_count = sum(1 for source in sources if source["status_code"] == "paused")
|
||
usable_count = sum(int(source["usable_offer_count"]) for source in sources)
|
||
|
||
return {
|
||
"success": True,
|
||
"schema_ready": schema_ready,
|
||
"active_count": active_count,
|
||
"paused_count": paused_count,
|
||
"usable_offer_count": usable_count,
|
||
"sources": sources,
|
||
"connector_contract": build_connector_contracts(),
|
||
"plain_summary": "MOMO 先用;蝦皮與酷澎先保留接口,暫不進告警。",
|
||
}
|