Files
ewoooc/services/external_market_offer_service.py
OoO 9260cc1740
All checks were successful
CD Pipeline / deploy (push) Successful in 1m4s
V10.607 建立外部市場來源正規化層
2026-06-15 16:19:03 +08:00

390 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""外部市場來源與報價的正規化服務。
第一版只做資料規格、來源狀態與只讀統計,不主動抓資料、不寫 DB。
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
from sqlalchemy import inspect, text
logger = logging.getLogger(__name__)
SOURCE_CONTRACTS = [
{
"code": "momo_reference",
"display_name": "MOMO 外部價格參考",
"platform_code": "momo",
"status_code": "active",
"status_label": "正在使用",
"source_kind": "legacy_bridge",
"input_methods": ["既有比價快取", "手動 CSV", "供應商 API"],
"data_quality_label": "只採用已確認同款",
"plain_note": "目前用已確認同款的 MOMO 參考價,協助判斷 PChome 商品是否需要調整售價或曝光。",
},
{
"code": "shopee",
"display_name": "蝦皮",
"platform_code": "shopee",
"status_code": "paused",
"status_label": "先暫停",
"source_kind": "connector_contract",
"input_methods": ["官方 API", "供應商 API", "手動 CSV"],
"data_quality_label": "暫不進告警",
"plain_note": "先保留資料接口,等有穩定合法來源後再啟用,不會影響目前作戰清單。",
},
{
"code": "coupang",
"display_name": "酷澎",
"platform_code": "coupang",
"status_code": "paused",
"status_label": "先暫停",
"source_kind": "connector_contract",
"input_methods": ["官方 API", "供應商 API", "手動 CSV"],
"data_quality_label": "暫不進告警",
"plain_note": "先保留資料接口,等有穩定合法來源後再啟用,不會影響目前作戰清單。",
},
]
NORMALIZED_OFFER_FIELDS = [
{
"name": "source_code",
"label": "資料來源",
"required": True,
"plain_note": "例如 momo_reference、shopee、coupang。",
},
{
"name": "source_product_id",
"label": "外部商品 ID",
"required": True,
"plain_note": "外部平台或資料供應商給的商品編號。",
},
{
"name": "title",
"label": "商品名稱",
"required": True,
"plain_note": "用來做人工確認與名稱比對。",
},
{
"name": "price",
"label": "售價",
"required": True,
"plain_note": "只填可直接比較的成交或頁面售價。",
},
{
"name": "observed_at",
"label": "資料時間",
"required": True,
"plain_note": "這筆價格看到的時間。",
},
{
"name": "ingestion_method",
"label": "取得方式",
"required": True,
"plain_note": "official_api、provider_api、manual_csv 或 legacy_competitor_cache。",
},
{
"name": "pchome_product_id",
"label": "PChome 商品 ID",
"required": False,
"plain_note": "若已確認同款才填,未確認就留空。",
},
{
"name": "quality_score",
"label": "資料可信度",
"required": False,
"plain_note": "0 到 100低於 76 不進自動告警。",
},
]
@dataclass(frozen=True)
class ExternalOfferPayload:
source_code: str
platform_code: str
source_product_id: str
title: str
price: float | None
observed_at: str
ingestion_method: str
currency: str = "TWD"
original_price: float | None = None
product_url: str | None = None
brand: str | None = None
category_text: str | None = None
pchome_product_id: str | None = None
momo_sku: str | None = None
match_status: str = "unmatched"
quality_score: float = 0.0
data_quality_status: str = "needs_review"
quality_notes: list[str] = field(default_factory=list)
def to_record(self) -> dict[str, Any]:
return {
"source_code": self.source_code,
"platform_code": self.platform_code,
"source_product_id": self.source_product_id,
"source_offer_key": f"{self.source_code}:{self.source_product_id}",
"title": self.title,
"price": self.price,
"currency": self.currency or "TWD",
"original_price": self.original_price,
"product_url": self.product_url,
"brand": self.brand,
"category_text": self.category_text,
"observed_at": self.observed_at,
"ingestion_method": self.ingestion_method,
"pchome_product_id": self.pchome_product_id,
"momo_sku": self.momo_sku,
"match_status": self.match_status,
"quality_score": self.quality_score,
"data_quality_status": self.data_quality_status,
"quality_notes_json": json.dumps(self.quality_notes, ensure_ascii=False),
}
def _to_float(value: Any) -> float | None:
if value is None or value == "":
return None
try:
return float(value)
except (TypeError, ValueError):
return None
def _load_json_list(value: Any) -> list[Any]:
if not value:
return []
if isinstance(value, list):
return value
try:
parsed = json.loads(value)
return parsed if isinstance(parsed, list) else []
except Exception:
return []
def _has_table(conn, table_name: str) -> bool:
try:
return inspect(conn).has_table(table_name)
except Exception:
logger.warning("[ExternalMarket] table probe failed: %s", table_name, exc_info=True)
return False
def normalize_external_offer_payload(payload: dict[str, Any]) -> tuple[ExternalOfferPayload | None, list[str]]:
"""把 official API / provider API / manual CSV 的資料轉成同一份欄位。"""
errors: list[str] = []
source_code = str(payload.get("source_code") or "").strip()
platform_code = str(payload.get("platform_code") or source_code or "").strip()
source_product_id = str(payload.get("source_product_id") or "").strip()
title = str(payload.get("title") or payload.get("name") or "").strip()
ingestion_method = str(payload.get("ingestion_method") or "").strip()
observed_at = str(payload.get("observed_at") or "").strip()
price = _to_float(payload.get("price"))
required_values = {
"資料來源": source_code,
"外部商品 ID": source_product_id,
"商品名稱": title,
"售價": price,
"資料時間": observed_at,
"取得方式": ingestion_method,
}
for label, value in required_values.items():
if value is None or value == "":
errors.append(f"缺少{label}")
if price is not None and price <= 0:
errors.append("售價必須大於 0")
if observed_at:
try:
datetime.fromisoformat(observed_at.replace("Z", "+00:00"))
except ValueError:
errors.append("資料時間格式需為 ISO 格式,例如 2026-06-15T10:00:00")
if errors:
return None, errors
quality_score = _to_float(payload.get("quality_score"))
if quality_score is None:
quality_score = 0.0
quality_notes = _load_json_list(payload.get("quality_notes"))
if not quality_notes and payload.get("quality_note"):
quality_notes = [str(payload.get("quality_note"))]
record = ExternalOfferPayload(
source_code=source_code,
platform_code=platform_code,
source_product_id=source_product_id,
title=title,
price=price,
observed_at=observed_at,
ingestion_method=ingestion_method,
currency=str(payload.get("currency") or "TWD").strip() or "TWD",
original_price=_to_float(payload.get("original_price")),
product_url=payload.get("product_url"),
brand=payload.get("brand"),
category_text=payload.get("category_text"),
pchome_product_id=payload.get("pchome_product_id"),
momo_sku=payload.get("momo_sku"),
match_status=str(payload.get("match_status") or "unmatched"),
quality_score=max(0.0, min(100.0, quality_score)),
data_quality_status=str(payload.get("data_quality_status") or "needs_review"),
quality_notes=[str(item) for item in quality_notes],
)
return record, []
def _legacy_momo_reference_stats(conn) -> dict[str, Any]:
if not _has_table(conn, "competitor_prices"):
return {"usable_offer_count": 0, "last_seen_at": None}
if conn.dialect.name == "postgresql":
sql = """
SELECT COUNT(*) AS usable_offer_count, MAX(crawled_at) AS last_seen_at
FROM competitor_prices
WHERE source = 'pchome'
AND competitor_product_id IS NOT NULL
AND price IS NOT NULL
AND price > 0
AND COALESCE(match_score, 0) >= 0.76
AND (expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)
AND COALESCE(tags, '[]'::jsonb) ? 'identity_v2'
"""
else:
sql = """
SELECT COUNT(*) AS usable_offer_count, MAX(crawled_at) AS last_seen_at
FROM competitor_prices
WHERE source = 'pchome'
AND competitor_product_id IS NOT NULL
AND price IS NOT NULL
AND price > 0
AND COALESCE(match_score, 0) >= 0.76
AND COALESCE(tags, '') LIKE '%identity_v2%'
"""
row = conn.execute(text(sql)).mappings().first() or {}
return {
"usable_offer_count": int(row.get("usable_offer_count") or 0),
"last_seen_at": str(row.get("last_seen_at") or "") or None,
}
def _normalized_offer_stats(conn) -> dict[str, dict[str, Any]]:
if not all(_has_table(conn, table) for table in {"external_market_sources", "external_offers"}):
return {}
rows = conn.execute(text("""
SELECT
s.code,
s.status,
s.enabled,
COUNT(o.id) AS offer_count,
SUM(CASE
WHEN o.data_quality_status IN ('verified', 'usable', 'reviewed')
AND COALESCE(o.quality_score, 0) >= 76
THEN 1 ELSE 0 END
) AS usable_offer_count,
MAX(o.observed_at) AS last_seen_at
FROM external_market_sources s
LEFT JOIN external_offers o ON o.source_code = s.code
GROUP BY s.code, s.status, s.enabled
""")).mappings().all()
return {
str(row["code"]): {
"status": row.get("status"),
"enabled": bool(row.get("enabled")),
"offer_count": int(row.get("offer_count") or 0),
"usable_offer_count": int(row.get("usable_offer_count") or 0),
"last_seen_at": str(row.get("last_seen_at") or "") or None,
}
for row in rows
}
def build_connector_contracts() -> dict[str, Any]:
"""回傳 connector 與手動 CSV 共同遵守的欄位規格。"""
return {
"success": True,
"version": "2026-06-15",
"plain_summary": "所有外部市場資料都先轉成同一份商品報價格式,再進作戰清單。",
"sources": SOURCE_CONTRACTS,
"normalized_offer_fields": NORMALIZED_OFFER_FIELDS,
"manual_csv": {
"encoding": "utf-8-sig",
"required_headers": [
field["name"] for field in NORMALIZED_OFFER_FIELDS if field["required"]
],
"optional_headers": [
field["name"] for field in NORMALIZED_OFFER_FIELDS if not field["required"]
],
"plain_rule": "低可信度或未確認同款的資料只進待補資料清單,不自動發告警。",
},
}
def build_external_source_readiness(engine=None) -> dict[str, Any]:
"""建立畫面可用的外部資料來源狀態。"""
sources = [dict(source) for source in SOURCE_CONTRACTS]
normalized_stats: dict[str, dict[str, Any]] = {}
legacy_stats: dict[str, Any] = {"usable_offer_count": 0, "last_seen_at": None}
schema_ready = False
if engine is not None:
try:
with engine.connect() as conn:
schema_ready = all(
_has_table(conn, table)
for table in {"external_market_sources", "external_offers"}
)
normalized_stats = _normalized_offer_stats(conn)
legacy_stats = _legacy_momo_reference_stats(conn)
except Exception:
logger.warning("[ExternalMarket] source readiness failed", exc_info=True)
for source in sources:
stats = normalized_stats.get(source["code"], {})
if source["code"] == "momo_reference":
usable = max(
int(stats.get("usable_offer_count") or 0),
int(legacy_stats.get("usable_offer_count") or 0),
)
last_seen_at = stats.get("last_seen_at") or legacy_stats.get("last_seen_at")
else:
usable = int(stats.get("usable_offer_count") or 0)
last_seen_at = stats.get("last_seen_at")
source["schema_ready"] = schema_ready
source["usable_offer_count"] = usable
source["last_seen_at"] = last_seen_at
source["can_alert"] = source["status_code"] == "active" and usable > 0
if source["status_code"] == "active":
source["plain_state"] = "已接入,可進作戰清單" if usable else "已接入,等待可用資料"
else:
source["plain_state"] = "先保留接口,不進告警"
active_count = sum(1 for source in sources if source["status_code"] == "active")
paused_count = sum(1 for source in sources if source["status_code"] == "paused")
usable_count = sum(int(source["usable_offer_count"]) for source in sources)
return {
"success": True,
"schema_ready": schema_ready,
"active_count": active_count,
"paused_count": paused_count,
"usable_offer_count": usable_count,
"sources": sources,
"connector_contract": build_connector_contracts(),
"plain_summary": "MOMO 先用;蝦皮與酷澎先保留接口,暫不進告警。",
}