225 lines
8.7 KiB
Python
225 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""跨平台市場活動情報 ORM models。"""
|
||
|
||
from datetime import datetime, timedelta, timezone
|
||
|
||
from sqlalchemy import (
|
||
Boolean,
|
||
Column,
|
||
DateTime,
|
||
Float,
|
||
ForeignKey,
|
||
Index,
|
||
Integer,
|
||
String,
|
||
Text,
|
||
UniqueConstraint,
|
||
)
|
||
from sqlalchemy.orm import relationship
|
||
|
||
from database.models import Base
|
||
|
||
|
||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||
|
||
|
||
def taipei_now():
|
||
"""取得台北時間 naive datetime,符合專案 DB 時間規範。"""
|
||
return datetime.now(TAIPEI_TZ).replace(tzinfo=None)
|
||
|
||
|
||
class MarketPlatform(Base):
|
||
"""市場平台設定,例如 MOMO / PChome / Coupang / Shopee。"""
|
||
|
||
__tablename__ = "market_platforms"
|
||
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
code = Column(String(50), unique=True, nullable=False, index=True)
|
||
name = Column(String(120), nullable=False)
|
||
base_url = Column(String(500))
|
||
enabled = Column(Boolean, default=False, nullable=False)
|
||
crawl_policy_json = Column(Text)
|
||
created_at = Column(DateTime, default=taipei_now, nullable=False)
|
||
updated_at = Column(DateTime, default=taipei_now, onupdate=taipei_now, nullable=False)
|
||
|
||
campaigns = relationship("MarketCampaign", back_populates="platform")
|
||
|
||
|
||
class MarketCampaign(Base):
|
||
"""跨平台活動檔期。"""
|
||
|
||
__tablename__ = "market_campaigns"
|
||
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
platform_code = Column(String(50), ForeignKey("market_platforms.code"), nullable=False, index=True)
|
||
campaign_key = Column(String(200), nullable=False)
|
||
campaign_name = Column(String(500), nullable=False)
|
||
campaign_type = Column(String(80), index=True)
|
||
campaign_url = Column(Text)
|
||
start_at = Column(DateTime)
|
||
end_at = Column(DateTime)
|
||
status = Column(String(30), default="unknown", nullable=False, index=True)
|
||
discovered_at = Column(DateTime, default=taipei_now, nullable=False)
|
||
last_seen_at = Column(DateTime, default=taipei_now, nullable=False)
|
||
metadata_json = Column(Text)
|
||
|
||
platform = relationship("MarketPlatform", back_populates="campaigns")
|
||
snapshots = relationship("MarketCampaignSnapshot", back_populates="campaign")
|
||
products = relationship("MarketCampaignProduct", back_populates="campaign")
|
||
|
||
__table_args__ = (
|
||
UniqueConstraint("platform_code", "campaign_key", name="uq_market_campaign_platform_key"),
|
||
Index("idx_market_campaign_status_time", "status", "start_at", "end_at"),
|
||
)
|
||
|
||
|
||
class MarketCampaignSnapshot(Base):
|
||
"""活動頁每次爬取快照。"""
|
||
|
||
__tablename__ = "market_campaign_snapshots"
|
||
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
campaign_id = Column(Integer, ForeignKey("market_campaigns.id"), nullable=False, index=True)
|
||
batch_id = Column(String(80), nullable=False, index=True)
|
||
crawled_at = Column(DateTime, default=taipei_now, nullable=False, index=True)
|
||
title = Column(String(500))
|
||
hero_text = Column(Text)
|
||
coupon_text = Column(Text)
|
||
raw_discount_text = Column(Text)
|
||
page_hash = Column(String(128), index=True)
|
||
raw_snapshot_path = Column(Text)
|
||
status = Column(String(30), default="success", nullable=False, index=True)
|
||
error_message = Column(Text)
|
||
metadata_json = Column(Text)
|
||
|
||
campaign = relationship("MarketCampaign", back_populates="snapshots")
|
||
|
||
__table_args__ = (
|
||
Index("idx_market_campaign_snapshot_campaign_time", "campaign_id", "crawled_at"),
|
||
)
|
||
|
||
|
||
class MarketCampaignProduct(Base):
|
||
"""活動頁中的平台商品快照主檔。"""
|
||
|
||
__tablename__ = "market_campaign_products"
|
||
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
campaign_id = Column(Integer, ForeignKey("market_campaigns.id"), nullable=False, index=True)
|
||
platform_code = Column(String(50), nullable=False, index=True)
|
||
platform_product_id = Column(String(200), nullable=False, index=True)
|
||
product_url = Column(Text)
|
||
name = Column(String(500), nullable=False)
|
||
brand = Column(String(200), index=True)
|
||
image_url = Column(Text)
|
||
category_text = Column(String(300), index=True)
|
||
price = Column(Float)
|
||
original_price = Column(Float)
|
||
discount_text = Column(String(200))
|
||
discount_rate = Column(Float)
|
||
coupon_text = Column(Text)
|
||
stock_text = Column(String(200))
|
||
sold_count = Column(Integer)
|
||
rating = Column(Float)
|
||
review_count = Column(Integer)
|
||
rank_position = Column(Integer)
|
||
is_active = Column(Boolean, default=True, nullable=False, index=True)
|
||
first_seen_at = Column(DateTime, default=taipei_now, nullable=False)
|
||
last_seen_at = Column(DateTime, default=taipei_now, nullable=False, index=True)
|
||
metadata_json = Column(Text)
|
||
|
||
campaign = relationship("MarketCampaign", back_populates="products")
|
||
price_history = relationship("MarketProductPriceHistory", back_populates="market_product")
|
||
matches = relationship("MarketProductMatch", back_populates="market_product")
|
||
|
||
__table_args__ = (
|
||
UniqueConstraint(
|
||
"campaign_id",
|
||
"platform_code",
|
||
"platform_product_id",
|
||
name="uq_market_campaign_product",
|
||
),
|
||
Index("idx_market_product_platform_seen", "platform_code", "last_seen_at"),
|
||
Index("idx_market_product_discount", "discount_rate", "price"),
|
||
)
|
||
|
||
|
||
class MarketProductPriceHistory(Base):
|
||
"""市場商品價格歷史快照。"""
|
||
|
||
__tablename__ = "market_product_price_history"
|
||
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
market_product_id = Column(Integer, ForeignKey("market_campaign_products.id"), nullable=False, index=True)
|
||
campaign_id = Column(Integer, ForeignKey("market_campaigns.id"), nullable=False, index=True)
|
||
platform_code = Column(String(50), nullable=False, index=True)
|
||
platform_product_id = Column(String(200), nullable=False, index=True)
|
||
price = Column(Float)
|
||
original_price = Column(Float)
|
||
discount_rate = Column(Float)
|
||
stock_text = Column(String(200))
|
||
sold_count = Column(Integer)
|
||
rank_position = Column(Integer)
|
||
crawled_at = Column(DateTime, default=taipei_now, nullable=False, index=True)
|
||
batch_id = Column(String(80), nullable=False, index=True)
|
||
metadata_json = Column(Text)
|
||
|
||
market_product = relationship("MarketCampaignProduct", back_populates="price_history")
|
||
|
||
__table_args__ = (
|
||
Index("idx_market_price_platform_time", "platform_code", "platform_product_id", "crawled_at"),
|
||
Index("idx_market_price_campaign_time", "campaign_id", "crawled_at"),
|
||
)
|
||
|
||
|
||
class MarketProductMatch(Base):
|
||
"""市場商品與我方 MOMO 商品的比對審核結果。"""
|
||
|
||
__tablename__ = "market_product_matches"
|
||
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
market_product_id = Column(Integer, ForeignKey("market_campaign_products.id"), nullable=False, index=True)
|
||
momo_product_id = Column(Integer, ForeignKey("products.id"), index=True)
|
||
momo_i_code = Column(String(50), index=True)
|
||
match_score = Column(Float, default=0.0, nullable=False)
|
||
match_status = Column(String(30), default="needs_review", nullable=False, index=True)
|
||
match_reason_json = Column(Text)
|
||
created_at = Column(DateTime, default=taipei_now, nullable=False)
|
||
reviewed_at = Column(DateTime)
|
||
reviewed_by = Column(String(120))
|
||
|
||
market_product = relationship("MarketCampaignProduct", back_populates="matches")
|
||
|
||
__table_args__ = (
|
||
UniqueConstraint("market_product_id", "momo_i_code", name="uq_market_product_momo_match"),
|
||
Index("idx_market_match_status_score", "match_status", "match_score"),
|
||
)
|
||
|
||
|
||
class MarketCrawlerRun(Base):
|
||
"""市場情報爬蟲執行紀錄。"""
|
||
|
||
__tablename__ = "market_crawler_runs"
|
||
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
platform_code = Column(String(50), index=True)
|
||
crawler_name = Column(String(120), nullable=False, index=True)
|
||
campaign_id = Column(Integer, ForeignKey("market_campaigns.id"), index=True)
|
||
batch_id = Column(String(80), nullable=False, index=True)
|
||
started_at = Column(DateTime, default=taipei_now, nullable=False, index=True)
|
||
finished_at = Column(DateTime)
|
||
status = Column(String(30), default="started", nullable=False, index=True)
|
||
dry_run = Column(Boolean, default=True, nullable=False)
|
||
pages_found = Column(Integer, default=0, nullable=False)
|
||
products_found = Column(Integer, default=0, nullable=False)
|
||
products_changed = Column(Integer, default=0, nullable=False)
|
||
error_count = Column(Integer, default=0, nullable=False)
|
||
error_message = Column(Text)
|
||
metadata_json = Column(Text)
|
||
|
||
__table_args__ = (
|
||
Index("idx_market_crawler_run_platform_time", "platform_code", "started_at"),
|
||
Index("idx_market_crawler_run_status_time", "status", "started_at"),
|
||
)
|