ewoooc/database/trend_models.py

"""
趨勢資料庫模型

包含:
- TrendRecord: 趨勢記錄表
- TrendKeyword: 趨勢關鍵字表
- TrendAnalysis: AI 趨勢分析報告表
- WebSearchCache: Web Search 結果快取表
- TelegramUser: Telegram 用戶綁定表
"""

from sqlalchemy import (
    Column, Integer, String, Text, Float, Boolean, DateTime, Date,
    ForeignKey, Index, UniqueConstraint, BigInteger
)
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime, date, timedelta
import hashlib
import json

# 使用與其他模型相同的 Base
from database.models import Base


class TrendRecord(Base):
    """趨勢資料記錄 - 儲存爬取的原始內容"""
    __tablename__ = 'trend_records'

    id = Column(Integer, primary_key=True)

    # 來源識別
    source = Column(String(50), nullable=False, index=True)
    # 可選值: 'google_news', 'ptt', 'dcard', 'youtube', 'weather', 'ollama_web_search'

    source_board = Column(String(100))
    # PTT/Dcard 看板名稱，如 'Gossiping', '網路購物'

    source_url = Column(String(500))
    # 原始連結

    source_id = Column(String(100))
    # 來源平台的唯一識別碼 (用於去重)

    # 內容
    title = Column(String(500), nullable=False)
    content = Column(Text)
    # 全文內容或摘要

    author = Column(String(100))
    # 作者/媒體名稱

    # 互動指標
    popularity_score = Column(Integer, default=0)
    # 熱門度分數 (推數、讚數、觀看數等)

    comment_count = Column(Integer, default=0)
    # 留言數

    # 分類標籤
    category = Column(String(100), index=True)
    # 商品分類對應: '美妝', '3C', '家電', '服飾' 等

    tags = Column(Text)
    # JSON 格式的標籤列表

    # 時間資訊
    published_at = Column(DateTime)
    # 原始發布時間

    trend_date = Column(Date, nullable=False, index=True)
    # 趨勢所屬日期 (用於聚合查詢)

    created_at = Column(DateTime, default=datetime.now)
    # 爬取時間

    # AI 分析結果
    sentiment = Column(String(20))
    # 情緒分析: 'positive', 'negative', 'neutral'

    ai_summary = Column(Text)
    # Ollama 生成的摘要

    relevance_score = Column(Float, default=0.0)
    # 與商品銷售的相關性分數 (0-1)

    # 索引優化
    __table_args__ = (
        Index('idx_trend_source_date', 'source', 'trend_date'),
        Index('idx_trend_category_date', 'category', 'trend_date'),
        Index('idx_trend_popularity', 'popularity_score', 'trend_date'),
        UniqueConstraint('source', 'source_id', name='uq_source_record'),
    )

    def to_dict(self):
        """轉換為字典"""
        return {
            'id': self.id,
            'source': self.source,
            'source_board': self.source_board,
            'source_url': self.source_url,
            'title': self.title,
            'content': self.content[:200] if self.content else None,
            'author': self.author,
            'popularity_score': self.popularity_score,
            'comment_count': self.comment_count,
            'category': self.category,
            'tags': json.loads(self.tags) if self.tags else [],
            'published_at': self.published_at.isoformat() if self.published_at else None,
            'trend_date': self.trend_date.isoformat() if self.trend_date else None,
            'sentiment': self.sentiment,
            'ai_summary': self.ai_summary,
            'relevance_score': self.relevance_score,
        }


class TrendKeyword(Base):
    """趨勢關鍵字 - 從文章中萃取的熱門詞彙"""
    __tablename__ = 'trend_keywords'

    id = Column(Integer, primary_key=True)

    keyword = Column(String(100), nullable=False, index=True)
    # 關鍵字

    keyword_type = Column(String(50), default='general')
    # 類型: 'product' (商品), 'brand' (品牌), 'event' (事件), 'general'

    source = Column(String(50), nullable=False)
    # 來源平台

    category = Column(String(100), index=True)
    # 商品分類

    mention_count = Column(Integer, default=1)
    # 提及次數

    trend_date = Column(Date, nullable=False, index=True)
    # 趨勢日期

    sentiment_avg = Column(Float, default=0.0)
    # 平均情緒分數 (-1 到 1)

    related_keywords = Column(Text)
    # JSON 格式的相關關鍵字

    created_at = Column(DateTime, default=datetime.now)
    updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)

    __table_args__ = (
        Index('idx_keyword_date_count', 'trend_date', 'mention_count'),
        UniqueConstraint('keyword', 'source', 'trend_date', name='uq_keyword_source_date'),
    )

    def to_dict(self):
        """轉換為字典"""
        return {
            'id': self.id,
            'keyword': self.keyword,
            'keyword_type': self.keyword_type,
            'source': self.source,
            'category': self.category,
            'mention_count': self.mention_count,
            'trend_date': self.trend_date.isoformat() if self.trend_date else None,
            'sentiment_avg': self.sentiment_avg,
            'related_keywords': json.loads(self.related_keywords) if self.related_keywords else [],
        }


class TrendAnalysis(Base):
    """趨勢分析報告 - Ollama AI 生成的分析結果"""
    __tablename__ = 'trend_analysis'

    id = Column(Integer, primary_key=True)

    analysis_date = Column(Date, nullable=False, index=True)
    # 分析日期

    category = Column(String(100), index=True)
    # 分析的商品分類 (null 表示全品類)

    analysis_type = Column(String(50), nullable=False)
    # 分析類型: 'daily_summary', 'weekly_trend', 'hot_topic', 'marketing_insight'

    # AI 分析內容
    summary = Column(Text, nullable=False)
    # 摘要說明

    hot_keywords = Column(Text)
    # JSON: 熱門關鍵字列表

    hot_topics = Column(Text)
    # JSON: 熱門話題列表

    consumer_insights = Column(Text)
    # JSON: 消費者洞察

    marketing_suggestions = Column(Text)
    # JSON: 行銷建議

    copywriting_hints = Column(Text)
    # JSON: 文案撰寫提示

    # 來源統計
    source_stats = Column(Text)
    # JSON: 各來源資料統計

    record_count = Column(Integer, default=0)
    # 分析涵蓋的記錄數

    # Ollama 資訊
    model_used = Column(String(50))
    # 使用的模型

    generation_time = Column(Float)
    # 生成耗時 (秒)

    created_at = Column(DateTime, default=datetime.now)

    __table_args__ = (
        UniqueConstraint('analysis_date', 'category', 'analysis_type', name='uq_analysis'),
    )

    def to_dict(self):
        """轉換為字典"""
        return {
            'id': self.id,
            'analysis_date': self.analysis_date.isoformat() if self.analysis_date else None,
            'category': self.category,
            'analysis_type': self.analysis_type,
            'summary': self.summary,
            'hot_keywords': json.loads(self.hot_keywords) if self.hot_keywords else [],
            'hot_topics': json.loads(self.hot_topics) if self.hot_topics else [],
            'consumer_insights': json.loads(self.consumer_insights) if self.consumer_insights else [],
            'marketing_suggestions': json.loads(self.marketing_suggestions) if self.marketing_suggestions else [],
            'copywriting_hints': json.loads(self.copywriting_hints) if self.copywriting_hints else [],
            'source_stats': json.loads(self.source_stats) if self.source_stats else {},
            'record_count': self.record_count,
            'model_used': self.model_used,
            'generation_time': self.generation_time,
            'created_at': self.created_at.isoformat() if self.created_at else None,
        }


class WebSearchCache(Base):
    """Web Search 結果快取 - 避免重複查詢"""
    __tablename__ = 'web_search_cache'

    id = Column(Integer, primary_key=True)

    # 查詢識別
    query_hash = Column(String(64), nullable=False, unique=True, index=True)
    # MD5(query + search_type)

    query = Column(String(500), nullable=False)
    # 原始查詢字串

    search_type = Column(String(50), default='general')
    # 搜尋類型: general, news, shopping, trends

    # 結果
    result_json = Column(Text, nullable=False)
    # JSON 格式的完整結果

    summary = Column(Text)
    # AI 生成的摘要

    result_count = Column(Integer, default=0)
    # 結果數量

    # 元資料
    model_used = Column(String(50))
    generation_time = Column(Float)

    # 時間
    created_at = Column(DateTime, default=datetime.now, index=True)
    expires_at = Column(DateTime)
    # 快取過期時間 (預設 24 小時)

    __table_args__ = (
        Index('idx_cache_query_type', 'query', 'search_type'),
        Index('idx_cache_expires', 'expires_at'),
    )

    @staticmethod
    def generate_hash(query: str, search_type: str) -> str:
        """產生查詢雜湊"""
        return hashlib.md5(f"{query}:{search_type}".encode(), usedforsecurity=False).hexdigest()

    def is_expired(self) -> bool:
        """檢查是否已過期"""
        if not self.expires_at:
            return True
        return datetime.now() > self.expires_at

    def to_dict(self):
        """轉換為字典"""
        return {
            'id': self.id,
            'query': self.query,
            'search_type': self.search_type,
            'result': json.loads(self.result_json) if self.result_json else None,
            'summary': self.summary,
            'result_count': self.result_count,
            'model_used': self.model_used,
            'generation_time': self.generation_time,
            'created_at': self.created_at.isoformat() if self.created_at else None,
            'expires_at': self.expires_at.isoformat() if self.expires_at else None,
            'is_expired': self.is_expired(),
        }


class TelegramUser(Base):
    """Telegram 用戶綁定表"""
    __tablename__ = 'telegram_users'

    id = Column(Integer, primary_key=True)

    telegram_id = Column(BigInteger, unique=True, nullable=False, index=True)
    # Telegram 用戶 ID

    telegram_username = Column(String(100))
    # Telegram 用戶名稱

    user_id = Column(Integer, ForeignKey('users.id'))
    # 綁定的系統用戶 ID (可選)

    display_name = Column(String(100))
    # 顯示名稱

    is_active = Column(Boolean, default=True)
    # 是否啟用

    is_admin = Column(Boolean, default=False)
    # 是否為管理員

    # 偏好設定
    notify_trends = Column(Boolean, default=True)
    # 是否接收趨勢通知

    notify_daily_summary = Column(Boolean, default=True)
    # 是否接收每日摘要

    preferred_categories = Column(Text)
    # JSON: 偏好的分類列表

    created_at = Column(DateTime, default=datetime.now)
    last_active_at = Column(DateTime, default=datetime.now)

    def to_dict(self):
        """轉換為字典"""
        return {
            'id': self.id,
            'telegram_id': self.telegram_id,
            'telegram_username': self.telegram_username,
            'user_id': self.user_id,
            'display_name': self.display_name,
            'is_active': self.is_active,
            'is_admin': self.is_admin,
            'notify_trends': self.notify_trends,
            'notify_daily_summary': self.notify_daily_summary,
            'preferred_categories': json.loads(self.preferred_categories) if self.preferred_categories else [],
            'created_at': self.created_at.isoformat() if self.created_at else None,
            'last_active_at': self.last_active_at.isoformat() if self.last_active_at else None,
        }


# PTT 目標看板
PTT_BOARDS = [
    'Gossiping',      # 八卦板 - 熱門話題
    'Lifeismoney',    # 省錢板 - 優惠情報
    'e-shopping',     # 網購板 - 電商趨勢
    'Beauty',         # 美妝板 - 美妝趨勢
    'MakeUp',         # 化妝板 - 彩妝趨勢
    'WomenTalk',      # 女板 - 女性消費趨勢
    'home-sale',      # 房屋板 - 居家用品參考
    'BabyMother',     # 媽寶板 - 母嬰市場
    'Tech_Job',       # 科技業 - 3C 消費力
]

# Dcard 目標看板
DCARD_BOARDS = [
    '網路購物',        # 電商討論
    '美妝',           # 美妝趨勢
    '穿搭',           # 服飾趨勢
    '3C',             # 科技產品
    '省錢',           # 優惠情報
    '生活',           # 生活趨勢
    '美食',           # 餐飲趨勢
]

# 看板對應分類
BOARD_CATEGORY_MAPPING = {
    # PTT
    'Beauty': '美妝',
    'MakeUp': '美妝',
    'e-shopping': '電商',
    'Lifeismoney': '優惠',
    'home-sale': '居家',
    'BabyMother': '母嬰',
    'Gossiping': '熱門',
    'WomenTalk': '生活',
    'Tech_Job': '3C',
    # Dcard
    '美妝': '美妝',
    '穿搭': '服飾',
    '3C': '3C',
    '網路購物': '電商',
    '省錢': '優惠',
    '生活': '生活',
    '美食': '美食',
}


def get_category_for_board(board: str) -> str:
    """根據看板名稱取得商品分類"""
    return BOARD_CATEGORY_MAPPING.get(board, '其他')