#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 爬蟲配置載入器 提供動態讀取和管理爬蟲配置的功能 """ import os import json from typing import Dict, Any, Optional CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'data', 'crawler_config.json') def load_crawler_config() -> Dict[str, Any]: """ 載入爬蟲配置文件 Returns: dict: 完整的配置字典 """ try: with open(CONFIG_PATH, 'r', encoding='utf-8') as f: return json.load(f) except FileNotFoundError: # 如果配置文件不存在,返回預設配置 return get_default_config() except json.JSONDecodeError as e: print(f"❌ 配置文件格式錯誤: {e}") return get_default_config() def get_default_config() -> Dict[str, Any]: """返回預設配置""" return { "crawlers": { "momo_main": {"enabled": True, "schedule_hours": 4}, "edm_promo": {"enabled": True, "schedule_hours": 4}, "festival_11": {"enabled": False, "schedule_hours": 6}, "mothers_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "母親節超值限時購", "page_type": "mothers_day"}, "valentine_520_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "520情人節限定購物", "page_type": "valentine_520"}, "labor_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "勞動節購物優惠", "page_type": "labor_day"} }, "settings": { "auto_import_enabled": True, "auto_import_interval_minutes": 30 } } def is_crawler_enabled(crawler_key: str) -> bool: """ 檢查指定爬蟲是否啟用 Args: crawler_key: 爬蟲識別碼 (momo_main, edm_promo, festival_11) Returns: bool: True 表示啟用,False 表示停用 """ config = load_crawler_config() crawler = config.get('crawlers', {}).get(crawler_key, {}) return crawler.get('enabled', False) def get_crawler_schedule(crawler_key: str) -> Optional[int]: """ 取得爬蟲的執行頻率(小時) Args: crawler_key: 爬蟲識別碼 Returns: int: 執行頻率(小時),如果爬蟲停用則返回 None """ if not is_crawler_enabled(crawler_key): return None config = load_crawler_config() crawler = config.get('crawlers', {}).get(crawler_key, {}) return crawler.get('schedule_hours', 4) def get_crawler_info(crawler_key: str) -> Optional[Dict[str, Any]]: """ 取得爬蟲的完整資訊 Args: crawler_key: 爬蟲識別碼 Returns: dict: 爬蟲資訊,如果不存在則返回 None """ config = load_crawler_config() return config.get('crawlers', {}).get(crawler_key) def get_enabled_crawlers() -> Dict[str, Dict[str, Any]]: """ 取得所有啟用的爬蟲 Returns: dict: 啟用的爬蟲字典 {crawler_key: crawler_info} """ config = load_crawler_config() crawlers = config.get('crawlers', {}) return {key: info for key, info in crawlers.items() if info.get('enabled', False)} def get_paused_crawlers() -> Dict[str, Dict[str, Any]]: """ 取得所有暫停的爬蟲 Returns: dict: 暫停的爬蟲字典 {crawler_key: crawler_info} """ config = load_crawler_config() crawlers = config.get('crawlers', {}) return {key: info for key, info in crawlers.items() if not info.get('enabled', False)} def update_crawler_status(crawler_key: str, enabled: bool, reason: str = "") -> bool: """ 更新爬蟲啟用狀態 Args: crawler_key: 爬蟲識別碼 enabled: True 啟用,False 停用 reason: 停用原因(可選) Returns: bool: 更新成功返回 True """ try: config = load_crawler_config() if crawler_key not in config.get('crawlers', {}): print(f"❌ 爬蟲 {crawler_key} 不存在") return False config['crawlers'][crawler_key]['enabled'] = enabled if not enabled and reason: config['crawlers'][crawler_key]['pause_reason'] = reason config['crawlers'][crawler_key]['status'] = 'paused' elif enabled: config['crawlers'][crawler_key]['status'] = 'active' config['crawlers'][crawler_key].pop('pause_reason', None) # 更新時間戳 from datetime import datetime config['metadata']['last_updated'] = datetime.now().isoformat() # 寫回配置文件 with open(CONFIG_PATH, 'w', encoding='utf-8') as f: json.dump(config, f, ensure_ascii=False, indent=2) return True except Exception as e: print(f"❌ 更新配置失敗: {e}") return False def print_crawler_status(): """印出所有爬蟲的狀態(用於檢查)""" config = load_crawler_config() crawlers = config.get('crawlers', {}) print("\n" + "=" * 60) print("爬蟲配置狀態") print("=" * 60) for key, info in crawlers.items(): status_icon = "✅" if info.get('enabled') else "⏸️" print(f"\n{status_icon} [{key}] {info.get('name', 'N/A')}") print(f" 狀態: {info.get('status', 'unknown')}") print(f" 啟用: {info.get('enabled', False)}") if not info.get('enabled'): print(f" 暫停原因: {info.get('pause_reason', 'N/A')}") print(f" 暫停日期: {info.get('paused_date', 'N/A')}") else: print(f" 執行頻率: 每 {info.get('schedule_hours', 'N/A')} 小時") if 'notes' in info: print(f" 備註: {info.get('notes')}") print("\n" + "=" * 60 + "\n") if __name__ == "__main__": # 測試:印出當前配置 print_crawler_status()