Files
ewoooc/services/crawler_config_loader.py
OoO af260c4a01
All checks were successful
CD Pipeline / deploy (push) Successful in 1m12s
feat: 新增三個促銷活動爬蟲支援(母親節、520情人節、勞動節)
- 新增通用促銷活動爬蟲函式 run_promo_event_task()
- 更新 crawler_config_loader.py 新增三個活動配置
- 更新 run_scheduler.py 動態註冊促銷活動爬蟲
- 新增 API 端點 /api/run_promo_event_task
- 新增三個前端儀表板路由(/edm/mothers_day, /edm/valentine_520, /edm/labor_day)
- 更新所有儀表板頁籤列表
- 新增配置檔案 services/data/crawler_config.json
- 新增使用文件 docs/guides/promo_event_crawler_guide.md
- 更新 agent_actions.py 允許重試列表
2026-04-28 13:57:44 +08:00

183 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
爬蟲配置載入器
提供動態讀取和管理爬蟲配置的功能
"""
import os
import json
from typing import Dict, Any, Optional
CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'data', 'crawler_config.json')
def load_crawler_config() -> Dict[str, Any]:
"""
載入爬蟲配置文件
Returns:
dict: 完整的配置字典
"""
try:
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
# 如果配置文件不存在,返回預設配置
return get_default_config()
except json.JSONDecodeError as e:
print(f"❌ 配置文件格式錯誤: {e}")
return get_default_config()
def get_default_config() -> Dict[str, Any]:
"""返回預設配置"""
return {
"crawlers": {
"momo_main": {"enabled": True, "schedule_hours": 4},
"edm_promo": {"enabled": True, "schedule_hours": 4},
"festival_11": {"enabled": False, "schedule_hours": 6},
"mothers_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "母親節超值限時購", "page_type": "mothers_day"},
"valentine_520_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "520情人節限定購物", "page_type": "valentine_520"},
"labor_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "勞動節購物優惠", "page_type": "labor_day"}
},
"settings": {
"auto_import_enabled": True,
"auto_import_interval_minutes": 30
}
}
def is_crawler_enabled(crawler_key: str) -> bool:
"""
檢查指定爬蟲是否啟用
Args:
crawler_key: 爬蟲識別碼 (momo_main, edm_promo, festival_11)
Returns:
bool: True 表示啟用False 表示停用
"""
config = load_crawler_config()
crawler = config.get('crawlers', {}).get(crawler_key, {})
return crawler.get('enabled', False)
def get_crawler_schedule(crawler_key: str) -> Optional[int]:
"""
取得爬蟲的執行頻率(小時)
Args:
crawler_key: 爬蟲識別碼
Returns:
int: 執行頻率(小時),如果爬蟲停用則返回 None
"""
if not is_crawler_enabled(crawler_key):
return None
config = load_crawler_config()
crawler = config.get('crawlers', {}).get(crawler_key, {})
return crawler.get('schedule_hours', 4)
def get_crawler_info(crawler_key: str) -> Optional[Dict[str, Any]]:
"""
取得爬蟲的完整資訊
Args:
crawler_key: 爬蟲識別碼
Returns:
dict: 爬蟲資訊,如果不存在則返回 None
"""
config = load_crawler_config()
return config.get('crawlers', {}).get(crawler_key)
def get_enabled_crawlers() -> Dict[str, Dict[str, Any]]:
"""
取得所有啟用的爬蟲
Returns:
dict: 啟用的爬蟲字典 {crawler_key: crawler_info}
"""
config = load_crawler_config()
crawlers = config.get('crawlers', {})
return {key: info for key, info in crawlers.items() if info.get('enabled', False)}
def get_paused_crawlers() -> Dict[str, Dict[str, Any]]:
"""
取得所有暫停的爬蟲
Returns:
dict: 暫停的爬蟲字典 {crawler_key: crawler_info}
"""
config = load_crawler_config()
crawlers = config.get('crawlers', {})
return {key: info for key, info in crawlers.items() if not info.get('enabled', False)}
def update_crawler_status(crawler_key: str, enabled: bool, reason: str = "") -> bool:
"""
更新爬蟲啟用狀態
Args:
crawler_key: 爬蟲識別碼
enabled: True 啟用False 停用
reason: 停用原因(可選)
Returns:
bool: 更新成功返回 True
"""
try:
config = load_crawler_config()
if crawler_key not in config.get('crawlers', {}):
print(f"❌ 爬蟲 {crawler_key} 不存在")
return False
config['crawlers'][crawler_key]['enabled'] = enabled
if not enabled and reason:
config['crawlers'][crawler_key]['pause_reason'] = reason
config['crawlers'][crawler_key]['status'] = 'paused'
elif enabled:
config['crawlers'][crawler_key]['status'] = 'active'
config['crawlers'][crawler_key].pop('pause_reason', None)
# 更新時間戳
from datetime import datetime
config['metadata']['last_updated'] = datetime.now().isoformat()
# 寫回配置文件
with open(CONFIG_PATH, 'w', encoding='utf-8') as f:
json.dump(config, f, ensure_ascii=False, indent=2)
return True
except Exception as e:
print(f"❌ 更新配置失敗: {e}")
return False
def print_crawler_status():
"""印出所有爬蟲的狀態(用於檢查)"""
config = load_crawler_config()
crawlers = config.get('crawlers', {})
print("\n" + "=" * 60)
print("爬蟲配置狀態")
print("=" * 60)
for key, info in crawlers.items():
status_icon = "" if info.get('enabled') else "⏸️"
print(f"\n{status_icon} [{key}] {info.get('name', 'N/A')}")
print(f" 狀態: {info.get('status', 'unknown')}")
print(f" 啟用: {info.get('enabled', False)}")
if not info.get('enabled'):
print(f" 暫停原因: {info.get('pause_reason', 'N/A')}")
print(f" 暫停日期: {info.get('paused_date', 'N/A')}")
else:
print(f" 執行頻率: 每 {info.get('schedule_hours', 'N/A')} 小時")
if 'notes' in info:
print(f" 備註: {info.get('notes')}")
print("\n" + "=" * 60 + "\n")
if __name__ == "__main__":
# 測試:印出當前配置
print_crawler_status()