All checks were successful
CD Pipeline / deploy (push) Successful in 1m12s
- 新增通用促銷活動爬蟲函式 run_promo_event_task() - 更新 crawler_config_loader.py 新增三個活動配置 - 更新 run_scheduler.py 動態註冊促銷活動爬蟲 - 新增 API 端點 /api/run_promo_event_task - 新增三個前端儀表板路由(/edm/mothers_day, /edm/valentine_520, /edm/labor_day) - 更新所有儀表板頁籤列表 - 新增配置檔案 services/data/crawler_config.json - 新增使用文件 docs/guides/promo_event_crawler_guide.md - 更新 agent_actions.py 允許重試列表
183 lines
5.8 KiB
Python
183 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
爬蟲配置載入器
|
||
提供動態讀取和管理爬蟲配置的功能
|
||
"""
|
||
import os
|
||
import json
|
||
from typing import Dict, Any, Optional
|
||
|
||
CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'data', 'crawler_config.json')
|
||
|
||
def load_crawler_config() -> Dict[str, Any]:
|
||
"""
|
||
載入爬蟲配置文件
|
||
|
||
Returns:
|
||
dict: 完整的配置字典
|
||
"""
|
||
try:
|
||
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
except FileNotFoundError:
|
||
# 如果配置文件不存在,返回預設配置
|
||
return get_default_config()
|
||
except json.JSONDecodeError as e:
|
||
print(f"❌ 配置文件格式錯誤: {e}")
|
||
return get_default_config()
|
||
|
||
def get_default_config() -> Dict[str, Any]:
|
||
"""返回預設配置"""
|
||
return {
|
||
"crawlers": {
|
||
"momo_main": {"enabled": True, "schedule_hours": 4},
|
||
"edm_promo": {"enabled": True, "schedule_hours": 4},
|
||
"festival_11": {"enabled": False, "schedule_hours": 6},
|
||
"mothers_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "母親節超值限時購", "page_type": "mothers_day"},
|
||
"valentine_520_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "520情人節限定購物", "page_type": "valentine_520"},
|
||
"labor_day_2026": {"enabled": False, "schedule_hours": 4, "lpn_code": "", "activity_name": "勞動節購物優惠", "page_type": "labor_day"}
|
||
},
|
||
"settings": {
|
||
"auto_import_enabled": True,
|
||
"auto_import_interval_minutes": 30
|
||
}
|
||
}
|
||
|
||
def is_crawler_enabled(crawler_key: str) -> bool:
|
||
"""
|
||
檢查指定爬蟲是否啟用
|
||
|
||
Args:
|
||
crawler_key: 爬蟲識別碼 (momo_main, edm_promo, festival_11)
|
||
|
||
Returns:
|
||
bool: True 表示啟用,False 表示停用
|
||
"""
|
||
config = load_crawler_config()
|
||
crawler = config.get('crawlers', {}).get(crawler_key, {})
|
||
return crawler.get('enabled', False)
|
||
|
||
def get_crawler_schedule(crawler_key: str) -> Optional[int]:
|
||
"""
|
||
取得爬蟲的執行頻率(小時)
|
||
|
||
Args:
|
||
crawler_key: 爬蟲識別碼
|
||
|
||
Returns:
|
||
int: 執行頻率(小時),如果爬蟲停用則返回 None
|
||
"""
|
||
if not is_crawler_enabled(crawler_key):
|
||
return None
|
||
|
||
config = load_crawler_config()
|
||
crawler = config.get('crawlers', {}).get(crawler_key, {})
|
||
return crawler.get('schedule_hours', 4)
|
||
|
||
def get_crawler_info(crawler_key: str) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
取得爬蟲的完整資訊
|
||
|
||
Args:
|
||
crawler_key: 爬蟲識別碼
|
||
|
||
Returns:
|
||
dict: 爬蟲資訊,如果不存在則返回 None
|
||
"""
|
||
config = load_crawler_config()
|
||
return config.get('crawlers', {}).get(crawler_key)
|
||
|
||
def get_enabled_crawlers() -> Dict[str, Dict[str, Any]]:
|
||
"""
|
||
取得所有啟用的爬蟲
|
||
|
||
Returns:
|
||
dict: 啟用的爬蟲字典 {crawler_key: crawler_info}
|
||
"""
|
||
config = load_crawler_config()
|
||
crawlers = config.get('crawlers', {})
|
||
return {key: info for key, info in crawlers.items() if info.get('enabled', False)}
|
||
|
||
def get_paused_crawlers() -> Dict[str, Dict[str, Any]]:
|
||
"""
|
||
取得所有暫停的爬蟲
|
||
|
||
Returns:
|
||
dict: 暫停的爬蟲字典 {crawler_key: crawler_info}
|
||
"""
|
||
config = load_crawler_config()
|
||
crawlers = config.get('crawlers', {})
|
||
return {key: info for key, info in crawlers.items() if not info.get('enabled', False)}
|
||
|
||
def update_crawler_status(crawler_key: str, enabled: bool, reason: str = "") -> bool:
|
||
"""
|
||
更新爬蟲啟用狀態
|
||
|
||
Args:
|
||
crawler_key: 爬蟲識別碼
|
||
enabled: True 啟用,False 停用
|
||
reason: 停用原因(可選)
|
||
|
||
Returns:
|
||
bool: 更新成功返回 True
|
||
"""
|
||
try:
|
||
config = load_crawler_config()
|
||
|
||
if crawler_key not in config.get('crawlers', {}):
|
||
print(f"❌ 爬蟲 {crawler_key} 不存在")
|
||
return False
|
||
|
||
config['crawlers'][crawler_key]['enabled'] = enabled
|
||
|
||
if not enabled and reason:
|
||
config['crawlers'][crawler_key]['pause_reason'] = reason
|
||
config['crawlers'][crawler_key]['status'] = 'paused'
|
||
elif enabled:
|
||
config['crawlers'][crawler_key]['status'] = 'active'
|
||
config['crawlers'][crawler_key].pop('pause_reason', None)
|
||
|
||
# 更新時間戳
|
||
from datetime import datetime
|
||
config['metadata']['last_updated'] = datetime.now().isoformat()
|
||
|
||
# 寫回配置文件
|
||
with open(CONFIG_PATH, 'w', encoding='utf-8') as f:
|
||
json.dump(config, f, ensure_ascii=False, indent=2)
|
||
|
||
return True
|
||
except Exception as e:
|
||
print(f"❌ 更新配置失敗: {e}")
|
||
return False
|
||
|
||
def print_crawler_status():
|
||
"""印出所有爬蟲的狀態(用於檢查)"""
|
||
config = load_crawler_config()
|
||
crawlers = config.get('crawlers', {})
|
||
|
||
print("\n" + "=" * 60)
|
||
print("爬蟲配置狀態")
|
||
print("=" * 60)
|
||
|
||
for key, info in crawlers.items():
|
||
status_icon = "✅" if info.get('enabled') else "⏸️"
|
||
print(f"\n{status_icon} [{key}] {info.get('name', 'N/A')}")
|
||
print(f" 狀態: {info.get('status', 'unknown')}")
|
||
print(f" 啟用: {info.get('enabled', False)}")
|
||
|
||
if not info.get('enabled'):
|
||
print(f" 暫停原因: {info.get('pause_reason', 'N/A')}")
|
||
print(f" 暫停日期: {info.get('paused_date', 'N/A')}")
|
||
else:
|
||
print(f" 執行頻率: 每 {info.get('schedule_hours', 'N/A')} 小時")
|
||
|
||
if 'notes' in info:
|
||
print(f" 備註: {info.get('notes')}")
|
||
|
||
print("\n" + "=" * 60 + "\n")
|
||
|
||
if __name__ == "__main__":
|
||
# 測試:印出當前配置
|
||
print_crawler_status()
|