Files

94 lines
2.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""市場情報 adapter 基礎類別。
Phase 3 只提供 read-only discovery plan不發 HTTP request。
"""
from dataclasses import asdict, dataclass
from typing import Iterable, Sequence
@dataclass(frozen=True)
class CampaignSource:
"""公開活動入口的描述,不代表已實際爬取。"""
source_key: str
name: str
url: str
campaign_type: str
notes: str = ""
def to_dict(self):
return asdict(self)
@dataclass(frozen=True)
class AdapterSafetyPolicy:
"""平台爬取安全策略。"""
request_interval_sec: float
timeout_sec: int
max_pages_per_run: int
allow_login: bool = False
allow_database_write: bool = False
allow_scheduler_attach: bool = False
def to_dict(self):
return asdict(self)
class MarketIntelAdapter:
"""市場情報平台 adapter base class。"""
platform_code = ""
platform_name = ""
base_url = ""
campaign_url_keywords = ()
campaign_text_keywords = ()
safety_policy = AdapterSafetyPolicy(
request_interval_sec=2.0,
timeout_sec=20,
max_pages_per_run=5,
)
def campaign_sources(self) -> Sequence[CampaignSource]:
return ()
def summary(self):
return {
"platform_code": self.platform_code,
"platform_name": self.platform_name,
"base_url": self.base_url,
"source_count": len(self.campaign_sources()),
"safety_policy": self.safety_policy.to_dict(),
"phase": "read_only_adapter_skeleton",
}
def build_discovery_plan(self):
"""建立 discovery plan不發 request、不寫 DB。"""
return {
**self.summary(),
"network_request_allowed": False,
"database_write_allowed": False,
"scheduler_attach_allowed": False,
"sources": [source.to_dict() for source in self.campaign_sources()],
}
def discover_campaigns(self, *, dry_run=True) -> Iterable[CampaignSource]:
"""Phase 3 僅允許 dry-run 回傳入口描述。"""
if not dry_run:
raise RuntimeError("市場情報 adapter 尚未允許正式 discovery")
return self.campaign_sources()
def score_campaign_link(self, href, text):
"""平台別活動連結加權,只用於診斷排序。"""
url_text = (href or "").lower()
link_text = (text or "").lower()
score = 0
for keyword in self.campaign_url_keywords:
if str(keyword).lower() in url_text:
score += 4
for keyword in self.campaign_text_keywords:
if str(keyword).lower() in link_text:
score += 3
return score