94 lines
2.7 KiB
Python
94 lines
2.7 KiB
Python
"""市場情報 adapter 基礎類別。
|
||
|
||
Phase 3 只提供 read-only discovery plan,不發 HTTP request。
|
||
"""
|
||
|
||
from dataclasses import asdict, dataclass
|
||
from typing import Iterable, Sequence
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class CampaignSource:
|
||
"""公開活動入口的描述,不代表已實際爬取。"""
|
||
|
||
source_key: str
|
||
name: str
|
||
url: str
|
||
campaign_type: str
|
||
notes: str = ""
|
||
|
||
def to_dict(self):
|
||
return asdict(self)
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class AdapterSafetyPolicy:
|
||
"""平台爬取安全策略。"""
|
||
|
||
request_interval_sec: float
|
||
timeout_sec: int
|
||
max_pages_per_run: int
|
||
allow_login: bool = False
|
||
allow_database_write: bool = False
|
||
allow_scheduler_attach: bool = False
|
||
|
||
def to_dict(self):
|
||
return asdict(self)
|
||
|
||
|
||
class MarketIntelAdapter:
|
||
"""市場情報平台 adapter base class。"""
|
||
|
||
platform_code = ""
|
||
platform_name = ""
|
||
base_url = ""
|
||
campaign_url_keywords = ()
|
||
campaign_text_keywords = ()
|
||
safety_policy = AdapterSafetyPolicy(
|
||
request_interval_sec=2.0,
|
||
timeout_sec=20,
|
||
max_pages_per_run=5,
|
||
)
|
||
|
||
def campaign_sources(self) -> Sequence[CampaignSource]:
|
||
return ()
|
||
|
||
def summary(self):
|
||
return {
|
||
"platform_code": self.platform_code,
|
||
"platform_name": self.platform_name,
|
||
"base_url": self.base_url,
|
||
"source_count": len(self.campaign_sources()),
|
||
"safety_policy": self.safety_policy.to_dict(),
|
||
"phase": "read_only_adapter_skeleton",
|
||
}
|
||
|
||
def build_discovery_plan(self):
|
||
"""建立 discovery plan,不發 request、不寫 DB。"""
|
||
return {
|
||
**self.summary(),
|
||
"network_request_allowed": False,
|
||
"database_write_allowed": False,
|
||
"scheduler_attach_allowed": False,
|
||
"sources": [source.to_dict() for source in self.campaign_sources()],
|
||
}
|
||
|
||
def discover_campaigns(self, *, dry_run=True) -> Iterable[CampaignSource]:
|
||
"""Phase 3 僅允許 dry-run 回傳入口描述。"""
|
||
if not dry_run:
|
||
raise RuntimeError("市場情報 adapter 尚未允許正式 discovery")
|
||
return self.campaign_sources()
|
||
|
||
def score_campaign_link(self, href, text):
|
||
"""平台別活動連結加權,只用於診斷排序。"""
|
||
url_text = (href or "").lower()
|
||
link_text = (text or "").lower()
|
||
score = 0
|
||
for keyword in self.campaign_url_keywords:
|
||
if str(keyword).lower() in url_text:
|
||
score += 4
|
||
for keyword in self.campaign_text_keywords:
|
||
if str(keyword).lower() in link_text:
|
||
score += 3
|
||
return score
|