Files
ewoooc/services/market_intel/manual_sample_plan.py
OoO 774f1b4b45
All checks were successful
CD Pipeline / deploy (push) Successful in 1m3s
新增市場情報人工樣本抓取計畫
2026-05-19 00:09:56 +08:00

168 lines
6.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""市場情報第一次人工 sample fetch 計畫。
本模組只組裝操作員檢查表與樣本範圍,不抓外部網站、不寫 DB、不掛排程。
"""
SAMPLE_PLATFORM_ORDER = ("pchome", "momo", "coupang", "shopee")
MAX_SAMPLE_SOURCES_PER_PLATFORM = 1
def _status_value(runtime_status, name, default=False):
if isinstance(runtime_status, dict):
return runtime_status.get(name, default)
return getattr(runtime_status, name, default)
def _sort_adapters(adapters):
priority = {code: index for index, code in enumerate(SAMPLE_PLATFORM_ORDER)}
return sorted(
adapters,
key=lambda adapter: priority.get(adapter.platform_code, len(priority)),
)
def _build_platform_sample(adapter, priority):
sources = list(adapter.campaign_sources())[:MAX_SAMPLE_SOURCES_PER_PLATFORM]
return {
"priority": priority,
"platform_code": adapter.platform_code,
"platform_name": adapter.platform_name,
"base_url": adapter.base_url,
"sample_source_count": len(sources),
"max_sources_per_manual_sample": MAX_SAMPLE_SOURCES_PER_PLATFORM,
"request_interval_sec": adapter.safety_policy.request_interval_sec,
"timeout_sec": adapter.safety_policy.timeout_sec,
"selected_sources": [source.to_dict() for source in sources],
"expected_diagnostics": [
"status_code",
"content_length",
"page_hash",
"title",
"campaign_link_candidates",
],
"write_status": "blocked_preview_only",
"network_status": "not_executed",
}
def build_manual_sample_fetch_plan_preview(
*,
runtime_status,
adapters,
mcp_fetch_gate,
live_db_inventory,
):
"""建立第一次人工 sample fetch 計畫;不執行 fetch。"""
adapters = _sort_adapters(list(adapters))
sample_platforms = [
_build_platform_sample(adapter, priority=index + 1)
for index, adapter in enumerate(adapters)
]
inventory_safe = bool(
live_db_inventory.get("mode") == "live_db_inventory_planned"
and not live_db_inventory.get("read_only_query_executed")
and not live_db_inventory.get("database_write_executed")
)
gate_checks = {
"platform_adapters_registered": bool(sample_platforms),
"market_intel_enabled": bool(_status_value(runtime_status, "enabled")),
"market_intel_crawler_enabled": bool(
_status_value(runtime_status, "crawler_enabled")
),
"database_write_still_blocked": not bool(
_status_value(runtime_status, "database_write_allowed")
),
"scheduler_detached": not bool(
_status_value(runtime_status, "scheduler_attached")
),
"mcp_fetch_gate_open": bool(mcp_fetch_gate.get("manual_fetch_gate_open")),
"live_inventory_preview_safe": inventory_safe,
"manual_operator_approval": False,
}
blocked_reasons = [
key for key, passed in gate_checks.items()
if not passed
]
blocked_reasons.append("sample_fetch_not_executed_by_api")
return {
"mode": "manual_sample_fetch_plan_preview",
"ready_for_manual_sample_fetch": False,
"sample_fetch_executed": False,
"external_network_executed": False,
"database_connection_opened": False,
"database_session_created": False,
"database_write_executed": False,
"database_commit_executed": False,
"scheduler_attached": False,
"writes_executed": False,
"would_write_database": False,
"platform_count": len(sample_platforms),
"sample_source_total": sum(
item["sample_source_count"] for item in sample_platforms
),
"sample_platform_order": list(SAMPLE_PLATFORM_ORDER),
"sample_platforms": sample_platforms,
"gate_checks": gate_checks,
"blocked_reasons": blocked_reasons,
"sample_policy": {
"max_platforms_per_manual_batch": 1,
"max_sources_per_platform": MAX_SAMPLE_SOURCES_PER_PLATFORM,
"first_batch_platform": sample_platforms[0]["platform_code"]
if sample_platforms
else None,
"write_policy": "no_database_write_until_operator_approves_import",
"network_policy": "public_pages_only_mcp_gate_required",
},
"operator_sequence": [
{
"key": "confirm_live_inventory_baseline",
"label": "先以人工只讀庫存 smoke 確認 market_* 表與 seed 基準",
"status": "required",
},
{
"key": "enable_fetch_flags_temporarily",
"label": "只在操作窗口暫時開啟 MARKET_INTEL_ENABLED 與 MARKET_INTEL_CRAWLER_ENABLED",
"status": "manual_required",
},
{
"key": "verify_mcp_fetch_gate",
"label": "MCP readiness、router、external health 與 tool contract 必須全過",
"status": "required",
},
{
"key": "run_one_platform_one_source",
"label": "第一批只跑 1 個平台的 1 個公開活動入口",
"status": "manual_required",
},
{
"key": "review_parser_diagnostics",
"label": "只審查 title、page_hash、候選活動連結與錯誤不寫 market_* 商品資料",
"status": "required",
},
],
"fallback_plan": [
{
"key": "turn_flags_off",
"label": "關閉 MARKET_INTEL_ENABLED 與 MARKET_INTEL_CRAWLER_ENABLED",
},
{
"key": "stay_preview_only",
"label": "保留所有市場情報 API/UI 預覽,不掛 scheduler、不寫 DB",
},
{
"key": "do_not_retry_fast",
"label": "若平台回應異常,不做密集重試,改回人工檢查 adapter 入口",
},
],
"safe_boundaries": [
"do_not_fetch_from_api_preview",
"do_not_write_market_tables",
"do_not_attach_scheduler",
"do_not_use_login_or_member_pages",
"do_not_bypass_anti_bot",
"do_not_touch_momo_db_lifecycle",
],
}