161 lines
5.9 KiB
Python
161 lines
5.9 KiB
Python
"""市場情報人工 sample fetch 結果驗收契約。
|
||
|
||
本模組只定義第一次 sample fetch 回來後的驗收欄位、門檻與人工決策;
|
||
不讀外部網站、不查 DB、不寫 DB、不掛排程。
|
||
"""
|
||
|
||
|
||
REQUIRED_RESULT_FIELDS = (
|
||
"batch_id",
|
||
"platform_code",
|
||
"source_key",
|
||
"source_url",
|
||
"status",
|
||
"status_code",
|
||
"content_length",
|
||
"page_hash",
|
||
"title",
|
||
"diagnostics",
|
||
)
|
||
|
||
REQUIRED_DIAGNOSTIC_FIELDS = (
|
||
"link_count",
|
||
"same_host_link_count",
|
||
"campaign_link_candidates",
|
||
)
|
||
|
||
|
||
def build_manual_sample_acceptance_preview(*, runtime_status, manual_sample_plan):
|
||
"""建立人工樣本結果驗收契約;不載入 sample result。"""
|
||
gate_checks = {
|
||
"manual_sample_plan_present": bool(manual_sample_plan),
|
||
"sample_fetch_not_executed_by_api": not bool(
|
||
manual_sample_plan.get("sample_fetch_executed")
|
||
),
|
||
"external_network_blocked_in_preview": not bool(
|
||
manual_sample_plan.get("external_network_executed")
|
||
),
|
||
"database_write_still_blocked": not bool(
|
||
getattr(runtime_status, "database_write_allowed", False)
|
||
),
|
||
"scheduler_detached": not bool(
|
||
getattr(runtime_status, "scheduler_attached", False)
|
||
),
|
||
}
|
||
blocked_reasons = [
|
||
key for key, passed in gate_checks.items()
|
||
if not passed
|
||
]
|
||
blocked_reasons.extend(
|
||
[
|
||
"sample_result_not_loaded",
|
||
"manual_review_required_before_import",
|
||
]
|
||
)
|
||
|
||
return {
|
||
"mode": "manual_sample_acceptance_preview",
|
||
"contract_ready": True,
|
||
"sample_result_loaded": False,
|
||
"sample_result_accepted": False,
|
||
"candidate_import_allowed": False,
|
||
"external_network_executed": False,
|
||
"database_connection_opened": False,
|
||
"database_session_created": False,
|
||
"database_write_executed": False,
|
||
"database_commit_executed": False,
|
||
"scheduler_attached": False,
|
||
"writes_executed": False,
|
||
"would_write_database": False,
|
||
"gate_checks": gate_checks,
|
||
"blocked_reasons": blocked_reasons,
|
||
"required_result_fields": list(REQUIRED_RESULT_FIELDS),
|
||
"required_diagnostic_fields": list(REQUIRED_DIAGNOSTIC_FIELDS),
|
||
"acceptance_thresholds": {
|
||
"http_status_min": 200,
|
||
"http_status_max": 299,
|
||
"minimum_content_length": 500,
|
||
"page_hash_length": 64,
|
||
"minimum_title_length": 2,
|
||
"minimum_link_count": 1,
|
||
"minimum_campaign_candidates": 1,
|
||
"accepted_candidate_bands": ["high", "medium"],
|
||
},
|
||
"acceptance_checks": [
|
||
{
|
||
"key": "http_status_ok",
|
||
"label": "HTTP status 必須為 2xx,redirect / 403 / 429 / 5xx 都不得進入候選導入",
|
||
"status": "not_evaluated",
|
||
},
|
||
{
|
||
"key": "content_has_body",
|
||
"label": "content_length 必須超過最低門檻,避免只拿到 bot challenge 或空頁",
|
||
"status": "not_evaluated",
|
||
},
|
||
{
|
||
"key": "page_fingerprint_present",
|
||
"label": "page_hash 必須存在,後續才能追蹤同頁變化",
|
||
"status": "not_evaluated",
|
||
},
|
||
{
|
||
"key": "parser_diagnostics_present",
|
||
"label": "diagnostics 必須包含 title、link_count 與 campaign_link_candidates",
|
||
"status": "not_evaluated",
|
||
},
|
||
{
|
||
"key": "candidate_quality_reviewed",
|
||
"label": "至少 1 筆 high/medium 候選需經人工確認,才可進入候選活動審核",
|
||
"status": "not_evaluated",
|
||
},
|
||
],
|
||
"reject_conditions": [
|
||
{
|
||
"key": "anti_bot_or_login_wall",
|
||
"label": "頁面出現登入牆、驗證碼、bot challenge、會員頁或購物車內容",
|
||
},
|
||
{
|
||
"key": "unexpected_personal_data",
|
||
"label": "頁面含非公開個資、帳號資料或訂單資訊",
|
||
},
|
||
{
|
||
"key": "too_many_failures",
|
||
"label": "同平台樣本連續失敗時,不做快速重試,先回頭審查 adapter 入口",
|
||
},
|
||
{
|
||
"key": "no_campaign_candidates",
|
||
"label": "沒有任何 high/medium campaign candidate 時,不建立活動候選",
|
||
},
|
||
],
|
||
"operator_decisions": [
|
||
{
|
||
"key": "approve_candidate_preview",
|
||
"label": "樣本可進入候選活動人工審核,但仍不得寫 market_campaigns",
|
||
"write_status": "blocked",
|
||
},
|
||
{
|
||
"key": "revise_adapter_source",
|
||
"label": "入口不適合時,調整 adapter source 後重新跑單一樣本",
|
||
"write_status": "blocked",
|
||
},
|
||
{
|
||
"key": "reject_platform_for_now",
|
||
"label": "平台風險或阻擋過高時,暫停該平台,只保留其他平台預覽",
|
||
"write_status": "blocked",
|
||
},
|
||
],
|
||
"promotion_sequence": [
|
||
"先把 sample result 與 diagnostics 以人工方式審核",
|
||
"通過後只開 candidate preview,不建立正式 campaign/product",
|
||
"累積至少 2 個平台樣本通過後,再設計候選活動審核資料流",
|
||
"任何寫入 market_* 前仍需獨立 approval 與 rollback plan",
|
||
],
|
||
"safe_boundaries": [
|
||
"do_not_accept_login_or_member_pages",
|
||
"do_not_accept_anti_bot_challenge_pages",
|
||
"do_not_import_candidates_without_human_review",
|
||
"do_not_write_market_tables_from_acceptance_preview",
|
||
"do_not_attach_scheduler_from_sample_result",
|
||
"do_not_touch_momo_db_lifecycle",
|
||
],
|
||
}
|