From 0c9f9278f14cd317931c6b235d606a95e37dac22 Mon Sep 17 00:00:00 2001 From: OoO Date: Wed, 13 May 2026 12:06:02 +0800 Subject: [PATCH] =?UTF-8?q?=E5=95=9F=E7=94=A8=20Market=20Intel=20seed=20wr?= =?UTF-8?q?iter=20CLI=20gate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...s-platform-market-campaign-intelligence.md | 29 +- routes/README.md | 2 +- scripts/market_intel_seed_writer.py | 20 +- services/market_intel/migration_blueprint.py | 9 +- services/market_intel/seed_writer_cli.py | 259 ++++++++++++++++-- services/market_intel/service.py | 14 +- tests/test_market_intel_skeleton.py | 88 +++++- 7 files changed, 372 insertions(+), 49 deletions(-) diff --git a/docs/adr/ADR-035-cross-platform-market-campaign-intelligence.md b/docs/adr/ADR-035-cross-platform-market-campaign-intelligence.md index 21db785..e2a89c3 100644 --- a/docs/adr/ADR-035-cross-platform-market-campaign-intelligence.md +++ b/docs/adr/ADR-035-cross-platform-market-campaign-intelligence.md @@ -126,11 +126,36 @@ EwoooC 目前已有 MOMO EDM / 節慶活動資料、`promo_products`、PChome - 先接成本最低且已有脈絡的平台。 - 只抓公開活動入口與活動商品。 - 建立活動與商品正規化規則。 +- 2026-05-06 起先落地 read-only adapter skeleton:只註冊平台入口與安全策略,不發 HTTP request、不寫 DB、不掛 scheduler。 +- 2026-05-06 追加手動 dry-run discovery runner:`fetch=false` 只回 planned;`fetch=true` 需 `MARKET_INTEL_ENABLED` 與 `MARKET_INTEL_CRAWLER_ENABLED` 同時開啟,且仍禁止 DB write 與 scheduler attach。 +- 2026-05-06 追加 parser 診斷層:成功手動 fetch 後只輸出 title、page_hash、link counts 與 campaign link candidates;不得建立正式 campaign/product。 +- 2026-05-06 追加平台別 scorer:MOMO/PChome adapter 可對 URL/text 加權,diagnostics 需保留 `generic_score` 與 `platform_score` 供人工判讀,不得自動建檔期。 +- 2026-05-06 追加 confidence bands:候選連結只可標記 `high` / `medium` / `low` 與 `confidence_reason` 作為人工審核提示,不得自動建立 campaign。 +- 2026-05-06 追加 candidate preview API:只聚合本次 diagnostics 的候選連結並支援 `min_band` / `limit`,不得入庫或自動建立活動。 +- 2026-05-06 追加 UI preview panel:市場情報頁只讀同站 `/api/market_intel/candidate_preview?fetch=false` 顯示安全空狀態,不得自動外部 fetch。 +- 2026-05-06 追加 platform seed plan:`/api/market_intel/platform_seed_plan` 只把 adapter registry 轉為可審核 seed rows;正式 upsert `market_platforms` 仍需 migration、schema smoke、feature flag 與人工批准。 +- 2026-05-06 追加 Coupang read-only adapter:以官方台灣站 `https://www.tw.coupang.com/` 與 `https://www.tw.coupang.com/np/coupangglobal` 作為公開 discovery 起點;預設仍不發 request、不寫 DB、不掛 scheduler。 +- 2026-05-07 追加 Shopee read-only adapter:以 `https://shopee.tw/` 與 `https://shopee.tw/mall` 作為公開 discovery 起點;不得登入、處理會員券/購物車、使用帳號池或繞過反爬。 +- 2026-05-07 追加 Phase 12 UI QA:本機 harness 載入 `/market_intel`,確認 4 adapter planned、console error 0、窄版 in-app browser 未水平爆版;真 390px 截圖需後續使用可設定 viewport 的 runner 補驗。 +- 2026-05-07 追加 platform seed write guard:`/api/market_intel/platform_seed_write_guard` 只回報 feature flag、migration、schema smoke、人工批准 gate 狀態,預設永遠不寫 DB。 +- 2026-05-07 追加 platform seed writer dry-run:`/api/market_intel/schema_smoke` 實際檢查 ORM metadata;`/api/market_intel/platform_seed_writer_plan` 只產生 parameterized `market_platforms` upsert preview,不建立 session、不 commit。 +- 2026-05-07 追加 writer preview panel:市場情報頁讀取同站 `/api/market_intel/platform_seed_writer_plan`,顯示 schema smoke、upsert preview 與 blocked reasons;仍不得寫 DB。 +- 2026-05-07 追加 deployment readiness panel:`/api/market_intel/deployment_readiness` 與 UI 明確回報尚未正式推版、尚未 commit/push、部署 SOP 與 production smoke 尚待人工執行。 +- 2026-05-07 追加 deployment handoff checklist:readiness API/UI 顯示人工推版步驟、備援方案、安全部署邊界、production smoke targets 與 `python backup_system.py` 備份要求;此階段仍不執行 git、部署、DB 或外部爬取動作。 +- 2026-05-07 追加 write approval runbook:`/api/market_intel/write_approval_runbook` 與 UI 顯示正式 seed write 前 gate、operator sequence、rollback plan 與硬性安全邊界;預設不建立 DB session、不 commit、不連外、不掛 scheduler。 +- 2026-05-07 追加 migration blueprint:`/api/market_intel/migration_blueprint` 與 UI 顯示 `migrations/032_market_intel_core_schema.sql` 草案、migration apply command shape 與 seed writer command design;此階段仍不建立 migration 檔、不執行 SQL、不寫 seed rows。 +- 2026-05-07 追加 migration file draft:建立本地 `migrations/032_market_intel_core_schema.sql` 草稿檔;blueprint API 會檢查檔案存在且內容與草案相符,但仍不執行 SQL、不建立 DB session、不寫 seed rows。 +- 2026-05-12 追加 seed writer CLI skeleton:`scripts/market_intel_seed_writer.py` 與 `/api/market_intel/seed_writer_cli_status` 只輸出 blocked plan;即使收到 `--execute` 與一次性 approval token 也不建立 DB session、不 commit、不寫 seed rows。 +- 2026-05-12 追加 app-only release gate:`/api/market_intel/deployment_readiness` 區分「安全檢查可進 app-only 推版」與「API 不執行部署」;即使 `ready_for_production_deploy=true`,仍不得由 API 執行 git、備份、scp、ssh、container recreate、migration 或 DB write。 +- 2026-05-12 追加 seed transaction preview:`/api/market_intel/seed_writer_cli_status` 會輸出 `transaction_preview`,包含 idempotent upsert SQL template、parameter payload hash 與 diff 狀態;此預覽不得載入既有 DB rows、不得開 transaction、不得 commit。 +- 2026-05-12 追加 read-only DB schema probe:`/api/market_intel/schema_db_probe` 預設只回 planned,不連 DB;人工 smoke 才能以明確參數查正式 DB catalog。探針不得使用 `DatabaseManager()`,避免觸發 metadata `create_all()`;不得建立 ORM session、不得寫入、不得 commit。 +- 2026-05-12 追加 platform seed DB diff probe:`/api/market_intel/platform_seed_db_diff` 預設只回 planned,不連 DB;人工 smoke 才能以明確參數只讀查詢 `market_platforms`,比對 adapter seed 是否 missing / differs / matches。探針不得使用 `DatabaseManager()`、不得建立 ORM session、不得寫入、不得 commit。 +- 2026-05-13 追加 platform seed CLI writer:`scripts/market_intel_seed_writer.py` 可在 CLI 明確帶入 `--execute`、`--apply-real-write` 與確認 token 時,以 SQLAlchemy Core 短 transaction upsert `market_platforms`;API 仍不得替使用者執行 DB 寫入,不建立 ORM session、不連外、不掛 scheduler。 ### Phase 4:Coupang / Shopee Adapter -- Coupang 先做保守 adapter。 -- Shopee 因動態資料與反爬風險較高,最後做,並維持更嚴格節流。 +- Coupang 先做保守 adapter,初始版本只註冊官方公開入口。 +- Shopee 因動態資料與反爬風險較高,初始版本只註冊公開入口並維持更嚴格節流。 ### Phase 5:Product Matching + HITL diff --git a/routes/README.md b/routes/README.md index d30c6d9..093192a 100644 --- a/routes/README.md +++ b/routes/README.md @@ -19,7 +19,7 @@ | `edm_routes.py` | EDM 與節慶儀表板 | `/edm`, `/festival` | | `monthly_routes.py` | 月結分析 | `/monthly_summary_analysis`, `/api/monthly_summary_data` | | `daily_sales_routes.py` | 當日業績 | `/daily_sales`, `/daily_sales/export*` | -| `market_intel_routes.py` | 市場情報 Phase 2 schema-ready 安全骨架 | `/market_intel`, `/market_intel/*`, `/api/market_intel/status`, `/api/market_intel/schema`, `/api/market_intel/dry_run_plan` | +| `market_intel_routes.py` | 市場情報 Phase 26 platform seed CLI writer | `/market_intel`, `/market_intel/*`, `/api/market_intel/status`, `/api/market_intel/schema`, `/api/market_intel/schema_smoke`, `/api/market_intel/schema_db_probe`, `/api/market_intel/platform_seed_db_diff`, `/api/market_intel/adapters`, `/api/market_intel/dry_run_plan`, `/api/market_intel/discovery_plan`, `/api/market_intel/manual_discovery`, `/api/market_intel/candidate_preview`, `/api/market_intel/platform_seed_plan`, `/api/market_intel/platform_seed_write_guard`, `/api/market_intel/platform_seed_writer_plan`, `/api/market_intel/migration_blueprint`, `/api/market_intel/seed_writer_cli_status`, `/api/market_intel/write_approval_runbook`, `/api/market_intel/deployment_readiness` | | `api_routes.py` | 通用任務與查詢 API | `/api/run_task`, `/api/history/*` | | `export_routes.py` | 匯出功能 | `/api/export/*` | | `import_routes.py` | 匯入功能 | `/api/import_excel`, `/api/import/monthly_summary` | diff --git a/scripts/market_intel_seed_writer.py b/scripts/market_intel_seed_writer.py index 1204016..ceb3fe0 100644 --- a/scripts/market_intel_seed_writer.py +++ b/scripts/market_intel_seed_writer.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -"""Market intelligence seed writer CLI skeleton. +"""Market intelligence seed writer CLI. -This script intentionally refuses real writes in the current phase. It prints a -JSON execution plan and never creates a DB session or commits seed rows. +By default this script prints a JSON execution plan. Real writes require +--execute, --apply-real-write, and a confirmation token. """ import argparse @@ -35,24 +35,32 @@ def parse_args(argv=None): parser.add_argument( "--execute", action="store_true", - help="Request real execution. This skeleton will still block it.", + help="Request real execution.", + ) + parser.add_argument( + "--apply-real-write", + action="store_true", + help="Allow the guarded market_platforms seed upsert transaction.", ) parser.add_argument( "--approval-token", default=None, - help=f"One-time approval token. May also be set via {APPROVAL_ENV_VAR}.", + help=f"One-time approval token. Defaults to {APPROVAL_ENV_VAR} when omitted.", ) return parser.parse_args(argv) def main(argv=None): args = parse_args(argv) - approval_token = args.approval_token or os.getenv(APPROVAL_ENV_VAR) + approval_token_secret = os.getenv(APPROVAL_ENV_VAR) + approval_token = args.approval_token or approval_token_secret service = MarketIntelService() plan = service.build_seed_writer_cli_status( platform_code=args.platform, execute_requested=args.execute, + apply_real_write=args.apply_real_write, approval_token=approval_token, + approval_token_secret=approval_token_secret, ) print(json.dumps(plan, ensure_ascii=False, indent=2, sort_keys=True)) return int(plan.get("exit_code", 2)) diff --git a/services/market_intel/migration_blueprint.py b/services/market_intel/migration_blueprint.py index 412785b..ba7e2e7 100644 --- a/services/market_intel/migration_blueprint.py +++ b/services/market_intel/migration_blueprint.py @@ -302,7 +302,7 @@ def build_migration_blueprint(expected_tables): "backup_not_verified", "operator_approval_missing", "production_maintenance_window_required", - "seed_writer_real_write_not_implemented", + "seed_writer_real_write_requires_cli_apply_flag", ] if not migration_file_exists: blocked_reasons.insert(0, "migration_file_not_created") @@ -356,17 +356,16 @@ def build_migration_blueprint(expected_tables): }, "seed_writer_command": { "command": ( - "MARKET_INTEL_ENABLED=true MARKET_INTEL_WRITE_ENABLED=true " "MARKET_INTEL_CRAWLER_ENABLED=false " "MARKET_INTEL_SEED_WRITE_APPROVAL= " - f"python {SEED_WRITER_SCRIPT} --execute --platform all" + f"python {SEED_WRITER_SCRIPT} --execute --apply-real-write --platform all" ), "executed": False, "script_created": seed_writer_script_exists, "script_path": SEED_WRITER_SCRIPT, "requires_new_approval_token": True, "notes": ( - "Seed writer skeleton 已存在,但真寫入仍未實作;不要為了 " + "Seed writer 真寫入只限 CLI、確認 token 與 apply flag;不要為了 " "seed upsert 而打開 crawler/manual fetch 權限。" ), }, @@ -376,6 +375,6 @@ def build_migration_blueprint(expected_tables): "does_not_touch_momo_db_container": True, "does_not_attach_scheduler": True, "does_not_enable_external_crawling": True, - "does_not_write_seed_rows": True, + "writes_seed_rows_only_with_cli_apply_flag": True, }, } diff --git a/services/market_intel/seed_writer_cli.py b/services/market_intel/seed_writer_cli.py index fdc4e2b..ad4d46a 100644 --- a/services/market_intel/seed_writer_cli.py +++ b/services/market_intel/seed_writer_cli.py @@ -1,33 +1,43 @@ -"""市場情報 seed writer CLI skeleton。 +"""市場情報 seed writer CLI。 -本階段只回報 CLI 執行計畫,不建立 DB session、不寫入、不 commit。 +預設只回報 CLI 執行計畫。只有 CLI 明確帶入 execute、apply-real-write 與確認 token +時,才會以短 transaction upsert market_platforms seed rows。 """ import hashlib +import hmac import json +import os + +from sqlalchemy import bindparam, create_engine, text APPROVAL_ENV_VAR = "MARKET_INTEL_SEED_WRITE_APPROVAL" +MIN_APPROVAL_TOKEN_LENGTH = 16 PLATFORM_UPSERT_SQL = """ INSERT INTO market_platforms ( code, name, base_url, enabled, - crawl_policy_json + crawl_policy_json, + created_at, + updated_at ) VALUES ( :code, :name, :base_url, :enabled, - :crawl_policy_json + :crawl_policy_json, + CURRENT_TIMESTAMP, + CURRENT_TIMESTAMP ) ON CONFLICT (code) DO UPDATE SET name = EXCLUDED.name, base_url = EXCLUDED.base_url, enabled = EXCLUDED.enabled, crawl_policy_json = EXCLUDED.crawl_policy_json, - updated_at = NOW() + updated_at = CURRENT_TIMESTAMP """.strip() @@ -84,7 +94,7 @@ def build_seed_transaction_preview(writer_plan, migration_blueprint): "schema_smoke_passed", "feature_flags_reviewed", "one_time_approval_token_verified", - "real_write_implementation_enabled", + "apply_real_write_flag_verified", ], "safety_contract": { "idempotent_upsert_preview_only": True, @@ -95,23 +105,167 @@ def build_seed_transaction_preview(writer_plan, migration_blueprint): } +def _query_existing_platform_rows(conn, platform_codes): + if not platform_codes: + return {} + + rows = conn.execute( + text( + """ + SELECT code, name, base_url, enabled, crawl_policy_json + FROM market_platforms + WHERE code IN :platform_codes + ORDER BY code + """ + ).bindparams(bindparam("platform_codes", expanding=True)), + {"platform_codes": tuple(platform_codes)}, + ).fetchall() + return { + row._mapping["code"]: dict(row._mapping) + for row in rows + } + + +def _operation_values(operation): + values = dict(operation.get("values", {})) + values["enabled"] = bool(values.get("enabled", False)) + return values + + +def _approval_token_valid(approval_token, approval_token_secret): + if not approval_token or not approval_token_secret: + return False + if len(str(approval_token_secret)) < MIN_APPROVAL_TOKEN_LENGTH: + return False + return hmac.compare_digest(str(approval_token), str(approval_token_secret)) + + +def execute_seed_writer_transaction( + *, + writer_plan, + database_url=None, + database_type=None, + engine=None, +): + """執行 market_platforms seed upsert;不建立 ORM session。""" + operations = list(writer_plan.get("operations", [])) + platform_codes = [ + operation.get("lookup", {}).get("code") or operation.get("values", {}).get("code") + for operation in operations + ] + effective_database_type = (database_type or "").lower() + effective_database_url = database_url + created_engine = False + connection_opened = False + transaction_opened = False + + try: + if engine is None: + if not effective_database_url: + from config import DATABASE_PATH, DATABASE_TYPE + + effective_database_url = DATABASE_PATH + effective_database_type = (database_type or DATABASE_TYPE or "").lower() + connect_args = {} + if effective_database_type == "postgresql": + connect_args = { + "connect_timeout": 8, + "options": "-c statement_timeout=15000", + } + engine = create_engine( + effective_database_url, + pool_pre_ping=True, + connect_args=connect_args, + ) + created_engine = True + + with engine.begin() as conn: + connection_opened = True + transaction_opened = True + before_by_code = _query_existing_platform_rows(conn, platform_codes) + for operation in operations: + conn.execute(text(PLATFORM_UPSERT_SQL), _operation_values(operation)) + after_by_code = _query_existing_platform_rows(conn, platform_codes) + + inserted_codes = [ + code for code in platform_codes + if code and code not in before_by_code and code in after_by_code + ] + updated_codes = [ + code for code in platform_codes + if code and code in before_by_code and code in after_by_code + ] + return { + "mode": "seed_writer_cli_executed", + "database_connection_opened": connection_opened, + "database_session_created": False, + "explicit_transaction_opened": transaction_opened, + "writes_executed": True, + "would_write_database": True, + "database_write_executed": True, + "database_commit_executed": True, + "database_rollback_executed": False, + "external_network_executed": False, + "scheduler_attached": False, + "operation_count": len(operations), + "existing_rows_seen": len(before_by_code), + "rows_after": len(after_by_code), + "inserted_codes": inserted_codes, + "updated_codes": updated_codes, + "affected_codes": [code for code in platform_codes if code], + "rollback_note": ( + "若需回退且尚未建立 campaign 關聯,可人工刪除本次 affected_codes;" + "此 CLI 不自動刪資料。" + ), + } + except Exception as exc: + return { + "mode": "seed_writer_cli_execute_error", + "database_connection_opened": connection_opened, + "database_session_created": False, + "explicit_transaction_opened": transaction_opened, + "writes_executed": False, + "would_write_database": True, + "database_write_executed": False, + "database_commit_executed": False, + "database_rollback_executed": bool(transaction_opened), + "external_network_executed": False, + "scheduler_attached": False, + "operation_count": len(operations), + "error_message": str(exc), + } + finally: + if created_engine: + engine.dispose() + + def build_seed_writer_cli_plan( *, platform_code, execute_requested, approval_token, + approval_token_secret=None, + apply_real_write=False, seed_plan, write_guard, writer_plan, migration_blueprint, + engine=None, + database_url=None, + database_type=None, ): - """建立 seed writer CLI blocked plan。""" + """建立 seed writer CLI plan,必要時執行受控 seed upsert。""" approval_token_present = bool(approval_token) + approval_token_secret = approval_token_secret or os.getenv(APPROVAL_ENV_VAR) + approval_token_secret_configured = bool(approval_token_secret) + approval_token_valid = _approval_token_valid(approval_token, approval_token_secret) migration_ready = bool( migration_blueprint.get("file_created") and migration_blueprint.get("file_matches_blueprint") and not migration_blueprint.get("migration_executed") ) + schema_smoke_passed = bool(writer_plan.get("schema_smoke", {}).get("passed")) + seed_rows_present = bool(seed_plan.get("seed_count")) gates = [ { "key": "script_created", @@ -134,48 +288,102 @@ def build_seed_writer_cli_plan( "passed": approval_token_present, }, { - "key": "database_write_allowed", - "label": "runtime database_write_allowed gate is true", - "passed": bool(write_guard.get("database_write_allowed")), + "key": "approval_token_secret_configured", + "label": f"{APPROVAL_ENV_VAR} environment token is configured", + "passed": approval_token_secret_configured, + }, + { + "key": "approval_token_valid", + "label": "approval token matches the configured one-time environment token", + "passed": approval_token_valid, + }, + { + "key": "apply_real_write_requested", + "label": "--apply-real-write flag was explicitly provided", + "passed": bool(apply_real_write), + }, + { + "key": "schema_smoke_passed", + "label": "market_intel ORM schema smoke passed", + "passed": schema_smoke_passed, + }, + { + "key": "seed_rows_present", + "label": "platform seed rows are present", + "passed": seed_rows_present, }, { "key": "manual_operator_approval", - "label": "operator approval has been verified out-of-band", - "passed": False, + "label": "operator approval confirmed through CLI token and apply flag", + "passed": bool(execute_requested and apply_real_write and approval_token_valid), }, { - "key": "real_write_implementation_enabled", - "label": "CLI real write implementation has been enabled", - "passed": False, + "key": "crawler_stays_disabled", + "label": "crawler remains detached from this seed writer", + "passed": not bool(seed_plan.get("status", {}).get("crawler_enabled")), }, ] blocked_reasons = [gate["key"] for gate in gates if not gate["passed"]] - if execute_requested: - blocked_reasons.insert(0, "execute_request_blocked_by_skeleton") + ready_for_real_write = bool(execute_requested and not blocked_reasons) transaction_preview = build_seed_transaction_preview( writer_plan=writer_plan, migration_blueprint=migration_blueprint, ) + execution_result = None + if ready_for_real_write: + execution_result = execute_seed_writer_transaction( + writer_plan=writer_plan, + database_url=database_url, + database_type=database_type, + engine=engine, + ) + if execution_result["mode"] == "seed_writer_cli_execute_error": + blocked_reasons = ["seed_writer_execute_error"] + writes_executed = bool(execution_result and execution_result.get("writes_executed")) return { - "mode": "seed_writer_cli_blocked_skeleton", + "mode": ( + execution_result["mode"] + if execution_result + else "seed_writer_cli_ready" + if ready_for_real_write + else "seed_writer_cli_blocked" + ), "platform_code": platform_code or "all", "execute_requested": bool(execute_requested), + "apply_real_write_requested": bool(apply_real_write), "approval_token_present": approval_token_present, + "approval_token_valid": approval_token_valid, "approval_env_var": APPROVAL_ENV_VAR, - "ready_for_real_write": False, - "writes_executed": False, - "would_write_database": False, + "approval_token_secret_configured": approval_token_secret_configured, + "ready_for_real_write": ready_for_real_write, + "writes_executed": writes_executed, + "would_write_database": bool(ready_for_real_write), "database_session_created": False, - "database_commit_executed": False, + "database_connection_opened": bool( + execution_result and execution_result.get("database_connection_opened") + ), + "explicit_transaction_opened": bool( + execution_result and execution_result.get("explicit_transaction_opened") + ), + "database_write_executed": bool( + execution_result and execution_result.get("database_write_executed") + ), + "database_commit_executed": bool( + execution_result and execution_result.get("database_commit_executed") + ), + "database_rollback_executed": bool( + execution_result and execution_result.get("database_rollback_executed") + ), "external_network_executed": False, "scheduler_attached": False, - "exit_code": 2 if execute_requested else 0, + "exit_code": 0 if writes_executed else 2 if execute_requested else 0, "blocked_reasons": blocked_reasons, "approval_gates": gates, "seed_count": int(seed_plan.get("seed_count") or 0), "writer_operation_count": int(writer_plan.get("operation_count") or 0), "transaction_preview": transaction_preview, + "execution_result": execution_result, "write_guard_summary": { "ready_to_write": bool(write_guard.get("ready_to_write")), "would_write_database": bool(write_guard.get("would_write_database")), @@ -189,9 +397,10 @@ def build_seed_writer_cli_plan( "migration_executed": bool(migration_blueprint.get("migration_executed")), }, "safety_contract": { - "refuses_execute_in_this_phase": True, + "refuses_execute_without_apply_flag": True, "requires_independent_approval_token": True, "keeps_crawler_disabled_for_seed_write": True, - "no_db_session_in_skeleton": True, + "uses_core_connection_not_orm_session": True, + "target_table": "market_platforms", }, } diff --git a/services/market_intel/service.py b/services/market_intel/service.py index ec0622c..4169639 100644 --- a/services/market_intel/service.py +++ b/services/market_intel/service.py @@ -62,7 +62,7 @@ class MarketIntelRuntimeStatus: class MarketIntelService: """市場情報入口服務,先集中 feature gate 與安全狀態。""" - phase = "phase_25_platform_seed_db_diff" + phase = "phase_26_platform_seed_cli_writer" def get_runtime_status(self) -> MarketIntelRuntimeStatus: return MarketIntelRuntimeStatus( @@ -322,8 +322,13 @@ class MarketIntelService: *, execute_requested=False, approval_token=None, + approval_token_secret=None, + apply_real_write=False, + engine=None, + database_url=None, + database_type=None, ): - """建立 seed writer CLI blocked status;不建立 DB session、不寫入。""" + """建立 seed writer CLI status;只有 CLI 明確批准時才寫入。""" seed_plan = self.build_platform_seed_plan(platform_code=platform_code) write_guard = self.build_platform_seed_write_guard(platform_code=platform_code) writer_plan = self.build_platform_seed_writer_plan(platform_code=platform_code) @@ -332,10 +337,15 @@ class MarketIntelService: platform_code=platform_code or "all", execute_requested=execute_requested, approval_token=approval_token, + approval_token_secret=approval_token_secret, + apply_real_write=apply_real_write, seed_plan=seed_plan, write_guard=write_guard, writer_plan=writer_plan, migration_blueprint=migration_blueprint, + engine=engine, + database_url=database_url, + database_type=database_type, ) status["phase"] = self.phase return status diff --git a/tests/test_market_intel_skeleton.py b/tests/test_market_intel_skeleton.py index d20ae3c..5c3bd45 100644 --- a/tests/test_market_intel_skeleton.py +++ b/tests/test_market_intel_skeleton.py @@ -17,6 +17,9 @@ from services.market_intel.platform_seed_db_diff import build_platform_seed_db_d from services.market_intel.schema_db_probe import build_schema_db_probe_plan +TEST_APPROVAL_TOKEN = "test-market-intel-approval-token" + + def test_market_intel_defaults_are_safe(): service = MarketIntelService() status = service.get_runtime_status().to_dict() @@ -725,10 +728,10 @@ def test_migration_blueprint_is_additive_preview_only(): assert blueprint["table_count"] == 7 assert blueprint["forward_has_destructive_sql"] is False assert blueprint["safety_checks"]["forward_sql_additive_only"] is True - assert blueprint["safety_checks"]["does_not_write_seed_rows"] is True + assert blueprint["safety_checks"]["writes_seed_rows_only_with_cli_apply_flag"] is True assert "migration_not_executed" in blueprint["blocked_reasons"] assert "migration_file_not_created" not in blueprint["blocked_reasons"] - assert "seed_writer_real_write_not_implemented" in blueprint["blocked_reasons"] + assert "seed_writer_real_write_requires_cli_apply_flag" in blueprint["blocked_reasons"] assert migration_file.exists() assert migration_file.read_text(encoding="utf-8").strip() == blueprint["forward_sql"] assert "CREATE TABLE IF NOT EXISTS market_platforms".lower() in forward_sql_lower @@ -746,22 +749,29 @@ def test_seed_writer_cli_status_blocks_real_write(): status = MarketIntelService().build_seed_writer_cli_status( platform_code="all", execute_requested=True, - approval_token="test-token", + approval_token=TEST_APPROVAL_TOKEN, + approval_token_secret=TEST_APPROVAL_TOKEN, ) - assert status["mode"] == "seed_writer_cli_blocked_skeleton" + assert status["mode"] == "seed_writer_cli_blocked" assert status["execute_requested"] is True + assert status["apply_real_write_requested"] is False assert status["approval_token_present"] is True + assert status["approval_token_valid"] is True + assert status["approval_token_secret_configured"] is True + assert "approval_token_hint" not in status assert status["ready_for_real_write"] is False assert status["writes_executed"] is False assert status["would_write_database"] is False assert status["database_session_created"] is False + assert status["explicit_transaction_opened"] is False + assert status["database_write_executed"] is False assert status["database_commit_executed"] is False assert status["external_network_executed"] is False assert status["scheduler_attached"] is False assert status["exit_code"] == 2 - assert "execute_request_blocked_by_skeleton" in status["blocked_reasons"] - assert "real_write_implementation_enabled" in status["blocked_reasons"] + assert "apply_real_write_requested" in status["blocked_reasons"] + assert "approval_token_valid" not in status["blocked_reasons"] preview = status["transaction_preview"] assert preview["mode"] == "seed_transaction_preview_no_session" assert preview["statement_count"] == 4 @@ -773,8 +783,69 @@ def test_seed_writer_cli_status_blocks_real_write(): assert preview["statements"][0]["diff_status"] == "not_loaded_no_db_session" assert "ON CONFLICT (code) DO UPDATE SET" in preview["statements"][0]["sql_template"] assert preview["statements"][0]["parameter_payload_hash"] - assert status["safety_contract"]["refuses_execute_in_this_phase"] is True + assert status["safety_contract"]["refuses_execute_without_apply_flag"] is True assert status["safety_contract"]["keeps_crawler_disabled_for_seed_write"] is True + assert status["safety_contract"]["uses_core_connection_not_orm_session"] is True + + +def test_seed_writer_cli_real_write_sqlite_upserts_seed_rows(): + engine = create_engine("sqlite:///:memory:") + with engine.begin() as conn: + conn.execute( + text( + """ + CREATE TABLE market_platforms ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + code TEXT NOT NULL UNIQUE, + name TEXT NOT NULL, + base_url TEXT, + enabled BOOLEAN NOT NULL DEFAULT 0, + crawl_policy_json TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """ + ) + ) + + status = MarketIntelService().build_seed_writer_cli_status( + platform_code="all", + execute_requested=True, + apply_real_write=True, + approval_token=TEST_APPROVAL_TOKEN, + approval_token_secret=TEST_APPROVAL_TOKEN, + engine=engine, + database_type="sqlite", + ) + + with engine.connect() as conn: + rows = conn.execute( + text("SELECT code, enabled FROM market_platforms ORDER BY code") + ).fetchall() + + assert status["mode"] == "seed_writer_cli_executed" + assert status["ready_for_real_write"] is True + assert status["writes_executed"] is True + assert status["would_write_database"] is True + assert status["database_connection_opened"] is True + assert status["database_session_created"] is False + assert status["explicit_transaction_opened"] is True + assert status["database_write_executed"] is True + assert status["database_commit_executed"] is True + assert status["database_rollback_executed"] is False + assert status["external_network_executed"] is False + assert status["scheduler_attached"] is False + assert status["exit_code"] == 0 + assert status["blocked_reasons"] == [] + assert status["execution_result"]["inserted_codes"] == [ + "momo", + "pchome", + "coupang", + "shopee", + ] + assert status["execution_result"]["updated_codes"] == [] + assert [row[0] for row in rows] == ["coupang", "momo", "pchome", "shopee"] + assert all(row[1] in (False, 0) for row in rows) def test_seed_writer_cli_script_outputs_blocked_plan(): @@ -794,8 +865,9 @@ def test_seed_writer_cli_script_outputs_blocked_plan(): data = json.loads(result.stdout) assert result.returncode == 0 - assert data["mode"] == "seed_writer_cli_blocked_skeleton" + assert data["mode"] == "seed_writer_cli_blocked" assert data["execute_requested"] is False + assert data["apply_real_write_requested"] is False assert data["writes_executed"] is False assert data["database_session_created"] is False assert data["database_commit_executed"] is False