From fccc80858d989f1963060d55fbd095bca0f3826c Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 12 May 2026 22:49:56 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=BE=A9=20Wave=200=20=E9=98=BB?= =?UTF-8?q?=E5=A1=9E=E8=88=87=20market=20intel=20=E5=85=A5=E5=BA=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 5 + migrations/032_market_intel_core_schema.sql | 237 +++++ .../033_fix_host_health_probe_labels.sql | 38 + routes/market_intel_routes.py | 128 ++- routes/openclaw_bot_routes.py | 85 ++ run_scheduler.py | 98 +- scripts/market_intel_seed_writer.py | 62 ++ services/cache_service.py | 6 +- services/market_intel/adapters/__init__.py | 9 + services/market_intel/adapters/base.py | 93 ++ .../market_intel/adapters/coupang_adapter.py | 54 ++ .../market_intel/adapters/momo_adapter.py | 54 ++ .../market_intel/adapters/pchome_adapter.py | 54 ++ services/market_intel/adapters/registry.py | 29 + .../market_intel/adapters/shopee_adapter.py | 57 ++ services/market_intel/candidate_preview.py | 81 ++ services/market_intel/discovery_runner.py | 215 +++++ services/market_intel/html_diagnostics.py | 172 ++++ services/market_intel/migration_blueprint.py | 381 ++++++++ services/market_intel/platform_seed.py | 43 + .../market_intel/platform_seed_db_diff.py | 238 +++++ services/market_intel/platform_seed_writer.py | 89 ++ services/market_intel/schema_db_probe.py | 172 ++++ services/market_intel/seed_writer_cli.py | 197 ++++ services/market_intel/service.py | 425 ++++++++- .../market_intel/write_approval_runbook.py | 134 +++ templates/market_intel/disabled.html | 881 ++++++++++++++++++ tests/test_market_intel_skeleton.py | 804 ++++++++++++++++ tests/test_openclaw_bot_routes_webhook.py | 56 ++ 29 files changed, 4888 insertions(+), 9 deletions(-) create mode 100644 migrations/032_market_intel_core_schema.sql create mode 100644 migrations/033_fix_host_health_probe_labels.sql create mode 100644 scripts/market_intel_seed_writer.py create mode 100644 services/market_intel/adapters/__init__.py create mode 100644 services/market_intel/adapters/base.py create mode 100644 services/market_intel/adapters/coupang_adapter.py create mode 100644 services/market_intel/adapters/momo_adapter.py create mode 100644 services/market_intel/adapters/pchome_adapter.py create mode 100644 services/market_intel/adapters/registry.py create mode 100644 services/market_intel/adapters/shopee_adapter.py create mode 100644 services/market_intel/candidate_preview.py create mode 100644 services/market_intel/discovery_runner.py create mode 100644 services/market_intel/html_diagnostics.py create mode 100644 services/market_intel/migration_blueprint.py create mode 100644 services/market_intel/platform_seed.py create mode 100644 services/market_intel/platform_seed_db_diff.py create mode 100644 services/market_intel/platform_seed_writer.py create mode 100644 services/market_intel/schema_db_probe.py create mode 100644 services/market_intel/seed_writer_cli.py create mode 100644 services/market_intel/write_approval_runbook.py create mode 100644 tests/test_market_intel_skeleton.py diff --git a/.gitignore b/.gitignore index 18f2aa8..d504fda 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ # Claude Code 本機設定(含 allow list / Secret,不可 commit) .claude/settings.local.json +.claude/worktrees/ +.tmp_* # Python __pycache__/ @@ -63,6 +65,7 @@ data/*.db-wal data/*.sqlite data/*.sqlite3 data/*.lock +data/*.pkl database/*.db database/*.db-journal database/*.db-shm @@ -85,6 +88,8 @@ data/excel_exports/ # 上傳檔案 web/static/uploads/ web/static/screenshots/ +MOMO Pro/uploads/ +MOMO Pro/screenshots/ templates/__init__.py # 測試與覆蓋率報告 diff --git a/migrations/032_market_intel_core_schema.sql b/migrations/032_market_intel_core_schema.sql new file mode 100644 index 0000000..d6e8156 --- /dev/null +++ b/migrations/032_market_intel_core_schema.sql @@ -0,0 +1,237 @@ +-- ============================================================================= +-- Migration 032: market_intel core schema +-- MOMO PRO - Cross-platform market campaign intelligence +-- 2026-05-07 Taipei +-- ============================================================================= +-- Notes: +-- Creates the ADR-035 market_* schema. This migration is additive only: +-- it creates tables, indexes, and grants. It does not drop or alter existing +-- sales tables, and it does not touch the momo-db container lifecycle. +-- ============================================================================= + +CREATE TABLE IF NOT EXISTS market_platforms ( + id BIGSERIAL PRIMARY KEY, + code VARCHAR(50) NOT NULL UNIQUE, + name VARCHAR(120) NOT NULL, + base_url VARCHAR(500), + enabled BOOLEAN NOT NULL DEFAULT FALSE, + crawl_policy_json TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_market_platforms_code + ON market_platforms (code); + +CREATE TABLE IF NOT EXISTS market_campaigns ( + id BIGSERIAL PRIMARY KEY, + platform_code VARCHAR(50) NOT NULL REFERENCES market_platforms(code), + campaign_key VARCHAR(200) NOT NULL, + campaign_name VARCHAR(500) NOT NULL, + campaign_type VARCHAR(80), + campaign_url TEXT, + start_at TIMESTAMP, + end_at TIMESTAMP, + status VARCHAR(30) NOT NULL DEFAULT 'unknown', + discovered_at TIMESTAMP NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMP NOT NULL DEFAULT NOW(), + metadata_json TEXT, + CONSTRAINT uq_market_campaign_platform_key + UNIQUE (platform_code, campaign_key) +); + +CREATE INDEX IF NOT EXISTS idx_market_campaigns_platform_code + ON market_campaigns (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_campaigns_campaign_type + ON market_campaigns (campaign_type); +CREATE INDEX IF NOT EXISTS idx_market_campaigns_status + ON market_campaigns (status); +CREATE INDEX IF NOT EXISTS idx_market_campaign_status_time + ON market_campaigns (status, start_at, end_at); + +CREATE TABLE IF NOT EXISTS market_campaign_snapshots ( + id BIGSERIAL PRIMARY KEY, + campaign_id BIGINT NOT NULL REFERENCES market_campaigns(id), + batch_id VARCHAR(80) NOT NULL, + crawled_at TIMESTAMP NOT NULL DEFAULT NOW(), + title VARCHAR(500), + hero_text TEXT, + coupon_text TEXT, + raw_discount_text TEXT, + page_hash VARCHAR(128), + raw_snapshot_path TEXT, + status VARCHAR(30) NOT NULL DEFAULT 'success', + error_message TEXT, + metadata_json TEXT +); + +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_campaign_id + ON market_campaign_snapshots (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_batch_id + ON market_campaign_snapshots (batch_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_crawled_at + ON market_campaign_snapshots (crawled_at); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_page_hash + ON market_campaign_snapshots (page_hash); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_status + ON market_campaign_snapshots (status); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshot_campaign_time + ON market_campaign_snapshots (campaign_id, crawled_at); + +CREATE TABLE IF NOT EXISTS market_campaign_products ( + id BIGSERIAL PRIMARY KEY, + campaign_id BIGINT NOT NULL REFERENCES market_campaigns(id), + platform_code VARCHAR(50) NOT NULL, + platform_product_id VARCHAR(200) NOT NULL, + product_url TEXT, + name VARCHAR(500) NOT NULL, + brand VARCHAR(200), + image_url TEXT, + category_text VARCHAR(300), + price DOUBLE PRECISION, + original_price DOUBLE PRECISION, + discount_text VARCHAR(200), + discount_rate DOUBLE PRECISION, + coupon_text TEXT, + stock_text VARCHAR(200), + sold_count INTEGER, + rating DOUBLE PRECISION, + review_count INTEGER, + rank_position INTEGER, + is_active BOOLEAN NOT NULL DEFAULT TRUE, + first_seen_at TIMESTAMP NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMP NOT NULL DEFAULT NOW(), + metadata_json TEXT, + CONSTRAINT uq_market_campaign_product + UNIQUE (campaign_id, platform_code, platform_product_id) +); + +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_campaign_id + ON market_campaign_products (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_platform_code + ON market_campaign_products (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_platform_product_id + ON market_campaign_products (platform_product_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_brand + ON market_campaign_products (brand); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_category_text + ON market_campaign_products (category_text); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_is_active + ON market_campaign_products (is_active); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_last_seen_at + ON market_campaign_products (last_seen_at); +CREATE INDEX IF NOT EXISTS idx_market_product_platform_seen + ON market_campaign_products (platform_code, last_seen_at); +CREATE INDEX IF NOT EXISTS idx_market_product_discount + ON market_campaign_products (discount_rate, price); + +CREATE TABLE IF NOT EXISTS market_product_price_history ( + id BIGSERIAL PRIMARY KEY, + market_product_id BIGINT NOT NULL REFERENCES market_campaign_products(id), + campaign_id BIGINT NOT NULL REFERENCES market_campaigns(id), + platform_code VARCHAR(50) NOT NULL, + platform_product_id VARCHAR(200) NOT NULL, + price DOUBLE PRECISION, + original_price DOUBLE PRECISION, + discount_rate DOUBLE PRECISION, + stock_text VARCHAR(200), + sold_count INTEGER, + rank_position INTEGER, + crawled_at TIMESTAMP NOT NULL DEFAULT NOW(), + batch_id VARCHAR(80) NOT NULL, + metadata_json TEXT +); + +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_market_product_id + ON market_product_price_history (market_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_campaign_id + ON market_product_price_history (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_platform_code + ON market_product_price_history (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_platform_product_id + ON market_product_price_history (platform_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_crawled_at + ON market_product_price_history (crawled_at); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_batch_id + ON market_product_price_history (batch_id); +CREATE INDEX IF NOT EXISTS idx_market_price_platform_time + ON market_product_price_history (platform_code, platform_product_id, crawled_at); +CREATE INDEX IF NOT EXISTS idx_market_price_campaign_time + ON market_product_price_history (campaign_id, crawled_at); + +CREATE TABLE IF NOT EXISTS market_product_matches ( + id BIGSERIAL PRIMARY KEY, + market_product_id BIGINT NOT NULL REFERENCES market_campaign_products(id), + momo_product_id INTEGER REFERENCES products(id), + momo_i_code VARCHAR(50), + match_score DOUBLE PRECISION NOT NULL DEFAULT 0.0, + match_status VARCHAR(30) NOT NULL DEFAULT 'needs_review', + match_reason_json TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + reviewed_at TIMESTAMP, + reviewed_by VARCHAR(120), + CONSTRAINT uq_market_product_momo_match + UNIQUE (market_product_id, momo_i_code) +); + +CREATE INDEX IF NOT EXISTS idx_market_product_matches_market_product_id + ON market_product_matches (market_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_matches_momo_product_id + ON market_product_matches (momo_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_matches_momo_i_code + ON market_product_matches (momo_i_code); +CREATE INDEX IF NOT EXISTS idx_market_product_matches_match_status + ON market_product_matches (match_status); +CREATE INDEX IF NOT EXISTS idx_market_match_status_score + ON market_product_matches (match_status, match_score); + +CREATE TABLE IF NOT EXISTS market_crawler_runs ( + id BIGSERIAL PRIMARY KEY, + platform_code VARCHAR(50), + crawler_name VARCHAR(120) NOT NULL, + campaign_id BIGINT REFERENCES market_campaigns(id), + batch_id VARCHAR(80) NOT NULL, + started_at TIMESTAMP NOT NULL DEFAULT NOW(), + finished_at TIMESTAMP, + status VARCHAR(30) NOT NULL DEFAULT 'started', + dry_run BOOLEAN NOT NULL DEFAULT TRUE, + pages_found INTEGER NOT NULL DEFAULT 0, + products_found INTEGER NOT NULL DEFAULT 0, + products_changed INTEGER NOT NULL DEFAULT 0, + error_count INTEGER NOT NULL DEFAULT 0, + error_message TEXT, + metadata_json TEXT +); + +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_platform_code + ON market_crawler_runs (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_crawler_name + ON market_crawler_runs (crawler_name); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_campaign_id + ON market_crawler_runs (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_batch_id + ON market_crawler_runs (batch_id); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_started_at + ON market_crawler_runs (started_at); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_status + ON market_crawler_runs (status); +CREATE INDEX IF NOT EXISTS idx_market_crawler_run_platform_time + ON market_crawler_runs (platform_code, started_at); +CREATE INDEX IF NOT EXISTS idx_market_crawler_run_status_time + ON market_crawler_runs (status, started_at); + +GRANT ALL PRIVILEGES ON market_platforms TO momo; +GRANT ALL PRIVILEGES ON market_campaigns TO momo; +GRANT ALL PRIVILEGES ON market_campaign_snapshots TO momo; +GRANT ALL PRIVILEGES ON market_campaign_products TO momo; +GRANT ALL PRIVILEGES ON market_product_price_history TO momo; +GRANT ALL PRIVILEGES ON market_product_matches TO momo; +GRANT ALL PRIVILEGES ON market_crawler_runs TO momo; + +GRANT USAGE, SELECT ON SEQUENCE market_platforms_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_campaigns_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_campaign_snapshots_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_campaign_products_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_product_price_history_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_product_matches_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_crawler_runs_id_seq TO momo; diff --git a/migrations/033_fix_host_health_probe_labels.sql b/migrations/033_fix_host_health_probe_labels.sql new file mode 100644 index 0000000..20c38c9 --- /dev/null +++ b/migrations/033_fix_host_health_probe_labels.sql @@ -0,0 +1,38 @@ +-- ============================================================================= +-- Migration 033: host_health_probes host_label CHECK alignment +-- 2026-05-12 Taipei +-- ============================================================================= +-- Context: +-- Migration 029 only allowed the original dashboard labels: +-- Primary (GCP), Secondary (GCP), Fallback (111) +-- services/ollama_service.py::get_host_label() later added operational labels: +-- GCP-SSD, GCP-SSD-2, 111 備援, GCP-SSD(via Nginx 110), +-- GCP-SSD-2(via Nginx 110) +-- +-- Keep the existing constraint name to avoid downstream drift, but widen the +-- allowed set. NOT VALID avoids failing on any historical probe rows while still +-- enforcing the widened CHECK for future inserts. +-- ============================================================================= + +ALTER TABLE IF EXISTS host_health_probes + DROP CONSTRAINT IF EXISTS chk_host_label_029; + +ALTER TABLE IF EXISTS host_health_probes + ADD CONSTRAINT chk_host_label_029 + CHECK ( + host_label IN ( + 'Primary (GCP)', + 'Secondary (GCP)', + 'Fallback (111)', + 'GCP-SSD', + 'GCP-SSD-2', + '111 備援', + 'GCP-SSD(via Nginx 110)', + 'GCP-SSD-2(via Nginx 110)', + '188 本地', + '未知' + ) + ) NOT VALID; + +COMMENT ON CONSTRAINT chk_host_label_029 ON host_health_probes IS + 'Accepted labels from migration 029 dashboard probes and services/ollama_service.py::get_host_label()'; diff --git a/routes/market_intel_routes.py b/routes/market_intel_routes.py index 7081cbb..3cecd87 100644 --- a/routes/market_intel_routes.py +++ b/routes/market_intel_routes.py @@ -23,13 +23,16 @@ def _service(): @market_intel_bp.route("/market_intel/campaigns") @login_required def campaigns(): - status = _service().get_runtime_status() + service = _service() + status = service.get_runtime_status() return render_template( "market_intel/disabled.html", active_page="market_intel", datetime_now=datetime.now(TAIPEI_TZ).strftime("%Y-%m-%d %H:%M:%S"), system_version=SYSTEM_VERSION, status=status, + adapter_count=len(service.get_adapter_summaries()), + manual_fetch_allowed=service.manual_fetch_allowed(), current_section="campaigns", ) @@ -39,13 +42,16 @@ def campaigns(): @market_intel_bp.route("/market_intel/opportunities") @login_required def disabled_section(): - status = _service().get_runtime_status() + service = _service() + status = service.get_runtime_status() return render_template( "market_intel/disabled.html", active_page="market_intel", datetime_now=datetime.now(TAIPEI_TZ).strftime("%Y-%m-%d %H:%M:%S"), system_version=SYSTEM_VERSION, status=status, + adapter_count=len(service.get_adapter_summaries()), + manual_fetch_allowed=service.manual_fetch_allowed(), current_section="disabled", ) @@ -62,8 +68,126 @@ def market_intel_schema(): return jsonify({"tables": _service().get_schema_tables()}) +@market_intel_bp.route("/api/market_intel/schema_smoke") +@login_required +def market_intel_schema_smoke(): + return jsonify(_service().build_schema_smoke()) + + +@market_intel_bp.route("/api/market_intel/schema_db_probe") +@login_required +def market_intel_schema_db_probe(): + execute_requested = request.args.get("execute", "false").lower() == "true" + return jsonify(_service().build_schema_db_probe(execute_requested=execute_requested)) + + +@market_intel_bp.route("/api/market_intel/platform_seed_db_diff") +@login_required +def market_intel_platform_seed_db_diff(): + platform_code = request.args.get("platform", "all") + execute_requested = request.args.get("execute", "false").lower() == "true" + return jsonify( + _service().build_platform_seed_db_diff( + platform_code=platform_code, + execute_requested=execute_requested, + ) + ) + + +@market_intel_bp.route("/api/market_intel/adapters") +@login_required +def market_intel_adapters(): + return jsonify({"adapters": _service().get_adapter_summaries()}) + + @market_intel_bp.route("/api/market_intel/dry_run_plan") @login_required def market_intel_dry_run_plan(): platform_code = request.args.get("platform", "all") return jsonify(_service().build_dry_run_plan(platform_code=platform_code)) + + +@market_intel_bp.route("/api/market_intel/discovery_plan") +@login_required +def market_intel_discovery_plan(): + platform_code = request.args.get("platform", "all") + return jsonify(_service().build_discovery_plan(platform_code=platform_code)) + + +@market_intel_bp.route("/api/market_intel/manual_discovery") +@login_required +def market_intel_manual_discovery(): + platform_code = request.args.get("platform", "all") + fetch = request.args.get("fetch", "false").lower() == "true" + return jsonify(_service().run_manual_discovery(platform_code=platform_code, fetch=fetch)) + + +@market_intel_bp.route("/api/market_intel/candidate_preview") +@login_required +def market_intel_candidate_preview(): + platform_code = request.args.get("platform", "all") + fetch = request.args.get("fetch", "false").lower() == "true" + min_band = request.args.get("min_band", "all") + limit = request.args.get("limit", default=50, type=int) + limit = max(1, min(limit or 50, 200)) + return jsonify( + _service().build_candidate_preview( + platform_code=platform_code, + fetch=fetch, + min_band=min_band, + limit=limit, + ) + ) + + +@market_intel_bp.route("/api/market_intel/platform_seed_plan") +@login_required +def market_intel_platform_seed_plan(): + platform_code = request.args.get("platform", "all") + return jsonify(_service().build_platform_seed_plan(platform_code=platform_code)) + + +@market_intel_bp.route("/api/market_intel/platform_seed_write_guard") +@login_required +def market_intel_platform_seed_write_guard(): + platform_code = request.args.get("platform", "all") + return jsonify(_service().build_platform_seed_write_guard(platform_code=platform_code)) + + +@market_intel_bp.route("/api/market_intel/platform_seed_writer_plan") +@login_required +def market_intel_platform_seed_writer_plan(): + platform_code = request.args.get("platform", "all") + return jsonify(_service().build_platform_seed_writer_plan(platform_code=platform_code)) + + +@market_intel_bp.route("/api/market_intel/migration_blueprint") +@login_required +def market_intel_migration_blueprint(): + return jsonify(_service().build_migration_blueprint()) + + +@market_intel_bp.route("/api/market_intel/seed_writer_cli_status") +@login_required +def market_intel_seed_writer_cli_status(): + platform_code = request.args.get("platform", "all") + execute_requested = request.args.get("execute", "false").lower() == "true" + return jsonify( + _service().build_seed_writer_cli_status( + platform_code=platform_code, + execute_requested=execute_requested, + ) + ) + + +@market_intel_bp.route("/api/market_intel/write_approval_runbook") +@login_required +def market_intel_write_approval_runbook(): + platform_code = request.args.get("platform", "all") + return jsonify(_service().build_write_approval_runbook(platform_code=platform_code)) + + +@market_intel_bp.route("/api/market_intel/deployment_readiness") +@login_required +def market_intel_deployment_readiness(): + return jsonify(_service().build_deployment_readiness()) diff --git a/routes/openclaw_bot_routes.py b/routes/openclaw_bot_routes.py index acda14e..92a6944 100644 --- a/routes/openclaw_bot_routes.py +++ b/routes/openclaw_bot_routes.py @@ -20,6 +20,8 @@ v5 新增(2026-04-16): • 完整報表 PDF 下載(fpdf2 → reportlab → CSV) """ +from __future__ import annotations + import os import json import re @@ -8595,6 +8597,85 @@ def handle_cmd(cmd, arg, chat_id, reply_to): send_message(chat_id, txt, reply_to, kb) +def _write_event_ignore_audit(event_id: str, user_label: str, ts_label: str) -> None: + """將 EA HITL 忽略決策寫入 ai_insights,供 webhook / polling 共用語意。""" + from database.manager import get_session + + session = get_session() + try: + session.execute( + text(""" + INSERT INTO ai_insights + (insight_type, content, confidence, created_by, status, metadata_json) + VALUES (:type, :content, :conf, :by, :status, :meta) + """), + { + "type": "human_review", + "content": f"[EA HITL] 事件 {event_id} 由 {user_label} 忽略", + "conf": 1.0, + "by": f"telegram:{user_label}", + "status": "ignored", + "meta": json.dumps({ + "event_id": event_id, + "decided_by": user_label, + "decided_at": ts_label, + "decision": "ignored", + }, ensure_ascii=False), + }, + ) + session.commit() + finally: + session.close() + + +def _handle_event_ignore_callback(data: str, cq: dict, chat_id, message_id) -> None: + """處理 `momo:eig:` webhook callback,避免 HITL 按鈕無反應。""" + from html import escape as _html_escape + + parts = data.split(':', 2) + event_id = parts[2].strip() if len(parts) >= 3 else '' + if not event_id: + send_message(chat_id, "⚠️ event_id 缺失,忽略動作未生效", None, None, parse_mode=None) + sys_log.warning("[EA HITL] empty event_id callback rejected: %r", data) + return + + user = cq.get('from') or {} + user_label_raw = ( + user.get('username') + or user.get('first_name') + or str(user.get('id') or '?') + ) + ts_label_raw = datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d %H:%M') + + try: + _write_event_ignore_audit(event_id, user_label_raw, ts_label_raw) + except Exception as audit_err: + sys_log.warning(f"[EA HITL] ai_insights audit 寫入失敗(不阻斷 UI): {audit_err}") + + user_label_safe = _html_escape(str(user_label_raw)) + ts_label_safe = _html_escape(ts_label_raw) + original = (cq.get('message') or {}).get('text') or (cq.get('message') or {}).get('caption') or '事件已忽略' + text = ( + _html_escape(str(original))[:3400] + + f"\n\n🛑 已忽略 by {user_label_safe} @ {ts_label_safe}" + ) + + edited = False + if message_id: + result = edit_message_text(chat_id, message_id, text, keyboard=None, parse_mode="HTML") + edited = bool(isinstance(result, dict) and result.get("ok")) + if not edited: + send_message( + chat_id, + f"🛑 已忽略事件 {event_id} by {user_label_raw} @ {ts_label_raw}", + None, + None, + parse_mode=None, + ) + + sys_log.info(f"[EA HITL] event_ignore event_id={event_id} by={user_label_raw}") + + # ── Webhook ─────────────────────────────────────────────────── @openclaw_bot_bp.route('/bot/telegram/webhook', methods=['POST']) def telegram_webhook(): @@ -8647,6 +8728,10 @@ def telegram_webhook(): answer_callback(cq_id) send_typing(chat_id) + if data.startswith('momo:eig:'): + _handle_event_ignore_callback(data, cq, chat_id, cq_message_id) + return jsonify({'ok': True}) + # ── Phase 11 RAG 反饋(v5.0 護欄 #1)───────────────────── # rag_fb:{log_id}:{score} → 寫回 rag_query_log.feedback_score # pg_ok:{episode_id} → PromotionGate 人工通過 diff --git a/run_scheduler.py b/run_scheduler.py index 2492a77..683acf5 100644 --- a/run_scheduler.py +++ b/run_scheduler.py @@ -50,6 +50,35 @@ logger = logging.getLogger(__name__) _AI_CALLS_ERROR_SPIKE_LAST_PUSH_TS = 0.0 +def _notify_scheduler_failure( + task_name: str, + error: Exception, + *, + source: str, + event_type: str, + priority: str = "P2", + title: str = None, + dedup_ttl_sec: int = 3600, +) -> None: + """Best-effort EventRouter failure notification for scheduler wrapper tasks.""" + try: + import traceback + from services.event_router import notify_failure + + notify_failure( + task_name=task_name, + error=error, + source=source, + event_type=event_type, + priority=priority, + title=title or f"{task_name} 失敗", + trace=traceback.format_exc(), + dedup_ttl_sec=dedup_ttl_sec, + ) + except Exception as router_error: + logger.error("[%s] event_router notify failed: %s", task_name, router_error) + + def _register_schedules(): schedule.every(30).minutes.do(run_auto_import_task) logger.info("📅 每 30 分鐘:auto_import") @@ -143,9 +172,9 @@ def _register_schedules(): schedule.every().day.at("00:05").do(run_cost_throttle_reset_if_new_month) logger.info("📅 每日 00:05:cost_throttle_reset_if_new_month") - # Phase 24: ROI 月報(每日 09:00 跑,內部判斷是否月初第 1 日) - schedule.every().day.at("09:00").do(run_roi_monthly_report_if_new_month) - logger.info("📅 每日 09:00:roi_monthly_report(月初第 1 日才送)") + # Phase 24: ROI 月報(每日 09:05 跑,內部判斷是否月初第 1 日;避開 09:00 daily_report) + schedule.every().day.at("09:05").do(run_roi_monthly_report_if_new_month) + logger.info("📅 每日 09:05:roi_monthly_report(月初第 1 日才送)") # Phase 26: PPT 視覺審核(每日 22:00 掃當天新生 .pptx,有 issues 才推 Telegram) schedule.every().day.at("22:00").do(run_ppt_vision_audit) @@ -207,6 +236,13 @@ def run_daily_token_report_task(): ) except Exception as e: logger.error(f"[TokenReport] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_daily_token_report_task", + e, + source="Scheduler.TokenReport", + event_type="daily_token_report_failure", + title="LLM Token 日報失敗", + ) # ───────────────────────────────────────────────────────────────────────────── @@ -230,6 +266,13 @@ def run_promotion_gate_worker(): ) except Exception as e: logger.error(f"[PromotionWorker] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_promotion_gate_worker", + e, + source="Scheduler.PromotionGate", + event_type="promotion_gate_worker_failure", + title="PromotionGate worker 失敗", + ) def run_awaiting_review_push(): @@ -244,6 +287,13 @@ def run_awaiting_review_push(): logger.info("[AwaitingReviewPush] pushed=%d episodes", pushed) except Exception as e: logger.error(f"[AwaitingReviewPush] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_awaiting_review_push", + e, + source="Scheduler.PromotionGate", + event_type="awaiting_review_push_failure", + title="PromotionGate 審查推播失敗", + ) def run_expire_stale_reviews(): @@ -258,6 +308,13 @@ def run_expire_stale_reviews(): logger.info("[ExpireStale] expired %d awaiting_review episodes (24h timeout)", n) except Exception as e: logger.error(f"[ExpireStale] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_expire_stale_reviews", + e, + source="Scheduler.PromotionGate", + event_type="expire_stale_reviews_failure", + title="PromotionGate 過期審查清理失敗", + ) def run_cost_throttle_evaluate(): @@ -280,6 +337,13 @@ def run_cost_throttle_evaluate(): logger.debug("[CostThrottle] no provider throttled") except Exception as e: logger.error(f"[CostThrottle] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_cost_throttle_evaluate", + e, + source="Scheduler.CostThrottle", + event_type="cost_throttle_evaluate_failure", + title="成本節流評估失敗", + ) def run_host_health_probe(): @@ -713,6 +777,13 @@ def run_cost_throttle_reset_if_new_month(): logger.info("[CostThrottle] new month detected, state reset") except Exception as e: logger.error(f"[CostThrottle] reset failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_cost_throttle_reset_if_new_month", + e, + source="Scheduler.CostThrottle", + event_type="cost_throttle_reset_failure", + title="成本節流跨月重置失敗", + ) def run_ppt_vision_audit(): @@ -735,6 +806,13 @@ def run_ppt_vision_audit(): logger.debug("[PPTVisionAudit] no issues found") except Exception as e: logger.error(f"[PPTVisionAudit] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_ppt_vision_audit", + e, + source="Scheduler.PPTVision", + event_type="ppt_vision_audit_failure", + title="PPT 視覺審核失敗", + ) def run_roi_monthly_report_if_new_month(): @@ -753,6 +831,13 @@ def run_roi_monthly_report_if_new_month(): result.get('sent'), result.get('period', '?')) except Exception as e: logger.error(f"[ROIReport] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_roi_monthly_report_if_new_month", + e, + source="Scheduler.ROIReport", + event_type="roi_monthly_report_failure", + title="ROI 月報失敗", + ) def run_embed_consistency_check(): @@ -776,6 +861,13 @@ def run_embed_consistency_check(): ) except Exception as e: logger.error(f"[EmbedConsistency] task failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_embed_consistency_check", + e, + source="Scheduler.RAG", + event_type="embed_consistency_check_failure", + title="BGE-M3 一致性檢查失敗", + ) def run_cleanup_agent_context(): diff --git a/scripts/market_intel_seed_writer.py b/scripts/market_intel_seed_writer.py new file mode 100644 index 0000000..1204016 --- /dev/null +++ b/scripts/market_intel_seed_writer.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Market intelligence seed writer CLI skeleton. + +This script intentionally refuses real writes in the current phase. It prints a +JSON execution plan and never creates a DB session or commits seed rows. +""" + +import argparse +import contextlib +import json +import os +import sys +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[1] +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +with contextlib.redirect_stdout(sys.stderr): + from services.market_intel import MarketIntelService # noqa: E402 + from services.market_intel.seed_writer_cli import APPROVAL_ENV_VAR # noqa: E402 + + +def parse_args(argv=None): + parser = argparse.ArgumentParser( + description="Preview market_intel platform seed writer execution." + ) + parser.add_argument( + "--platform", + default="all", + help="Platform code to preview, or all. Default: all.", + ) + parser.add_argument( + "--execute", + action="store_true", + help="Request real execution. This skeleton will still block it.", + ) + parser.add_argument( + "--approval-token", + default=None, + help=f"One-time approval token. May also be set via {APPROVAL_ENV_VAR}.", + ) + return parser.parse_args(argv) + + +def main(argv=None): + args = parse_args(argv) + approval_token = args.approval_token or os.getenv(APPROVAL_ENV_VAR) + service = MarketIntelService() + plan = service.build_seed_writer_cli_status( + platform_code=args.platform, + execute_requested=args.execute, + approval_token=approval_token, + ) + print(json.dumps(plan, ensure_ascii=False, indent=2, sort_keys=True)) + return int(plan.get("exit_code", 2)) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/services/cache_service.py b/services/cache_service.py index a96089f..d57a7b1 100644 --- a/services/cache_service.py +++ b/services/cache_service.py @@ -11,6 +11,7 @@ from services.cache_manager import ( _SALES_PROCESSED_CACHE, _SALES_OPTIONS_CACHE, _SALES_ANALYSIS_RESULT_CACHE, + _SALES_CACHE_TTL, _DASHBOARD_DATA_CACHE, _DASHBOARD_CACHE_TTL, clear_sales_cache, @@ -20,10 +21,9 @@ from services.cache_manager import ( # 台北時區 TAIPEI_TZ = timezone(timedelta(hours=8)) -# 快取 TTL 設定 -_SALES_CACHE_TTL = 3600 # 業績分析快取: 60 分鐘 +# 快取 TTL 設定:sales TTL 以 cache_manager 為單一來源 _SALES_OPTIONS_TTL = 21600 # 選項快取: 6 小時 -_SALES_RESULT_TTL = 3600 # 結果快取: 60 分鐘 +_SALES_RESULT_TTL = _SALES_CACHE_TTL # ========================================== # 成長分析快取 diff --git a/services/market_intel/adapters/__init__.py b/services/market_intel/adapters/__init__.py new file mode 100644 index 0000000..53cc26e --- /dev/null +++ b/services/market_intel/adapters/__init__.py @@ -0,0 +1,9 @@ +"""市場情報平台 adapter registry。""" + +from services.market_intel.adapters.registry import ( + get_adapter, + get_adapter_registry, + get_adapter_summaries, +) + +__all__ = ["get_adapter", "get_adapter_registry", "get_adapter_summaries"] diff --git a/services/market_intel/adapters/base.py b/services/market_intel/adapters/base.py new file mode 100644 index 0000000..ec3b7e9 --- /dev/null +++ b/services/market_intel/adapters/base.py @@ -0,0 +1,93 @@ +"""市場情報 adapter 基礎類別。 + +Phase 3 只提供 read-only discovery plan,不發 HTTP request。 +""" + +from dataclasses import asdict, dataclass +from typing import Iterable, Sequence + + +@dataclass(frozen=True) +class CampaignSource: + """公開活動入口的描述,不代表已實際爬取。""" + + source_key: str + name: str + url: str + campaign_type: str + notes: str = "" + + def to_dict(self): + return asdict(self) + + +@dataclass(frozen=True) +class AdapterSafetyPolicy: + """平台爬取安全策略。""" + + request_interval_sec: float + timeout_sec: int + max_pages_per_run: int + allow_login: bool = False + allow_database_write: bool = False + allow_scheduler_attach: bool = False + + def to_dict(self): + return asdict(self) + + +class MarketIntelAdapter: + """市場情報平台 adapter base class。""" + + platform_code = "" + platform_name = "" + base_url = "" + campaign_url_keywords = () + campaign_text_keywords = () + safety_policy = AdapterSafetyPolicy( + request_interval_sec=2.0, + timeout_sec=20, + max_pages_per_run=5, + ) + + def campaign_sources(self) -> Sequence[CampaignSource]: + return () + + def summary(self): + return { + "platform_code": self.platform_code, + "platform_name": self.platform_name, + "base_url": self.base_url, + "source_count": len(self.campaign_sources()), + "safety_policy": self.safety_policy.to_dict(), + "phase": "read_only_adapter_skeleton", + } + + def build_discovery_plan(self): + """建立 discovery plan,不發 request、不寫 DB。""" + return { + **self.summary(), + "network_request_allowed": False, + "database_write_allowed": False, + "scheduler_attach_allowed": False, + "sources": [source.to_dict() for source in self.campaign_sources()], + } + + def discover_campaigns(self, *, dry_run=True) -> Iterable[CampaignSource]: + """Phase 3 僅允許 dry-run 回傳入口描述。""" + if not dry_run: + raise RuntimeError("市場情報 adapter 尚未允許正式 discovery") + return self.campaign_sources() + + def score_campaign_link(self, href, text): + """平台別活動連結加權,只用於診斷排序。""" + url_text = (href or "").lower() + link_text = (text or "").lower() + score = 0 + for keyword in self.campaign_url_keywords: + if str(keyword).lower() in url_text: + score += 4 + for keyword in self.campaign_text_keywords: + if str(keyword).lower() in link_text: + score += 3 + return score diff --git a/services/market_intel/adapters/coupang_adapter.py b/services/market_intel/adapters/coupang_adapter.py new file mode 100644 index 0000000..5a2136c --- /dev/null +++ b/services/market_intel/adapters/coupang_adapter.py @@ -0,0 +1,54 @@ +"""Coupang 市場活動 read-only adapter skeleton。""" + +from services.market_intel.adapters.base import ( + AdapterSafetyPolicy, + CampaignSource, + MarketIntelAdapter, +) + + +class CoupangMarketAdapter(MarketIntelAdapter): + platform_code = "coupang" + platform_name = "Coupang 酷澎" + base_url = "https://www.tw.coupang.com" + campaign_url_keywords = ( + "tw.coupang.com", + "/np/", + "coupangglobal", + "promotion", + "event", + "sale", + ) + campaign_text_keywords = ( + "coupang", + "酷澎", + "活動", + "優惠", + "折扣", + "特價", + "今日精選", + "火箭", + ) + safety_policy = AdapterSafetyPolicy( + request_interval_sec=2.5, + timeout_sec=25, + max_pages_per_run=5, + ) + + def campaign_sources(self): + return ( + CampaignSource( + source_key="coupang_home", + name="Coupang 酷澎首頁入口", + url="https://www.tw.coupang.com/", + campaign_type="homepage_campaign", + notes="只保存官方公開首頁入口;正式抓取需另開 feature flag。", + ), + CampaignSource( + source_key="coupang_global", + name="Coupang 火箭跨境入口", + url="https://www.tw.coupang.com/np/coupangglobal", + campaign_type="cross_border_sale", + notes="先作為跨境/火箭相關活動 discovery 起點,不代表已建立活動。", + ), + ) diff --git a/services/market_intel/adapters/momo_adapter.py b/services/market_intel/adapters/momo_adapter.py new file mode 100644 index 0000000..f95b75f --- /dev/null +++ b/services/market_intel/adapters/momo_adapter.py @@ -0,0 +1,54 @@ +"""MOMO 市場活動 read-only adapter skeleton。""" + +from services.market_intel.adapters.base import ( + AdapterSafetyPolicy, + CampaignSource, + MarketIntelAdapter, +) + + +class MomoMarketAdapter(MarketIntelAdapter): + platform_code = "momo" + platform_name = "MOMO 購物網" + base_url = "https://www.momoshop.com.tw" + campaign_url_keywords = ( + "edm", + "cmmedm", + "lgrpcategory", + "category", + "promo", + "event", + ) + campaign_text_keywords = ( + "momo", + "活動", + "優惠", + "限時", + "限時搶購", + "品牌日", + "滿額", + "折價券", + ) + safety_policy = AdapterSafetyPolicy( + request_interval_sec=2.0, + timeout_sec=25, + max_pages_per_run=8, + ) + + def campaign_sources(self): + return ( + CampaignSource( + source_key="momo_edm", + name="MOMO EDM 活動入口", + url="https://www.momoshop.com.tw/edm/cmmedm.jsp", + campaign_type="festival", + notes="先接既有 EDM / festival 爬蟲語意,正式抓取需另開 feature flag。", + ), + CampaignSource( + source_key="momo_flash_sale", + name="MOMO 限時搶購入口", + url="https://www.momoshop.com.tw/category/LgrpCategory.jsp?l_code=2140000000", + campaign_type="flash_sale", + notes="只保存公開入口描述;Phase 3 不發 request。", + ), + ) diff --git a/services/market_intel/adapters/pchome_adapter.py b/services/market_intel/adapters/pchome_adapter.py new file mode 100644 index 0000000..fe5dc08 --- /dev/null +++ b/services/market_intel/adapters/pchome_adapter.py @@ -0,0 +1,54 @@ +"""PChome 市場活動 read-only adapter skeleton。""" + +from services.market_intel.adapters.base import ( + AdapterSafetyPolicy, + CampaignSource, + MarketIntelAdapter, +) + + +class PChomeMarketAdapter(MarketIntelAdapter): + platform_code = "pchome" + platform_name = "PChome 24h" + base_url = "https://24h.pchome.com.tw" + campaign_url_keywords = ( + "24h.pchome.com.tw", + "/activity", + "/campaign", + "/region/", + "promo", + "event", + ) + campaign_text_keywords = ( + "pchome", + "24h", + "活動", + "優惠", + "限時", + "品牌日", + "特價", + "折扣", + ) + safety_policy = AdapterSafetyPolicy( + request_interval_sec=1.5, + timeout_sec=20, + max_pages_per_run=8, + ) + + def campaign_sources(self): + return ( + CampaignSource( + source_key="pchome_home", + name="PChome 24h 首頁活動入口", + url="https://24h.pchome.com.tw/", + campaign_type="homepage_campaign", + notes="可作為活動 banner / 分類入口 discovery 起點。", + ), + CampaignSource( + source_key="pchome_region_beauty", + name="PChome 美妝保養館別入口", + url="https://24h.pchome.com.tw/region/DDAB", + campaign_type="category_sale", + notes="延續既有 PChome crawler 的 region page 低成本入口。", + ), + ) diff --git a/services/market_intel/adapters/registry.py b/services/market_intel/adapters/registry.py new file mode 100644 index 0000000..53e4c9e --- /dev/null +++ b/services/market_intel/adapters/registry.py @@ -0,0 +1,29 @@ +"""市場情報 adapter 註冊表。""" + +from services.market_intel.adapters.coupang_adapter import CoupangMarketAdapter +from services.market_intel.adapters.momo_adapter import MomoMarketAdapter +from services.market_intel.adapters.pchome_adapter import PChomeMarketAdapter +from services.market_intel.adapters.shopee_adapter import ShopeeMarketAdapter + + +ADAPTER_CLASSES = { + "momo": MomoMarketAdapter, + "pchome": PChomeMarketAdapter, + "coupang": CoupangMarketAdapter, + "shopee": ShopeeMarketAdapter, +} + + +def get_adapter(platform_code): + adapter_class = ADAPTER_CLASSES.get((platform_code or "").lower()) + if not adapter_class: + return None + return adapter_class() + + +def get_adapter_registry(): + return {code: adapter_class() for code, adapter_class in ADAPTER_CLASSES.items()} + + +def get_adapter_summaries(): + return [adapter.summary() for adapter in get_adapter_registry().values()] diff --git a/services/market_intel/adapters/shopee_adapter.py b/services/market_intel/adapters/shopee_adapter.py new file mode 100644 index 0000000..1531cac --- /dev/null +++ b/services/market_intel/adapters/shopee_adapter.py @@ -0,0 +1,57 @@ +"""Shopee 市場活動 read-only adapter skeleton。""" + +from services.market_intel.adapters.base import ( + AdapterSafetyPolicy, + CampaignSource, + MarketIntelAdapter, +) + + +class ShopeeMarketAdapter(MarketIntelAdapter): + platform_code = "shopee" + platform_name = "Shopee 蝦皮購物" + base_url = "https://shopee.tw" + campaign_url_keywords = ( + "shopee.tw", + "/mall", + "campaign", + "event", + "flash_sale", + "promotion", + "sale", + ) + campaign_text_keywords = ( + "shopee", + "蝦皮", + "商城", + "活動", + "優惠", + "折扣", + "特價", + "限時", + "品牌", + "免運", + ) + safety_policy = AdapterSafetyPolicy( + request_interval_sec=4.0, + timeout_sec=25, + max_pages_per_run=3, + ) + + def campaign_sources(self): + return ( + CampaignSource( + source_key="shopee_home", + name="Shopee 蝦皮首頁入口", + url="https://shopee.tw/", + campaign_type="homepage_campaign", + notes="只保存公開首頁入口;不得登入、碰會員券、購物車或反爬繞過。", + ), + CampaignSource( + source_key="shopee_mall", + name="Shopee Mall 商城入口", + url="https://shopee.tw/mall", + campaign_type="brand_mall", + notes="只作為商城/品牌活動 discovery 起點;正式抓取需另開 feature flag。", + ), + ) diff --git a/services/market_intel/candidate_preview.py b/services/market_intel/candidate_preview.py new file mode 100644 index 0000000..e05e9d9 --- /dev/null +++ b/services/market_intel/candidate_preview.py @@ -0,0 +1,81 @@ +"""市場情報候選連結 preview 聚合。 + +只整理本次 diagnostics 結果供人工審核,不建立 campaign/product,不寫 DB。 +""" + + +BAND_RANK = { + "high": 3, + "medium": 2, + "low": 1, +} + + +def _band_allowed(candidate, min_band): + if not min_band or min_band == "all": + return True + candidate_rank = BAND_RANK.get(candidate.get("confidence_band"), 0) + min_rank = BAND_RANK.get(min_band, 0) + return candidate_rank >= min_rank + + +def build_candidate_preview_from_discovery(discovery_result, *, min_band="all", limit=50): + """把 manual discovery diagnostics 整理成人工審核 preview。""" + candidates = [] + run_statuses = [] + + for run in discovery_result.get("runs", []): + platform_code = run.get("platform_code") + run_statuses.append({ + "platform_code": platform_code, + "status": run.get("status"), + "sources_planned": run.get("sources_planned", 0), + "sources_fetched": run.get("sources_fetched", 0), + "errors": run.get("errors", 0), + }) + + for source_result in run.get("results", []): + diagnostics = source_result.get("diagnostics") or {} + for candidate in diagnostics.get("campaign_link_candidates", []): + if not _band_allowed(candidate, min_band): + continue + candidates.append({ + "platform_code": platform_code, + "source_key": source_result.get("source_key"), + "source_name": source_result.get("name"), + "source_url": source_result.get("url"), + "source_status": source_result.get("status"), + "page_title": diagnostics.get("title"), + "page_hash": diagnostics.get("page_hash"), + "href": candidate.get("href"), + "text": candidate.get("text"), + "is_same_host": candidate.get("is_same_host"), + "score": candidate.get("score", 0), + "generic_score": candidate.get("generic_score", 0), + "platform_score": candidate.get("platform_score", 0), + "confidence_band": candidate.get("confidence_band"), + "confidence_reason": candidate.get("confidence_reason"), + }) + + candidates = sorted( + candidates, + key=lambda item: ( + BAND_RANK.get(item.get("confidence_band"), 0), + item.get("score", 0), + item.get("platform_score", 0), + ), + reverse=True, + ) + + return { + "platform_code": discovery_result.get("platform_code", "all"), + "fetch_requested": bool(discovery_result.get("fetch_requested")), + "manual_fetch_allowed": bool(discovery_result.get("manual_fetch_allowed")), + "min_band": min_band or "all", + "limit": limit, + "candidate_count": len(candidates), + "candidates": candidates[:limit], + "run_statuses": run_statuses, + "database_write_allowed": False, + "scheduler_attached": False, + } diff --git a/services/market_intel/discovery_runner.py b/services/market_intel/discovery_runner.py new file mode 100644 index 0000000..7ab79f4 --- /dev/null +++ b/services/market_intel/discovery_runner.py @@ -0,0 +1,215 @@ +"""市場情報手動 discovery dry-run runner。 + +預設不發 HTTP request;即使手動 fetch,也只做公開頁面探測與摘要,不寫 DB。 +""" + +import hashlib +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta, timezone +from typing import Callable, Optional +from uuid import uuid4 + +import requests + +from services.market_intel.html_diagnostics import parse_html_diagnostics + + +TAIPEI_TZ = timezone(timedelta(hours=8)) +DEFAULT_HEADERS = { + "User-Agent": "EwoooC-MarketIntel-DryRun/1.0 (+https://mo.wooo.work)", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "zh-TW,zh;q=0.9,en;q=0.8", +} + + +@dataclass(frozen=True) +class DiscoverySourceResult: + """單一活動入口的 dry-run 結果。""" + + source_key: str + name: str + url: str + campaign_type: str + status: str + network_requested: bool + network_executed: bool + status_code: Optional[int] = None + content_length: int = 0 + page_hash: Optional[str] = None + title: Optional[str] = None + diagnostics: Optional[dict] = None + error_message: Optional[str] = None + + def to_dict(self): + return asdict(self) + + +@dataclass(frozen=True) +class ManualDiscoveryRunResult: + """手動 discovery dry-run 的整體結果。""" + + batch_id: str + platform_code: str + started_at: str + finished_at: str + status: str + fetch_requested: bool + network_allowed: bool + database_write_allowed: bool + scheduler_attached: bool + sources_planned: int + sources_fetched: int + errors: int + results: list + error_message: Optional[str] = None + + def to_dict(self): + return asdict(self) + + +def _now_iso(): + return datetime.now(TAIPEI_TZ).replace(tzinfo=None).isoformat() + + +class ManualDiscoveryRunner: + """手動 discovery runner,透過 feature gate 控制是否允許網路探測。""" + + def __init__(self, *, runtime_status, http_get: Optional[Callable] = None): + self.runtime_status = runtime_status + self.http_get = http_get or requests.get + + def _network_allowed(self): + return bool(self.runtime_status.enabled and self.runtime_status.crawler_enabled) + + def run(self, adapter, *, fetch=False): + started_at = _now_iso() + sources = list(adapter.campaign_sources()) + network_allowed = self._network_allowed() + + if fetch and not network_allowed: + return ManualDiscoveryRunResult( + batch_id=f"market-manual-{uuid4().hex[:12]}", + platform_code=adapter.platform_code, + started_at=started_at, + finished_at=_now_iso(), + status="blocked", + fetch_requested=True, + network_allowed=False, + database_write_allowed=False, + scheduler_attached=False, + sources_planned=len(sources), + sources_fetched=0, + errors=0, + results=[ + self._source_result(source, "blocked", True, False).to_dict() + for source in sources + ], + error_message="MARKET_INTEL_ENABLED 與 MARKET_INTEL_CRAWLER_ENABLED 必須同時開啟才允許手動 fetch", + ) + + capped_sources = sources[:adapter.safety_policy.max_pages_per_run] + results = [] + errors = 0 + fetched = 0 + last_request_at = 0.0 + + for source in capped_sources: + if not fetch: + results.append(self._source_result(source, "planned", False, False).to_dict()) + continue + + elapsed = time.time() - last_request_at + if last_request_at and elapsed < adapter.safety_policy.request_interval_sec: + time.sleep(adapter.safety_policy.request_interval_sec - elapsed) + + try: + response = self.http_get( + source.url, + headers=DEFAULT_HEADERS, + timeout=adapter.safety_policy.timeout_sec, + ) + last_request_at = time.time() + text = response.text or "" + fetched += 1 + results.append( + self._source_result( + source, + "fetched", + True, + True, + status_code=getattr(response, "status_code", None), + content=text, + score_link=adapter.score_campaign_link, + ).to_dict() + ) + except Exception as exc: + last_request_at = time.time() + errors += 1 + results.append( + self._source_result( + source, + "failed", + True, + True, + error_message=str(exc), + ).to_dict() + ) + + status = "planned" + if fetch: + status = "success" if errors == 0 else "partial_failed" + + return ManualDiscoveryRunResult( + batch_id=f"market-manual-{uuid4().hex[:12]}", + platform_code=adapter.platform_code, + started_at=started_at, + finished_at=_now_iso(), + status=status, + fetch_requested=bool(fetch), + network_allowed=network_allowed, + database_write_allowed=False, + scheduler_attached=False, + sources_planned=len(sources), + sources_fetched=fetched, + errors=errors, + results=results, + ) + + def _source_result( + self, + source, + status, + network_requested, + network_executed, + *, + status_code=None, + content=None, + score_link=None, + error_message=None, + ): + content = content or "" + diagnostics = ( + parse_html_diagnostics( + content, + base_url=source.url, + score_link=score_link, + ).to_dict() + if content + else None + ) + return DiscoverySourceResult( + source_key=source.source_key, + name=source.name, + url=source.url, + campaign_type=source.campaign_type, + status=status, + network_requested=network_requested, + network_executed=network_executed, + status_code=status_code, + content_length=len(content), + page_hash=hashlib.sha256(content.encode("utf-8", errors="ignore")).hexdigest() if content else None, + title=diagnostics.get("title") if diagnostics else None, + diagnostics=diagnostics, + error_message=error_message, + ) diff --git a/services/market_intel/html_diagnostics.py b/services/market_intel/html_diagnostics.py new file mode 100644 index 0000000..7905acc --- /dev/null +++ b/services/market_intel/html_diagnostics.py @@ -0,0 +1,172 @@ +"""市場情報 HTML 診斷解析工具。 + +只萃取頁面標題、連結候選與內容指紋,不建立正式 campaign/product。 +""" + +import hashlib +import re +from dataclasses import asdict, dataclass +from html.parser import HTMLParser +from urllib.parse import urljoin, urlparse + + +@dataclass(frozen=True) +class LinkCandidate: + """活動頁中可疑連結候選。""" + + href: str + text: str + is_same_host: bool + score: int + generic_score: int + platform_score: int + confidence_band: str + confidence_reason: str + + def to_dict(self): + return asdict(self) + + +@dataclass(frozen=True) +class HtmlDiagnostics: + """HTML 診斷摘要。""" + + content_length: int + page_hash: str + title: str + link_count: int + same_host_link_count: int + campaign_link_candidates: list + + def to_dict(self): + return asdict(self) + + +class _DiagnosticHtmlParser(HTMLParser): + def __init__(self): + super().__init__(convert_charrefs=True) + self.title_parts = [] + self.links = [] + self._in_title = False + self._active_href = None + self._active_text = [] + + def handle_starttag(self, tag, attrs): + attrs_map = dict(attrs or []) + if tag.lower() == "title": + self._in_title = True + if tag.lower() == "a": + self._active_href = attrs_map.get("href") + self._active_text = [] + + def handle_data(self, data): + if self._in_title: + self.title_parts.append(data) + if self._active_href is not None: + self._active_text.append(data) + + def handle_endtag(self, tag): + tag = tag.lower() + if tag == "title": + self._in_title = False + if tag == "a" and self._active_href is not None: + self.links.append((self._active_href, " ".join(self._active_text))) + self._active_href = None + self._active_text = [] + + +def _clean_text(value, limit=160): + text = re.sub(r"\s+", " ", value or "").strip() + return text[:limit] + + +def _score_link(href, text): + haystack = f"{href} {text}".lower() + score = 0 + for keyword in ("edm", "event", "promo", "campaign", "sale", "activity"): + if keyword in haystack: + score += 2 + for keyword in ("活動", "優惠", "折扣", "檔期", "品牌日", "限時", "促銷"): + if keyword in haystack: + score += 3 + return score + + +def _confidence_band(score, *, is_same_host, platform_score, generic_score): + """把診斷分數轉成人工審核用信心帶。""" + reasons = [] + reasons.append("same_host" if is_same_host else "external_host") + if platform_score > 0: + reasons.append(f"platform_score={platform_score}") + if generic_score > 0: + reasons.append(f"generic_score={generic_score}") + + if score >= 12 and is_same_host: + return "high", ", ".join(reasons) + if score >= 6: + return "medium", ", ".join(reasons) + return "low", ", ".join(reasons) + + +def parse_html_diagnostics(html, base_url="", candidate_limit=12, score_link=None): + """解析 HTML 診斷資訊,不做商業資料入庫。""" + html = html or "" + parser = _DiagnosticHtmlParser() + parser.feed(html) + + base_host = urlparse(base_url or "").netloc + candidates = [] + same_host_count = 0 + + for raw_href, raw_text in parser.links: + if not raw_href: + continue + href = urljoin(base_url, raw_href) + parsed = urlparse(href) + if parsed.scheme not in {"http", "https"}: + continue + is_same_host = bool(base_host and parsed.netloc == base_host) + if is_same_host: + same_host_count += 1 + text = _clean_text(raw_text) + generic_score = _score_link(href, text) + platform_score = int(score_link(href, text)) if score_link else 0 + score = generic_score + platform_score + if score <= 0: + continue + confidence_band, confidence_reason = _confidence_band( + score, + is_same_host=is_same_host, + platform_score=platform_score, + generic_score=generic_score, + ) + candidates.append( + LinkCandidate( + href=href, + text=text, + is_same_host=is_same_host, + score=score, + generic_score=generic_score, + platform_score=platform_score, + confidence_band=confidence_band, + confidence_reason=confidence_reason, + ) + ) + + band_rank = {"high": 3, "medium": 2, "low": 1} + candidates = sorted( + candidates, + key=lambda item: (band_rank.get(item.confidence_band, 0), item.score), + reverse=True, + )[:candidate_limit] + page_hash = hashlib.sha256(html.encode("utf-8", errors="ignore")).hexdigest() if html else "" + title = _clean_text(" ".join(parser.title_parts), limit=200) + + return HtmlDiagnostics( + content_length=len(html), + page_hash=page_hash, + title=title, + link_count=len(parser.links), + same_host_link_count=same_host_count, + campaign_link_candidates=[candidate.to_dict() for candidate in candidates], + ) diff --git a/services/market_intel/migration_blueprint.py b/services/market_intel/migration_blueprint.py new file mode 100644 index 0000000..412785b --- /dev/null +++ b/services/market_intel/migration_blueprint.py @@ -0,0 +1,381 @@ +"""市場情報 migration 與正式 seed writer 命令草案。 + +本模組只產生可審核的 SQL / command preview,不連線 DB、不執行命令。 +""" + +from pathlib import Path + + +MIGRATION_NUMBER = "032" +MIGRATION_FILENAME = "migrations/032_market_intel_core_schema.sql" +SEED_WRITER_SCRIPT = "scripts/market_intel_seed_writer.py" + + +FORWARD_SQL = """ +-- ============================================================================= +-- Migration 032: market_intel core schema +-- MOMO PRO - Cross-platform market campaign intelligence +-- 2026-05-07 Taipei +-- ============================================================================= +-- Notes: +-- Creates the ADR-035 market_* schema. This migration is additive only: +-- it creates tables, indexes, and grants. It does not drop or alter existing +-- sales tables, and it does not touch the momo-db container lifecycle. +-- ============================================================================= + +CREATE TABLE IF NOT EXISTS market_platforms ( + id BIGSERIAL PRIMARY KEY, + code VARCHAR(50) NOT NULL UNIQUE, + name VARCHAR(120) NOT NULL, + base_url VARCHAR(500), + enabled BOOLEAN NOT NULL DEFAULT FALSE, + crawl_policy_json TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_market_platforms_code + ON market_platforms (code); + +CREATE TABLE IF NOT EXISTS market_campaigns ( + id BIGSERIAL PRIMARY KEY, + platform_code VARCHAR(50) NOT NULL REFERENCES market_platforms(code), + campaign_key VARCHAR(200) NOT NULL, + campaign_name VARCHAR(500) NOT NULL, + campaign_type VARCHAR(80), + campaign_url TEXT, + start_at TIMESTAMP, + end_at TIMESTAMP, + status VARCHAR(30) NOT NULL DEFAULT 'unknown', + discovered_at TIMESTAMP NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMP NOT NULL DEFAULT NOW(), + metadata_json TEXT, + CONSTRAINT uq_market_campaign_platform_key + UNIQUE (platform_code, campaign_key) +); + +CREATE INDEX IF NOT EXISTS idx_market_campaigns_platform_code + ON market_campaigns (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_campaigns_campaign_type + ON market_campaigns (campaign_type); +CREATE INDEX IF NOT EXISTS idx_market_campaigns_status + ON market_campaigns (status); +CREATE INDEX IF NOT EXISTS idx_market_campaign_status_time + ON market_campaigns (status, start_at, end_at); + +CREATE TABLE IF NOT EXISTS market_campaign_snapshots ( + id BIGSERIAL PRIMARY KEY, + campaign_id BIGINT NOT NULL REFERENCES market_campaigns(id), + batch_id VARCHAR(80) NOT NULL, + crawled_at TIMESTAMP NOT NULL DEFAULT NOW(), + title VARCHAR(500), + hero_text TEXT, + coupon_text TEXT, + raw_discount_text TEXT, + page_hash VARCHAR(128), + raw_snapshot_path TEXT, + status VARCHAR(30) NOT NULL DEFAULT 'success', + error_message TEXT, + metadata_json TEXT +); + +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_campaign_id + ON market_campaign_snapshots (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_batch_id + ON market_campaign_snapshots (batch_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_crawled_at + ON market_campaign_snapshots (crawled_at); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_page_hash + ON market_campaign_snapshots (page_hash); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshots_status + ON market_campaign_snapshots (status); +CREATE INDEX IF NOT EXISTS idx_market_campaign_snapshot_campaign_time + ON market_campaign_snapshots (campaign_id, crawled_at); + +CREATE TABLE IF NOT EXISTS market_campaign_products ( + id BIGSERIAL PRIMARY KEY, + campaign_id BIGINT NOT NULL REFERENCES market_campaigns(id), + platform_code VARCHAR(50) NOT NULL, + platform_product_id VARCHAR(200) NOT NULL, + product_url TEXT, + name VARCHAR(500) NOT NULL, + brand VARCHAR(200), + image_url TEXT, + category_text VARCHAR(300), + price DOUBLE PRECISION, + original_price DOUBLE PRECISION, + discount_text VARCHAR(200), + discount_rate DOUBLE PRECISION, + coupon_text TEXT, + stock_text VARCHAR(200), + sold_count INTEGER, + rating DOUBLE PRECISION, + review_count INTEGER, + rank_position INTEGER, + is_active BOOLEAN NOT NULL DEFAULT TRUE, + first_seen_at TIMESTAMP NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMP NOT NULL DEFAULT NOW(), + metadata_json TEXT, + CONSTRAINT uq_market_campaign_product + UNIQUE (campaign_id, platform_code, platform_product_id) +); + +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_campaign_id + ON market_campaign_products (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_platform_code + ON market_campaign_products (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_platform_product_id + ON market_campaign_products (platform_product_id); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_brand + ON market_campaign_products (brand); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_category_text + ON market_campaign_products (category_text); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_is_active + ON market_campaign_products (is_active); +CREATE INDEX IF NOT EXISTS idx_market_campaign_products_last_seen_at + ON market_campaign_products (last_seen_at); +CREATE INDEX IF NOT EXISTS idx_market_product_platform_seen + ON market_campaign_products (platform_code, last_seen_at); +CREATE INDEX IF NOT EXISTS idx_market_product_discount + ON market_campaign_products (discount_rate, price); + +CREATE TABLE IF NOT EXISTS market_product_price_history ( + id BIGSERIAL PRIMARY KEY, + market_product_id BIGINT NOT NULL REFERENCES market_campaign_products(id), + campaign_id BIGINT NOT NULL REFERENCES market_campaigns(id), + platform_code VARCHAR(50) NOT NULL, + platform_product_id VARCHAR(200) NOT NULL, + price DOUBLE PRECISION, + original_price DOUBLE PRECISION, + discount_rate DOUBLE PRECISION, + stock_text VARCHAR(200), + sold_count INTEGER, + rank_position INTEGER, + crawled_at TIMESTAMP NOT NULL DEFAULT NOW(), + batch_id VARCHAR(80) NOT NULL, + metadata_json TEXT +); + +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_market_product_id + ON market_product_price_history (market_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_campaign_id + ON market_product_price_history (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_platform_code + ON market_product_price_history (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_platform_product_id + ON market_product_price_history (platform_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_crawled_at + ON market_product_price_history (crawled_at); +CREATE INDEX IF NOT EXISTS idx_market_product_price_history_batch_id + ON market_product_price_history (batch_id); +CREATE INDEX IF NOT EXISTS idx_market_price_platform_time + ON market_product_price_history (platform_code, platform_product_id, crawled_at); +CREATE INDEX IF NOT EXISTS idx_market_price_campaign_time + ON market_product_price_history (campaign_id, crawled_at); + +CREATE TABLE IF NOT EXISTS market_product_matches ( + id BIGSERIAL PRIMARY KEY, + market_product_id BIGINT NOT NULL REFERENCES market_campaign_products(id), + momo_product_id INTEGER REFERENCES products(id), + momo_i_code VARCHAR(50), + match_score DOUBLE PRECISION NOT NULL DEFAULT 0.0, + match_status VARCHAR(30) NOT NULL DEFAULT 'needs_review', + match_reason_json TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + reviewed_at TIMESTAMP, + reviewed_by VARCHAR(120), + CONSTRAINT uq_market_product_momo_match + UNIQUE (market_product_id, momo_i_code) +); + +CREATE INDEX IF NOT EXISTS idx_market_product_matches_market_product_id + ON market_product_matches (market_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_matches_momo_product_id + ON market_product_matches (momo_product_id); +CREATE INDEX IF NOT EXISTS idx_market_product_matches_momo_i_code + ON market_product_matches (momo_i_code); +CREATE INDEX IF NOT EXISTS idx_market_product_matches_match_status + ON market_product_matches (match_status); +CREATE INDEX IF NOT EXISTS idx_market_match_status_score + ON market_product_matches (match_status, match_score); + +CREATE TABLE IF NOT EXISTS market_crawler_runs ( + id BIGSERIAL PRIMARY KEY, + platform_code VARCHAR(50), + crawler_name VARCHAR(120) NOT NULL, + campaign_id BIGINT REFERENCES market_campaigns(id), + batch_id VARCHAR(80) NOT NULL, + started_at TIMESTAMP NOT NULL DEFAULT NOW(), + finished_at TIMESTAMP, + status VARCHAR(30) NOT NULL DEFAULT 'started', + dry_run BOOLEAN NOT NULL DEFAULT TRUE, + pages_found INTEGER NOT NULL DEFAULT 0, + products_found INTEGER NOT NULL DEFAULT 0, + products_changed INTEGER NOT NULL DEFAULT 0, + error_count INTEGER NOT NULL DEFAULT 0, + error_message TEXT, + metadata_json TEXT +); + +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_platform_code + ON market_crawler_runs (platform_code); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_crawler_name + ON market_crawler_runs (crawler_name); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_campaign_id + ON market_crawler_runs (campaign_id); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_batch_id + ON market_crawler_runs (batch_id); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_started_at + ON market_crawler_runs (started_at); +CREATE INDEX IF NOT EXISTS idx_market_crawler_runs_status + ON market_crawler_runs (status); +CREATE INDEX IF NOT EXISTS idx_market_crawler_run_platform_time + ON market_crawler_runs (platform_code, started_at); +CREATE INDEX IF NOT EXISTS idx_market_crawler_run_status_time + ON market_crawler_runs (status, started_at); + +GRANT ALL PRIVILEGES ON market_platforms TO momo; +GRANT ALL PRIVILEGES ON market_campaigns TO momo; +GRANT ALL PRIVILEGES ON market_campaign_snapshots TO momo; +GRANT ALL PRIVILEGES ON market_campaign_products TO momo; +GRANT ALL PRIVILEGES ON market_product_price_history TO momo; +GRANT ALL PRIVILEGES ON market_product_matches TO momo; +GRANT ALL PRIVILEGES ON market_crawler_runs TO momo; + +GRANT USAGE, SELECT ON SEQUENCE market_platforms_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_campaigns_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_campaign_snapshots_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_campaign_products_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_product_price_history_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_product_matches_id_seq TO momo; +GRANT USAGE, SELECT ON SEQUENCE market_crawler_runs_id_seq TO momo; +""".strip() + + +ROLLBACK_SQL = """ +-- Manual rollback draft only. Do not run without operator approval and backup verification. +DROP TABLE IF EXISTS market_crawler_runs; +DROP TABLE IF EXISTS market_product_matches; +DROP TABLE IF EXISTS market_product_price_history; +DROP TABLE IF EXISTS market_campaign_products; +DROP TABLE IF EXISTS market_campaign_snapshots; +DROP TABLE IF EXISTS market_campaigns; +DROP TABLE IF EXISTS market_platforms; +""".strip() + + +def _statement_count(sql_text): + return len([part for part in sql_text.split(";") if part.strip()]) + + +def _contains_destructive_forward_sql(sql_text): + lowered = sql_text.lower() + destructive_markers = ( + "drop table", + "truncate ", + "delete from", + "alter table products", + "alter table daily_sales", + "alter table monthly_sales", + ) + return any(marker in lowered for marker in destructive_markers) + + +def build_migration_blueprint(expected_tables): + """建立 market_intel migration 與真寫入命令草案;不執行任何 SQL。""" + expected_tables = list(expected_tables) + migration_path = Path(MIGRATION_FILENAME) + seed_writer_path = Path(SEED_WRITER_SCRIPT) + migration_file_exists = migration_path.exists() + seed_writer_script_exists = seed_writer_path.exists() + migration_file_text = ( + migration_path.read_text(encoding="utf-8").strip() + if migration_file_exists + else "" + ) + migration_file_matches_blueprint = ( + migration_file_exists and migration_file_text == FORWARD_SQL + ) + forward_has_destructive_sql = _contains_destructive_forward_sql(FORWARD_SQL) + blocked_reasons = [ + "migration_not_executed", + "backup_not_verified", + "operator_approval_missing", + "production_maintenance_window_required", + "seed_writer_real_write_not_implemented", + ] + if not migration_file_exists: + blocked_reasons.insert(0, "migration_file_not_created") + elif not migration_file_matches_blueprint: + blocked_reasons.insert(0, "migration_file_differs_from_blueprint") + + return { + "mode": "migration_file_draft_read_only", + "migration_number": MIGRATION_NUMBER, + "suggested_filename": MIGRATION_FILENAME, + "file_created": migration_file_exists, + "file_matches_blueprint": migration_file_matches_blueprint, + "file_status": ( + "local_draft_matches_blueprint" + if migration_file_matches_blueprint + else "local_draft_differs_from_blueprint" + if migration_file_exists + else "not_created" + ), + "migration_executed": False, + "database_session_created": False, + "database_commit_executed": False, + "external_network_executed": False, + "scheduler_attached": False, + "expected_tables": expected_tables, + "table_count": len(expected_tables), + "forward_sql": FORWARD_SQL, + "forward_statement_count": _statement_count(FORWARD_SQL), + "forward_has_destructive_sql": forward_has_destructive_sql, + "rollback_sql": ROLLBACK_SQL, + "rollback_requires_manual_approval": True, + "rollback_statement_count": _statement_count(ROLLBACK_SQL), + "blocked_reasons": blocked_reasons, + "table_operations": [ + { + "table": table_name, + "operation": "CREATE TABLE IF NOT EXISTS", + "write_status": "preview_only_not_executed", + } + for table_name in expected_tables + ], + "command_plan": { + "migration_apply_command": { + "command": ( + "psql \"$DATABASE_URL\" -v ON_ERROR_STOP=1 " + f"-f {MIGRATION_FILENAME}" + ), + "executed": False, + "requires_backup": True, + "requires_adr011_deploy_boundary_review": True, + }, + "seed_writer_command": { + "command": ( + "MARKET_INTEL_ENABLED=true MARKET_INTEL_WRITE_ENABLED=true " + "MARKET_INTEL_CRAWLER_ENABLED=false " + "MARKET_INTEL_SEED_WRITE_APPROVAL= " + f"python {SEED_WRITER_SCRIPT} --execute --platform all" + ), + "executed": False, + "script_created": seed_writer_script_exists, + "script_path": SEED_WRITER_SCRIPT, + "requires_new_approval_token": True, + "notes": ( + "Seed writer skeleton 已存在,但真寫入仍未實作;不要為了 " + "seed upsert 而打開 crawler/manual fetch 權限。" + ), + }, + }, + "safety_checks": { + "forward_sql_additive_only": not forward_has_destructive_sql, + "does_not_touch_momo_db_container": True, + "does_not_attach_scheduler": True, + "does_not_enable_external_crawling": True, + "does_not_write_seed_rows": True, + }, + } diff --git a/services/market_intel/platform_seed.py b/services/market_intel/platform_seed.py new file mode 100644 index 0000000..8ef3ef8 --- /dev/null +++ b/services/market_intel/platform_seed.py @@ -0,0 +1,43 @@ +"""市場情報平台種子資料規劃。 + +本模組只產生可審核的 seed plan,不執行資料庫寫入。 +""" + +from services.market_intel.adapters import get_adapter, get_adapter_registry + + +def _adapter_to_seed(adapter): + policy = adapter.safety_policy.to_dict() + sources = [source.to_dict() for source in adapter.campaign_sources()] + return { + "code": adapter.platform_code, + "name": adapter.platform_name, + "base_url": adapter.base_url, + "enabled": False, + "crawl_policy_json": { + "request_interval_sec": policy["request_interval_sec"], + "timeout_sec": policy["timeout_sec"], + "max_pages_per_run": policy["max_pages_per_run"], + "allow_login": policy["allow_login"], + "allow_database_write": policy["allow_database_write"], + "allow_scheduler_attach": policy["allow_scheduler_attach"], + "seed_source_keys": [source["source_key"] for source in sources], + }, + "source_count": len(sources), + "sources": sources, + "write_action": "upsert_market_platform_after_gate_approval", + } + + +def build_platform_seed_rows(platform_code="all"): + """根據已註冊 adapter 建立平台 seed rows,不碰 DB。""" + if platform_code and platform_code != "all": + adapter = get_adapter(platform_code) + if not adapter: + return [] + return [_adapter_to_seed(adapter)] + + return [ + _adapter_to_seed(adapter) + for adapter in get_adapter_registry().values() + ] diff --git a/services/market_intel/platform_seed_db_diff.py b/services/market_intel/platform_seed_db_diff.py new file mode 100644 index 0000000..769e9ee --- /dev/null +++ b/services/market_intel/platform_seed_db_diff.py @@ -0,0 +1,238 @@ +"""市場情報平台 seed 與正式 DB 的只讀差異探針。 + +本模組只查詢 market_platforms 既有 rows,不使用 DatabaseManager、不建立 ORM session、 +不開 explicit transaction、不寫入、不 commit。 +""" + +import json + +from sqlalchemy import bindparam, create_engine, text + + +def _normalize_policy(value): + """把 seed policy 與 DB text policy 正規化成可比較字串。""" + if value in (None, ""): + return "" + if isinstance(value, str): + try: + value = json.loads(value) + except json.JSONDecodeError: + return value.strip() + return json.dumps(value, ensure_ascii=False, sort_keys=True, separators=(",", ":")) + + +def _build_planned_seed_diffs(seed_rows): + return [ + { + "code": seed["code"], + "exists": False, + "expected_enabled": bool(seed.get("enabled", False)), + "actual_enabled": None, + "name_matches": False, + "base_url_matches": False, + "policy_matches": False, + "diff_status": "planned_no_db_connection", + } + for seed in seed_rows + ] + + +def _build_seed_diff(seed, actual_row): + if not actual_row: + return { + "code": seed["code"], + "exists": False, + "expected_enabled": bool(seed.get("enabled", False)), + "actual_enabled": None, + "name_matches": False, + "base_url_matches": False, + "policy_matches": False, + "diff_status": "missing", + } + + expected_policy = _normalize_policy(seed.get("crawl_policy_json")) + actual_policy = _normalize_policy(actual_row.get("crawl_policy_json")) + name_matches = (actual_row.get("name") or "") == (seed.get("name") or "") + base_url_matches = (actual_row.get("base_url") or "") == (seed.get("base_url") or "") + expected_enabled = bool(seed.get("enabled", False)) + actual_enabled = bool(actual_row.get("enabled", False)) + policy_matches = expected_policy == actual_policy + diff_status = ( + "matches_expected" + if name_matches and base_url_matches and policy_matches and expected_enabled == actual_enabled + else "differs_from_expected" + ) + return { + "code": seed["code"], + "exists": True, + "expected_enabled": expected_enabled, + "actual_enabled": actual_enabled, + "name_matches": name_matches, + "base_url_matches": base_url_matches, + "policy_matches": policy_matches, + "diff_status": diff_status, + } + + +def _query_platform_rows(conn, expected_codes): + if not expected_codes: + return {} + + rows = conn.execute( + text( + """ + SELECT code, name, base_url, enabled, crawl_policy_json + FROM market_platforms + WHERE code IN :platform_codes + ORDER BY code + """ + ).bindparams(bindparam("platform_codes", expanding=True)), + {"platform_codes": tuple(expected_codes)}, + ).fetchall() + return { + row._mapping["code"]: dict(row._mapping) + for row in rows + } + + +def build_platform_seed_db_diff_plan( + seed_plan, + *, + execute_requested=False, + database_url=None, + database_type=None, + engine=None, +): + """建立平台 seed DB 差異探針結果;預設只回 planned,不連 DB。""" + seed_rows = list(seed_plan.get("seeds", [])) + expected_codes = [seed["code"] for seed in seed_rows] + + if not execute_requested: + return { + "mode": "platform_seed_db_diff_planned", + "execute_requested": False, + "read_only_query_executed": False, + "database_connection_opened": False, + "database_session_created": False, + "explicit_transaction_opened": False, + "database_write_executed": False, + "database_commit_executed": False, + "migration_executed": False, + "seed_write_executed": False, + "expected_seed_count": len(seed_rows), + "existing_seed_count": 0, + "expected_codes": expected_codes, + "existing_codes": [], + "missing_codes": expected_codes, + "changed_codes": [], + "matching_codes": [], + "seed_rows_ready": False, + "seed_diffs": _build_planned_seed_diffs(seed_rows), + "blocked_reasons": [ + "execute_false_planned_only", + "seed_db_diff_not_loaded", + "seed_write_still_blocked", + ], + } + + from config import DATABASE_PATH, DATABASE_TYPE + + effective_database_type = (database_type or DATABASE_TYPE or "").lower() + effective_database_url = database_url or DATABASE_PATH + created_engine = False + connection_opened = False + + try: + if engine is None: + connect_args = {} + if effective_database_type == "postgresql": + connect_args = { + "connect_timeout": 8, + "options": "-c statement_timeout=15000", + } + engine = create_engine( + effective_database_url, + isolation_level="AUTOCOMMIT", + pool_pre_ping=True, + connect_args=connect_args, + ) + created_engine = True + + with engine.connect() as conn: + connection_opened = True + existing_by_code = _query_platform_rows(conn, expected_codes) + + seed_diffs = [ + _build_seed_diff(seed, existing_by_code.get(seed["code"])) + for seed in seed_rows + ] + missing_codes = [ + item["code"] for item in seed_diffs + if item["diff_status"] == "missing" + ] + changed_codes = [ + item["code"] for item in seed_diffs + if item["diff_status"] == "differs_from_expected" + ] + matching_codes = [ + item["code"] for item in seed_diffs + if item["diff_status"] == "matches_expected" + ] + blocked_reasons = ["seed_write_still_blocked"] + if missing_codes: + blocked_reasons.insert(0, "seed_rows_missing") + if changed_codes: + blocked_reasons.insert(0, "seed_rows_differ") + + return { + "mode": "platform_seed_db_diff_read_only", + "execute_requested": True, + "read_only_query_executed": True, + "database_connection_opened": connection_opened, + "database_session_created": False, + "explicit_transaction_opened": False, + "database_write_executed": False, + "database_commit_executed": False, + "migration_executed": False, + "seed_write_executed": False, + "expected_seed_count": len(seed_rows), + "existing_seed_count": len(existing_by_code), + "expected_codes": expected_codes, + "existing_codes": sorted(existing_by_code), + "missing_codes": missing_codes, + "changed_codes": changed_codes, + "matching_codes": matching_codes, + "seed_rows_ready": not missing_codes and not changed_codes, + "seed_diffs": seed_diffs, + "blocked_reasons": blocked_reasons, + } + except Exception as exc: + return { + "mode": "platform_seed_db_diff_error", + "execute_requested": True, + "read_only_query_executed": False, + "database_connection_opened": connection_opened, + "database_session_created": False, + "explicit_transaction_opened": False, + "database_write_executed": False, + "database_commit_executed": False, + "migration_executed": False, + "seed_write_executed": False, + "expected_seed_count": len(seed_rows), + "existing_seed_count": 0, + "expected_codes": expected_codes, + "existing_codes": [], + "missing_codes": expected_codes, + "changed_codes": [], + "matching_codes": [], + "seed_rows_ready": False, + "seed_diffs": _build_planned_seed_diffs(seed_rows), + "blocked_reasons": [ + "platform_seed_db_diff_error", + "seed_write_still_blocked", + ], + "error_message": str(exc), + } + finally: + if created_engine: + engine.dispose() diff --git a/services/market_intel/platform_seed_writer.py b/services/market_intel/platform_seed_writer.py new file mode 100644 index 0000000..43b438f --- /dev/null +++ b/services/market_intel/platform_seed_writer.py @@ -0,0 +1,89 @@ +"""市場情報平台種子資料寫入 dry-run 規劃。 + +本模組只建立 upsert preview 與 schema smoke 結果,不執行資料庫寫入。 +""" + +import json + +from database.manager import Base + + +MARKET_PLATFORM_REQUIRED_COLUMNS = ( + "code", + "name", + "base_url", + "enabled", + "crawl_policy_json", +) + + +def build_schema_smoke(expected_tables, metadata=None): + """檢查 ORM metadata 是否具備 market_* 表與 platform upsert 必要欄位。""" + metadata = metadata or Base.metadata + metadata_tables = metadata.tables + table_names = set(metadata_tables) + missing_tables = [ + table_name for table_name in expected_tables + if table_name not in table_names + ] + market_platform = metadata_tables.get("market_platforms") + platform_columns = ( + set(market_platform.columns.keys()) + if market_platform is not None + else set() + ) + missing_platform_columns = [ + column_name for column_name in MARKET_PLATFORM_REQUIRED_COLUMNS + if column_name not in platform_columns + ] + + return { + "passed": not missing_tables and not missing_platform_columns, + "expected_table_count": len(expected_tables), + "metadata_table_count": len(table_names), + "missing_tables": missing_tables, + "market_platform_required_columns": list(MARKET_PLATFORM_REQUIRED_COLUMNS), + "missing_market_platform_columns": missing_platform_columns, + } + + +def build_platform_seed_writer_plan(seed_plan, write_guard, schema_smoke): + """建立 market_platforms upsert dry-run plan,不建立 session、不 commit。""" + operations = [] + for seed in seed_plan.get("seeds", []): + operations.append( + { + "operation": "upsert", + "table": "market_platforms", + "lookup": {"code": seed["code"]}, + "values": { + "code": seed["code"], + "name": seed["name"], + "base_url": seed["base_url"], + "enabled": False, + "crawl_policy_json": json.dumps( + seed["crawl_policy_json"], + ensure_ascii=False, + sort_keys=True, + ), + }, + "sql_shape": ( + "INSERT INTO market_platforms (...) VALUES (...) " + "ON CONFLICT(code) DO UPDATE SET ..." + ), + "write_status": "blocked_dry_run_only", + } + ) + + return { + "mode": "dry_run", + "operation_count": len(operations), + "operations": operations, + "schema_smoke": schema_smoke, + "ready_to_write": False, + "writes_executed": False, + "would_write_database": False, + "database_write_allowed": bool(write_guard.get("database_write_allowed")), + "blocked_reasons": write_guard.get("blocked_reasons", []), + "write_action": "preview_only_no_session_no_commit", + } diff --git a/services/market_intel/schema_db_probe.py b/services/market_intel/schema_db_probe.py new file mode 100644 index 0000000..d7cbc05 --- /dev/null +++ b/services/market_intel/schema_db_probe.py @@ -0,0 +1,172 @@ +"""市場情報正式 DB schema 只讀探針。 + +本模組只查詢系統 catalog,不使用 DatabaseManager、不呼叫 create_all、不寫入。 +""" + +from sqlalchemy import bindparam, create_engine, text + + +def _build_table_status(expected_tables, existing_tables): + existing_set = set(existing_tables) + return [ + { + "table": table_name, + "exists": table_name in existing_set, + } + for table_name in expected_tables + ] + + +def _probe_postgresql(conn, expected_tables): + existing_tables = [] + for table_name in expected_tables: + exists = conn.execute( + text( + """ + SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = ANY (current_schemas(false)) + AND table_name = :table_name + ) + """ + ), + {"table_name": table_name}, + ).scalar() + if exists: + existing_tables.append(table_name) + return existing_tables + + +def _probe_sqlite(conn, expected_tables): + rows = conn.execute( + text( + """ + SELECT name + FROM sqlite_master + WHERE type = 'table' + AND name IN :table_names + """ + ).bindparams(bindparam("table_names", expanding=True)), + {"table_names": tuple(expected_tables)}, + ).fetchall() + return [row[0] for row in rows] + + +def build_schema_db_probe_plan( + expected_tables, + *, + execute_requested=False, + database_url=None, + database_type=None, + engine=None, +): + """建立 DB schema 探針結果;預設只回 planned,不連 DB。""" + expected_tables = list(expected_tables) + if not execute_requested: + return { + "mode": "schema_db_probe_planned", + "execute_requested": False, + "read_only_query_executed": False, + "database_connection_opened": False, + "database_session_created": False, + "explicit_transaction_opened": False, + "database_write_executed": False, + "database_commit_executed": False, + "migration_executed": False, + "schema_tables_exist": False, + "expected_tables": expected_tables, + "existing_tables": [], + "missing_tables": expected_tables, + "table_statuses": _build_table_status(expected_tables, []), + "blocked_reasons": [ + "execute_false_planned_only", + "migration_not_executed_by_this_probe", + "seed_write_still_blocked", + ], + } + + from config import DATABASE_PATH, DATABASE_TYPE + + effective_database_type = (database_type or DATABASE_TYPE or "").lower() + effective_database_url = database_url or DATABASE_PATH + created_engine = False + connection_opened = False + + try: + if engine is None: + connect_args = {} + if effective_database_type == "postgresql": + connect_args = { + "connect_timeout": 8, + "options": "-c statement_timeout=15000", + } + engine = create_engine( + effective_database_url, + isolation_level="AUTOCOMMIT", + pool_pre_ping=True, + connect_args=connect_args, + ) + created_engine = True + + with engine.connect() as conn: + connection_opened = True + if effective_database_type == "postgresql": + existing_tables = _probe_postgresql(conn, expected_tables) + else: + existing_tables = _probe_sqlite(conn, expected_tables) + + missing_tables = [ + table_name for table_name in expected_tables + if table_name not in set(existing_tables) + ] + schema_tables_exist = not missing_tables + blocked_reasons = [ + "migration_not_executed_by_this_probe", + "seed_write_still_blocked", + ] + if not schema_tables_exist: + blocked_reasons.insert(0, "market_tables_missing") + + return { + "mode": "schema_db_probe_read_only", + "execute_requested": True, + "read_only_query_executed": True, + "database_connection_opened": connection_opened, + "database_session_created": False, + "explicit_transaction_opened": False, + "database_write_executed": False, + "database_commit_executed": False, + "migration_executed": False, + "schema_tables_exist": schema_tables_exist, + "expected_tables": expected_tables, + "existing_tables": existing_tables, + "missing_tables": missing_tables, + "table_statuses": _build_table_status(expected_tables, existing_tables), + "blocked_reasons": blocked_reasons, + } + except Exception as exc: + return { + "mode": "schema_db_probe_error", + "execute_requested": True, + "read_only_query_executed": False, + "database_connection_opened": connection_opened, + "database_session_created": False, + "explicit_transaction_opened": False, + "database_write_executed": False, + "database_commit_executed": False, + "migration_executed": False, + "schema_tables_exist": False, + "expected_tables": expected_tables, + "existing_tables": [], + "missing_tables": expected_tables, + "table_statuses": _build_table_status(expected_tables, []), + "blocked_reasons": [ + "schema_db_probe_error", + "seed_write_still_blocked", + ], + "error_message": str(exc), + } + finally: + if created_engine: + engine.dispose() diff --git a/services/market_intel/seed_writer_cli.py b/services/market_intel/seed_writer_cli.py new file mode 100644 index 0000000..fdc4e2b --- /dev/null +++ b/services/market_intel/seed_writer_cli.py @@ -0,0 +1,197 @@ +"""市場情報 seed writer CLI skeleton。 + +本階段只回報 CLI 執行計畫,不建立 DB session、不寫入、不 commit。 +""" + +import hashlib +import json + + +APPROVAL_ENV_VAR = "MARKET_INTEL_SEED_WRITE_APPROVAL" +PLATFORM_UPSERT_SQL = """ +INSERT INTO market_platforms ( + code, + name, + base_url, + enabled, + crawl_policy_json +) VALUES ( + :code, + :name, + :base_url, + :enabled, + :crawl_policy_json +) +ON CONFLICT (code) DO UPDATE SET + name = EXCLUDED.name, + base_url = EXCLUDED.base_url, + enabled = EXCLUDED.enabled, + crawl_policy_json = EXCLUDED.crawl_policy_json, + updated_at = NOW() +""".strip() + + +def _payload_hash(payload): + encoded = json.dumps(payload, ensure_ascii=False, sort_keys=True).encode("utf-8") + return hashlib.sha256(encoded).hexdigest()[:16] + + +def build_seed_transaction_preview(writer_plan, migration_blueprint): + """建立 seed writer transaction preview;不建立 DB session。""" + statements = [] + for index, operation in enumerate(writer_plan.get("operations", []), start=1): + values = operation.get("values", {}) + lookup_code = operation.get("lookup", {}).get("code") or values.get("code") + statements.append( + { + "index": index, + "operation": operation.get("operation", "upsert"), + "table": operation.get("table", "market_platforms"), + "lookup": {"code": lookup_code}, + "sql_template": PLATFORM_UPSERT_SQL, + "parameter_keys": sorted(values), + "parameter_payload_hash": _payload_hash(values), + "idempotency_key": f"market_platforms:{lookup_code}", + "diff_status": "not_loaded_no_db_session", + "write_status": "blocked_transaction_preview_only", + } + ) + + migration_ready = bool( + migration_blueprint.get("file_created") + and migration_blueprint.get("file_matches_blueprint") + and not migration_blueprint.get("migration_executed") + ) + return { + "mode": "seed_transaction_preview_no_session", + "target_table": "market_platforms", + "statement_count": len(statements), + "statements": statements, + "migration_draft_ready": migration_ready, + "database_snapshot_loaded": False, + "existing_rows_seen": 0, + "database_session_created": False, + "transaction_opened": False, + "writes_executed": False, + "would_write_database": False, + "database_commit_executed": False, + "database_rollback_executed": False, + "external_network_executed": False, + "scheduler_attached": False, + "required_runtime_order": [ + "backup_verified", + "migration_applied_by_operator", + "schema_smoke_passed", + "feature_flags_reviewed", + "one_time_approval_token_verified", + "real_write_implementation_enabled", + ], + "safety_contract": { + "idempotent_upsert_preview_only": True, + "does_not_load_existing_rows": True, + "does_not_open_transaction": True, + "does_not_commit": True, + }, + } + + +def build_seed_writer_cli_plan( + *, + platform_code, + execute_requested, + approval_token, + seed_plan, + write_guard, + writer_plan, + migration_blueprint, +): + """建立 seed writer CLI blocked plan。""" + approval_token_present = bool(approval_token) + migration_ready = bool( + migration_blueprint.get("file_created") + and migration_blueprint.get("file_matches_blueprint") + and not migration_blueprint.get("migration_executed") + ) + gates = [ + { + "key": "script_created", + "label": "scripts/market_intel_seed_writer.py exists", + "passed": True, + }, + { + "key": "migration_file_matches_blueprint", + "label": "migration draft exists and matches the reviewed blueprint", + "passed": migration_ready, + }, + { + "key": "execute_requested", + "label": "--execute flag was explicitly provided", + "passed": bool(execute_requested), + }, + { + "key": "approval_token_present", + "label": f"{APPROVAL_ENV_VAR} or --approval-token was provided", + "passed": approval_token_present, + }, + { + "key": "database_write_allowed", + "label": "runtime database_write_allowed gate is true", + "passed": bool(write_guard.get("database_write_allowed")), + }, + { + "key": "manual_operator_approval", + "label": "operator approval has been verified out-of-band", + "passed": False, + }, + { + "key": "real_write_implementation_enabled", + "label": "CLI real write implementation has been enabled", + "passed": False, + }, + ] + blocked_reasons = [gate["key"] for gate in gates if not gate["passed"]] + if execute_requested: + blocked_reasons.insert(0, "execute_request_blocked_by_skeleton") + transaction_preview = build_seed_transaction_preview( + writer_plan=writer_plan, + migration_blueprint=migration_blueprint, + ) + + return { + "mode": "seed_writer_cli_blocked_skeleton", + "platform_code": platform_code or "all", + "execute_requested": bool(execute_requested), + "approval_token_present": approval_token_present, + "approval_env_var": APPROVAL_ENV_VAR, + "ready_for_real_write": False, + "writes_executed": False, + "would_write_database": False, + "database_session_created": False, + "database_commit_executed": False, + "external_network_executed": False, + "scheduler_attached": False, + "exit_code": 2 if execute_requested else 0, + "blocked_reasons": blocked_reasons, + "approval_gates": gates, + "seed_count": int(seed_plan.get("seed_count") or 0), + "writer_operation_count": int(writer_plan.get("operation_count") or 0), + "transaction_preview": transaction_preview, + "write_guard_summary": { + "ready_to_write": bool(write_guard.get("ready_to_write")), + "would_write_database": bool(write_guard.get("would_write_database")), + "database_write_allowed": bool(write_guard.get("database_write_allowed")), + "blocked_reasons": write_guard.get("blocked_reasons", []), + }, + "migration_file_summary": { + "suggested_filename": migration_blueprint.get("suggested_filename"), + "file_created": bool(migration_blueprint.get("file_created")), + "file_matches_blueprint": bool(migration_blueprint.get("file_matches_blueprint")), + "migration_executed": bool(migration_blueprint.get("migration_executed")), + }, + "safety_contract": { + "refuses_execute_in_this_phase": True, + "requires_independent_approval_token": True, + "keeps_crawler_disabled_for_seed_write": True, + "no_db_session_in_skeleton": True, + }, + } diff --git a/services/market_intel/service.py b/services/market_intel/service.py index af2e07f..ec0622c 100644 --- a/services/market_intel/service.py +++ b/services/market_intel/service.py @@ -12,6 +12,23 @@ from config import ( MARKET_INTEL_ENABLED, MARKET_INTEL_WRITE_ENABLED, ) +from services.market_intel.adapters import ( + get_adapter, + get_adapter_registry, + get_adapter_summaries, +) +from services.market_intel.candidate_preview import build_candidate_preview_from_discovery +from services.market_intel.discovery_runner import ManualDiscoveryRunner +from services.market_intel.migration_blueprint import build_migration_blueprint +from services.market_intel.platform_seed import build_platform_seed_rows +from services.market_intel.platform_seed_db_diff import build_platform_seed_db_diff_plan +from services.market_intel.platform_seed_writer import ( + build_platform_seed_writer_plan, + build_schema_smoke, +) +from services.market_intel.seed_writer_cli import build_seed_writer_cli_plan +from services.market_intel.schema_db_probe import build_schema_db_probe_plan +from services.market_intel.write_approval_runbook import build_write_approval_runbook TAIPEI_TZ = timezone(timedelta(hours=8)) @@ -45,7 +62,7 @@ class MarketIntelRuntimeStatus: class MarketIntelService: """市場情報入口服務,先集中 feature gate 與安全狀態。""" - phase = "phase_2_schema_ready_disabled" + phase = "phase_25_platform_seed_db_diff" def get_runtime_status(self) -> MarketIntelRuntimeStatus: return MarketIntelRuntimeStatus( @@ -62,13 +79,22 @@ class MarketIntelService: ), ) + def manual_fetch_allowed(self): + status = self.get_runtime_status() + return bool(status.enabled and status.crawler_enabled) + def get_schema_tables(self): """回傳 ADR-035 定義的 market_* schema 名稱。""" return list(MARKET_INTEL_TABLES) + def get_adapter_summaries(self): + """回傳目前已註冊 adapter,不觸發網路。""" + return get_adapter_summaries() + def build_dry_run_plan(self, platform_code="all"): """建立 dry-run 計畫,不執行爬蟲、不寫 DB。""" status = self.get_runtime_status() + adapter_registry = get_adapter_registry() return { "batch_id": f"market-dry-run-{uuid4().hex[:12]}", "platform_code": platform_code, @@ -77,6 +103,403 @@ class MarketIntelService: "would_discover_campaigns": bool(status.enabled and status.crawler_enabled), "would_write_database": bool(status.database_write_allowed), "scheduler_attached": status.scheduler_attached, + "manual_fetch_allowed": self.manual_fetch_allowed(), "schema_tables": self.get_schema_tables(), + "adapter_count": len(adapter_registry), + "adapters": [adapter.summary() for adapter in adapter_registry.values()], "status": status.to_dict(), } + + def build_discovery_plan(self, platform_code="all"): + """建立平台 discovery dry-run plan,不發 request、不寫 DB。""" + if platform_code and platform_code != "all": + adapter = get_adapter(platform_code) + if not adapter: + return { + "platform_code": platform_code, + "found": False, + "plans": [], + "error": "未知平台 adapter", + } + return { + "platform_code": platform_code, + "found": True, + "plans": [adapter.build_discovery_plan()], + } + + return { + "platform_code": "all", + "found": True, + "plans": [ + adapter.build_discovery_plan() + for adapter in get_adapter_registry().values() + ], + } + + def run_manual_discovery(self, platform_code="all", *, fetch=False, http_get=None): + """手動執行 discovery dry-run;預設不發 request,永遠不寫 DB。""" + registry = get_adapter_registry() + adapters = [] + + if platform_code and platform_code != "all": + adapter = get_adapter(platform_code) + if not adapter: + return { + "platform_code": platform_code, + "found": False, + "runs": [], + "error": "未知平台 adapter", + } + adapters = [adapter] + else: + adapters = list(registry.values()) + + runner = ManualDiscoveryRunner( + runtime_status=self.get_runtime_status(), + http_get=http_get, + ) + return { + "platform_code": platform_code or "all", + "found": True, + "fetch_requested": bool(fetch), + "manual_fetch_allowed": self.manual_fetch_allowed(), + "runs": [ + runner.run(adapter, fetch=fetch).to_dict() + for adapter in adapters + ], + } + + def build_candidate_preview( + self, + platform_code="all", + *, + fetch=False, + min_band="all", + limit=50, + http_get=None, + ): + """聚合候選連結 preview,只供人工審核,不寫 DB。""" + discovery_result = self.run_manual_discovery( + platform_code=platform_code, + fetch=fetch, + http_get=http_get, + ) + preview = build_candidate_preview_from_discovery( + discovery_result, + min_band=min_band, + limit=limit, + ) + preview["discovery_found"] = bool(discovery_result.get("found")) + preview["error"] = discovery_result.get("error") + return preview + + def build_platform_seed_plan(self, platform_code="all"): + """建立 market_platforms 初始資料計畫,不寫入 DB。""" + status = self.get_runtime_status() + seed_rows = build_platform_seed_rows(platform_code=platform_code) + found = bool(seed_rows) or platform_code in (None, "", "all") + return { + "platform_code": platform_code or "all", + "found": found, + "phase": self.phase, + "seed_count": len(seed_rows), + "seeds": seed_rows, + "would_write_database": False, + "database_write_allowed": bool(status.database_write_allowed), + "required_gates": { + "market_intel_enabled": bool(status.enabled), + "market_intel_write_enabled": bool(status.write_enabled), + "schema_smoke_required": True, + "migration_required": True, + "manual_operator_approval_required": True, + }, + "status": status.to_dict(), + "error": None if found else "未知平台 adapter", + } + + def build_platform_seed_write_guard(self, platform_code="all"): + """回報 platform seed 寫入前置 gate;本方法不執行寫入。""" + status = self.get_runtime_status() + seed_plan = self.build_platform_seed_plan(platform_code=platform_code) + schema_smoke = build_schema_smoke(MARKET_INTEL_TABLES) + guard_checks = { + "seed_plan_found": bool(seed_plan["found"]), + "has_seed_rows": bool(seed_plan["seed_count"]), + "market_intel_enabled": bool(status.enabled), + "market_intel_write_enabled": bool(status.write_enabled), + "database_write_allowed": bool(status.database_write_allowed), + "migration_confirmed": False, + "schema_smoke_confirmed": bool(schema_smoke["passed"]), + "manual_operator_approval": False, + } + blocked_reasons = [ + name for name, passed in guard_checks.items() + if not passed + ] + return { + "platform_code": platform_code or "all", + "phase": self.phase, + "seed_count": seed_plan["seed_count"], + "ready_to_write": False, + "would_write_database": False, + "database_write_allowed": bool(status.database_write_allowed), + "guard_checks": guard_checks, + "blocked_reasons": blocked_reasons, + "write_action": "blocked_dry_run_only", + "schema_smoke": schema_smoke, + "seed_plan": seed_plan, + } + + def build_schema_smoke(self): + """回報 market_intel ORM metadata smoke 結果,不查詢 DB。""" + return { + "phase": self.phase, + "schema_smoke": build_schema_smoke(MARKET_INTEL_TABLES), + "expected_tables": self.get_schema_tables(), + } + + def build_schema_db_probe(self, *, execute_requested=False): + """回報正式 DB schema 只讀探針;預設不連 DB。""" + probe = build_schema_db_probe_plan( + MARKET_INTEL_TABLES, + execute_requested=execute_requested, + ) + probe["phase"] = self.phase + return probe + + def build_platform_seed_db_diff(self, platform_code="all", *, execute_requested=False): + """回報 platform seed 與 DB 的只讀差異;預設不連 DB。""" + seed_plan = self.build_platform_seed_plan(platform_code=platform_code) + diff = build_platform_seed_db_diff_plan( + seed_plan, + execute_requested=execute_requested, + ) + diff["phase"] = self.phase + diff["platform_code"] = platform_code or "all" + diff["seed_plan_found"] = bool(seed_plan["found"]) + return diff + + def build_platform_seed_writer_plan(self, platform_code="all"): + """建立 platform seed writer dry-run plan,不建立 DB session。""" + seed_plan = self.build_platform_seed_plan(platform_code=platform_code) + write_guard = self.build_platform_seed_write_guard(platform_code=platform_code) + schema_smoke = write_guard["schema_smoke"] + writer_plan = build_platform_seed_writer_plan( + seed_plan=seed_plan, + write_guard=write_guard, + schema_smoke=schema_smoke, + ) + writer_plan["phase"] = self.phase + writer_plan["platform_code"] = platform_code or "all" + writer_plan["seed_plan_found"] = bool(seed_plan["found"]) + writer_plan["seed_count"] = seed_plan["seed_count"] + return writer_plan + + def build_write_approval_runbook(self, platform_code="all"): + """建立正式 seed writer 前的人工批准 runbook;本方法不執行寫入。""" + status = self.get_runtime_status() + seed_plan = self.build_platform_seed_plan(platform_code=platform_code) + write_guard = self.build_platform_seed_write_guard(platform_code=platform_code) + writer_plan = self.build_platform_seed_writer_plan(platform_code=platform_code) + return build_write_approval_runbook( + phase=self.phase, + status=status, + schema_smoke=write_guard["schema_smoke"], + seed_plan=seed_plan, + write_guard=write_guard, + writer_plan=writer_plan, + ) + + def build_migration_blueprint(self): + """建立 market_intel migration 與 seed writer 命令草案;不執行 SQL。""" + blueprint = build_migration_blueprint(self.get_schema_tables()) + blueprint["phase"] = self.phase + return blueprint + + def build_seed_writer_cli_status( + self, + platform_code="all", + *, + execute_requested=False, + approval_token=None, + ): + """建立 seed writer CLI blocked status;不建立 DB session、不寫入。""" + seed_plan = self.build_platform_seed_plan(platform_code=platform_code) + write_guard = self.build_platform_seed_write_guard(platform_code=platform_code) + writer_plan = self.build_platform_seed_writer_plan(platform_code=platform_code) + migration_blueprint = self.build_migration_blueprint() + status = build_seed_writer_cli_plan( + platform_code=platform_code or "all", + execute_requested=execute_requested, + approval_token=approval_token, + seed_plan=seed_plan, + write_guard=write_guard, + writer_plan=writer_plan, + migration_blueprint=migration_blueprint, + ) + status["phase"] = self.phase + return status + + def build_deployment_readiness(self): + """建立市場情報推版準備狀態;不執行 git、部署或遠端操作。""" + status = self.get_runtime_status() + schema_smoke = build_schema_smoke(MARKET_INTEL_TABLES) + writer_plan = self.build_platform_seed_writer_plan() + checks = { + "schema_smoke_passed": bool(schema_smoke["passed"]), + "feature_flags_default_safe": bool( + not status.enabled + and not status.crawler_enabled + and not status.write_enabled + ), + "database_write_blocked": bool(not status.database_write_allowed), + "scheduler_detached": bool(not status.scheduler_attached), + "manual_fetch_disabled": bool(not self.manual_fetch_allowed()), + "writer_plan_dry_run_only": bool( + writer_plan["mode"] == "dry_run" + and not writer_plan["writes_executed"] + and not writer_plan["would_write_database"] + ), + "registered_adapters_present": bool(len(self.get_adapter_summaries()) >= 4), + "schema_db_probe_planned_safe": bool( + not self.build_schema_db_probe()["read_only_query_executed"] + ), + "platform_seed_db_diff_planned_safe": bool( + not self.build_platform_seed_db_diff()["read_only_query_executed"] + ), + } + ready_for_production_deploy = all(checks.values()) + blocked_reasons = [ + reason for reason, blocked in ( + ("readiness_checks_not_all_passed", not ready_for_production_deploy), + ("production_deploy_not_executed_by_api", True), + ("git_commit_not_created_by_api", True), + ("git_push_not_executed_by_api", True), + ("backup_must_be_verified_by_operator", True), + ("production_smoke_must_be_verified_by_operator", True), + ) + if blocked + ] + required_manual_steps = [ + { + "key": "review_worktree_scope", + "label": "審核 worktree,只納入市場情報相關變更,排除 unrelated dirty files", + "status": "required", + }, + { + "key": "run_backup_system", + "label": "重大更新前執行 python backup_system.py", + "status": "required", + }, + { + "key": "commit_market_intel_changes_only", + "label": "只 commit 市場情報模組、ADR/TODO 與必要測試", + "status": "operator_optional", + }, + { + "key": "push_reviewed_branch_or_main", + "label": "推送已審核分支或 main,再進入部署 SOP", + "status": "operator_optional", + }, + { + "key": "run_deployment_sop", + "label": "依 deployment SOP app-only 部署,不碰 momo-db", + "status": "required", + }, + { + "key": "verify_health_endpoint", + "label": "部署後先驗證 /health,不使用首頁作為探測", + "status": "required", + }, + { + "key": "verify_market_intel_page_after_deploy", + "label": "驗證 /market_intel 與市場情報 API 仍維持 blocked dry-run", + "status": "required", + }, + ] + fallback_plan = [ + { + "key": "feature_flag_kill_switch", + "label": "MARKET_INTEL_ENABLED、MARKET_INTEL_CRAWLER_ENABLED、MARKET_INTEL_WRITE_ENABLED 保持全關,可立即停用新功能面", + "trigger": "任何 UI/API 異常或非預期連外行為", + }, + { + "key": "app_only_rollback", + "label": "回退到上一個已知正常版本後,只 recreate momo-app,避免影響 momo-db 資料生命週期", + "trigger": "部署後 /health 或 /market_intel smoke 失敗", + }, + { + "key": "scheduler_detached", + "label": "市場情報 scheduler 尚未掛載;異常時不需停爬蟲排程,因為本階段沒有排程入口", + "trigger": "排程或外部流量疑慮", + }, + { + "key": "database_write_blocked", + "label": "writer 仍是 preview_only_no_session_no_commit;異常時不需要 DB rollback", + "trigger": "seed writer 或 schema smoke 異常", + }, + ] + safe_deploy_boundaries = [ + { + "key": "no_remove_orphans", + "label": "禁止使用 docker compose --remove-orphans", + }, + { + "key": "no_momo_db_lifecycle_change", + "label": "禁止 stop/rm/recreate momo-db 或變更資料生命週期", + }, + { + "key": "health_probe_only", + "label": "HTTP health / blackbox / CD 探測只打 /health", + }, + { + "key": "flags_default_off", + "label": "市場情報三個 feature flags 預設維持 OFF", + }, + ] + return { + "phase": self.phase, + "mode": "app_only_release_gate", + "production_deployed": False, + "git_committed": False, + "git_pushed": False, + "ready_for_production_deploy": ready_for_production_deploy, + "deployment_actions_executed": False, + "execution_boundary": { + "api_executes_git": False, + "api_executes_backup": False, + "api_executes_scp": False, + "api_executes_ssh": False, + "api_recreates_container": False, + "api_runs_migration": False, + "api_writes_database": False, + }, + "checks": checks, + "blocked_reasons": blocked_reasons, + "requires_backup_before_major_update": True, + "backup_command": "python backup_system.py", + "required_manual_steps": required_manual_steps, + "fallback_plan": fallback_plan, + "safe_deploy_boundaries": safe_deploy_boundaries, + "production_smoke_targets": [ + "/health", + "/market_intel", + "/api/market_intel/status", + "/api/market_intel/deployment_readiness", + "/api/market_intel/schema_smoke", + "/api/market_intel/schema_db_probe", + "/api/market_intel/platform_seed_db_diff", + ], + "status": status.to_dict(), + "schema_smoke": schema_smoke, + "writer_plan_summary": { + "operation_count": writer_plan["operation_count"], + "writes_executed": writer_plan["writes_executed"], + "would_write_database": writer_plan["would_write_database"], + }, + "write_approval_runbook": self.build_write_approval_runbook(), + "migration_blueprint": self.build_migration_blueprint(), + "seed_writer_cli_status": self.build_seed_writer_cli_status(), + "schema_db_probe": self.build_schema_db_probe(), + "platform_seed_db_diff": self.build_platform_seed_db_diff(), + } diff --git a/services/market_intel/write_approval_runbook.py b/services/market_intel/write_approval_runbook.py new file mode 100644 index 0000000..ec63f51 --- /dev/null +++ b/services/market_intel/write_approval_runbook.py @@ -0,0 +1,134 @@ +"""市場情報正式寫入前的人工批准 runbook。 + +本模組只產生 gate 與操作順序,不建立 DB session、不執行 migration、不寫入資料。 +""" + + +def _status_value(status, key, default=False): + return bool(getattr(status, key, default)) + + +def build_write_approval_runbook( + *, + phase, + status, + schema_smoke, + seed_plan, + write_guard, + writer_plan, +): + """建立正式 seed write 前的 read-only runbook。""" + gates = [ + { + "key": "schema_smoke_passed", + "label": "ORM metadata smoke 已通過,七張 market_* table 與 market_platforms 欄位完整", + "passed": bool(schema_smoke.get("passed")), + }, + { + "key": "backup_completed", + "label": "已在正式推版前執行 python backup_system.py", + "passed": False, + }, + { + "key": "migration_file_reviewed", + "label": "market_* schema migration 已人工審核,且不 drop/alter 既有業績資料表", + "passed": False, + }, + { + "key": "feature_flags_enabled_for_write_window", + "label": "寫入窗口才可同時啟用 MARKET_INTEL_ENABLED 與 MARKET_INTEL_WRITE_ENABLED", + "passed": bool(_status_value(status, "enabled") and _status_value(status, "write_enabled")), + }, + { + "key": "database_write_allowed", + "label": "runtime database_write_allowed 為 true", + "passed": bool(_status_value(status, "database_write_allowed")), + }, + { + "key": "manual_operator_approval", + "label": "操作者已明確批准一次性 market_platforms seed upsert", + "passed": False, + }, + { + "key": "rollback_plan_reviewed", + "label": "已確認回復策略:關閉 flags、app-only rollback、必要時清理 seed rows", + "passed": False, + }, + { + "key": "production_smoke_targets_defined", + "label": "已定義 /health 與市場情報 API smoke targets", + "passed": True, + }, + ] + blocked_reasons = [gate["key"] for gate in gates if not gate["passed"]] + + return { + "phase": phase, + "mode": "approval_runbook_read_only", + "ready_for_real_write": False, + "writes_executed": False, + "would_write_database": False, + "database_session_created": False, + "database_commit_executed": False, + "external_network_executed": False, + "scheduler_attached": False, + "approval_required": True, + "approval_token_present": False, + "blocked_reasons": blocked_reasons, + "approval_gates": gates, + "seed_count": int(seed_plan.get("seed_count") or 0), + "writer_operation_count": int(writer_plan.get("operation_count") or 0), + "schema_smoke": schema_smoke, + "write_guard_summary": { + "ready_to_write": bool(write_guard.get("ready_to_write")), + "would_write_database": bool(write_guard.get("would_write_database")), + "blocked_reasons": write_guard.get("blocked_reasons", []), + }, + "operator_sequence": [ + { + "key": "scope_review", + "label": "確認 git diff 只包含 market_intel、ADR/TODO、版本與測試", + }, + { + "key": "backup", + "label": "執行 python backup_system.py,保存正式環境回復點", + }, + { + "key": "migration_apply_window", + "label": "在維護窗口套用 market_* migration,不觸碰 momo-db 容器生命週期", + }, + { + "key": "seed_preview_compare", + "label": "比對 platform_seed_writer_plan 的 4 筆 upsert preview", + }, + { + "key": "one_time_seed_write", + "label": "另開明確批准後才允許一次性 seed writer 真寫入", + }, + { + "key": "post_write_smoke", + "label": "驗證 /health、/market_intel、schema_smoke 與 deployment_readiness", + }, + ], + "rollback_plan": [ + { + "key": "disable_flags", + "label": "立刻關閉 MARKET_INTEL_ENABLED、MARKET_INTEL_CRAWLER_ENABLED、MARKET_INTEL_WRITE_ENABLED", + }, + { + "key": "app_only_rollback", + "label": "回退 momo-app 程式版本,只 recreate momo-app,不碰 momo-db", + }, + { + "key": "seed_rows_cleanup", + "label": "若唯一異常來自 platform seed,可在人工審核後刪除或停用 market_platforms seed rows", + }, + ], + "hard_safety_boundaries": [ + "no_external_crawling_during_seed_write", + "no_scheduler_attach_during_seed_write", + "no_momo_db_container_lifecycle_change", + "no_remove_orphans", + "health_probe_uses_health_endpoint_only", + ], + } diff --git a/templates/market_intel/disabled.html b/templates/market_intel/disabled.html index 8a92be1..dbfce58 100644 --- a/templates/market_intel/disabled.html +++ b/templates/market_intel/disabled.html @@ -58,6 +58,187 @@ font-family: "JetBrains Mono", monospace; font-size: 1rem; } + + .market-intel-preview-head { + align-items: center; + display: flex; + gap: 0.75rem; + justify-content: space-between; + margin-bottom: 1rem; + } + + .market-intel-preview-title { + color: var(--momo-ink, #30251b); + font-size: 1rem; + font-weight: 800; + margin: 0; + } + + .market-intel-icon-button { + align-items: center; + background: rgba(255, 250, 241, 0.9); + border: 1px solid var(--momo-border, #d8c8aa); + border-radius: 8px; + color: var(--momo-ink, #30251b); + display: inline-flex; + height: 2.25rem; + justify-content: center; + width: 2.25rem; + } + + .market-intel-icon-button:hover { + background: rgba(201, 117, 45, 0.12); + color: var(--momo-accent-700, #8f4530); + } + + .market-intel-preview-meta { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + margin-bottom: 1rem; + } + + .market-intel-pill { + background: rgba(255, 250, 241, 0.82); + border: 1px solid rgba(120, 83, 44, 0.14); + border-radius: 8px; + color: var(--momo-muted, #756a5b); + font-family: "JetBrains Mono", monospace; + font-size: 0.78rem; + font-weight: 700; + padding: 0.35rem 0.5rem; + } + + .market-intel-empty { + background: rgba(255, 250, 241, 0.72); + border: 1px dashed rgba(120, 83, 44, 0.28); + border-radius: 8px; + color: var(--momo-muted, #756a5b); + padding: 1rem; + } + + .market-intel-candidate-list { + display: grid; + gap: 0.75rem; + } + + .market-intel-candidate { + background: rgba(255, 250, 241, 0.82); + border: 1px solid rgba(120, 83, 44, 0.14); + border-left: 3px solid var(--momo-accent, #c8752d); + border-radius: 8px; + padding: 0.8rem 0.9rem; + } + + .market-intel-candidate a { + color: var(--momo-ink, #30251b); + font-weight: 800; + text-decoration: none; + word-break: break-word; + } + + .market-intel-candidate small { + color: var(--momo-muted, #756a5b); + display: block; + margin-top: 0.35rem; + } + + .market-intel-operation-list { + display: grid; + gap: 0.75rem; + } + + .market-intel-operation { + background: rgba(255, 250, 241, 0.82); + border: 1px solid rgba(120, 83, 44, 0.14); + border-left: 3px solid var(--momo-accent, #c8752d); + border-radius: 8px; + padding: 0.8rem 0.9rem; + } + + .market-intel-operation strong { + color: var(--momo-ink, #30251b); + display: block; + font-family: "JetBrains Mono", monospace; + font-size: 0.92rem; + word-break: break-word; + } + + .market-intel-operation small { + color: var(--momo-muted, #756a5b); + display: block; + margin-top: 0.35rem; + } + + .market-intel-check-list { + display: grid; + gap: 0.6rem; + } + + .market-intel-deploy-grid { + display: grid; + gap: 0.9rem; + grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); + margin-top: 1rem; + } + + .market-intel-deploy-section-title { + color: var(--momo-muted, #756a5b); + font-family: "JetBrains Mono", monospace; + font-size: 0.78rem; + font-weight: 800; + margin: 0 0 0.55rem; + } + + .market-intel-check { + align-items: center; + background: rgba(255, 250, 241, 0.82); + border: 1px solid rgba(120, 83, 44, 0.14); + border-radius: 8px; + color: var(--momo-muted, #756a5b); + display: flex; + gap: 0.65rem; + justify-content: space-between; + padding: 0.65rem 0.75rem; + } + + .market-intel-check div { + min-width: 0; + } + + .market-intel-check strong { + color: var(--momo-ink, #30251b); + font-family: "JetBrains Mono", monospace; + font-size: 0.84rem; + word-break: break-word; + } + + .market-intel-check small { + color: var(--momo-muted, #756a5b); + display: block; + line-height: 1.45; + margin-top: 0.25rem; + } + + .market-intel-check span { + color: var(--momo-muted, #756a5b); + font-family: "JetBrains Mono", monospace; + font-size: 0.78rem; + font-weight: 800; + white-space: nowrap; + } + + @media (max-width: 520px) { + .market-intel-preview-head, + .market-intel-check { + align-items: flex-start; + flex-direction: column; + } + + .market-intel-check span { + white-space: normal; + } + } {% endblock %} @@ -89,7 +270,707 @@ DB 寫入許可 {{ 'ON' if status.database_write_allowed else 'OFF' }} +
+ 已註冊 Adapter + {{ adapter_count|default(0) }} +
+
+ 手動 Fetch + {{ 'ON' if manual_fetch_allowed|default(false) else 'OFF' }} +
+ + + +
+
+
+

CANDIDATE PREVIEW / SAFE

+

候選預覽

+
+ +
+
+ loading +
+
+
讀取候選預覽中...
+
+
+ +
+
+
+

SEED WRITER / DRY RUN

+

平台種子寫入預覽

+
+ +
+
+ loading +
+
+
讀取寫入預覽中...
+
+
+ +
+
+
+

SEED CLI / TRANSACTION PREVIEW

+

Seed CLI 交易預覽

+
+ +
+
+ loading +
+
+
讀取交易預覽中...
+
+
+ +
+
+
+

DB SCHEMA / READ ONLY PROBE

+

正式 DB Schema 探針

+
+ +
+
+ loading +
+
+
讀取 DB 探針中...
+
+
+ +
+
+
+

PLATFORM SEED / READ ONLY DIFF

+

平台 Seed DB 差異探針

+
+ +
+
+ loading +
+
+
讀取 Seed 差異探針中...
+
+
+ +
+
+
+

MIGRATION / BLUEPRINT

+

Schema migration 草案

+
+ +
+
+ loading +
+
+
讀取 migration 草案中...
+
+
+ +
+
+
+

WRITE APPROVAL / RUNBOOK

+

正式寫入批准檢查

+
+ +
+
+ loading +
+
+
讀取批准檢查中...
+
+
+ +
+
+
+

DEPLOYMENT / READINESS

+

推版準備檢查

+
+ +
+
+ loading +
+
+
讀取推版準備中...
{% endblock %} + +{% block extra_js %} + +{% endblock %} diff --git a/tests/test_market_intel_skeleton.py b/tests/test_market_intel_skeleton.py new file mode 100644 index 0000000..d20ae3c --- /dev/null +++ b/tests/test_market_intel_skeleton.py @@ -0,0 +1,804 @@ +import json +import os +import subprocess +import sys +from pathlib import Path + +from sqlalchemy import create_engine, text + +from database.manager import Base +from services.market_intel import MarketIntelService +from services.market_intel.service import MARKET_INTEL_TABLES +from services.market_intel.adapters import get_adapter, get_adapter_summaries +from services.market_intel.candidate_preview import build_candidate_preview_from_discovery +from services.market_intel.discovery_runner import ManualDiscoveryRunner +from services.market_intel.html_diagnostics import parse_html_diagnostics +from services.market_intel.platform_seed_db_diff import build_platform_seed_db_diff_plan +from services.market_intel.schema_db_probe import build_schema_db_probe_plan + + +def test_market_intel_defaults_are_safe(): + service = MarketIntelService() + status = service.get_runtime_status().to_dict() + plan = service.build_dry_run_plan("momo") + + assert status["enabled"] is False + assert status["crawler_enabled"] is False + assert status["write_enabled"] is False + assert status["database_write_allowed"] is False + assert plan["would_discover_campaigns"] is False + assert plan["would_write_database"] is False + + +def test_market_intel_adapter_registry_is_read_only(): + summaries = get_adapter_summaries() + codes = {item["platform_code"] for item in summaries} + + assert {"momo", "pchome", "coupang", "shopee"} <= codes + for summary in summaries: + policy = summary["safety_policy"] + assert policy["allow_login"] is False + assert policy["allow_database_write"] is False + assert policy["allow_scheduler_attach"] is False + + +def test_market_intel_discovery_plan_does_not_allow_network_or_write(): + service = MarketIntelService() + plan = service.build_discovery_plan("momo") + + assert plan["found"] is True + assert len(plan["plans"]) == 1 + momo_plan = plan["plans"][0] + assert momo_plan["network_request_allowed"] is False + assert momo_plan["database_write_allowed"] is False + assert momo_plan["scheduler_attach_allowed"] is False + assert momo_plan["sources"] + + +def test_unknown_adapter_returns_diagnostic_error(): + assert get_adapter("unknown") is None + plan = MarketIntelService().build_discovery_plan("unknown") + assert plan["found"] is False + assert plan["error"] == "未知平台 adapter" + + +def test_manual_discovery_default_does_not_call_network(): + called = {"count": 0} + + def fake_get(*args, **kwargs): + called["count"] += 1 + raise AssertionError("預設 dry-run 不應發 HTTP request") + + result = MarketIntelService().run_manual_discovery("momo", fetch=False, http_get=fake_get) + + assert called["count"] == 0 + assert result["found"] is True + assert result["runs"][0]["status"] == "planned" + assert result["runs"][0]["sources_fetched"] == 0 + assert all(item["network_executed"] is False for item in result["runs"][0]["results"]) + + +def test_manual_discovery_fetch_is_blocked_when_flags_are_off(): + called = {"count": 0} + + def fake_get(*args, **kwargs): + called["count"] += 1 + raise AssertionError("flags 關閉時不應發 HTTP request") + + result = MarketIntelService().run_manual_discovery("momo", fetch=True, http_get=fake_get) + + assert called["count"] == 0 + assert result["runs"][0]["status"] == "blocked" + assert result["runs"][0]["network_allowed"] is False + assert result["runs"][0]["database_write_allowed"] is False + + +def test_manual_runner_fetch_uses_injected_http_get_when_allowed(): + class RuntimeStatus: + enabled = True + crawler_enabled = True + + class Response: + status_code = 200 + text = "活動頁OK" + + called = {"count": 0} + + def fake_get(url, **kwargs): + called["count"] += 1 + return Response() + + adapter = get_adapter("momo") + runner = ManualDiscoveryRunner(runtime_status=RuntimeStatus(), http_get=fake_get) + result = runner.run(adapter, fetch=True).to_dict() + + assert called["count"] == len(adapter.campaign_sources()) + assert result["status"] == "success" + assert result["database_write_allowed"] is False + assert result["scheduler_attached"] is False + assert result["sources_fetched"] == len(adapter.campaign_sources()) + assert result["results"][0]["title"] == "活動頁" + assert result["results"][0]["diagnostics"]["title"] == "活動頁" + + +def test_html_diagnostics_extracts_campaign_link_candidates(): + html = """ + + 五月品牌日 + + 品牌日活動 + 客服中心 + 外部 promo + + + """ + + diagnostics = parse_html_diagnostics(html, base_url="https://shop.example").to_dict() + + assert diagnostics["title"] == "五月品牌日" + assert diagnostics["link_count"] == 3 + assert diagnostics["same_host_link_count"] == 1 + assert diagnostics["campaign_link_candidates"][0]["href"] == "https://shop.example/event/brand-day" + assert diagnostics["campaign_link_candidates"][0]["score"] > 0 + assert "generic_score" in diagnostics["campaign_link_candidates"][0] + assert "platform_score" in diagnostics["campaign_link_candidates"][0] + assert "confidence_band" in diagnostics["campaign_link_candidates"][0] + assert "confidence_reason" in diagnostics["campaign_link_candidates"][0] + + +def test_manual_runner_returns_parser_diagnostics_when_fetch_succeeds(): + class RuntimeStatus: + enabled = True + crawler_enabled = True + + class Response: + status_code = 200 + text = """ + + PChome 優惠活動 + 限時優惠 + + """ + + adapter = get_adapter("pchome") + runner = ManualDiscoveryRunner(runtime_status=RuntimeStatus(), http_get=lambda *args, **kwargs: Response()) + result = runner.run(adapter, fetch=True).to_dict() + diagnostics = result["results"][0]["diagnostics"] + + assert diagnostics["title"] == "PChome 優惠活動" + assert diagnostics["campaign_link_candidates"] + assert diagnostics["campaign_link_candidates"][0]["is_same_host"] is True + + +def test_momo_platform_scorer_prioritizes_momo_campaign_links(): + adapter = get_adapter("momo") + html = """ + MOMO 活動 + 外部 promo + MOMO 品牌日活動 + + """ + + diagnostics = parse_html_diagnostics( + html, + base_url=adapter.base_url, + score_link=adapter.score_campaign_link, + ).to_dict() + + first = diagnostics["campaign_link_candidates"][0] + assert first["href"] == "https://www.momoshop.com.tw/edm/cmmedm.jsp" + assert first["platform_score"] > 0 + assert first["confidence_band"] == "high" + + +def test_pchome_platform_scorer_prioritizes_region_campaign_links(): + adapter = get_adapter("pchome") + html = """ + PChome 活動 + 外部 event + PChome 24h 美妝優惠 + + """ + + diagnostics = parse_html_diagnostics( + html, + base_url=adapter.base_url, + score_link=adapter.score_campaign_link, + ).to_dict() + + first = diagnostics["campaign_link_candidates"][0] + assert first["href"] == "https://24h.pchome.com.tw/region/DDAB" + assert first["platform_score"] > 0 + assert first["confidence_band"] == "high" + + +def test_coupang_platform_scorer_prioritizes_official_campaign_links(): + adapter = get_adapter("coupang") + html = """ + Coupang 活動 + 外部 event + 酷澎 火箭跨境優惠 + + """ + + diagnostics = parse_html_diagnostics( + html, + base_url=adapter.base_url, + score_link=adapter.score_campaign_link, + ).to_dict() + + first = diagnostics["campaign_link_candidates"][0] + assert first["href"] == "https://www.tw.coupang.com/np/coupangglobal" + assert first["platform_score"] > 0 + assert first["confidence_band"] == "high" + + +def test_shopee_platform_scorer_prioritizes_mall_campaign_links(): + adapter = get_adapter("shopee") + html = """ + Shopee 活動 + 外部 event + 蝦皮商城 品牌限時優惠 + + """ + + diagnostics = parse_html_diagnostics( + html, + base_url=adapter.base_url, + score_link=adapter.score_campaign_link, + ).to_dict() + + first = diagnostics["campaign_link_candidates"][0] + assert first["href"] == "https://shopee.tw/mall" + assert first["platform_score"] > 0 + assert first["confidence_band"] == "high" + + +def test_confidence_bands_cover_high_medium_low(): + adapter = get_adapter("momo") + html = """ + 信心帶測試 + MOMO 限時品牌日活動優惠 + 活動 + 清單 + + """ + + diagnostics = parse_html_diagnostics( + html, + base_url=adapter.base_url, + score_link=adapter.score_campaign_link, + ).to_dict() + bands = {item["href"]: item["confidence_band"] for item in diagnostics["campaign_link_candidates"]} + + assert bands["https://www.momoshop.com.tw/edm/cmmedm.jsp"] == "high" + assert bands["https://neutral.example/event-light"] == "medium" + assert bands["https://other.example/sale"] == "low" + for item in diagnostics["campaign_link_candidates"]: + assert item["confidence_reason"] + + +def test_candidate_preview_default_is_empty_and_does_not_call_network(): + called = {"count": 0} + + def fake_get(*args, **kwargs): + called["count"] += 1 + raise AssertionError("candidate preview 預設不應發 HTTP request") + + preview = MarketIntelService().build_candidate_preview("momo", fetch=False, http_get=fake_get) + + assert called["count"] == 0 + assert preview["candidate_count"] == 0 + assert preview["database_write_allowed"] is False + assert preview["scheduler_attached"] is False + assert preview["run_statuses"][0]["status"] == "planned" + + +def test_candidate_preview_fetch_is_blocked_when_flags_are_off(): + called = {"count": 0} + + def fake_get(*args, **kwargs): + called["count"] += 1 + raise AssertionError("flags 關閉時 candidate preview 不應發 HTTP request") + + preview = MarketIntelService().build_candidate_preview("momo", fetch=True, http_get=fake_get) + + assert called["count"] == 0 + assert preview["candidate_count"] == 0 + assert preview["run_statuses"][0]["status"] == "blocked" + + +def test_candidate_preview_aggregates_and_filters_by_band(): + discovery = { + "platform_code": "all", + "fetch_requested": True, + "manual_fetch_allowed": True, + "runs": [ + { + "platform_code": "momo", + "status": "success", + "sources_planned": 1, + "sources_fetched": 1, + "errors": 0, + "results": [ + { + "source_key": "momo_edm", + "name": "MOMO EDM", + "url": "https://www.momoshop.com.tw/edm/cmmedm.jsp", + "status": "fetched", + "diagnostics": { + "title": "MOMO 活動", + "page_hash": "hash-a", + "campaign_link_candidates": [ + { + "href": "https://www.momoshop.com.tw/edm/a", + "text": "品牌日", + "is_same_host": True, + "score": 20, + "generic_score": 6, + "platform_score": 14, + "confidence_band": "high", + "confidence_reason": "same_host", + }, + { + "href": "https://other.example/sale", + "text": "清單", + "is_same_host": False, + "score": 2, + "generic_score": 2, + "platform_score": 0, + "confidence_band": "low", + "confidence_reason": "external_host", + }, + ], + }, + } + ], + } + ], + } + + preview = build_candidate_preview_from_discovery(discovery, min_band="medium", limit=10) + + assert preview["candidate_count"] == 1 + assert preview["candidates"][0]["confidence_band"] == "high" + assert preview["candidates"][0]["platform_code"] == "momo" + assert preview["candidates"][0]["source_key"] == "momo_edm" + + +def test_market_intel_preview_template_uses_safe_fetch_false_endpoint(): + template = Path("templates/market_intel/disabled.html").read_text(encoding="utf-8") + + assert "data-market-intel-preview" in template + assert "data-market-intel-writer" in template + assert "data-market-intel-cli" in template + assert "data-market-intel-cli-body" in template + assert "data-market-intel-db-probe" in template + assert "data-market-intel-db-probe-body" in template + assert "data-market-intel-seed-diff" in template + assert "data-market-intel-seed-diff-body" in template + assert "data-market-intel-migration" in template + assert "data-market-intel-migration-tables" in template + assert "data-market-intel-approval" in template + assert "data-market-intel-approval-gates" in template + assert "data-market-intel-deploy" in template + assert "data-market-intel-deploy-steps" in template + assert "data-market-intel-deploy-fallback" in template + assert "market_intel.market_intel_candidate_preview" in template + assert "market_intel.market_intel_platform_seed_writer_plan" in template + assert "market_intel.market_intel_seed_writer_cli_status" in template + assert "market_intel.market_intel_schema_db_probe" in template + assert "market_intel.market_intel_platform_seed_db_diff" in template + assert "market_intel.market_intel_migration_blueprint" in template + assert "market_intel.market_intel_write_approval_runbook" in template + assert "market_intel.market_intel_deployment_readiness" in template + assert "required_manual_steps" in template + assert "fallback_plan" in template + assert "approval_gates" in template + assert "備援方案" in template + assert "fetch=false" in template + assert "fetch=true" not in template + assert "execute=false" in template + assert "execute=true" not in template + assert "writes=executed" not in template + assert "API 不執行推版" in template + + +def test_market_intel_schema_metadata_contains_all_market_tables(): + metadata_tables = set(Base.metadata.tables) + + assert set(MARKET_INTEL_TABLES) <= metadata_tables + + +def test_market_intel_schema_smoke_checks_platform_columns(): + smoke = MarketIntelService().build_schema_smoke()["schema_smoke"] + + assert smoke["passed"] is True + assert smoke["missing_tables"] == [] + assert smoke["missing_market_platform_columns"] == [] + assert "crawl_policy_json" in smoke["market_platform_required_columns"] + + +def test_platform_seed_plan_is_read_only_and_adapter_derived(): + plan = MarketIntelService().build_platform_seed_plan() + seed_codes = {seed["code"] for seed in plan["seeds"]} + + assert {"momo", "pchome", "coupang", "shopee"} <= seed_codes + assert plan["would_write_database"] is False + assert plan["required_gates"]["schema_smoke_required"] is True + assert plan["required_gates"]["migration_required"] is True + assert plan["required_gates"]["manual_operator_approval_required"] is True + + for seed in plan["seeds"]: + policy = seed["crawl_policy_json"] + assert seed["enabled"] is False + assert seed["source_count"] == len(seed["sources"]) + assert policy["allow_login"] is False + assert policy["allow_database_write"] is False + assert policy["allow_scheduler_attach"] is False + + +def test_platform_seed_plan_unknown_adapter_returns_diagnostic_error(): + plan = MarketIntelService().build_platform_seed_plan("unknown") + + assert plan["found"] is False + assert plan["seed_count"] == 0 + assert plan["error"] == "未知平台 adapter" + + +def test_platform_seed_write_guard_blocks_default_environment(): + guard = MarketIntelService().build_platform_seed_write_guard() + + assert guard["ready_to_write"] is False + assert guard["would_write_database"] is False + assert guard["database_write_allowed"] is False + assert guard["seed_count"] == 4 + assert "market_intel_enabled" in guard["blocked_reasons"] + assert "market_intel_write_enabled" in guard["blocked_reasons"] + assert "database_write_allowed" in guard["blocked_reasons"] + assert "migration_confirmed" in guard["blocked_reasons"] + assert "manual_operator_approval" in guard["blocked_reasons"] + assert guard["schema_smoke"]["passed"] is True + assert "schema_smoke_confirmed" not in guard["blocked_reasons"] + + +def test_platform_seed_writer_plan_is_dry_run_only(): + writer_plan = MarketIntelService().build_platform_seed_writer_plan() + + assert writer_plan["mode"] == "dry_run" + assert writer_plan["ready_to_write"] is False + assert writer_plan["writes_executed"] is False + assert writer_plan["would_write_database"] is False + assert writer_plan["operation_count"] == 4 + assert writer_plan["schema_smoke"]["passed"] is True + + first_operation = writer_plan["operations"][0] + assert first_operation["operation"] == "upsert" + assert first_operation["table"] == "market_platforms" + assert first_operation["write_status"] == "blocked_dry_run_only" + assert first_operation["values"]["enabled"] is False + assert "ON CONFLICT(code)" in first_operation["sql_shape"] + + +def test_schema_db_probe_planned_does_not_connect_or_write(): + probe = MarketIntelService().build_schema_db_probe() + + assert probe["mode"] == "schema_db_probe_planned" + assert probe["execute_requested"] is False + assert probe["read_only_query_executed"] is False + assert probe["database_connection_opened"] is False + assert probe["database_session_created"] is False + assert probe["explicit_transaction_opened"] is False + assert probe["database_write_executed"] is False + assert probe["database_commit_executed"] is False + assert probe["migration_executed"] is False + assert probe["missing_tables"] == list(MARKET_INTEL_TABLES) + assert "execute_false_planned_only" in probe["blocked_reasons"] + + +def test_schema_db_probe_sqlite_read_only_reports_existing_and_missing_tables(): + engine = create_engine("sqlite:///:memory:") + with engine.begin() as conn: + conn.execute(text("CREATE TABLE market_platforms (id INTEGER PRIMARY KEY)")) + + probe = build_schema_db_probe_plan( + ["market_platforms", "market_campaigns"], + execute_requested=True, + database_type="sqlite", + engine=engine, + ) + + assert probe["mode"] == "schema_db_probe_read_only" + assert probe["execute_requested"] is True + assert probe["read_only_query_executed"] is True + assert probe["database_connection_opened"] is True + assert probe["database_session_created"] is False + assert probe["explicit_transaction_opened"] is False + assert probe["database_write_executed"] is False + assert probe["database_commit_executed"] is False + assert probe["existing_tables"] == ["market_platforms"] + assert probe["missing_tables"] == ["market_campaigns"] + assert probe["schema_tables_exist"] is False + assert "market_tables_missing" in probe["blocked_reasons"] + + +def test_platform_seed_db_diff_planned_does_not_connect_or_write(): + diff = MarketIntelService().build_platform_seed_db_diff() + + assert diff["mode"] == "platform_seed_db_diff_planned" + assert diff["execute_requested"] is False + assert diff["read_only_query_executed"] is False + assert diff["database_connection_opened"] is False + assert diff["database_session_created"] is False + assert diff["explicit_transaction_opened"] is False + assert diff["database_write_executed"] is False + assert diff["database_commit_executed"] is False + assert diff["seed_write_executed"] is False + assert diff["expected_seed_count"] == 4 + assert set(diff["missing_codes"]) == {"momo", "pchome", "coupang", "shopee"} + assert "execute_false_planned_only" in diff["blocked_reasons"] + assert "seed_write_still_blocked" in diff["blocked_reasons"] + + +def test_platform_seed_db_diff_sqlite_read_only_reports_missing_and_matching_seed_rows(): + seed_plan = { + "seeds": [ + { + "code": "momo", + "name": "MOMO", + "base_url": "https://momo.example", + "enabled": False, + "crawl_policy_json": { + "allow_login": False, + "allow_database_write": False, + "seed_source_keys": ["momo_edm"], + }, + }, + { + "code": "pchome", + "name": "PChome", + "base_url": "https://pchome.example", + "enabled": False, + "crawl_policy_json": { + "allow_login": False, + "allow_database_write": False, + "seed_source_keys": ["pchome_region"], + }, + }, + ] + } + engine = create_engine("sqlite:///:memory:") + with engine.begin() as conn: + conn.execute( + text( + """ + CREATE TABLE market_platforms ( + code TEXT PRIMARY KEY, + name TEXT, + base_url TEXT, + enabled BOOLEAN, + crawl_policy_json TEXT + ) + """ + ) + ) + conn.execute( + text( + """ + INSERT INTO market_platforms + (code, name, base_url, enabled, crawl_policy_json) + VALUES + (:code, :name, :base_url, :enabled, :crawl_policy_json) + """ + ), + { + "code": "momo", + "name": "MOMO", + "base_url": "https://momo.example", + "enabled": False, + "crawl_policy_json": json.dumps( + seed_plan["seeds"][0]["crawl_policy_json"], + ensure_ascii=False, + sort_keys=True, + ), + }, + ) + + diff = build_platform_seed_db_diff_plan( + seed_plan, + execute_requested=True, + database_type="sqlite", + engine=engine, + ) + + assert diff["mode"] == "platform_seed_db_diff_read_only" + assert diff["execute_requested"] is True + assert diff["read_only_query_executed"] is True + assert diff["database_connection_opened"] is True + assert diff["database_session_created"] is False + assert diff["explicit_transaction_opened"] is False + assert diff["database_write_executed"] is False + assert diff["database_commit_executed"] is False + assert diff["seed_write_executed"] is False + assert diff["expected_seed_count"] == 2 + assert diff["existing_seed_count"] == 1 + assert diff["existing_codes"] == ["momo"] + assert diff["missing_codes"] == ["pchome"] + assert diff["changed_codes"] == [] + assert diff["matching_codes"] == ["momo"] + assert diff["seed_rows_ready"] is False + assert {item["code"]: item["diff_status"] for item in diff["seed_diffs"]} == { + "momo": "matches_expected", + "pchome": "missing", + } + assert "seed_rows_missing" in diff["blocked_reasons"] + assert "seed_write_still_blocked" in diff["blocked_reasons"] + + +def test_deployment_readiness_reports_app_only_release_gate(): + readiness = MarketIntelService().build_deployment_readiness() + + step_keys = {step["key"] for step in readiness["required_manual_steps"]} + fallback_keys = {item["key"] for item in readiness["fallback_plan"]} + boundary_keys = {item["key"] for item in readiness["safe_deploy_boundaries"]} + + assert readiness["mode"] == "app_only_release_gate" + assert readiness["production_deployed"] is False + assert readiness["git_committed"] is False + assert readiness["git_pushed"] is False + assert readiness["ready_for_production_deploy"] is True + assert readiness["deployment_actions_executed"] is False + assert readiness["execution_boundary"]["api_executes_scp"] is False + assert readiness["execution_boundary"]["api_recreates_container"] is False + assert readiness["execution_boundary"]["api_runs_migration"] is False + assert readiness["execution_boundary"]["api_writes_database"] is False + assert readiness["requires_backup_before_major_update"] is True + assert readiness["backup_command"] == "python backup_system.py" + assert readiness["checks"]["schema_smoke_passed"] is True + assert readiness["checks"]["schema_db_probe_planned_safe"] is True + assert readiness["checks"]["platform_seed_db_diff_planned_safe"] is True + assert readiness["checks"]["writer_plan_dry_run_only"] is True + assert readiness["writer_plan_summary"]["writes_executed"] is False + assert "readiness_checks_not_all_passed" not in readiness["blocked_reasons"] + assert "production_deploy_not_executed_by_api" in readiness["blocked_reasons"] + assert "backup_must_be_verified_by_operator" in readiness["blocked_reasons"] + assert "run_backup_system" in step_keys + assert "verify_health_endpoint" in step_keys + assert "feature_flag_kill_switch" in fallback_keys + assert "database_write_blocked" in fallback_keys + assert "no_remove_orphans" in boundary_keys + assert "no_momo_db_lifecycle_change" in boundary_keys + assert "/health" in readiness["production_smoke_targets"] + assert "/api/market_intel/deployment_readiness" in readiness["production_smoke_targets"] + assert "/api/market_intel/platform_seed_db_diff" in readiness["production_smoke_targets"] + assert readiness["write_approval_runbook"]["ready_for_real_write"] is False + assert readiness["write_approval_runbook"]["writes_executed"] is False + assert readiness["migration_blueprint"]["migration_executed"] is False + assert readiness["migration_blueprint"]["file_created"] is True + assert readiness["migration_blueprint"]["file_matches_blueprint"] is True + assert readiness["schema_db_probe"]["read_only_query_executed"] is False + assert readiness["platform_seed_db_diff"]["read_only_query_executed"] is False + + +def test_write_approval_runbook_is_read_only_and_blocks_real_write(): + runbook = MarketIntelService().build_write_approval_runbook() + gate_keys = {gate["key"] for gate in runbook["approval_gates"]} + + assert runbook["mode"] == "approval_runbook_read_only" + assert runbook["ready_for_real_write"] is False + assert runbook["writes_executed"] is False + assert runbook["would_write_database"] is False + assert runbook["database_session_created"] is False + assert runbook["database_commit_executed"] is False + assert runbook["external_network_executed"] is False + assert runbook["scheduler_attached"] is False + assert runbook["approval_required"] is True + assert runbook["approval_token_present"] is False + assert runbook["seed_count"] == 4 + assert runbook["writer_operation_count"] == 4 + assert runbook["schema_smoke"]["passed"] is True + assert "schema_smoke_passed" in gate_keys + assert "backup_completed" in gate_keys + assert "migration_file_reviewed" in gate_keys + assert "manual_operator_approval" in gate_keys + assert "database_write_allowed" in runbook["blocked_reasons"] + assert "manual_operator_approval" in runbook["blocked_reasons"] + assert "no_momo_db_container_lifecycle_change" in runbook["hard_safety_boundaries"] + assert "no_remove_orphans" in runbook["hard_safety_boundaries"] + + +def test_migration_blueprint_is_additive_preview_only(): + blueprint = MarketIntelService().build_migration_blueprint() + forward_sql_lower = blueprint["forward_sql"].lower() + migration_file = Path(blueprint["suggested_filename"]) + + assert blueprint["mode"] == "migration_file_draft_read_only" + assert blueprint["suggested_filename"] == "migrations/032_market_intel_core_schema.sql" + assert blueprint["file_created"] is True + assert blueprint["file_matches_blueprint"] is True + assert blueprint["file_status"] == "local_draft_matches_blueprint" + assert blueprint["migration_executed"] is False + assert blueprint["database_session_created"] is False + assert blueprint["database_commit_executed"] is False + assert blueprint["external_network_executed"] is False + assert blueprint["scheduler_attached"] is False + assert blueprint["table_count"] == 7 + assert blueprint["forward_has_destructive_sql"] is False + assert blueprint["safety_checks"]["forward_sql_additive_only"] is True + assert blueprint["safety_checks"]["does_not_write_seed_rows"] is True + assert "migration_not_executed" in blueprint["blocked_reasons"] + assert "migration_file_not_created" not in blueprint["blocked_reasons"] + assert "seed_writer_real_write_not_implemented" in blueprint["blocked_reasons"] + assert migration_file.exists() + assert migration_file.read_text(encoding="utf-8").strip() == blueprint["forward_sql"] + assert "CREATE TABLE IF NOT EXISTS market_platforms".lower() in forward_sql_lower + assert "CREATE TABLE IF NOT EXISTS market_crawler_runs".lower() in forward_sql_lower + assert "drop table" not in forward_sql_lower + assert "truncate " not in forward_sql_lower + assert "delete from" not in forward_sql_lower + assert "MARKET_INTEL_CRAWLER_ENABLED=false" in blueprint["command_plan"]["seed_writer_command"]["command"] + assert blueprint["command_plan"]["seed_writer_command"]["script_created"] is True + assert blueprint["command_plan"]["seed_writer_command"]["script_path"] == "scripts/market_intel_seed_writer.py" + assert blueprint["rollback_requires_manual_approval"] is True + + +def test_seed_writer_cli_status_blocks_real_write(): + status = MarketIntelService().build_seed_writer_cli_status( + platform_code="all", + execute_requested=True, + approval_token="test-token", + ) + + assert status["mode"] == "seed_writer_cli_blocked_skeleton" + assert status["execute_requested"] is True + assert status["approval_token_present"] is True + assert status["ready_for_real_write"] is False + assert status["writes_executed"] is False + assert status["would_write_database"] is False + assert status["database_session_created"] is False + assert status["database_commit_executed"] is False + assert status["external_network_executed"] is False + assert status["scheduler_attached"] is False + assert status["exit_code"] == 2 + assert "execute_request_blocked_by_skeleton" in status["blocked_reasons"] + assert "real_write_implementation_enabled" in status["blocked_reasons"] + preview = status["transaction_preview"] + assert preview["mode"] == "seed_transaction_preview_no_session" + assert preview["statement_count"] == 4 + assert preview["database_session_created"] is False + assert preview["transaction_opened"] is False + assert preview["database_commit_executed"] is False + assert preview["database_snapshot_loaded"] is False + assert preview["statements"][0]["table"] == "market_platforms" + assert preview["statements"][0]["diff_status"] == "not_loaded_no_db_session" + assert "ON CONFLICT (code) DO UPDATE SET" in preview["statements"][0]["sql_template"] + assert preview["statements"][0]["parameter_payload_hash"] + assert status["safety_contract"]["refuses_execute_in_this_phase"] is True + assert status["safety_contract"]["keeps_crawler_disabled_for_seed_write"] is True + + +def test_seed_writer_cli_script_outputs_blocked_plan(): + env = { + **os.environ, + "MOMO_ALLOW_INSECURE_CONFIG_FOR_TESTS": "true", + "SECRET_KEY": "test", + "LOGIN_PASSWORD": "test", + } + result = subprocess.run( + [sys.executable, "scripts/market_intel_seed_writer.py", "--platform", "all"], + capture_output=True, + check=False, + env=env, + text=True, + ) + data = json.loads(result.stdout) + + assert result.returncode == 0 + assert data["mode"] == "seed_writer_cli_blocked_skeleton" + assert data["execute_requested"] is False + assert data["writes_executed"] is False + assert data["database_session_created"] is False + assert data["database_commit_executed"] is False + assert data["exit_code"] == 0 + assert data["transaction_preview"]["statement_count"] == 4 + assert data["transaction_preview"]["transaction_opened"] is False diff --git a/tests/test_openclaw_bot_routes_webhook.py b/tests/test_openclaw_bot_routes_webhook.py index bc33037..627b03e 100644 --- a/tests/test_openclaw_bot_routes_webhook.py +++ b/tests/test_openclaw_bot_routes_webhook.py @@ -179,6 +179,62 @@ def test_webhook_menu_callback_edits_existing_message(monkeypatch): assert parse_mode == "Markdown" +def test_webhook_event_ignore_callback_edits_and_audits(monkeypatch): + from routes import openclaw_bot_routes as bot + + edited = [] + sent = [] + audits = [] + + def fake_edit_message_text(chat_id, message_id, text, keyboard=None, parse_mode="Markdown"): + edited.append((chat_id, message_id, text, keyboard, parse_mode)) + return {"ok": True} + + def fake_send_message(chat_id, text, reply_to=None, keyboard=None, parse_mode="Markdown"): + sent.append((chat_id, text, reply_to, keyboard, parse_mode)) + return {"ok": True} + + def fake_audit(event_id, user_label, ts_label): + audits.append((event_id, user_label, ts_label)) + + monkeypatch.setattr(bot, "edit_message_text", fake_edit_message_text) + monkeypatch.setattr(bot, "send_message", fake_send_message) + monkeypatch.setattr(bot, "_write_event_ignore_audit", fake_audit) + monkeypatch.setattr(bot, "_is_authorized", lambda _chat_type, _chat_id, _uid: True) + monkeypatch.setattr(bot, "answer_callback", lambda _cq_id, text="": None) + monkeypatch.setattr(bot, "send_typing", lambda _chat_id: None) + + app = _build_request_app() + payload = { + "update_id": 10103, + "callback_query": { + "id": "cb-eig-1", + "from": {"id": 777, "username": "alice"}, + "message": { + "message_id": 79, + "chat": {"id": -200, "type": "supergroup"}, + "text": "待處理事件", + }, + "data": "momo:eig:event-123", + }, + } + + with app.test_request_context( + "/bot/telegram/webhook", method="POST", json=payload + ): + bot.telegram_webhook() + + assert audits and audits[0][0:2] == ("event-123", "alice") + assert len(edited) == 1 + _, _, text, keyboard, parse_mode = edited[0] + assert "<b>待處理事件</b>" in text + assert "已忽略" in text + assert "alice" in text + assert keyboard is None + assert parse_mode == "HTML" + assert sent == [] + + def test_webhook_legacy_menu_callback_normalizes_prefix(monkeypatch): from routes import openclaw_bot_routes as bot