diff --git a/.env.example b/.env.example index fa5aae9..e6531c5 100644 --- a/.env.example +++ b/.env.example @@ -371,6 +371,9 @@ OPENCLAW_IMAGE_GEMINI_MODEL=gemini-1.5-flash NEMOTRON_OLLAMA_FIRST=true NEMOTRON_OLLAMA_MODEL=qwen3:14b NEMOTRON_OLLAMA_TIMEOUT=180 +OPENCLAW_STRATEGY_OLLAMA_MODEL=qwen3:14b +OPENCLAW_STRATEGY_OLLAMA_TIMEOUT=90 +OPENCLAW_STRATEGY_OLLAMA_NUM_PREDICT=2048 # [預設 OFF] MCP Router;需先部署 docker-compose.mcp.yml 並完成健康檢查再開 MCP_ROUTER_ENABLED=false @@ -400,15 +403,45 @@ PASSWORD_EXPIRY_DAYS=90 # 備份 / 報表 / 同步 # ────────────────────────────────────────────────────────────────────────── +DATA_DIR=/app/data BACKUP_DIR=/app/data/db_backups BACKUP_RETENTION_DAYS=7 DB_CONTAINER=momo-db REPORTS_DIR=/app/data/reports DATABASE_PATH=data/momo_database.db +SQLITE_PATH=/app/data/momo_database.db PG_SYNC_ENABLED=false PG_SYNC_INTERVAL=300 +# PChome 競品比價與補抓產線 +COMPETITOR_INTEL_CACHE_TTL_SECONDS=21600 +PCHOME_FEEDER_TIMEOUT=12 +PCHOME_FEEDER_RATE_DELAY=1.0 +PCHOME_FEEDER_SEARCH_LIMIT=20 +PCHOME_FEEDER_MAX_SEARCH_TERMS=5 +PCHOME_FEEDER_SEARCH_MAX_PAGES=2 +PCHOME_BACKFILL_STATUS_PATH=/app/data/pchome_match_backfill_status.json +PCHOME_BACKFILL_ACTIVE_TTL_SECONDS=7200 + +# PPT 預覽與視覺 QA +PPT_PREVIEW_CACHE_DIR=/app/data/ppt_previews +PPT_VISION_STATE_PATH=/app/data/ppt_vision_audit_status.json +PPT_VISION_ACTIVE_TTL_SECONDS=7200 +PPT_VISION_IMAGE_MAX_EDGE=1280 +PPT_VISION_IMAGE_QUALITY=82 + +# Action plan hygiene / ElephantAlpha resource policy +ACTION_PLAN_HYGIENE_STALE_HOURS=24 +ACTION_PLAN_HYGIENE_MAX_UPDATES=200 +ELEPHANT_ALPHA_RESOURCE_QUEUE_THRESHOLD=10 +ELEPHANT_ALPHA_RESOURCE_LOAD_THRESHOLD_PCT=80 +ELEPHANT_ALPHA_RESOURCE_HIGH_PRIORITY_THRESHOLD=5 +ELEPHANT_ALPHA_RESOURCE_STALE_THRESHOLD=5 +ELEPHANT_ALPHA_RESOURCE_STALE_HOURS=24 +ELEPHANT_ALPHA_RESOURCE_HYGIENE_ENABLED=true +ELEPHANT_ALPHA_HERMES_LLM_PREFETCH_ENABLED=false + # [選填] 外部 BI 連結(模板全域變數) METABASE_URL=https://mo.wooo.work/metabase GRIST_URL=https://grist.wooo.work diff --git a/config.py b/config.py index 52877ba..4af757d 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.333" +SYSTEM_VERSION = "V10.335" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index a2928be..e1d1b71 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -58,7 +58,7 @@ SQL漏斗(~300筆) - 比對覆蓋率補強入口:`POST /api/ai/pchome-match/backfill`,優先補抓仍無有效 PChome 配對的高價 ACTIVE 商品,完成後自動重算 AI 挑品清單。 - 排程閉環:`run_pchome_match_backfill_task` 每日 10:30 執行,補抓 PChome 待比對商品、寫入歷史價格,再重算 `strategy='product_pick'` 清單。 - PChome / MOMO 競價摘要出口 `services/competitor_intel_repository.py` 使用 30 分鐘共享快取(`COMPETITOR_INTEL_CACHE_TTL_SECONDS` 可調),避免 `/growth_analysis`、`/daily_sales`、PPT/AI 報表每次請求重跑昂貴覆蓋率與價差趨勢查詢;`run_competitor_price_feeder_task` 與 PChome backfill 完成後會主動清除快取。快取只包摘要輸出,不改 matcher 的高信心門檻與 identity_v2 準確性規則。 -- 商品看板第一屏:`/` 的 V2 看板直接以 `products`、`price_records`、`competitor_prices`、`competitor_match_attempts`、`competitor_match_reviews`、`ai_price_recommendations` 顯示比對覆蓋率、PChome 優勢、MOMO 威脅、AI 挑品、待比對優先清單與 PChome 覆核隊列;`filter=ai_picks` 可查看 50 品 AI 挑品列表,`filter=pchome_review` 可直接查看需人工處理的比價覆核 SKU,並以 DB 分頁支援 search/category/status 後的完整隊列,不得只截前 50 筆。覆核狀態篩選必須至少包含全部、需單位價、身份否決、低信心、價格過期與找不到同款,讓人工可依 matcher 診斷類型分批處理。列內顯示候選 PChome 商品、候選價、match score、單位價換算摘要、人工動作與 matcher 診斷原因標籤(品牌不符、商品線不符、容量差異、組合差異、需單位價、價差極端等),不得只顯示籠統「待比對」。`/api/export/excel/pchome-review` 必須匯出同一套覆核隊列、人工處置、候選 PChome、單位價比較與原始診斷,讓人工覆核、簡報與後續 AI 分析共用同一份證據。`/api/pchome-review//decision` 是人工閉環入口:`accept_identity` 才可把候選寫入 `competitor_prices` 與 `competitor_price_history` 並打上 `manual_review/manual_accept/identity_v2`;`reject_identity` 與 `unit_price_required` 只寫 `competitor_match_reviews` 並追加 manual attempt,不得把不同販售組合或否決候選灌入正式價差。PChome feeder 後續搜尋同一候選時必須讀取 `competitor_match_reviews`:已否決候選寫 `manual_rejected` 並跳過正式寫入,且必須繼續評估下一個候選,不能讓已否決候選長期阻塞同 SKU;已標記單位價候選寫 `manual_unit_price_required`;已採用候選可保守補到最低門檻並保留 `manual_review/manual_accept` 標籤。搜尋候選池只有強同款分數達 `0.90` 才可提前停止,避免 0.76 灰區候選卡掉後續更精準搜尋詞。人工 `reject_identity`、`unit_price_required`、`needs_research` 若命中當前正式候選,必須將同候選 `competitor_prices` 過期,不得繼續顯示正式總價差。商品列表必須將 `manual_rejected`、`manual_unit_price_required`、`manual_needs_research` 顯示為明確人工閉環狀態,不可回落成籠統「待比對」。`fetch_competitor_coverage()` 必須輸出人工採用、人工否決、人工單位價與採用率,daily/growth/PPT 共用 payload 必須顯示人工閉環成效,避免只呈現待審數。商品看板深度快取同時寫入 `data/dashboard_full_cache.pkl`,供多個 Gunicorn worker 共用,避免部署後各 worker 重複重建 7,000+ 商品統計造成開頁變慢;所有資料異動與 AI 挑品重算都透過 `clear_dashboard_cache()` 同步清除記憶體與共享快取,手動重算 API 會立即預熱商品看板快取,避免第一位使用者承擔重建成本。 +- 商品看板第一屏:`/` 的 V2 看板直接以 `products`、`price_records`、`competitor_prices`、`competitor_match_attempts`、`competitor_match_reviews`、`ai_price_recommendations` 顯示比對覆蓋率、PChome 優勢、MOMO 威脅、AI 挑品、待比對優先清單與 PChome 覆核隊列;`filter=ai_picks` 可查看 50 品 AI 挑品列表,`filter=pchome_review` 可直接查看需人工處理的比價覆核 SKU,並以 DB 分頁支援 search/category/status 後的完整隊列,不得只截前 50 筆。覆核狀態篩選必須至少包含全部、需單位價、已排除、低信心、價格過期與找不到同款,讓人工可依 matcher 診斷類型分批處理。列內顯示候選 PChome 商品、候選價、match score、單位價換算摘要、人工動作與 matcher 診斷原因標籤(品牌不符、商品線不符、容量差異、組合差異、需單位價、價差極端等),不得只顯示籠統「待比對」。`/api/export/excel/pchome-review` 必須匯出同一套覆核隊列、人工處置、候選 PChome、單位價比較與原始診斷,讓人工覆核、簡報與後續 AI 分析共用同一份證據。`/api/pchome-review//decision` 是人工閉環入口:`accept_identity` 才可把候選寫入 `competitor_prices` 與 `competitor_price_history` 並打上 `manual_review/manual_accept/identity_v2`;`reject_identity` 與 `unit_price_required` 只寫 `competitor_match_reviews` 並追加 manual attempt,不得把不同販售組合或否決候選灌入正式價差。PChome feeder 後續搜尋同一候選時必須讀取 `competitor_match_reviews`:已否決候選寫 `manual_rejected` 並跳過正式寫入,且必須繼續評估下一個候選,不能讓已否決候選長期阻塞同 SKU;已標記單位價候選寫 `manual_unit_price_required`;已採用候選可保守補到最低門檻並保留 `manual_review/manual_accept` 標籤。搜尋候選池只有強同款分數達 `0.90` 才可提前停止,避免 0.76 灰區候選卡掉後續更精準搜尋詞。人工 `reject_identity`、`unit_price_required`、`needs_research` 若命中當前正式候選,必須將同候選 `competitor_prices` 過期,不得繼續顯示正式總價差。商品列表必須將 `manual_rejected`、`manual_unit_price_required`、`manual_needs_research` 顯示為明確人工閉環狀態,不可回落成籠統「待比對」。`fetch_competitor_coverage()` 必須輸出人工採用、人工否決、人工單位價與採用率,daily/growth/PPT 共用 payload 必須顯示人工閉環成效,避免只呈現待審數。商品看板深度快取同時寫入 `data/dashboard_full_cache.pkl`,供多個 Gunicorn worker 共用,避免部署後各 worker 重複重建 7,000+ 商品統計造成開頁變慢;所有資料異動與 AI 挑品重算都透過 `clear_dashboard_cache()` 同步清除記憶體與共享快取,手動重算 API 會立即預熱商品看板快取,避免第一位使用者承擔重建成本。 | 角色 | 模型 | 主機 | 成本 | 每日限額 | |------|------|------|------|---------| @@ -349,8 +349,8 @@ LEFT JOIN competitor_prices cp - `marketplace_product_matcher.py` 的擴充只能走「正向證據 + 反向 veto」:品牌一致、商品線/型號訊號強、價格合理且無 hard veto 時才允許 `strong_product_line_match` 加分;補充瓶/補充包/refill 與一般正裝不互相配對,分享組/加量組/明星組等組合包不得誤配單品。 - 套組/買送/件數不同但品牌、核心商品線與單一基礎規格一致時,matcher 必須回傳 `comparison_mode='unit_comparable'` 與 `unit_comparable` reason;Feeder 只能寫入 `competitor_match_attempts.attempt_status='unit_comparable'` 或 `refresh_unit_comparable`,不得寫入 `competitor_prices`。Dashboard 與 `competitor_intel_repository` 必須用 `build_unit_price_comparison()` 產生每 ml / 每 g / 每入單位價證據,讓 PPT / AI 報表可說明「需單位價比較」而不是把總價當同款價差。商品看板在正式配對尚未成立時,仍必須顯示最佳候選 PChome 商品名稱、候選價與「候選價,需單位換算」說明,讓人工覆核可直接看見下一步;daily/growth、PPT 與 OpenClaw 摘要不得自建查詢,需消費 `fetch_competitor_review_queue()` 與 coverage 的 `unit_comparable_count`。若任一側含多個不同容量/重量規格,視為多品項套組,不可進 `unit_comparable`。 - PChome feeder 的外部 request timeout 由 `PCHOME_FEEDER_TIMEOUT` 控制,預設 12 秒;排程不得因單一 PChome 搜尋 API timeout 被拖到數分鐘。 -- 商品看板的 PChome 狀態必須把 matcher 診斷原因翻成可行動語意:品牌衝突、規格衝突、補充包差異、組合差異、商品線不符等,不可只顯示籠統「待比對」或「身份否決」。 -- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待審`、`身份否決`、`需單位價比較`、`找不到同款`、`抓取異常`、`尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。 +- 商品看板的 PChome 狀態必須把 matcher 診斷原因翻成可行動語意:品牌不符已排除、規格不符已排除、補充包不相容、組合規格不相容、系列不符已排除、需單位價比較、低信心待補強等,不可只顯示籠統「待比對」或「身份否決」。 +- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待補強`、`已排除`、`需單位價比較`、`找不到同款`、`抓取異常`、`尚未搜尋`。硬性不相容候選應顯示為已排除/不相容,不得讓使用者誤以為每筆都需要人工待審。 ### 執行方式 diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index ec50fd3..9f347bf 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -25,6 +25,7 @@ - 2026-05-20 追記:同步背景 PChome 比價人工覆核閉環後的 `services/competitor_intel_repository.py` 行數;此處只更新 inventory,不變更競品情報 repository 行為。 - 2026-05-20 追記:同步背景 PChome identity / price direction 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更商品比對行為。 - 2026-05-20 追記:同步背景 PChome crawler 搜尋韌性擴充後的 `services/pchome_crawler.py` 行數;此處只更新 inventory,不變更 PChome crawler 行為。 +- 2026-05-20 追記:同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更比價行為。 ## 達到或超過 800 行檔案清單 @@ -51,14 +52,14 @@ | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders | | 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers | | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting | -| 972 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / persistence normalization | +| 1128 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / persistence normalization | | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service | | 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing | | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy | | 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter | | 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration | | 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 | -| 1292 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy | +| 1592 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy | | 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers | | 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service | diff --git a/routes/ai_routes.py b/routes/ai_routes.py index 2060943..98c06a7 100644 --- a/routes/ai_routes.py +++ b/routes/ai_routes.py @@ -1712,7 +1712,68 @@ def api_pchome_match_backfill(): import threading payload = request.get_json(silent=True) or {} - limit = max(5, min(int(payload.get('limit', 60)), 160)) + try: + limit = max(5, min(int(payload.get('limit', 60)), 160)) + except (TypeError, ValueError): + limit = 60 + + from services.pchome_backfill_status import ( + PchomeBackfillAlreadyRunning, + fail_pchome_backfill_run, + finish_pchome_backfill_run, + get_pchome_backfill_status, + start_pchome_backfill_run, + update_pchome_backfill_run, + ) + + def _feeder_result_payload(result): + return { + 'total_skus': int(getattr(result, 'total_skus', 0) or 0), + 'matched': int(getattr(result, 'matched', 0) or 0), + 'skipped_no_result': int(getattr(result, 'skipped_no_result', 0) or 0), + 'skipped_low_score': int(getattr(result, 'skipped_low_score', 0) or 0), + 'errors': int(getattr(result, 'errors', 0) or 0), + 'history_written': int(getattr(result, 'history_written', 0) or 0), + 'attempts_written': int(getattr(result, 'attempts_written', 0) or 0), + 'duration_sec': round(float(getattr(result, 'duration_sec', 0) or 0), 2), + } + + def _pick_result_payload(result): + return { + 'candidates': int(getattr(result, 'candidates', 0) or 0), + 'written': int(getattr(result, 'written', 0) or 0), + 'generated_at': getattr(result, 'generated_at', None), + } + + def _combined_feeder_payload(revalidation_result, feeder_result): + revalidation_payload = _feeder_result_payload(revalidation_result) + feeder_payload = _feeder_result_payload(feeder_result) + return { + 'total_skus': revalidation_payload['total_skus'] + feeder_payload['total_skus'], + 'matched': revalidation_payload['matched'] + feeder_payload['matched'], + 'skipped_no_result': revalidation_payload['skipped_no_result'] + feeder_payload['skipped_no_result'], + 'skipped_low_score': revalidation_payload['skipped_low_score'] + feeder_payload['skipped_low_score'], + 'errors': revalidation_payload['errors'] + feeder_payload['errors'], + 'history_written': revalidation_payload['history_written'] + feeder_payload['history_written'], + 'attempts_written': revalidation_payload['attempts_written'] + feeder_payload['attempts_written'], + 'duration_sec': round(revalidation_payload['duration_sec'] + feeder_payload['duration_sec'], 2), + 'retryable_candidate_revalidation': revalidation_payload, + 'unmatched_priority_backfill': feeder_payload, + } + + try: + run = start_pchome_backfill_run( + limit=limit, + operator=session.get('username') or 'web', + ) + except PchomeBackfillAlreadyRunning as exc: + return jsonify({ + 'success': False, + 'message': 'PChome 補抓已在執行中,請稍後查看進度', + 'data': exc.status, + }), 409 + + run_id = run['run_id'] def _run_backfill(): try: @@ -1723,24 +1784,67 @@ def api_pchome_match_backfill(): from services.competitor_price_feeder import CompetitorPriceFeeder engine = create_engine(DATABASE_PATH) - result = CompetitorPriceFeeder(engine=engine).run_unmatched_priority(limit=limit) + feeder = CompetitorPriceFeeder(engine=engine) + revalidation_limit = min(limit, 80) + update_pchome_backfill_run( + run_id, + stage='revalidating', + message=f'正在重新評分 {revalidation_limit} 筆近門檻 PChome 候選', + ) + revalidation_result = feeder.run_retryable_candidate_revalidation( + limit=revalidation_limit, + min_score=0.70, + ) + unmatched_limit = max(5, min(limit, limit - int(getattr(revalidation_result, 'total_skus', 0) or 0))) + update_pchome_backfill_run( + run_id, + stage='matching', + message=f'正在補抓 {unmatched_limit} 筆高優先待比對商品', + ) + result = feeder.run_unmatched_priority(limit=unmatched_limit) + result_payload = _combined_feeder_payload(revalidation_result, result) + update_pchome_backfill_run( + run_id, + stage='generating_picks', + message='PChome 補抓完成,正在重算 AI 挑品清單', + result=result_payload, + ) pick_result = generate_product_pick_list(engine, limit=50) + pick_payload = _pick_result_payload(pick_result) + update_pchome_backfill_run( + run_id, + stage='clearing_cache', + message='AI 挑品已重算,正在清除看板快取', + result=result_payload, + pick_result=pick_payload, + ) from services.cache_manager import clear_dashboard_cache clear_dashboard_cache() clear_competitor_intel_cache() + finish_pchome_backfill_run( + run_id, + result=result_payload, + pick_result=pick_payload, + message=( + f"PChome 補抓完成:比對 {result_payload['total_skus']} 筆、" + f"新增/更新 {result_payload['matched']} 筆、" + f"AI 挑品寫入 {pick_payload['written']} 筆" + ), + ) logger.info( "[PChomeBackfill] done total=%s matched=%s no=%s low=%s errors=%s history=%s duration=%ss pick_written=%s", - result.total_skus, - result.matched, - result.skipped_no_result, - result.skipped_low_score, - result.errors, - result.history_written, - result.duration_sec, + result_payload['total_skus'], + result_payload['matched'], + result_payload['skipped_no_result'], + result_payload['skipped_low_score'], + result_payload['errors'], + result_payload['history_written'], + result_payload['duration_sec'], pick_result.written, ) except Exception as exc: - logger.error(f"[PChomeBackfill] 背景補抓失敗: {exc}") + fail_pchome_backfill_run(run_id, str(exc)) + logger.error(f"[PChomeBackfill] 背景補抓失敗: {exc}", exc_info=True) thread = threading.Thread(target=_run_backfill, daemon=True) thread.start() @@ -1749,9 +1853,22 @@ def api_pchome_match_backfill(): 'success': True, 'message': f'已啟動 PChome 待比對補抓,優先處理 {limit} 筆高價未配對商品;完成後會重算 AI 挑品清單', 'limit': limit, + 'data': get_pchome_backfill_status(), }), 202 +@ai_bp.route('/api/ai/pchome-match/backfill/status', methods=['GET']) +@login_required +def api_pchome_match_backfill_status(): + """取得 PChome 待比對補抓的背景執行狀態。""" + from services.pchome_backfill_status import get_pchome_backfill_status + + return jsonify({ + 'success': True, + 'data': get_pchome_backfill_status(), + }) + + @ai_bp.route('/api/ai/icaim/trigger', methods=['POST']) @login_required def api_icaim_trigger(): diff --git a/routes/dashboard_routes.py b/routes/dashboard_routes.py index f41503f..53497d3 100644 --- a/routes/dashboard_routes.py +++ b/routes/dashboard_routes.py @@ -60,7 +60,7 @@ REVIEW_STATUS_OPTIONS = [ 'label': '需單位價', 'statuses': ('unit_comparable', 'refresh_unit_comparable'), }, - {'key': 'identity_veto', 'label': '身份否決', 'statuses': ('identity_veto',)}, + {'key': 'identity_veto', 'label': '已排除', 'statuses': ('identity_veto',)}, {'key': 'low_score', 'label': '低信心', 'statuses': ('low_score',)}, {'key': 'expired_match', 'label': '價格過期', 'statuses': ('expired_match',)}, {'key': 'no_result', 'label': '找不到同款', 'statuses': ('no_result', 'refresh_no_result')}, @@ -88,22 +88,22 @@ def _to_float(value): def _diagnostic_match_rejection_label(diagnostic_text, score_text, *, blocked=True): diagnostic_text = diagnostic_text or '' - suffix = '已停止自動採用' if blocked else '不自動採用以避免錯配' + suffix = '已排除,不進入價格比較' if blocked else '暫不採用,等待補搜尋或人工補證據' if 'unit_comparable' in diagnostic_text: return '需單位價比較', f'{score_text},同核心商品但販售組合不同,需轉換每 ml / 每入後再判讀' if 'refill_pack_conflict' in diagnostic_text: - return '補充包差異待審', f'{score_text},補充瓶/補充包與一般正裝不同,{suffix}' + return '補充包不相容', f'{score_text},補充瓶/補充包與一般正裝不同,{suffix}' if any(token in diagnostic_text for token in ('bundle_offer_conflict', 'multi_component_conflict')): - return '組合差異待審', f'{score_text},組合包/多件組與單品不同,{suffix}' + return '組合規格不相容', f'{score_text},組合包/多件組與單品不同,{suffix}' if 'brand_conflict' in diagnostic_text: - return '品牌衝突待審', f'{score_text},品牌不一致,{suffix}' + return '品牌不符已排除', f'{score_text},品牌不一致,{suffix}' if any(token in diagnostic_text for token in ('volume_conflict', 'weight_conflict', 'count_conflict', 'component_count_conflict')): - return '規格衝突待審', f'{score_text},容量/件數不一致,{suffix}' + return '規格不符已排除', f'{score_text},容量/件數不一致,{suffix}' if 'type_conflict' in diagnostic_text: - return '品類衝突待審', f'{score_text},品類不一致,{suffix}' - if 'product_line_conflict' in diagnostic_text: - return '商品線不符待審', f'{score_text},商品線訊號不足,{suffix}' - return '身份否決' if blocked else '低信心待審', f'{score_text},{suffix}' + return '品類不符已排除', f'{score_text},品類不一致,{suffix}' + if any(token in diagnostic_text for token in ('product_line_conflict', 'model_line_conflict')): + return '系列不符已排除', f'{score_text},商品線/型號不一致,{suffix}' + return '身份不符已排除' if blocked else '低信心待補強', f'{score_text},{suffix}' def _build_pchome_match_status(attempt=None, ineligible=None): @@ -218,7 +218,7 @@ def _build_pchome_match_status(attempt=None, ineligible=None): } if reason == 'below_score_floor': return { - 'label': '低分配對待審', + 'label': '低分配對待補強', 'tone': 'neutral', 'summary': '已有候選但低於高信心門檻,避免錯配所以暫不採用', 'detail': score_text, diff --git a/scheduler.py b/scheduler.py index 15579cb..6681bc4 100644 --- a/scheduler.py +++ b/scheduler.py @@ -2210,6 +2210,7 @@ def run_pchome_match_backfill_task(): ) feeder = CompetitorPriceFeeder(engine=engine) refresh_result = feeder.run_expired_identity_refresh(limit=240) + retryable_result = feeder.run_retryable_candidate_revalidation(limit=160, min_score=0.70) feeder_result = feeder.run_unmatched_priority(limit=120) pick_result = generate_product_pick_list(engine, limit=50) clear_dashboard_cache() @@ -2232,6 +2233,10 @@ def run_pchome_match_backfill_task(): "expired_identity_refresh_no_result": refresh_result.skipped_no_result, "expired_identity_refresh_low_score": refresh_result.skipped_low_score, "expired_identity_refresh_errors": refresh_result.errors, + "retryable_candidate_revalidation_total": retryable_result.total_skus, + "retryable_candidate_revalidation_matched": retryable_result.matched, + "retryable_candidate_revalidation_low_score": retryable_result.skipped_low_score, + "retryable_candidate_revalidation_errors": retryable_result.errors, "pick_candidates": pick_result.candidates, "pick_written": pick_result.written, "status": "Success", @@ -2240,11 +2245,12 @@ def run_pchome_match_backfill_task(): f"[Scheduler] [PChomeBackfill] ✅ 完成 | " f"revalidated={revalidation_result.promoted_fresh}+{revalidation_result.promoted_expired} " f"refreshed={refresh_result.matched}/{refresh_result.total_skus} " + f"retryable={retryable_result.matched}/{retryable_result.total_skus} " f"matched={feeder_result.matched}/{feeder_result.total_skus} " f"history_written={feeder_result.history_written} " f"pick_written={pick_result.written} " - f"errors={feeder_result.errors + refresh_result.errors} " - f"耗時={feeder_result.duration_sec + refresh_result.duration_sec}s" + f"errors={feeder_result.errors + refresh_result.errors + retryable_result.errors} " + f"耗時={feeder_result.duration_sec + refresh_result.duration_sec + retryable_result.duration_sec}s" ) _save_stats('pchome_match_backfill', stats) diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 1dce3ec..0f8412a 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -673,6 +673,83 @@ class CompetitorPriceFeeder: ).fetchall() return [dict(r._mapping) for r in rows] + def _fetch_retryable_candidate_skus(self, limit: int = 80, min_score: float = 0.70) -> list: + """ + 取得近門檻且非 hard veto 的候選,供 matcher 升級後重新評分。 + + 這條路徑不重新搜尋,只用前次留下的 PChome product_id 批次查詢最新商品資料, + 適合把舊 scorer 卡在 0.70~0.759 的真同款重新推進正式比價。 + """ + if self.engine is None: + raise RuntimeError("需要注入 SQLAlchemy engine") + + from sqlalchemy import text + sql = text(""" + WITH latest_momo AS ( + SELECT + p.id AS product_id, + p.i_code AS sku, + p.name, + p.category, + pr.price AS momo_price, + ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC) AS rn + FROM products p + JOIN price_records pr ON pr.product_id = p.id + WHERE p.status = 'ACTIVE' + ), + latest_attempt AS ( + SELECT DISTINCT ON (cma.sku) + cma.sku, + cma.best_competitor_product_id, + cma.best_competitor_product_name, + cma.best_match_score, + cma.attempt_status, + cma.hard_veto, + cma.attempted_at + FROM competitor_match_attempts cma + WHERE cma.source = 'pchome' + AND cma.best_competitor_product_id IS NOT NULL + AND cma.best_competitor_product_id <> '' + ORDER BY cma.sku, cma.attempted_at DESC, cma.id DESC + ) + SELECT + lm.product_id, + lm.sku, + lm.name, + lm.category, + lm.momo_price, + la.best_competitor_product_id AS competitor_product_id, + la.best_competitor_product_name AS competitor_product_name, + la.best_match_score, + la.attempt_status + FROM latest_momo lm + JOIN latest_attempt la + ON la.sku = lm.sku + LEFT JOIN competitor_prices cp + ON cp.sku = lm.sku + AND cp.source = 'pchome' + AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP) + AND COALESCE(cp.match_score, 0) >= :match_score_floor + AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2' + WHERE lm.rn = 1 + AND cp.sku IS NULL + AND la.attempt_status IN ('low_score', 'refresh_low_score') + AND COALESCE(la.best_match_score, 0) >= :min_score + AND COALESCE(la.hard_veto, false) = false + ORDER BY la.best_match_score DESC NULLS LAST, lm.momo_price DESC NULLS LAST, lm.sku + LIMIT :limit + """) + with self.engine.connect() as conn: + rows = conn.execute( + sql, + { + "limit": max(1, min(int(limit), 300)), + "min_score": float(min_score), + "match_score_floor": MIN_MATCH_SCORE, + }, + ).fetchall() + return [dict(r._mapping) for r in rows] + def _fetch_expired_identity_skus(self, limit: int = 120) -> list: """ 取得 identity_v2 已確認、但 PChome 價格快取過期的商品。 @@ -1444,6 +1521,25 @@ class CompetitorPriceFeeder: return self._run_known_identity_refresh_items(skus, source=source, label="identity_v2 過期價格刷新") + def run_retryable_candidate_revalidation( + self, + limit: int = 80, + min_score: float = 0.70, + source: str = "pchome", + ) -> FeederResult: + """重新評分近門檻候選,避免 matcher 升級後仍卡在舊的 low_score。""" + try: + skus = self._fetch_retryable_candidate_skus(limit=limit, min_score=min_score) + except Exception as e: + logger.error(f"[Feeder] 讀取近門檻候選失敗: {e}") + return FeederResult(0, 0, 0, 0, 1, 0.0) + + return self._run_known_identity_refresh_items( + skus, + source=source, + label="近門檻候選重新評分", + ) + def run_unmatched_priority(self, limit: int = 80, source: str = "pchome") -> FeederResult: """優先補抓尚未有有效 PChome 配對的高價商品。""" try: diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 48d783d..e38b39e 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -53,6 +53,13 @@ NOISE_PHRASES = ( "優惠組", "分享包", "組合", + "多款可選", + "多款任選", + "任選多款", + "多色可選", + "色號可選", + "平行輸入", + "大容量", ) GENERIC_TOKENS = { @@ -91,6 +98,17 @@ GENERIC_TOKENS = { "mg", "la", "paris", + "多款", + "可選", + "任選", + "平行輸入", + "大容量", + "日本", + "韓國", + "澳洲", + "法國", + "英國", + "美國", } BRAND_ALIAS_OVERRIDES = { @@ -102,6 +120,23 @@ BRAND_ALIAS_OVERRIDES = { "sisley": ("希思黎", "sisley"), "gennies": ("奇妮", "gennies"), "uruhimemomoko": ("潤姬桃子", "uruhimemomoko", "uruhime momoko"), + "arau baby": ("arau baby", "arau", "愛樂寶", "saraya"), + "sebamed": ("sebamed", "施巴"), + "shu uemura": ("shu uemura", "shuuemura", "植村秀"), + "johnsons": ("johnsons", "johnson's", "johnson", "嬌生"), + "gillette": ("gillette", "吉列"), + "schick": ("schick", "舒適牌"), + "obge": ("obge",), + "vaseline": ("vaseline", "凡士林"), + "eaoron": ("eaoron",), + "kameria": ("kameria", "凱蜜菈"), + "cocodor": ("cocodor",), + "peripera": ("peripera",), + "solone": ("solone",), + "im meme": ("im meme", "i'm meme", "i’m meme"), + "febreze": ("febreze", "風倍清"), + "jo malone": ("jo malone",), + "prada": ("prada", "普拉達"), } PRODUCT_TYPES = { @@ -120,9 +155,15 @@ PRODUCT_TYPES = { "保健": ("錠", "膠囊", "粉", "飲", "包", "健康食品"), } -COUNT_UNITS = {"入", "組", "瓶", "支", "條", "盒", "包", "袋", "片", "顆", "粒", "錠", "枚", "件", "罐", "杯", "本"} +COUNT_UNITS = {"入", "組", "瓶", "支", "條", "盒", "包", "袋", "片", "顆", "粒", "錠", "枚", "件", "罐", "杯", "本", "刀把", "刀片", "刀頭", "蕊"} +COUNT_UNIT_PATTERN = r"(?:刀把|刀片|刀頭|入|組|瓶|支|條|盒|包|袋|片|顆|粒|錠|枚|件|罐|杯|本|蕊)" PIECE_UNITS = {"包", "袋", "片", "顆", "粒", "錠", "枚"} CONTAINER_UNITS = {"入", "組", "盒", "罐", "杯", "本", "瓶", "支", "條", "件"} +COUNT_UNIT_FAMILIES = { + "刀片": "blade", + "刀頭": "blade", + "蕊": "refill", +} ENGLISH_COUNT_UNIT_RE = r"(?:pcs?|pieces?|capsules?|caps?|tablets?|tabs?|packs?|sachets?|bottles?|boxes?)" BUNDLE_OFFER_PHRASES = ( "囤貨組", @@ -366,6 +407,10 @@ def _convert_volume(value: str, unit: str) -> Optional[tuple[str, float]]: return None +def _count_unit_family(unit: str) -> str: + return COUNT_UNIT_FAMILIES.get(unit, unit) + + def _extract_specs( text: str, ) -> tuple[tuple[float, ...], tuple[float, ...], tuple[float, ...], tuple[tuple[int, str], ...], Optional[int]]: @@ -385,11 +430,11 @@ def _extract_specs( dosages_mg.append(number) counts: list[tuple[int, str]] = [] - for match in re.finditer(r"(\d+)\s*([入組瓶支條盒包袋片顆粒錠枚件罐杯本])", text): + for match in re.finditer(rf"(\d+)\s*({COUNT_UNIT_PATTERN})", text): counts.append((int(match.group(1)), match.group(2))) - for match in re.finditer(r"([一二兩雙三四五六七八九十])\s*([入組瓶支條盒包袋片顆粒錠枚件罐杯本])", text): + for match in re.finditer(rf"([一二兩雙三四五六七八九十])\s*({COUNT_UNIT_PATTERN})", text): counts.append((CHINESE_COUNT[match.group(1)], match.group(2))) - for match in re.finditer(r"(?:x|乘)\s*(\d+)\s*([入組瓶支條盒包袋片顆粒錠枚件罐杯本])?", text, re.I): + for match in re.finditer(rf"(?:x|乘)\s*(\d+)\s*({COUNT_UNIT_PATTERN})?", text, re.I): unit = match.group(2) or "入" counts.append((int(match.group(1)), unit)) for match in re.finditer(rf"(\d+)\s*{ENGLISH_COUNT_UNIT_RE}", text, re.I): @@ -429,7 +474,8 @@ def parse_product_identity(name: str) -> ProductIdentity: searchable = _strip_noise(normalized) tokens = set(_tokenize(searchable)) product_type = _extract_product_type(searchable) - brand_tokens = _known_brand_tokens(searchable) | _leading_brand_tokens(name, normalized) + known_brand_tokens = _known_brand_tokens(searchable) + brand_tokens = known_brand_tokens or _leading_brand_tokens(name, normalized) core_tokens = { token @@ -539,9 +585,9 @@ def _has_hard_count_unit_conflict(left: ProductIdentity, right: ProductIdentity) left_by_count: dict[int, set[str]] = {} right_by_count: dict[int, set[str]] = {} for count, unit in left.counts: - left_by_count.setdefault(count, set()).add(unit) + left_by_count.setdefault(count, set()).add(_count_unit_family(unit)) for count, unit in right.counts: - right_by_count.setdefault(count, set()).add(unit) + right_by_count.setdefault(count, set()).add(_count_unit_family(unit)) for count in set(left_by_count) & set(right_by_count): left_units = left_by_count[count] @@ -559,11 +605,22 @@ def _has_hard_count_unit_conflict(left: ProductIdentity, right: ProductIdentity) def _count_score(left: ProductIdentity, right: ProductIdentity) -> tuple[float, bool]: left_counts = [count for count, _unit in left.counts] right_counts = [count for count, _unit in right.counts] + left_by_unit: dict[str, set[int]] = {} + right_by_unit: dict[str, set[int]] = {} + for count, unit in left.counts: + left_by_unit.setdefault(_count_unit_family(unit), set()).add(count) + for count, unit in right.counts: + right_by_unit.setdefault(_count_unit_family(unit), set()).add(count) + if left.total_piece_count and right.total_piece_count: if left.total_piece_count == right.total_piece_count: return 1.0, False ratio = max(left.total_piece_count, right.total_piece_count) / max(min(left.total_piece_count, right.total_piece_count), 1) return (0.0, True) if ratio >= 1.5 else (0.45, False) + + for unit in set(left_by_unit) & set(right_by_unit): + if left_by_unit[unit] != right_by_unit[unit]: + return 0.0, True if left.counts and right.counts: if set(left.counts) & set(right.counts): return 0.85, False @@ -852,6 +909,28 @@ def _has_strong_product_line_signal( return token_score >= 0.56 and chinese_name_score >= 0.45 +def _model_line_tokens(identity: ProductIdentity) -> set[str]: + tokens: set[str] = set() + for token in identity.core_tokens: + if token in GENERIC_TOKENS: + continue + if re.fullmatch(r"[a-z][a-z0-9-]{2,}", token): + tokens.add(token) + for match in re.finditer(r"([\u4e00-\u9fff]{2,})(?:系列)", token): + value = match.group(1) + if value not in GENERIC_TOKENS: + tokens.add(value) + return tokens + + +def _has_model_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool: + left_tokens = _model_line_tokens(left) + right_tokens = _model_line_tokens(right) + if not left_tokens or not right_tokens: + return False + return not bool(left_tokens & right_tokens) + + def score_marketplace_match( momo_name: str, competitor_name: str, @@ -877,6 +956,9 @@ def score_marketplace_match( reasons.extend(spec_reasons) if left.product_type and right.product_type and left.product_type != right.product_type: reasons.append("type_conflict") + model_line_conflict = _has_model_line_conflict(left, right) + if model_line_conflict: + reasons.append("model_line_conflict") bundle_offer_conflict = ( _has_bundle_offer(left) != _has_bundle_offer(right) and not ( @@ -905,6 +987,8 @@ def score_marketplace_match( hard_veto = True if _has_refill_pack(left) != _has_refill_pack(right): hard_veto = True + if model_line_conflict: + hard_veto = True if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions: hard_veto = True if chinese_name_score < 0.16 and token_score < 0.72: diff --git a/services/pchome_backfill_status.py b/services/pchome_backfill_status.py new file mode 100644 index 0000000..98d6c3e --- /dev/null +++ b/services/pchome_backfill_status.py @@ -0,0 +1,344 @@ +"""Persist lightweight PChome match backfill run status. + +The PChome backfill endpoint runs in a background thread. A tiny JSON status +file gives operators progress, last result, and failure context without adding +new schema or blocking the dashboard request path. +""" + +from __future__ import annotations + +import json +import os +import time +import uuid +from copy import deepcopy +from datetime import datetime +from typing import Any + +try: + from config import BASE_DIR +except Exception: # pragma: no cover - import fallback for isolated scripts + BASE_DIR = os.getcwd() + + +RECENT_RUN_LIMIT = 8 +ACTIVE_TTL_SECONDS = int(os.getenv("PCHOME_BACKFILL_ACTIVE_TTL_SECONDS", "7200")) + +STAGE_ORDER = ( + "queued", + "revalidating", + "matching", + "generating_picks", + "clearing_cache", + "completed", +) + +STAGE_LABELS = { + "idle": "尚未執行", + "queued": "已排入背景補抓", + "revalidating": "重新評分近門檻候選", + "matching": "比對高優先未配對商品", + "generating_picks": "重算 AI 挑品清單", + "clearing_cache": "清除看板與競價快取", + "completed": "補抓完成", + "failed": "補抓失敗", + "stale": "執行狀態逾時", +} + + +class PchomeBackfillAlreadyRunning(RuntimeError): + """Raised when a fresh backfill run is already active.""" + + def __init__(self, status: dict[str, Any]): + super().__init__("PChome backfill is already running") + self.status = status + + +def _status_path() -> str: + return os.getenv( + "PCHOME_BACKFILL_STATUS_PATH", + os.path.join( + os.getenv("DATA_DIR", os.path.join(str(BASE_DIR), "data")), + "pchome_match_backfill_status.json", + ), + ) + + +def _now_label() -> str: + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + +def _run_id() -> str: + return f"pchome-backfill-{datetime.now().strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:8]}" + + +def _read_payload() -> dict[str, Any]: + path = _status_path() + try: + if not os.path.isfile(path): + return _default_status() + with open(path, "r", encoding="utf-8") as handle: + payload = json.load(handle) + if not isinstance(payload, dict): + return _default_status() + return _normalize_status(payload) + except Exception: + return _default_status() + + +def _write_payload(payload: dict[str, Any]) -> None: + path = _status_path() + directory = os.path.dirname(path) + tmp_path = f"{path}.{os.getpid()}.tmp" + os.makedirs(directory, exist_ok=True) + with open(tmp_path, "w", encoding="utf-8") as handle: + json.dump(_normalize_status(payload), handle, ensure_ascii=False, indent=2, default=str) + os.replace(tmp_path, path) + + +def _default_status() -> dict[str, Any]: + return { + "status": "idle", + "stage": "idle", + "stage_label": STAGE_LABELS["idle"], + "progress_pct": 0, + "running": False, + "current_run": None, + "recent_runs": [], + "last_result": None, + "last_error": None, + "updated_at": None, + } + + +def _age_seconds(value: str | None) -> float | None: + if not value: + return None + try: + parsed = datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + return max(0.0, time.time() - parsed.timestamp()) + except Exception: + return None + + +def _progress_for_stage(stage: str) -> int: + if stage == "failed": + return 100 + if stage not in STAGE_ORDER: + return 0 + return int(round((STAGE_ORDER.index(stage) + 1) / len(STAGE_ORDER) * 100)) + + +def _normalize_run(run: Any) -> dict[str, Any] | None: + if not isinstance(run, dict): + return None + normalized = dict(run) + stage = str(normalized.get("stage") or normalized.get("status") or "idle") + status = str(normalized.get("status") or "idle") + normalized["stage"] = stage + normalized["status"] = status + normalized["stage_label"] = STAGE_LABELS.get(stage, stage) + normalized["progress_pct"] = int(normalized.get("progress_pct") or _progress_for_stage(stage)) + normalized.setdefault("result", None) + normalized.setdefault("pick_result", None) + normalized.setdefault("last_error", None) + normalized.setdefault("message", None) + return normalized + + +def _normalize_status(payload: dict[str, Any]) -> dict[str, Any]: + normalized = dict(_default_status()) + normalized.update({key: value for key, value in payload.items() if key in normalized}) + current = _normalize_run(payload.get("current_run")) + recent_runs = [ + run for run in (_normalize_run(item) for item in payload.get("recent_runs", [])) if run + ][:RECENT_RUN_LIMIT] + + if current: + age = _age_seconds(current.get("updated_at") or current.get("started_at")) + is_fresh_running = current.get("status") == "running" and ( + age is None or age <= ACTIVE_TTL_SECONDS + ) + if current.get("status") == "running" and not is_fresh_running: + current["status"] = "stale" + current["stage"] = "stale" + current["stage_label"] = STAGE_LABELS["stale"] + current["running"] = False + current["progress_pct"] = 100 + current["last_error"] = current.get("last_error") or "active run exceeded ttl" + else: + current["running"] = is_fresh_running + normalized["current_run"] = current + normalized["status"] = current["status"] + normalized["stage"] = current["stage"] + normalized["stage_label"] = current["stage_label"] + normalized["progress_pct"] = current["progress_pct"] + normalized["running"] = bool(current.get("running")) + normalized["last_result"] = current.get("result") or payload.get("last_result") + normalized["last_error"] = current.get("last_error") or payload.get("last_error") + + normalized["recent_runs"] = recent_runs + normalized["updated_at"] = payload.get("updated_at") or ( + current.get("updated_at") if current else None + ) + return normalized + + +def get_pchome_backfill_status() -> dict[str, Any]: + """Return the latest persisted PChome backfill status.""" + + return _read_payload() + + +def start_pchome_backfill_run(limit: int, operator: str | None = None) -> dict[str, Any]: + """Create a fresh running status or raise when another run is active.""" + + status = _read_payload() + if status.get("running"): + raise PchomeBackfillAlreadyRunning(status) + + now = _now_label() + run = { + "run_id": _run_id(), + "status": "running", + "stage": "queued", + "stage_label": STAGE_LABELS["queued"], + "progress_pct": _progress_for_stage("queued"), + "running": True, + "limit": int(limit), + "operator": operator or "web", + "started_at": now, + "updated_at": now, + "finished_at": None, + "message": "等待背景執行緒啟動", + "result": None, + "pick_result": None, + "last_error": None, + } + status["current_run"] = run + status["recent_runs"] = status.get("recent_runs", [])[:RECENT_RUN_LIMIT] + status["last_error"] = None + status["updated_at"] = now + _write_payload(status) + return deepcopy(run) + + +def update_pchome_backfill_run( + run_id: str, + *, + stage: str, + message: str | None = None, + result: dict[str, Any] | None = None, + pick_result: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Update an active run stage.""" + + status = _read_payload() + current = _normalize_run(status.get("current_run")) or {} + if current.get("run_id") != run_id: + current = {"run_id": run_id, "status": "running", "started_at": _now_label()} + + now = _now_label() + current.update( + { + "status": "running", + "stage": stage, + "stage_label": STAGE_LABELS.get(stage, stage), + "progress_pct": _progress_for_stage(stage), + "running": True, + "updated_at": now, + } + ) + if message is not None: + current["message"] = message + if result is not None: + current["result"] = result + if pick_result is not None: + current["pick_result"] = pick_result + + status["current_run"] = current + status["updated_at"] = now + _write_payload(status) + return deepcopy(current) + + +def finish_pchome_backfill_run( + run_id: str, + *, + result: dict[str, Any] | None = None, + pick_result: dict[str, Any] | None = None, + message: str | None = None, +) -> dict[str, Any]: + """Mark a run completed and append it to recent history.""" + + return _finish_run( + run_id, + status_value="completed", + stage="completed", + message=message or "PChome 補抓完成", + result=result, + pick_result=pick_result, + error=None, + ) + + +def fail_pchome_backfill_run(run_id: str, error: str) -> dict[str, Any]: + """Mark a run failed and append it to recent history.""" + + return _finish_run( + run_id, + status_value="failed", + stage="failed", + message="PChome 補抓失敗", + result=None, + pick_result=None, + error=error, + ) + + +def _finish_run( + run_id: str, + *, + status_value: str, + stage: str, + message: str, + result: dict[str, Any] | None, + pick_result: dict[str, Any] | None, + error: str | None, +) -> dict[str, Any]: + status = _read_payload() + current = _normalize_run(status.get("current_run")) or {} + now = _now_label() + if current.get("run_id") != run_id: + current = {"run_id": run_id, "started_at": now} + + current.update( + { + "status": status_value, + "stage": stage, + "stage_label": STAGE_LABELS.get(stage, stage), + "progress_pct": _progress_for_stage(stage), + "running": False, + "updated_at": now, + "finished_at": now, + "message": message, + "last_error": error, + } + ) + if result is not None: + current["result"] = result + if pick_result is not None: + current["pick_result"] = pick_result + + recent_runs = [current] + for run in status.get("recent_runs", []): + normalized_run = _normalize_run(run) + if normalized_run and normalized_run.get("run_id") != run_id: + recent_runs.append(normalized_run) + status["current_run"] = current + status["recent_runs"] = recent_runs[:RECENT_RUN_LIMIT] + status["last_result"] = current.get("result") + status["last_error"] = error + status["updated_at"] = now + _write_payload(status) + return deepcopy(current) diff --git a/templates/dashboard_v2.html b/templates/dashboard_v2.html index c654c2a..135f0e5 100644 --- a/templates/dashboard_v2.html +++ b/templates/dashboard_v2.html @@ -52,6 +52,32 @@
{{ overview.last_pchome_crawled or '尚無 PChome 抓取紀錄' }}
+
+
+
PCHOME MATCH BACKFILL
+
待比對補抓產線
+
+ 待補抓 {{ overview.pending_match_count | default(0) | number_format }} · 覆核 {{ overview.review_queue_count | default(0) | number_format }} · 單位價 {{ overview.unit_comparable_count | default(0) | number_format }} +
+
+ +
+ 讀取狀態中 + -- +
+ +
diff --git a/templates/edm_dashboard.html b/templates/edm_dashboard.html index 869c6e3..c80ebc9 100644 --- a/templates/edm_dashboard.html +++ b/templates/edm_dashboard.html @@ -19,4 +19,11 @@ {% block content %} {% include 'components/_legacy_bridge_panel.html' %} +{% set legacy_current = (promo_pages | selectattr('id', 'equalto', current_promo_page) | list | first) if promo_pages is defined else none %} +{% set legacy_current_url = legacy_current.url if legacy_current else legacy_bridge_target %} + {% endblock %} diff --git a/tests/test_competitor_identity_revalidator.py b/tests/test_competitor_identity_revalidator.py index cde50f7..c8309a2 100644 --- a/tests/test_competitor_identity_revalidator.py +++ b/tests/test_competitor_identity_revalidator.py @@ -79,9 +79,9 @@ def test_dashboard_match_status_explains_identity_veto_reason(): "error_message": "score=0.32; reasons=refill_pack_conflict", }) - assert bundle["label"] == "組合差異待審" + assert bundle["label"] == "組合規格不相容" assert "組合包/多件組" in bundle["summary"] - assert refill["label"] == "補充包差異待審" + assert refill["label"] == "補充包不相容" assert "補充瓶/補充包" in refill["summary"] diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index c8c49bb..f2381fc 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -29,6 +29,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert "identity_v2" in source assert "_fetch_expired_identity_skus" in source assert "run_expired_identity_refresh" in source + assert "_fetch_retryable_candidate_skus" in source + assert "run_retryable_candidate_revalidation" in source assert "refresh_known_identity" in source assert 'attempt_status="unit_comparable"' in source assert 'attempt_status="refresh_unit_comparable"' in source diff --git a/tests/test_frontend_v2_assets.py b/tests/test_frontend_v2_assets.py index b023faf..fc599ca 100644 --- a/tests/test_frontend_v2_assets.py +++ b/tests/test_frontend_v2_assets.py @@ -383,8 +383,8 @@ def test_dashboard_v2_shows_pchome_competitor_pricing_and_links(): assert "候選:{{ item.pchome_match_attempt.best_competitor_product_name }}" in dashboard assert "候選價,需單位換算" in dashboard assert "_load_pchome_match_attempt_map" in route_source - assert "低信心待審" in route_source - assert "規格衝突待審" in route_source + assert "低信心待補強" in route_source + assert "規格不符已排除" in route_source assert "dashboard-review-reasons" in dashboard assert "series.pchome" in page_js assert "label: 'PChome'" in page_js @@ -454,8 +454,12 @@ def test_ai_product_pick_agent_uses_real_competitor_data_and_dashboard_action(): assert "@ai_bp.route('/api/ai/product-picks/generate', methods=['POST'])" in route_source assert "generate_product_pick_list(engine" in route_source assert "@ai_bp.route('/api/ai/pchome-match/backfill', methods=['POST'])" in route_source - assert "run_unmatched_priority(limit=limit)" in route_source + assert "@ai_bp.route('/api/ai/pchome-match/backfill/status', methods=['GET'])" in route_source + assert "run_unmatched_priority(limit=unmatched_limit)" in route_source + assert "run_retryable_candidate_revalidation" in route_source assert "generate_product_pick_list(engine, limit=50)" in route_source + assert "start_pchome_backfill_run" in route_source + assert "finish_pchome_backfill_run" in route_source assert "payload.get('limit', 50)" in route_source assert "JSON.stringify({ limit: 50 })" in template assert "完成後會重算 AI 挑品清單" in route_source @@ -476,6 +480,7 @@ def test_ai_product_pick_agent_uses_real_competitor_data_and_dashboard_action(): agent_actions_source = (ROOT / "services/agent_actions.py").read_text(encoding="utf-8") assert "def run_pchome_match_backfill_task" in scheduler_source assert "_save_stats('pchome_match_backfill'" in scheduler_source + assert "retryable_candidate_revalidation_total" in scheduler_source assert "run_pchome_match_backfill_task" in run_scheduler_source assert "每日 10:30:pchome_match_backfill" in run_scheduler_source assert '"run_pchome_match_backfill_task"' in agent_actions_source @@ -486,6 +491,12 @@ def test_ai_product_pick_agent_uses_real_competitor_data_and_dashboard_action(): assert "backfillPchomeMatches" in template assert "/api/ai/product-picks/generate" in template assert "/api/ai/pchome-match/backfill" in template + assert "/api/ai/pchome-match/backfill/status" in dashboard_template + assert "PCHOME MATCH BACKFILL" in dashboard_template + assert "data-pchome-backfill-trigger" in dashboard_template + dashboard_js = (ROOT / "web/static/js/page-dashboard-v2.js").read_text(encoding="utf-8") + assert "loadPchomeBackfillStatus" in dashboard_js + assert "window.backfillPchomeMatches" in dashboard_js assert "'product_pick':['bg-success'" in template assert "kpiMatchRate" in template diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 590f530..854ed27 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -337,6 +337,53 @@ def test_marketplace_matcher_accepts_strong_multi_component_line_without_full_sp assert "strong_component_line_match" in diagnostics.reasons +def test_marketplace_matcher_accepts_known_brand_alias_and_option_copy(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【OBgE】韓國 OBgE 男士自然遮瑕粉底棒13g", + "OBgE/自然遮瑕粉底棒13g - 多款可選", + momo_price=765, + competitor_price=1099, + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert diagnostics.comparison_mode == "exact_identity" + + +def test_marketplace_matcher_accepts_same_pack_with_chinese_count_wording(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【SEBAMED】潔膚露1000ml共2入(大容量 平行輸入)", + "Sebamed施巴 潔膚露1000ml 兩入組", + momo_price=799, + competitor_price=899, + ) + + assert diagnostics.score >= 0.76 + assert diagnostics.hard_veto is False + assert "brand_match" in diagnostics.tags + + +def test_marketplace_matcher_rejects_razor_series_and_blade_count_conflict(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【Gillette 吉列】SkinGuard 紳適系列刮鬍刀頭(4刀頭)", + "【Gillette 吉列 】Fusion鋒隱系列刮鬍刀頭(8刀頭)", + momo_price=499, + competitor_price=906, + ) + + assert diagnostics.score < 0.76 + assert diagnostics.hard_veto is True + assert diagnostics.comparison_mode == "not_comparable" + assert "count_conflict" in diagnostics.reasons + assert "model_line_conflict" in diagnostics.reasons + + def test_marketplace_search_terms_prefer_readable_brand_core_spec(): from services.marketplace_product_matcher import build_search_terms diff --git a/tests/test_pchome_backfill_status.py b/tests/test_pchome_backfill_status.py new file mode 100644 index 0000000..adb0476 --- /dev/null +++ b/tests/test_pchome_backfill_status.py @@ -0,0 +1,57 @@ +from services.pchome_backfill_status import ( + PchomeBackfillAlreadyRunning, + fail_pchome_backfill_run, + finish_pchome_backfill_run, + get_pchome_backfill_status, + start_pchome_backfill_run, + update_pchome_backfill_run, +) + + +def test_pchome_backfill_status_tracks_running_and_completion(tmp_path, monkeypatch): + monkeypatch.setenv("PCHOME_BACKFILL_STATUS_PATH", str(tmp_path / "status.json")) + + run = start_pchome_backfill_run(limit=12, operator="tester") + running = get_pchome_backfill_status() + + assert running["running"] is True + assert running["status"] == "running" + assert running["current_run"]["run_id"] == run["run_id"] + assert running["current_run"]["limit"] == 12 + + try: + start_pchome_backfill_run(limit=5) + except PchomeBackfillAlreadyRunning as exc: + assert exc.status["running"] is True + else: + raise AssertionError("expected active PChome backfill guard") + + update_pchome_backfill_run( + run["run_id"], + stage="matching", + result={"total_skus": 12, "matched": 3}, + ) + finish_pchome_backfill_run( + run["run_id"], + result={"total_skus": 12, "matched": 3}, + pick_result={"written": 9}, + ) + completed = get_pchome_backfill_status() + + assert completed["running"] is False + assert completed["status"] == "completed" + assert completed["last_result"]["matched"] == 3 + assert completed["recent_runs"][0]["pick_result"]["written"] == 9 + + +def test_pchome_backfill_status_records_failure(tmp_path, monkeypatch): + monkeypatch.setenv("PCHOME_BACKFILL_STATUS_PATH", str(tmp_path / "status.json")) + + run = start_pchome_backfill_run(limit=8) + fail_pchome_backfill_run(run["run_id"], "crawler timeout") + failed = get_pchome_backfill_status() + + assert failed["running"] is False + assert failed["status"] == "failed" + assert failed["last_error"] == "crawler timeout" + assert failed["recent_runs"][0]["last_error"] == "crawler timeout" diff --git a/web/static/css/page-dashboard-v2.css b/web/static/css/page-dashboard-v2.css index d270455..96993e9 100644 --- a/web/static/css/page-dashboard-v2.css +++ b/web/static/css/page-dashboard-v2.css @@ -163,6 +163,87 @@ color: var(--momo-accent-strong); } + .dashboard-backfill-card { + display: grid; + grid-template-columns: minmax(220px, 1fr) minmax(160px, 280px) minmax(240px, 1fr) auto; + gap: 14px; + align-items: center; + min-width: 0; + margin-top: 12px; + padding: 14px 16px; + overflow: hidden; + background: var(--momo-bg-surface); + border: 1px solid var(--momo-border-light); + border-radius: 8px; + } + + .dashboard-backfill-card[data-status="running"] { + border-color: rgba(190, 106, 45, 0.36); + box-shadow: inset 3px 0 0 var(--momo-warm-caramel); + } + + .dashboard-backfill-card[data-status="failed"], + .dashboard-backfill-card[data-status="stale"] { + border-color: rgba(188, 75, 49, 0.32); + box-shadow: inset 3px 0 0 var(--momo-danger); + } + + .dashboard-backfill-main { + min-width: 0; + } + + .dashboard-backfill-label { + margin-bottom: 3px; + color: var(--momo-text-tertiary); + font-size: 10px; + font-weight: 800; + letter-spacing: 0.10em; + } + + .dashboard-backfill-title { + color: var(--momo-text-primary); + font-size: 15px; + font-weight: 800; + line-height: 1.25; + } + + .dashboard-backfill-meta, + .dashboard-backfill-status { + min-width: 0; + color: var(--momo-text-secondary); + font-size: 11px; + line-height: 1.45; + overflow-wrap: anywhere; + } + + .dashboard-backfill-progress { + position: relative; + width: 100%; + height: 8px; + overflow: hidden; + background: rgba(42, 37, 32, 0.08); + border: 1px solid rgba(42, 37, 32, 0.08); + border-radius: 999px; + } + + .dashboard-backfill-progress span { + position: absolute; + inset: 0 auto 0 0; + width: 0%; + background: linear-gradient(90deg, var(--momo-warm-caramel), var(--momo-success)); + transition: width 240ms ease; + } + + .dashboard-backfill-card[data-status="failed"] .dashboard-backfill-progress span, + .dashboard-backfill-card[data-status="stale"] .dashboard-backfill-progress span { + background: linear-gradient(90deg, var(--momo-danger), var(--momo-warm-rust)); + } + + .dashboard-backfill-status { + display: grid; + gap: 2px; + } + .dashboard-focus-grid { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); @@ -1151,6 +1232,16 @@ line-height: 1.35; } + .dashboard-backfill-card { + grid-template-columns: 1fr; + gap: 10px; + padding: 14px; + } + + .dashboard-backfill-card .dashboard-action-button { + width: 100%; + } + .dashboard-search, .dashboard-select, .dashboard-segmented { diff --git a/web/static/js/page-dashboard-v2.js b/web/static/js/page-dashboard-v2.js index e89ddd2..a3aaead 100644 --- a/web/static/js/page-dashboard-v2.js +++ b/web/static/js/page-dashboard-v2.js @@ -280,6 +280,142 @@ let priceChartInstance = null; button.addEventListener('click', () => runDashboardTask(button.dataset.dashboardTask)); }); + let pchomeBackfillPollTimer = null; + + function getPchomeBackfillElements() { + const card = document.querySelector('[data-pchome-backfill-card]'); + return { + card, + trigger: document.querySelector('[data-pchome-backfill-trigger]'), + status: document.querySelector('[data-pchome-backfill-status]'), + result: document.querySelector('[data-pchome-backfill-result]'), + progress: document.querySelector('[data-pchome-backfill-progress]'), + backfillEndpoint: card ? card.dataset.backfillEndpoint : '/api/ai/pchome-match/backfill', + statusEndpoint: card ? card.dataset.statusEndpoint : '/api/ai/pchome-match/backfill/status' + }; + } + + function formatBackfillCount(value) { + return Number(value || 0).toLocaleString(); + } + + function schedulePchomeBackfillPoll() { + if (pchomeBackfillPollTimer) { + clearTimeout(pchomeBackfillPollTimer); + } + pchomeBackfillPollTimer = setTimeout(loadPchomeBackfillStatus, 5000); + } + + function renderPchomeBackfillStatus(payload) { + const status = payload && payload.data ? payload.data : (payload || {}); + const elements = getPchomeBackfillElements(); + if (!elements.card) return; + + const currentRun = status.current_run || {}; + const result = currentRun.result || status.last_result || {}; + const pickResult = currentRun.pick_result || {}; + const running = Boolean(status.running || currentRun.running); + const progressPct = Math.max(0, Math.min(Number(status.progress_pct || currentRun.progress_pct || 0), 100)); + const statusKey = status.status || currentRun.status || 'idle'; + const stageLabel = status.stage_label || currentRun.stage_label || '尚未執行'; + const updatedAt = status.updated_at || currentRun.updated_at || currentRun.finished_at || ''; + + elements.card.dataset.status = statusKey; + if (elements.progress) { + elements.progress.style.width = `${progressPct}%`; + } + if (elements.status) { + elements.status.textContent = updatedAt ? `${stageLabel} · ${updatedAt}` : stageLabel; + } + if (elements.result) { + if (status.last_error || currentRun.last_error) { + elements.result.textContent = status.last_error || currentRun.last_error; + } else if (result && Object.keys(result).length > 0) { + const pickWritten = pickResult.written !== undefined ? ` · 挑品 ${formatBackfillCount(pickResult.written)}` : ''; + elements.result.textContent = ( + `比對 ${formatBackfillCount(result.total_skus)} · 成功 ${formatBackfillCount(result.matched)}` + + ` · 待覆核 ${formatBackfillCount(result.skipped_low_score)}` + + ` · 無結果 ${formatBackfillCount(result.skipped_no_result)}` + + pickWritten + ); + } else { + elements.result.textContent = running ? '正在累積結果' : '尚無最近結果'; + } + } + if (elements.trigger) { + elements.trigger.disabled = running; + elements.trigger.classList.toggle('is-loading', running); + elements.trigger.innerHTML = running + ? ' 補抓中' + : ' 補抓 60 筆'; + } + + if (running) { + schedulePchomeBackfillPoll(); + } else if (pchomeBackfillPollTimer) { + clearTimeout(pchomeBackfillPollTimer); + pchomeBackfillPollTimer = null; + } + } + + function loadPchomeBackfillStatus() { + const elements = getPchomeBackfillElements(); + if (!elements.card) return Promise.resolve(); + return fetch(elements.statusEndpoint, { + headers: { 'Accept': 'application/json' } + }) + .then(response => response.json()) + .then(renderPchomeBackfillStatus) + .catch(error => { + console.warn('[DashboardV2] PChome backfill status load failed:', error); + if (elements.status) { + elements.status.textContent = '狀態讀取失敗'; + } + }); + } + + function backfillPchomeMatches() { + const elements = getPchomeBackfillElements(); + if (!elements.card || !elements.trigger) return; + const limit = Number(elements.trigger.dataset.limit || 60); + if (!confirm(`啟動 PChome 待比對補抓 ${limit} 筆?`)) return; + + elements.trigger.disabled = true; + if (elements.status) { + elements.status.textContent = '正在送出補抓任務'; + } + fetch(elements.backfillEndpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-CSRFToken': getCSRFToken() + }, + body: JSON.stringify({ limit }) + }) + .then(response => response.json().then(data => ({ ok: response.ok, status: response.status, data }))) + .then(({ ok, status, data }) => { + renderPchomeBackfillStatus(data); + if (!ok && status !== 409) { + throw new Error(data.message || data.error || 'PChome 補抓啟動失敗'); + } + schedulePchomeBackfillPoll(); + }) + .catch(error => { + if (elements.status) { + elements.status.textContent = error.message || 'PChome 補抓啟動失敗'; + } + if (elements.trigger) { + elements.trigger.disabled = false; + } + }); + } + + window.backfillPchomeMatches = backfillPchomeMatches; + document.querySelectorAll('[data-pchome-backfill-trigger]').forEach(button => { + button.addEventListener('click', backfillPchomeMatches); + }); + loadPchomeBackfillStatus(); + function runPchomeReviewDecision(button) { const sku = button.dataset.reviewSku || ''; const action = button.dataset.reviewAction || '';