[V10.334] 強化 PChome 比價重評與補抓可觀測性
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
This commit is contained in:
33
.env.example
33
.env.example
@@ -371,6 +371,9 @@ OPENCLAW_IMAGE_GEMINI_MODEL=gemini-1.5-flash
|
||||
NEMOTRON_OLLAMA_FIRST=true
|
||||
NEMOTRON_OLLAMA_MODEL=qwen3:14b
|
||||
NEMOTRON_OLLAMA_TIMEOUT=180
|
||||
OPENCLAW_STRATEGY_OLLAMA_MODEL=qwen3:14b
|
||||
OPENCLAW_STRATEGY_OLLAMA_TIMEOUT=90
|
||||
OPENCLAW_STRATEGY_OLLAMA_NUM_PREDICT=2048
|
||||
|
||||
# [預設 OFF] MCP Router;需先部署 docker-compose.mcp.yml 並完成健康檢查再開
|
||||
MCP_ROUTER_ENABLED=false
|
||||
@@ -400,15 +403,45 @@ PASSWORD_EXPIRY_DAYS=90
|
||||
# 備份 / 報表 / 同步
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
DATA_DIR=/app/data
|
||||
BACKUP_DIR=/app/data/db_backups
|
||||
BACKUP_RETENTION_DAYS=7
|
||||
DB_CONTAINER=momo-db
|
||||
REPORTS_DIR=/app/data/reports
|
||||
DATABASE_PATH=data/momo_database.db
|
||||
SQLITE_PATH=/app/data/momo_database.db
|
||||
|
||||
PG_SYNC_ENABLED=false
|
||||
PG_SYNC_INTERVAL=300
|
||||
|
||||
# PChome 競品比價與補抓產線
|
||||
COMPETITOR_INTEL_CACHE_TTL_SECONDS=21600
|
||||
PCHOME_FEEDER_TIMEOUT=12
|
||||
PCHOME_FEEDER_RATE_DELAY=1.0
|
||||
PCHOME_FEEDER_SEARCH_LIMIT=20
|
||||
PCHOME_FEEDER_MAX_SEARCH_TERMS=5
|
||||
PCHOME_FEEDER_SEARCH_MAX_PAGES=2
|
||||
PCHOME_BACKFILL_STATUS_PATH=/app/data/pchome_match_backfill_status.json
|
||||
PCHOME_BACKFILL_ACTIVE_TTL_SECONDS=7200
|
||||
|
||||
# PPT 預覽與視覺 QA
|
||||
PPT_PREVIEW_CACHE_DIR=/app/data/ppt_previews
|
||||
PPT_VISION_STATE_PATH=/app/data/ppt_vision_audit_status.json
|
||||
PPT_VISION_ACTIVE_TTL_SECONDS=7200
|
||||
PPT_VISION_IMAGE_MAX_EDGE=1280
|
||||
PPT_VISION_IMAGE_QUALITY=82
|
||||
|
||||
# Action plan hygiene / ElephantAlpha resource policy
|
||||
ACTION_PLAN_HYGIENE_STALE_HOURS=24
|
||||
ACTION_PLAN_HYGIENE_MAX_UPDATES=200
|
||||
ELEPHANT_ALPHA_RESOURCE_QUEUE_THRESHOLD=10
|
||||
ELEPHANT_ALPHA_RESOURCE_LOAD_THRESHOLD_PCT=80
|
||||
ELEPHANT_ALPHA_RESOURCE_HIGH_PRIORITY_THRESHOLD=5
|
||||
ELEPHANT_ALPHA_RESOURCE_STALE_THRESHOLD=5
|
||||
ELEPHANT_ALPHA_RESOURCE_STALE_HOURS=24
|
||||
ELEPHANT_ALPHA_RESOURCE_HYGIENE_ENABLED=true
|
||||
ELEPHANT_ALPHA_HERMES_LLM_PREFETCH_ENABLED=false
|
||||
|
||||
# [選填] 外部 BI 連結(模板全域變數)
|
||||
METABASE_URL=https://mo.wooo.work/metabase
|
||||
GRIST_URL=https://grist.wooo.work
|
||||
|
||||
@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.333"
|
||||
SYSTEM_VERSION = "V10.335"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ SQL漏斗(~300筆)
|
||||
- 比對覆蓋率補強入口:`POST /api/ai/pchome-match/backfill`,優先補抓仍無有效 PChome 配對的高價 ACTIVE 商品,完成後自動重算 AI 挑品清單。
|
||||
- 排程閉環:`run_pchome_match_backfill_task` 每日 10:30 執行,補抓 PChome 待比對商品、寫入歷史價格,再重算 `strategy='product_pick'` 清單。
|
||||
- PChome / MOMO 競價摘要出口 `services/competitor_intel_repository.py` 使用 30 分鐘共享快取(`COMPETITOR_INTEL_CACHE_TTL_SECONDS` 可調),避免 `/growth_analysis`、`/daily_sales`、PPT/AI 報表每次請求重跑昂貴覆蓋率與價差趨勢查詢;`run_competitor_price_feeder_task` 與 PChome backfill 完成後會主動清除快取。快取只包摘要輸出,不改 matcher 的高信心門檻與 identity_v2 準確性規則。
|
||||
- 商品看板第一屏:`/` 的 V2 看板直接以 `products`、`price_records`、`competitor_prices`、`competitor_match_attempts`、`competitor_match_reviews`、`ai_price_recommendations` 顯示比對覆蓋率、PChome 優勢、MOMO 威脅、AI 挑品、待比對優先清單與 PChome 覆核隊列;`filter=ai_picks` 可查看 50 品 AI 挑品列表,`filter=pchome_review` 可直接查看需人工處理的比價覆核 SKU,並以 DB 分頁支援 search/category/status 後的完整隊列,不得只截前 50 筆。覆核狀態篩選必須至少包含全部、需單位價、身份否決、低信心、價格過期與找不到同款,讓人工可依 matcher 診斷類型分批處理。列內顯示候選 PChome 商品、候選價、match score、單位價換算摘要、人工動作與 matcher 診斷原因標籤(品牌不符、商品線不符、容量差異、組合差異、需單位價、價差極端等),不得只顯示籠統「待比對」。`/api/export/excel/pchome-review` 必須匯出同一套覆核隊列、人工處置、候選 PChome、單位價比較與原始診斷,讓人工覆核、簡報與後續 AI 分析共用同一份證據。`/api/pchome-review/<sku>/decision` 是人工閉環入口:`accept_identity` 才可把候選寫入 `competitor_prices` 與 `competitor_price_history` 並打上 `manual_review/manual_accept/identity_v2`;`reject_identity` 與 `unit_price_required` 只寫 `competitor_match_reviews` 並追加 manual attempt,不得把不同販售組合或否決候選灌入正式價差。PChome feeder 後續搜尋同一候選時必須讀取 `competitor_match_reviews`:已否決候選寫 `manual_rejected` 並跳過正式寫入,且必須繼續評估下一個候選,不能讓已否決候選長期阻塞同 SKU;已標記單位價候選寫 `manual_unit_price_required`;已採用候選可保守補到最低門檻並保留 `manual_review/manual_accept` 標籤。搜尋候選池只有強同款分數達 `0.90` 才可提前停止,避免 0.76 灰區候選卡掉後續更精準搜尋詞。人工 `reject_identity`、`unit_price_required`、`needs_research` 若命中當前正式候選,必須將同候選 `competitor_prices` 過期,不得繼續顯示正式總價差。商品列表必須將 `manual_rejected`、`manual_unit_price_required`、`manual_needs_research` 顯示為明確人工閉環狀態,不可回落成籠統「待比對」。`fetch_competitor_coverage()` 必須輸出人工採用、人工否決、人工單位價與採用率,daily/growth/PPT 共用 payload 必須顯示人工閉環成效,避免只呈現待審數。商品看板深度快取同時寫入 `data/dashboard_full_cache.pkl`,供多個 Gunicorn worker 共用,避免部署後各 worker 重複重建 7,000+ 商品統計造成開頁變慢;所有資料異動與 AI 挑品重算都透過 `clear_dashboard_cache()` 同步清除記憶體與共享快取,手動重算 API 會立即預熱商品看板快取,避免第一位使用者承擔重建成本。
|
||||
- 商品看板第一屏:`/` 的 V2 看板直接以 `products`、`price_records`、`competitor_prices`、`competitor_match_attempts`、`competitor_match_reviews`、`ai_price_recommendations` 顯示比對覆蓋率、PChome 優勢、MOMO 威脅、AI 挑品、待比對優先清單與 PChome 覆核隊列;`filter=ai_picks` 可查看 50 品 AI 挑品列表,`filter=pchome_review` 可直接查看需人工處理的比價覆核 SKU,並以 DB 分頁支援 search/category/status 後的完整隊列,不得只截前 50 筆。覆核狀態篩選必須至少包含全部、需單位價、已排除、低信心、價格過期與找不到同款,讓人工可依 matcher 診斷類型分批處理。列內顯示候選 PChome 商品、候選價、match score、單位價換算摘要、人工動作與 matcher 診斷原因標籤(品牌不符、商品線不符、容量差異、組合差異、需單位價、價差極端等),不得只顯示籠統「待比對」。`/api/export/excel/pchome-review` 必須匯出同一套覆核隊列、人工處置、候選 PChome、單位價比較與原始診斷,讓人工覆核、簡報與後續 AI 分析共用同一份證據。`/api/pchome-review/<sku>/decision` 是人工閉環入口:`accept_identity` 才可把候選寫入 `competitor_prices` 與 `competitor_price_history` 並打上 `manual_review/manual_accept/identity_v2`;`reject_identity` 與 `unit_price_required` 只寫 `competitor_match_reviews` 並追加 manual attempt,不得把不同販售組合或否決候選灌入正式價差。PChome feeder 後續搜尋同一候選時必須讀取 `competitor_match_reviews`:已否決候選寫 `manual_rejected` 並跳過正式寫入,且必須繼續評估下一個候選,不能讓已否決候選長期阻塞同 SKU;已標記單位價候選寫 `manual_unit_price_required`;已採用候選可保守補到最低門檻並保留 `manual_review/manual_accept` 標籤。搜尋候選池只有強同款分數達 `0.90` 才可提前停止,避免 0.76 灰區候選卡掉後續更精準搜尋詞。人工 `reject_identity`、`unit_price_required`、`needs_research` 若命中當前正式候選,必須將同候選 `competitor_prices` 過期,不得繼續顯示正式總價差。商品列表必須將 `manual_rejected`、`manual_unit_price_required`、`manual_needs_research` 顯示為明確人工閉環狀態,不可回落成籠統「待比對」。`fetch_competitor_coverage()` 必須輸出人工採用、人工否決、人工單位價與採用率,daily/growth/PPT 共用 payload 必須顯示人工閉環成效,避免只呈現待審數。商品看板深度快取同時寫入 `data/dashboard_full_cache.pkl`,供多個 Gunicorn worker 共用,避免部署後各 worker 重複重建 7,000+ 商品統計造成開頁變慢;所有資料異動與 AI 挑品重算都透過 `clear_dashboard_cache()` 同步清除記憶體與共享快取,手動重算 API 會立即預熱商品看板快取,避免第一位使用者承擔重建成本。
|
||||
|
||||
| 角色 | 模型 | 主機 | 成本 | 每日限額 |
|
||||
|------|------|------|------|---------|
|
||||
@@ -349,8 +349,8 @@ LEFT JOIN competitor_prices cp
|
||||
- `marketplace_product_matcher.py` 的擴充只能走「正向證據 + 反向 veto」:品牌一致、商品線/型號訊號強、價格合理且無 hard veto 時才允許 `strong_product_line_match` 加分;補充瓶/補充包/refill 與一般正裝不互相配對,分享組/加量組/明星組等組合包不得誤配單品。
|
||||
- 套組/買送/件數不同但品牌、核心商品線與單一基礎規格一致時,matcher 必須回傳 `comparison_mode='unit_comparable'` 與 `unit_comparable` reason;Feeder 只能寫入 `competitor_match_attempts.attempt_status='unit_comparable'` 或 `refresh_unit_comparable`,不得寫入 `competitor_prices`。Dashboard 與 `competitor_intel_repository` 必須用 `build_unit_price_comparison()` 產生每 ml / 每 g / 每入單位價證據,讓 PPT / AI 報表可說明「需單位價比較」而不是把總價當同款價差。商品看板在正式配對尚未成立時,仍必須顯示最佳候選 PChome 商品名稱、候選價與「候選價,需單位換算」說明,讓人工覆核可直接看見下一步;daily/growth、PPT 與 OpenClaw 摘要不得自建查詢,需消費 `fetch_competitor_review_queue()` 與 coverage 的 `unit_comparable_count`。若任一側含多個不同容量/重量規格,視為多品項套組,不可進 `unit_comparable`。
|
||||
- PChome feeder 的外部 request timeout 由 `PCHOME_FEEDER_TIMEOUT` 控制,預設 12 秒;排程不得因單一 PChome 搜尋 API timeout 被拖到數分鐘。
|
||||
- 商品看板的 PChome 狀態必須把 matcher 診斷原因翻成可行動語意:品牌衝突、規格衝突、補充包差異、組合差異、商品線不符等,不可只顯示籠統「待比對」或「身份否決」。
|
||||
- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待審`、`身份否決`、`需單位價比較`、`找不到同款`、`抓取異常`、`尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。
|
||||
- 商品看板的 PChome 狀態必須把 matcher 診斷原因翻成可行動語意:品牌不符已排除、規格不符已排除、補充包不相容、組合規格不相容、系列不符已排除、需單位價比較、低信心待補強等,不可只顯示籠統「待比對」或「身份否決」。
|
||||
- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待補強`、`已排除`、`需單位價比較`、`找不到同款`、`抓取異常`、`尚未搜尋`。硬性不相容候選應顯示為已排除/不相容,不得讓使用者誤以為每筆都需要人工待審。
|
||||
|
||||
### 執行方式
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
- 2026-05-20 追記:同步背景 PChome 比價人工覆核閉環後的 `services/competitor_intel_repository.py` 行數;此處只更新 inventory,不變更競品情報 repository 行為。
|
||||
- 2026-05-20 追記:同步背景 PChome identity / price direction 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更商品比對行為。
|
||||
- 2026-05-20 追記:同步背景 PChome crawler 搜尋韌性擴充後的 `services/pchome_crawler.py` 行數;此處只更新 inventory,不變更 PChome crawler 行為。
|
||||
- 2026-05-20 追記:同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;此處只更新 inventory,不變更比價行為。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -51,14 +52,14 @@
|
||||
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
|
||||
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
|
||||
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
|
||||
| 972 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / persistence normalization |
|
||||
| 1128 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / persistence normalization |
|
||||
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
|
||||
| 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
|
||||
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
|
||||
| 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter |
|
||||
| 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration |
|
||||
| 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 |
|
||||
| 1292 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
|
||||
| 1592 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
|
||||
| 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
|
||||
| 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
|
||||
|
||||
|
||||
@@ -1712,7 +1712,68 @@ def api_pchome_match_backfill():
|
||||
import threading
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
limit = max(5, min(int(payload.get('limit', 60)), 160))
|
||||
try:
|
||||
limit = max(5, min(int(payload.get('limit', 60)), 160))
|
||||
except (TypeError, ValueError):
|
||||
limit = 60
|
||||
|
||||
from services.pchome_backfill_status import (
|
||||
PchomeBackfillAlreadyRunning,
|
||||
fail_pchome_backfill_run,
|
||||
finish_pchome_backfill_run,
|
||||
get_pchome_backfill_status,
|
||||
start_pchome_backfill_run,
|
||||
update_pchome_backfill_run,
|
||||
)
|
||||
|
||||
def _feeder_result_payload(result):
|
||||
return {
|
||||
'total_skus': int(getattr(result, 'total_skus', 0) or 0),
|
||||
'matched': int(getattr(result, 'matched', 0) or 0),
|
||||
'skipped_no_result': int(getattr(result, 'skipped_no_result', 0) or 0),
|
||||
'skipped_low_score': int(getattr(result, 'skipped_low_score', 0) or 0),
|
||||
'errors': int(getattr(result, 'errors', 0) or 0),
|
||||
'history_written': int(getattr(result, 'history_written', 0) or 0),
|
||||
'attempts_written': int(getattr(result, 'attempts_written', 0) or 0),
|
||||
'duration_sec': round(float(getattr(result, 'duration_sec', 0) or 0), 2),
|
||||
}
|
||||
|
||||
def _pick_result_payload(result):
|
||||
return {
|
||||
'candidates': int(getattr(result, 'candidates', 0) or 0),
|
||||
'written': int(getattr(result, 'written', 0) or 0),
|
||||
'generated_at': getattr(result, 'generated_at', None),
|
||||
}
|
||||
|
||||
def _combined_feeder_payload(revalidation_result, feeder_result):
|
||||
revalidation_payload = _feeder_result_payload(revalidation_result)
|
||||
feeder_payload = _feeder_result_payload(feeder_result)
|
||||
return {
|
||||
'total_skus': revalidation_payload['total_skus'] + feeder_payload['total_skus'],
|
||||
'matched': revalidation_payload['matched'] + feeder_payload['matched'],
|
||||
'skipped_no_result': revalidation_payload['skipped_no_result'] + feeder_payload['skipped_no_result'],
|
||||
'skipped_low_score': revalidation_payload['skipped_low_score'] + feeder_payload['skipped_low_score'],
|
||||
'errors': revalidation_payload['errors'] + feeder_payload['errors'],
|
||||
'history_written': revalidation_payload['history_written'] + feeder_payload['history_written'],
|
||||
'attempts_written': revalidation_payload['attempts_written'] + feeder_payload['attempts_written'],
|
||||
'duration_sec': round(revalidation_payload['duration_sec'] + feeder_payload['duration_sec'], 2),
|
||||
'retryable_candidate_revalidation': revalidation_payload,
|
||||
'unmatched_priority_backfill': feeder_payload,
|
||||
}
|
||||
|
||||
try:
|
||||
run = start_pchome_backfill_run(
|
||||
limit=limit,
|
||||
operator=session.get('username') or 'web',
|
||||
)
|
||||
except PchomeBackfillAlreadyRunning as exc:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': 'PChome 補抓已在執行中,請稍後查看進度',
|
||||
'data': exc.status,
|
||||
}), 409
|
||||
|
||||
run_id = run['run_id']
|
||||
|
||||
def _run_backfill():
|
||||
try:
|
||||
@@ -1723,24 +1784,67 @@ def api_pchome_match_backfill():
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
|
||||
engine = create_engine(DATABASE_PATH)
|
||||
result = CompetitorPriceFeeder(engine=engine).run_unmatched_priority(limit=limit)
|
||||
feeder = CompetitorPriceFeeder(engine=engine)
|
||||
revalidation_limit = min(limit, 80)
|
||||
update_pchome_backfill_run(
|
||||
run_id,
|
||||
stage='revalidating',
|
||||
message=f'正在重新評分 {revalidation_limit} 筆近門檻 PChome 候選',
|
||||
)
|
||||
revalidation_result = feeder.run_retryable_candidate_revalidation(
|
||||
limit=revalidation_limit,
|
||||
min_score=0.70,
|
||||
)
|
||||
unmatched_limit = max(5, min(limit, limit - int(getattr(revalidation_result, 'total_skus', 0) or 0)))
|
||||
update_pchome_backfill_run(
|
||||
run_id,
|
||||
stage='matching',
|
||||
message=f'正在補抓 {unmatched_limit} 筆高優先待比對商品',
|
||||
)
|
||||
result = feeder.run_unmatched_priority(limit=unmatched_limit)
|
||||
result_payload = _combined_feeder_payload(revalidation_result, result)
|
||||
update_pchome_backfill_run(
|
||||
run_id,
|
||||
stage='generating_picks',
|
||||
message='PChome 補抓完成,正在重算 AI 挑品清單',
|
||||
result=result_payload,
|
||||
)
|
||||
pick_result = generate_product_pick_list(engine, limit=50)
|
||||
pick_payload = _pick_result_payload(pick_result)
|
||||
update_pchome_backfill_run(
|
||||
run_id,
|
||||
stage='clearing_cache',
|
||||
message='AI 挑品已重算,正在清除看板快取',
|
||||
result=result_payload,
|
||||
pick_result=pick_payload,
|
||||
)
|
||||
from services.cache_manager import clear_dashboard_cache
|
||||
clear_dashboard_cache()
|
||||
clear_competitor_intel_cache()
|
||||
finish_pchome_backfill_run(
|
||||
run_id,
|
||||
result=result_payload,
|
||||
pick_result=pick_payload,
|
||||
message=(
|
||||
f"PChome 補抓完成:比對 {result_payload['total_skus']} 筆、"
|
||||
f"新增/更新 {result_payload['matched']} 筆、"
|
||||
f"AI 挑品寫入 {pick_payload['written']} 筆"
|
||||
),
|
||||
)
|
||||
logger.info(
|
||||
"[PChomeBackfill] done total=%s matched=%s no=%s low=%s errors=%s history=%s duration=%ss pick_written=%s",
|
||||
result.total_skus,
|
||||
result.matched,
|
||||
result.skipped_no_result,
|
||||
result.skipped_low_score,
|
||||
result.errors,
|
||||
result.history_written,
|
||||
result.duration_sec,
|
||||
result_payload['total_skus'],
|
||||
result_payload['matched'],
|
||||
result_payload['skipped_no_result'],
|
||||
result_payload['skipped_low_score'],
|
||||
result_payload['errors'],
|
||||
result_payload['history_written'],
|
||||
result_payload['duration_sec'],
|
||||
pick_result.written,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"[PChomeBackfill] 背景補抓失敗: {exc}")
|
||||
fail_pchome_backfill_run(run_id, str(exc))
|
||||
logger.error(f"[PChomeBackfill] 背景補抓失敗: {exc}", exc_info=True)
|
||||
|
||||
thread = threading.Thread(target=_run_backfill, daemon=True)
|
||||
thread.start()
|
||||
@@ -1749,9 +1853,22 @@ def api_pchome_match_backfill():
|
||||
'success': True,
|
||||
'message': f'已啟動 PChome 待比對補抓,優先處理 {limit} 筆高價未配對商品;完成後會重算 AI 挑品清單',
|
||||
'limit': limit,
|
||||
'data': get_pchome_backfill_status(),
|
||||
}), 202
|
||||
|
||||
|
||||
@ai_bp.route('/api/ai/pchome-match/backfill/status', methods=['GET'])
|
||||
@login_required
|
||||
def api_pchome_match_backfill_status():
|
||||
"""取得 PChome 待比對補抓的背景執行狀態。"""
|
||||
from services.pchome_backfill_status import get_pchome_backfill_status
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': get_pchome_backfill_status(),
|
||||
})
|
||||
|
||||
|
||||
@ai_bp.route('/api/ai/icaim/trigger', methods=['POST'])
|
||||
@login_required
|
||||
def api_icaim_trigger():
|
||||
|
||||
@@ -60,7 +60,7 @@ REVIEW_STATUS_OPTIONS = [
|
||||
'label': '需單位價',
|
||||
'statuses': ('unit_comparable', 'refresh_unit_comparable'),
|
||||
},
|
||||
{'key': 'identity_veto', 'label': '身份否決', 'statuses': ('identity_veto',)},
|
||||
{'key': 'identity_veto', 'label': '已排除', 'statuses': ('identity_veto',)},
|
||||
{'key': 'low_score', 'label': '低信心', 'statuses': ('low_score',)},
|
||||
{'key': 'expired_match', 'label': '價格過期', 'statuses': ('expired_match',)},
|
||||
{'key': 'no_result', 'label': '找不到同款', 'statuses': ('no_result', 'refresh_no_result')},
|
||||
@@ -88,22 +88,22 @@ def _to_float(value):
|
||||
|
||||
def _diagnostic_match_rejection_label(diagnostic_text, score_text, *, blocked=True):
|
||||
diagnostic_text = diagnostic_text or ''
|
||||
suffix = '已停止自動採用' if blocked else '不自動採用以避免錯配'
|
||||
suffix = '已排除,不進入價格比較' if blocked else '暫不採用,等待補搜尋或人工補證據'
|
||||
if 'unit_comparable' in diagnostic_text:
|
||||
return '需單位價比較', f'{score_text},同核心商品但販售組合不同,需轉換每 ml / 每入後再判讀'
|
||||
if 'refill_pack_conflict' in diagnostic_text:
|
||||
return '補充包差異待審', f'{score_text},補充瓶/補充包與一般正裝不同,{suffix}'
|
||||
return '補充包不相容', f'{score_text},補充瓶/補充包與一般正裝不同,{suffix}'
|
||||
if any(token in diagnostic_text for token in ('bundle_offer_conflict', 'multi_component_conflict')):
|
||||
return '組合差異待審', f'{score_text},組合包/多件組與單品不同,{suffix}'
|
||||
return '組合規格不相容', f'{score_text},組合包/多件組與單品不同,{suffix}'
|
||||
if 'brand_conflict' in diagnostic_text:
|
||||
return '品牌衝突待審', f'{score_text},品牌不一致,{suffix}'
|
||||
return '品牌不符已排除', f'{score_text},品牌不一致,{suffix}'
|
||||
if any(token in diagnostic_text for token in ('volume_conflict', 'weight_conflict', 'count_conflict', 'component_count_conflict')):
|
||||
return '規格衝突待審', f'{score_text},容量/件數不一致,{suffix}'
|
||||
return '規格不符已排除', f'{score_text},容量/件數不一致,{suffix}'
|
||||
if 'type_conflict' in diagnostic_text:
|
||||
return '品類衝突待審', f'{score_text},品類不一致,{suffix}'
|
||||
if 'product_line_conflict' in diagnostic_text:
|
||||
return '商品線不符待審', f'{score_text},商品線訊號不足,{suffix}'
|
||||
return '身份否決' if blocked else '低信心待審', f'{score_text},{suffix}'
|
||||
return '品類不符已排除', f'{score_text},品類不一致,{suffix}'
|
||||
if any(token in diagnostic_text for token in ('product_line_conflict', 'model_line_conflict')):
|
||||
return '系列不符已排除', f'{score_text},商品線/型號不一致,{suffix}'
|
||||
return '身份不符已排除' if blocked else '低信心待補強', f'{score_text},{suffix}'
|
||||
|
||||
|
||||
def _build_pchome_match_status(attempt=None, ineligible=None):
|
||||
@@ -218,7 +218,7 @@ def _build_pchome_match_status(attempt=None, ineligible=None):
|
||||
}
|
||||
if reason == 'below_score_floor':
|
||||
return {
|
||||
'label': '低分配對待審',
|
||||
'label': '低分配對待補強',
|
||||
'tone': 'neutral',
|
||||
'summary': '已有候選但低於高信心門檻,避免錯配所以暫不採用',
|
||||
'detail': score_text,
|
||||
|
||||
10
scheduler.py
10
scheduler.py
@@ -2210,6 +2210,7 @@ def run_pchome_match_backfill_task():
|
||||
)
|
||||
feeder = CompetitorPriceFeeder(engine=engine)
|
||||
refresh_result = feeder.run_expired_identity_refresh(limit=240)
|
||||
retryable_result = feeder.run_retryable_candidate_revalidation(limit=160, min_score=0.70)
|
||||
feeder_result = feeder.run_unmatched_priority(limit=120)
|
||||
pick_result = generate_product_pick_list(engine, limit=50)
|
||||
clear_dashboard_cache()
|
||||
@@ -2232,6 +2233,10 @@ def run_pchome_match_backfill_task():
|
||||
"expired_identity_refresh_no_result": refresh_result.skipped_no_result,
|
||||
"expired_identity_refresh_low_score": refresh_result.skipped_low_score,
|
||||
"expired_identity_refresh_errors": refresh_result.errors,
|
||||
"retryable_candidate_revalidation_total": retryable_result.total_skus,
|
||||
"retryable_candidate_revalidation_matched": retryable_result.matched,
|
||||
"retryable_candidate_revalidation_low_score": retryable_result.skipped_low_score,
|
||||
"retryable_candidate_revalidation_errors": retryable_result.errors,
|
||||
"pick_candidates": pick_result.candidates,
|
||||
"pick_written": pick_result.written,
|
||||
"status": "Success",
|
||||
@@ -2240,11 +2245,12 @@ def run_pchome_match_backfill_task():
|
||||
f"[Scheduler] [PChomeBackfill] ✅ 完成 | "
|
||||
f"revalidated={revalidation_result.promoted_fresh}+{revalidation_result.promoted_expired} "
|
||||
f"refreshed={refresh_result.matched}/{refresh_result.total_skus} "
|
||||
f"retryable={retryable_result.matched}/{retryable_result.total_skus} "
|
||||
f"matched={feeder_result.matched}/{feeder_result.total_skus} "
|
||||
f"history_written={feeder_result.history_written} "
|
||||
f"pick_written={pick_result.written} "
|
||||
f"errors={feeder_result.errors + refresh_result.errors} "
|
||||
f"耗時={feeder_result.duration_sec + refresh_result.duration_sec}s"
|
||||
f"errors={feeder_result.errors + refresh_result.errors + retryable_result.errors} "
|
||||
f"耗時={feeder_result.duration_sec + refresh_result.duration_sec + retryable_result.duration_sec}s"
|
||||
)
|
||||
_save_stats('pchome_match_backfill', stats)
|
||||
|
||||
|
||||
@@ -673,6 +673,83 @@ class CompetitorPriceFeeder:
|
||||
).fetchall()
|
||||
return [dict(r._mapping) for r in rows]
|
||||
|
||||
def _fetch_retryable_candidate_skus(self, limit: int = 80, min_score: float = 0.70) -> list:
|
||||
"""
|
||||
取得近門檻且非 hard veto 的候選,供 matcher 升級後重新評分。
|
||||
|
||||
這條路徑不重新搜尋,只用前次留下的 PChome product_id 批次查詢最新商品資料,
|
||||
適合把舊 scorer 卡在 0.70~0.759 的真同款重新推進正式比價。
|
||||
"""
|
||||
if self.engine is None:
|
||||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||||
|
||||
from sqlalchemy import text
|
||||
sql = text("""
|
||||
WITH latest_momo AS (
|
||||
SELECT
|
||||
p.id AS product_id,
|
||||
p.i_code AS sku,
|
||||
p.name,
|
||||
p.category,
|
||||
pr.price AS momo_price,
|
||||
ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC) AS rn
|
||||
FROM products p
|
||||
JOIN price_records pr ON pr.product_id = p.id
|
||||
WHERE p.status = 'ACTIVE'
|
||||
),
|
||||
latest_attempt AS (
|
||||
SELECT DISTINCT ON (cma.sku)
|
||||
cma.sku,
|
||||
cma.best_competitor_product_id,
|
||||
cma.best_competitor_product_name,
|
||||
cma.best_match_score,
|
||||
cma.attempt_status,
|
||||
cma.hard_veto,
|
||||
cma.attempted_at
|
||||
FROM competitor_match_attempts cma
|
||||
WHERE cma.source = 'pchome'
|
||||
AND cma.best_competitor_product_id IS NOT NULL
|
||||
AND cma.best_competitor_product_id <> ''
|
||||
ORDER BY cma.sku, cma.attempted_at DESC, cma.id DESC
|
||||
)
|
||||
SELECT
|
||||
lm.product_id,
|
||||
lm.sku,
|
||||
lm.name,
|
||||
lm.category,
|
||||
lm.momo_price,
|
||||
la.best_competitor_product_id AS competitor_product_id,
|
||||
la.best_competitor_product_name AS competitor_product_name,
|
||||
la.best_match_score,
|
||||
la.attempt_status
|
||||
FROM latest_momo lm
|
||||
JOIN latest_attempt la
|
||||
ON la.sku = lm.sku
|
||||
LEFT JOIN competitor_prices cp
|
||||
ON cp.sku = lm.sku
|
||||
AND cp.source = 'pchome'
|
||||
AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)
|
||||
AND COALESCE(cp.match_score, 0) >= :match_score_floor
|
||||
AND COALESCE(cp.tags, '[]'::jsonb) ? 'identity_v2'
|
||||
WHERE lm.rn = 1
|
||||
AND cp.sku IS NULL
|
||||
AND la.attempt_status IN ('low_score', 'refresh_low_score')
|
||||
AND COALESCE(la.best_match_score, 0) >= :min_score
|
||||
AND COALESCE(la.hard_veto, false) = false
|
||||
ORDER BY la.best_match_score DESC NULLS LAST, lm.momo_price DESC NULLS LAST, lm.sku
|
||||
LIMIT :limit
|
||||
""")
|
||||
with self.engine.connect() as conn:
|
||||
rows = conn.execute(
|
||||
sql,
|
||||
{
|
||||
"limit": max(1, min(int(limit), 300)),
|
||||
"min_score": float(min_score),
|
||||
"match_score_floor": MIN_MATCH_SCORE,
|
||||
},
|
||||
).fetchall()
|
||||
return [dict(r._mapping) for r in rows]
|
||||
|
||||
def _fetch_expired_identity_skus(self, limit: int = 120) -> list:
|
||||
"""
|
||||
取得 identity_v2 已確認、但 PChome 價格快取過期的商品。
|
||||
@@ -1444,6 +1521,25 @@ class CompetitorPriceFeeder:
|
||||
|
||||
return self._run_known_identity_refresh_items(skus, source=source, label="identity_v2 過期價格刷新")
|
||||
|
||||
def run_retryable_candidate_revalidation(
|
||||
self,
|
||||
limit: int = 80,
|
||||
min_score: float = 0.70,
|
||||
source: str = "pchome",
|
||||
) -> FeederResult:
|
||||
"""重新評分近門檻候選,避免 matcher 升級後仍卡在舊的 low_score。"""
|
||||
try:
|
||||
skus = self._fetch_retryable_candidate_skus(limit=limit, min_score=min_score)
|
||||
except Exception as e:
|
||||
logger.error(f"[Feeder] 讀取近門檻候選失敗: {e}")
|
||||
return FeederResult(0, 0, 0, 0, 1, 0.0)
|
||||
|
||||
return self._run_known_identity_refresh_items(
|
||||
skus,
|
||||
source=source,
|
||||
label="近門檻候選重新評分",
|
||||
)
|
||||
|
||||
def run_unmatched_priority(self, limit: int = 80, source: str = "pchome") -> FeederResult:
|
||||
"""優先補抓尚未有有效 PChome 配對的高價商品。"""
|
||||
try:
|
||||
|
||||
@@ -53,6 +53,13 @@ NOISE_PHRASES = (
|
||||
"優惠組",
|
||||
"分享包",
|
||||
"組合",
|
||||
"多款可選",
|
||||
"多款任選",
|
||||
"任選多款",
|
||||
"多色可選",
|
||||
"色號可選",
|
||||
"平行輸入",
|
||||
"大容量",
|
||||
)
|
||||
|
||||
GENERIC_TOKENS = {
|
||||
@@ -91,6 +98,17 @@ GENERIC_TOKENS = {
|
||||
"mg",
|
||||
"la",
|
||||
"paris",
|
||||
"多款",
|
||||
"可選",
|
||||
"任選",
|
||||
"平行輸入",
|
||||
"大容量",
|
||||
"日本",
|
||||
"韓國",
|
||||
"澳洲",
|
||||
"法國",
|
||||
"英國",
|
||||
"美國",
|
||||
}
|
||||
|
||||
BRAND_ALIAS_OVERRIDES = {
|
||||
@@ -102,6 +120,23 @@ BRAND_ALIAS_OVERRIDES = {
|
||||
"sisley": ("希思黎", "sisley"),
|
||||
"gennies": ("奇妮", "gennies"),
|
||||
"uruhimemomoko": ("潤姬桃子", "uruhimemomoko", "uruhime momoko"),
|
||||
"arau baby": ("arau baby", "arau", "愛樂寶", "saraya"),
|
||||
"sebamed": ("sebamed", "施巴"),
|
||||
"shu uemura": ("shu uemura", "shuuemura", "植村秀"),
|
||||
"johnsons": ("johnsons", "johnson's", "johnson", "嬌生"),
|
||||
"gillette": ("gillette", "吉列"),
|
||||
"schick": ("schick", "舒適牌"),
|
||||
"obge": ("obge",),
|
||||
"vaseline": ("vaseline", "凡士林"),
|
||||
"eaoron": ("eaoron",),
|
||||
"kameria": ("kameria", "凱蜜菈"),
|
||||
"cocodor": ("cocodor",),
|
||||
"peripera": ("peripera",),
|
||||
"solone": ("solone",),
|
||||
"im meme": ("im meme", "i'm meme", "i’m meme"),
|
||||
"febreze": ("febreze", "風倍清"),
|
||||
"jo malone": ("jo malone",),
|
||||
"prada": ("prada", "普拉達"),
|
||||
}
|
||||
|
||||
PRODUCT_TYPES = {
|
||||
@@ -120,9 +155,15 @@ PRODUCT_TYPES = {
|
||||
"保健": ("錠", "膠囊", "粉", "飲", "包", "健康食品"),
|
||||
}
|
||||
|
||||
COUNT_UNITS = {"入", "組", "瓶", "支", "條", "盒", "包", "袋", "片", "顆", "粒", "錠", "枚", "件", "罐", "杯", "本"}
|
||||
COUNT_UNITS = {"入", "組", "瓶", "支", "條", "盒", "包", "袋", "片", "顆", "粒", "錠", "枚", "件", "罐", "杯", "本", "刀把", "刀片", "刀頭", "蕊"}
|
||||
COUNT_UNIT_PATTERN = r"(?:刀把|刀片|刀頭|入|組|瓶|支|條|盒|包|袋|片|顆|粒|錠|枚|件|罐|杯|本|蕊)"
|
||||
PIECE_UNITS = {"包", "袋", "片", "顆", "粒", "錠", "枚"}
|
||||
CONTAINER_UNITS = {"入", "組", "盒", "罐", "杯", "本", "瓶", "支", "條", "件"}
|
||||
COUNT_UNIT_FAMILIES = {
|
||||
"刀片": "blade",
|
||||
"刀頭": "blade",
|
||||
"蕊": "refill",
|
||||
}
|
||||
ENGLISH_COUNT_UNIT_RE = r"(?:pcs?|pieces?|capsules?|caps?|tablets?|tabs?|packs?|sachets?|bottles?|boxes?)"
|
||||
BUNDLE_OFFER_PHRASES = (
|
||||
"囤貨組",
|
||||
@@ -366,6 +407,10 @@ def _convert_volume(value: str, unit: str) -> Optional[tuple[str, float]]:
|
||||
return None
|
||||
|
||||
|
||||
def _count_unit_family(unit: str) -> str:
|
||||
return COUNT_UNIT_FAMILIES.get(unit, unit)
|
||||
|
||||
|
||||
def _extract_specs(
|
||||
text: str,
|
||||
) -> tuple[tuple[float, ...], tuple[float, ...], tuple[float, ...], tuple[tuple[int, str], ...], Optional[int]]:
|
||||
@@ -385,11 +430,11 @@ def _extract_specs(
|
||||
dosages_mg.append(number)
|
||||
|
||||
counts: list[tuple[int, str]] = []
|
||||
for match in re.finditer(r"(\d+)\s*([入組瓶支條盒包袋片顆粒錠枚件罐杯本])", text):
|
||||
for match in re.finditer(rf"(\d+)\s*({COUNT_UNIT_PATTERN})", text):
|
||||
counts.append((int(match.group(1)), match.group(2)))
|
||||
for match in re.finditer(r"([一二兩雙三四五六七八九十])\s*([入組瓶支條盒包袋片顆粒錠枚件罐杯本])", text):
|
||||
for match in re.finditer(rf"([一二兩雙三四五六七八九十])\s*({COUNT_UNIT_PATTERN})", text):
|
||||
counts.append((CHINESE_COUNT[match.group(1)], match.group(2)))
|
||||
for match in re.finditer(r"(?:x|乘)\s*(\d+)\s*([入組瓶支條盒包袋片顆粒錠枚件罐杯本])?", text, re.I):
|
||||
for match in re.finditer(rf"(?:x|乘)\s*(\d+)\s*({COUNT_UNIT_PATTERN})?", text, re.I):
|
||||
unit = match.group(2) or "入"
|
||||
counts.append((int(match.group(1)), unit))
|
||||
for match in re.finditer(rf"(\d+)\s*{ENGLISH_COUNT_UNIT_RE}", text, re.I):
|
||||
@@ -429,7 +474,8 @@ def parse_product_identity(name: str) -> ProductIdentity:
|
||||
searchable = _strip_noise(normalized)
|
||||
tokens = set(_tokenize(searchable))
|
||||
product_type = _extract_product_type(searchable)
|
||||
brand_tokens = _known_brand_tokens(searchable) | _leading_brand_tokens(name, normalized)
|
||||
known_brand_tokens = _known_brand_tokens(searchable)
|
||||
brand_tokens = known_brand_tokens or _leading_brand_tokens(name, normalized)
|
||||
|
||||
core_tokens = {
|
||||
token
|
||||
@@ -539,9 +585,9 @@ def _has_hard_count_unit_conflict(left: ProductIdentity, right: ProductIdentity)
|
||||
left_by_count: dict[int, set[str]] = {}
|
||||
right_by_count: dict[int, set[str]] = {}
|
||||
for count, unit in left.counts:
|
||||
left_by_count.setdefault(count, set()).add(unit)
|
||||
left_by_count.setdefault(count, set()).add(_count_unit_family(unit))
|
||||
for count, unit in right.counts:
|
||||
right_by_count.setdefault(count, set()).add(unit)
|
||||
right_by_count.setdefault(count, set()).add(_count_unit_family(unit))
|
||||
|
||||
for count in set(left_by_count) & set(right_by_count):
|
||||
left_units = left_by_count[count]
|
||||
@@ -559,11 +605,22 @@ def _has_hard_count_unit_conflict(left: ProductIdentity, right: ProductIdentity)
|
||||
def _count_score(left: ProductIdentity, right: ProductIdentity) -> tuple[float, bool]:
|
||||
left_counts = [count for count, _unit in left.counts]
|
||||
right_counts = [count for count, _unit in right.counts]
|
||||
left_by_unit: dict[str, set[int]] = {}
|
||||
right_by_unit: dict[str, set[int]] = {}
|
||||
for count, unit in left.counts:
|
||||
left_by_unit.setdefault(_count_unit_family(unit), set()).add(count)
|
||||
for count, unit in right.counts:
|
||||
right_by_unit.setdefault(_count_unit_family(unit), set()).add(count)
|
||||
|
||||
if left.total_piece_count and right.total_piece_count:
|
||||
if left.total_piece_count == right.total_piece_count:
|
||||
return 1.0, False
|
||||
ratio = max(left.total_piece_count, right.total_piece_count) / max(min(left.total_piece_count, right.total_piece_count), 1)
|
||||
return (0.0, True) if ratio >= 1.5 else (0.45, False)
|
||||
|
||||
for unit in set(left_by_unit) & set(right_by_unit):
|
||||
if left_by_unit[unit] != right_by_unit[unit]:
|
||||
return 0.0, True
|
||||
if left.counts and right.counts:
|
||||
if set(left.counts) & set(right.counts):
|
||||
return 0.85, False
|
||||
@@ -852,6 +909,28 @@ def _has_strong_product_line_signal(
|
||||
return token_score >= 0.56 and chinese_name_score >= 0.45
|
||||
|
||||
|
||||
def _model_line_tokens(identity: ProductIdentity) -> set[str]:
|
||||
tokens: set[str] = set()
|
||||
for token in identity.core_tokens:
|
||||
if token in GENERIC_TOKENS:
|
||||
continue
|
||||
if re.fullmatch(r"[a-z][a-z0-9-]{2,}", token):
|
||||
tokens.add(token)
|
||||
for match in re.finditer(r"([\u4e00-\u9fff]{2,})(?:系列)", token):
|
||||
value = match.group(1)
|
||||
if value not in GENERIC_TOKENS:
|
||||
tokens.add(value)
|
||||
return tokens
|
||||
|
||||
|
||||
def _has_model_line_conflict(left: ProductIdentity, right: ProductIdentity) -> bool:
|
||||
left_tokens = _model_line_tokens(left)
|
||||
right_tokens = _model_line_tokens(right)
|
||||
if not left_tokens or not right_tokens:
|
||||
return False
|
||||
return not bool(left_tokens & right_tokens)
|
||||
|
||||
|
||||
def score_marketplace_match(
|
||||
momo_name: str,
|
||||
competitor_name: str,
|
||||
@@ -877,6 +956,9 @@ def score_marketplace_match(
|
||||
reasons.extend(spec_reasons)
|
||||
if left.product_type and right.product_type and left.product_type != right.product_type:
|
||||
reasons.append("type_conflict")
|
||||
model_line_conflict = _has_model_line_conflict(left, right)
|
||||
if model_line_conflict:
|
||||
reasons.append("model_line_conflict")
|
||||
bundle_offer_conflict = (
|
||||
_has_bundle_offer(left) != _has_bundle_offer(right)
|
||||
and not (
|
||||
@@ -905,6 +987,8 @@ def score_marketplace_match(
|
||||
hard_veto = True
|
||||
if _has_refill_pack(left) != _has_refill_pack(right):
|
||||
hard_veto = True
|
||||
if model_line_conflict:
|
||||
hard_veto = True
|
||||
if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
|
||||
hard_veto = True
|
||||
if chinese_name_score < 0.16 and token_score < 0.72:
|
||||
|
||||
344
services/pchome_backfill_status.py
Normal file
344
services/pchome_backfill_status.py
Normal file
@@ -0,0 +1,344 @@
|
||||
"""Persist lightweight PChome match backfill run status.
|
||||
|
||||
The PChome backfill endpoint runs in a background thread. A tiny JSON status
|
||||
file gives operators progress, last result, and failure context without adding
|
||||
new schema or blocking the dashboard request path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from config import BASE_DIR
|
||||
except Exception: # pragma: no cover - import fallback for isolated scripts
|
||||
BASE_DIR = os.getcwd()
|
||||
|
||||
|
||||
RECENT_RUN_LIMIT = 8
|
||||
ACTIVE_TTL_SECONDS = int(os.getenv("PCHOME_BACKFILL_ACTIVE_TTL_SECONDS", "7200"))
|
||||
|
||||
STAGE_ORDER = (
|
||||
"queued",
|
||||
"revalidating",
|
||||
"matching",
|
||||
"generating_picks",
|
||||
"clearing_cache",
|
||||
"completed",
|
||||
)
|
||||
|
||||
STAGE_LABELS = {
|
||||
"idle": "尚未執行",
|
||||
"queued": "已排入背景補抓",
|
||||
"revalidating": "重新評分近門檻候選",
|
||||
"matching": "比對高優先未配對商品",
|
||||
"generating_picks": "重算 AI 挑品清單",
|
||||
"clearing_cache": "清除看板與競價快取",
|
||||
"completed": "補抓完成",
|
||||
"failed": "補抓失敗",
|
||||
"stale": "執行狀態逾時",
|
||||
}
|
||||
|
||||
|
||||
class PchomeBackfillAlreadyRunning(RuntimeError):
|
||||
"""Raised when a fresh backfill run is already active."""
|
||||
|
||||
def __init__(self, status: dict[str, Any]):
|
||||
super().__init__("PChome backfill is already running")
|
||||
self.status = status
|
||||
|
||||
|
||||
def _status_path() -> str:
|
||||
return os.getenv(
|
||||
"PCHOME_BACKFILL_STATUS_PATH",
|
||||
os.path.join(
|
||||
os.getenv("DATA_DIR", os.path.join(str(BASE_DIR), "data")),
|
||||
"pchome_match_backfill_status.json",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _now_label() -> str:
|
||||
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
def _run_id() -> str:
|
||||
return f"pchome-backfill-{datetime.now().strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
|
||||
def _read_payload() -> dict[str, Any]:
|
||||
path = _status_path()
|
||||
try:
|
||||
if not os.path.isfile(path):
|
||||
return _default_status()
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
payload = json.load(handle)
|
||||
if not isinstance(payload, dict):
|
||||
return _default_status()
|
||||
return _normalize_status(payload)
|
||||
except Exception:
|
||||
return _default_status()
|
||||
|
||||
|
||||
def _write_payload(payload: dict[str, Any]) -> None:
|
||||
path = _status_path()
|
||||
directory = os.path.dirname(path)
|
||||
tmp_path = f"{path}.{os.getpid()}.tmp"
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
with open(tmp_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(_normalize_status(payload), handle, ensure_ascii=False, indent=2, default=str)
|
||||
os.replace(tmp_path, path)
|
||||
|
||||
|
||||
def _default_status() -> dict[str, Any]:
|
||||
return {
|
||||
"status": "idle",
|
||||
"stage": "idle",
|
||||
"stage_label": STAGE_LABELS["idle"],
|
||||
"progress_pct": 0,
|
||||
"running": False,
|
||||
"current_run": None,
|
||||
"recent_runs": [],
|
||||
"last_result": None,
|
||||
"last_error": None,
|
||||
"updated_at": None,
|
||||
}
|
||||
|
||||
|
||||
def _age_seconds(value: str | None) -> float | None:
|
||||
if not value:
|
||||
return None
|
||||
try:
|
||||
parsed = datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
return max(0.0, time.time() - parsed.timestamp())
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _progress_for_stage(stage: str) -> int:
|
||||
if stage == "failed":
|
||||
return 100
|
||||
if stage not in STAGE_ORDER:
|
||||
return 0
|
||||
return int(round((STAGE_ORDER.index(stage) + 1) / len(STAGE_ORDER) * 100))
|
||||
|
||||
|
||||
def _normalize_run(run: Any) -> dict[str, Any] | None:
|
||||
if not isinstance(run, dict):
|
||||
return None
|
||||
normalized = dict(run)
|
||||
stage = str(normalized.get("stage") or normalized.get("status") or "idle")
|
||||
status = str(normalized.get("status") or "idle")
|
||||
normalized["stage"] = stage
|
||||
normalized["status"] = status
|
||||
normalized["stage_label"] = STAGE_LABELS.get(stage, stage)
|
||||
normalized["progress_pct"] = int(normalized.get("progress_pct") or _progress_for_stage(stage))
|
||||
normalized.setdefault("result", None)
|
||||
normalized.setdefault("pick_result", None)
|
||||
normalized.setdefault("last_error", None)
|
||||
normalized.setdefault("message", None)
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_status(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
normalized = dict(_default_status())
|
||||
normalized.update({key: value for key, value in payload.items() if key in normalized})
|
||||
current = _normalize_run(payload.get("current_run"))
|
||||
recent_runs = [
|
||||
run for run in (_normalize_run(item) for item in payload.get("recent_runs", [])) if run
|
||||
][:RECENT_RUN_LIMIT]
|
||||
|
||||
if current:
|
||||
age = _age_seconds(current.get("updated_at") or current.get("started_at"))
|
||||
is_fresh_running = current.get("status") == "running" and (
|
||||
age is None or age <= ACTIVE_TTL_SECONDS
|
||||
)
|
||||
if current.get("status") == "running" and not is_fresh_running:
|
||||
current["status"] = "stale"
|
||||
current["stage"] = "stale"
|
||||
current["stage_label"] = STAGE_LABELS["stale"]
|
||||
current["running"] = False
|
||||
current["progress_pct"] = 100
|
||||
current["last_error"] = current.get("last_error") or "active run exceeded ttl"
|
||||
else:
|
||||
current["running"] = is_fresh_running
|
||||
normalized["current_run"] = current
|
||||
normalized["status"] = current["status"]
|
||||
normalized["stage"] = current["stage"]
|
||||
normalized["stage_label"] = current["stage_label"]
|
||||
normalized["progress_pct"] = current["progress_pct"]
|
||||
normalized["running"] = bool(current.get("running"))
|
||||
normalized["last_result"] = current.get("result") or payload.get("last_result")
|
||||
normalized["last_error"] = current.get("last_error") or payload.get("last_error")
|
||||
|
||||
normalized["recent_runs"] = recent_runs
|
||||
normalized["updated_at"] = payload.get("updated_at") or (
|
||||
current.get("updated_at") if current else None
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def get_pchome_backfill_status() -> dict[str, Any]:
|
||||
"""Return the latest persisted PChome backfill status."""
|
||||
|
||||
return _read_payload()
|
||||
|
||||
|
||||
def start_pchome_backfill_run(limit: int, operator: str | None = None) -> dict[str, Any]:
|
||||
"""Create a fresh running status or raise when another run is active."""
|
||||
|
||||
status = _read_payload()
|
||||
if status.get("running"):
|
||||
raise PchomeBackfillAlreadyRunning(status)
|
||||
|
||||
now = _now_label()
|
||||
run = {
|
||||
"run_id": _run_id(),
|
||||
"status": "running",
|
||||
"stage": "queued",
|
||||
"stage_label": STAGE_LABELS["queued"],
|
||||
"progress_pct": _progress_for_stage("queued"),
|
||||
"running": True,
|
||||
"limit": int(limit),
|
||||
"operator": operator or "web",
|
||||
"started_at": now,
|
||||
"updated_at": now,
|
||||
"finished_at": None,
|
||||
"message": "等待背景執行緒啟動",
|
||||
"result": None,
|
||||
"pick_result": None,
|
||||
"last_error": None,
|
||||
}
|
||||
status["current_run"] = run
|
||||
status["recent_runs"] = status.get("recent_runs", [])[:RECENT_RUN_LIMIT]
|
||||
status["last_error"] = None
|
||||
status["updated_at"] = now
|
||||
_write_payload(status)
|
||||
return deepcopy(run)
|
||||
|
||||
|
||||
def update_pchome_backfill_run(
|
||||
run_id: str,
|
||||
*,
|
||||
stage: str,
|
||||
message: str | None = None,
|
||||
result: dict[str, Any] | None = None,
|
||||
pick_result: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Update an active run stage."""
|
||||
|
||||
status = _read_payload()
|
||||
current = _normalize_run(status.get("current_run")) or {}
|
||||
if current.get("run_id") != run_id:
|
||||
current = {"run_id": run_id, "status": "running", "started_at": _now_label()}
|
||||
|
||||
now = _now_label()
|
||||
current.update(
|
||||
{
|
||||
"status": "running",
|
||||
"stage": stage,
|
||||
"stage_label": STAGE_LABELS.get(stage, stage),
|
||||
"progress_pct": _progress_for_stage(stage),
|
||||
"running": True,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
if message is not None:
|
||||
current["message"] = message
|
||||
if result is not None:
|
||||
current["result"] = result
|
||||
if pick_result is not None:
|
||||
current["pick_result"] = pick_result
|
||||
|
||||
status["current_run"] = current
|
||||
status["updated_at"] = now
|
||||
_write_payload(status)
|
||||
return deepcopy(current)
|
||||
|
||||
|
||||
def finish_pchome_backfill_run(
|
||||
run_id: str,
|
||||
*,
|
||||
result: dict[str, Any] | None = None,
|
||||
pick_result: dict[str, Any] | None = None,
|
||||
message: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Mark a run completed and append it to recent history."""
|
||||
|
||||
return _finish_run(
|
||||
run_id,
|
||||
status_value="completed",
|
||||
stage="completed",
|
||||
message=message or "PChome 補抓完成",
|
||||
result=result,
|
||||
pick_result=pick_result,
|
||||
error=None,
|
||||
)
|
||||
|
||||
|
||||
def fail_pchome_backfill_run(run_id: str, error: str) -> dict[str, Any]:
|
||||
"""Mark a run failed and append it to recent history."""
|
||||
|
||||
return _finish_run(
|
||||
run_id,
|
||||
status_value="failed",
|
||||
stage="failed",
|
||||
message="PChome 補抓失敗",
|
||||
result=None,
|
||||
pick_result=None,
|
||||
error=error,
|
||||
)
|
||||
|
||||
|
||||
def _finish_run(
|
||||
run_id: str,
|
||||
*,
|
||||
status_value: str,
|
||||
stage: str,
|
||||
message: str,
|
||||
result: dict[str, Any] | None,
|
||||
pick_result: dict[str, Any] | None,
|
||||
error: str | None,
|
||||
) -> dict[str, Any]:
|
||||
status = _read_payload()
|
||||
current = _normalize_run(status.get("current_run")) or {}
|
||||
now = _now_label()
|
||||
if current.get("run_id") != run_id:
|
||||
current = {"run_id": run_id, "started_at": now}
|
||||
|
||||
current.update(
|
||||
{
|
||||
"status": status_value,
|
||||
"stage": stage,
|
||||
"stage_label": STAGE_LABELS.get(stage, stage),
|
||||
"progress_pct": _progress_for_stage(stage),
|
||||
"running": False,
|
||||
"updated_at": now,
|
||||
"finished_at": now,
|
||||
"message": message,
|
||||
"last_error": error,
|
||||
}
|
||||
)
|
||||
if result is not None:
|
||||
current["result"] = result
|
||||
if pick_result is not None:
|
||||
current["pick_result"] = pick_result
|
||||
|
||||
recent_runs = [current]
|
||||
for run in status.get("recent_runs", []):
|
||||
normalized_run = _normalize_run(run)
|
||||
if normalized_run and normalized_run.get("run_id") != run_id:
|
||||
recent_runs.append(normalized_run)
|
||||
status["current_run"] = current
|
||||
status["recent_runs"] = recent_runs[:RECENT_RUN_LIMIT]
|
||||
status["last_result"] = current.get("result")
|
||||
status["last_error"] = error
|
||||
status["updated_at"] = now
|
||||
_write_payload(status)
|
||||
return deepcopy(current)
|
||||
@@ -52,6 +52,32 @@
|
||||
<div class="dashboard-kpi-sub momo-mono">{{ overview.last_pchome_crawled or '尚無 PChome 抓取紀錄' }}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="dashboard-backfill-card"
|
||||
data-pchome-backfill-card
|
||||
data-backfill-endpoint="/api/ai/pchome-match/backfill"
|
||||
data-status-endpoint="/api/ai/pchome-match/backfill/status"
|
||||
data-pchome-backfill-action="backfillPchomeMatches">
|
||||
<div class="dashboard-backfill-main">
|
||||
<div class="dashboard-backfill-label momo-mono">PCHOME MATCH BACKFILL</div>
|
||||
<div class="dashboard-backfill-title">待比對補抓產線</div>
|
||||
<div class="dashboard-backfill-meta momo-mono">
|
||||
待補抓 {{ overview.pending_match_count | default(0) | number_format }} · 覆核 {{ overview.review_queue_count | default(0) | number_format }} · 單位價 {{ overview.unit_comparable_count | default(0) | number_format }}
|
||||
</div>
|
||||
</div>
|
||||
<div class="dashboard-backfill-progress" aria-hidden="true">
|
||||
<span data-pchome-backfill-progress></span>
|
||||
</div>
|
||||
<div class="dashboard-backfill-status momo-mono">
|
||||
<span data-pchome-backfill-status>讀取狀態中</span>
|
||||
<span data-pchome-backfill-result>--</span>
|
||||
</div>
|
||||
<button class="dashboard-action-button is-primary"
|
||||
type="button"
|
||||
data-pchome-backfill-trigger
|
||||
data-limit="60">
|
||||
<i class="fas fa-magnifying-glass-chart"></i> 補抓 60 筆
|
||||
</button>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
|
||||
@@ -19,4 +19,11 @@
|
||||
|
||||
{% block content %}
|
||||
{% include 'components/_legacy_bridge_panel.html' %}
|
||||
{% set legacy_current = (promo_pages | selectattr('id', 'equalto', current_promo_page) | list | first) if promo_pages is defined else none %}
|
||||
{% set legacy_current_url = legacy_current.url if legacy_current else legacy_bridge_target %}
|
||||
<nav class="visually-hidden" aria-label="Legacy EDM sort compatibility">
|
||||
<a href="{{ legacy_current_url }}?sort_by=name&order=desc">name desc</a>
|
||||
<a href="{{ legacy_current_url }}?sort_by=price&order=desc">price desc</a>
|
||||
<a href="{{ legacy_current_url }}?sort_by=remain_qty&order=desc">remain qty desc</a>
|
||||
</nav>
|
||||
{% endblock %}
|
||||
|
||||
@@ -79,9 +79,9 @@ def test_dashboard_match_status_explains_identity_veto_reason():
|
||||
"error_message": "score=0.32; reasons=refill_pack_conflict",
|
||||
})
|
||||
|
||||
assert bundle["label"] == "組合差異待審"
|
||||
assert bundle["label"] == "組合規格不相容"
|
||||
assert "組合包/多件組" in bundle["summary"]
|
||||
assert refill["label"] == "補充包差異待審"
|
||||
assert refill["label"] == "補充包不相容"
|
||||
assert "補充瓶/補充包" in refill["summary"]
|
||||
|
||||
|
||||
|
||||
@@ -29,6 +29,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert "identity_v2" in source
|
||||
assert "_fetch_expired_identity_skus" in source
|
||||
assert "run_expired_identity_refresh" in source
|
||||
assert "_fetch_retryable_candidate_skus" in source
|
||||
assert "run_retryable_candidate_revalidation" in source
|
||||
assert "refresh_known_identity" in source
|
||||
assert 'attempt_status="unit_comparable"' in source
|
||||
assert 'attempt_status="refresh_unit_comparable"' in source
|
||||
|
||||
@@ -383,8 +383,8 @@ def test_dashboard_v2_shows_pchome_competitor_pricing_and_links():
|
||||
assert "候選:{{ item.pchome_match_attempt.best_competitor_product_name }}" in dashboard
|
||||
assert "候選價,需單位換算" in dashboard
|
||||
assert "_load_pchome_match_attempt_map" in route_source
|
||||
assert "低信心待審" in route_source
|
||||
assert "規格衝突待審" in route_source
|
||||
assert "低信心待補強" in route_source
|
||||
assert "規格不符已排除" in route_source
|
||||
assert "dashboard-review-reasons" in dashboard
|
||||
assert "series.pchome" in page_js
|
||||
assert "label: 'PChome'" in page_js
|
||||
@@ -454,8 +454,12 @@ def test_ai_product_pick_agent_uses_real_competitor_data_and_dashboard_action():
|
||||
assert "@ai_bp.route('/api/ai/product-picks/generate', methods=['POST'])" in route_source
|
||||
assert "generate_product_pick_list(engine" in route_source
|
||||
assert "@ai_bp.route('/api/ai/pchome-match/backfill', methods=['POST'])" in route_source
|
||||
assert "run_unmatched_priority(limit=limit)" in route_source
|
||||
assert "@ai_bp.route('/api/ai/pchome-match/backfill/status', methods=['GET'])" in route_source
|
||||
assert "run_unmatched_priority(limit=unmatched_limit)" in route_source
|
||||
assert "run_retryable_candidate_revalidation" in route_source
|
||||
assert "generate_product_pick_list(engine, limit=50)" in route_source
|
||||
assert "start_pchome_backfill_run" in route_source
|
||||
assert "finish_pchome_backfill_run" in route_source
|
||||
assert "payload.get('limit', 50)" in route_source
|
||||
assert "JSON.stringify({ limit: 50 })" in template
|
||||
assert "完成後會重算 AI 挑品清單" in route_source
|
||||
@@ -476,6 +480,7 @@ def test_ai_product_pick_agent_uses_real_competitor_data_and_dashboard_action():
|
||||
agent_actions_source = (ROOT / "services/agent_actions.py").read_text(encoding="utf-8")
|
||||
assert "def run_pchome_match_backfill_task" in scheduler_source
|
||||
assert "_save_stats('pchome_match_backfill'" in scheduler_source
|
||||
assert "retryable_candidate_revalidation_total" in scheduler_source
|
||||
assert "run_pchome_match_backfill_task" in run_scheduler_source
|
||||
assert "每日 10:30:pchome_match_backfill" in run_scheduler_source
|
||||
assert '"run_pchome_match_backfill_task"' in agent_actions_source
|
||||
@@ -486,6 +491,12 @@ def test_ai_product_pick_agent_uses_real_competitor_data_and_dashboard_action():
|
||||
assert "backfillPchomeMatches" in template
|
||||
assert "/api/ai/product-picks/generate" in template
|
||||
assert "/api/ai/pchome-match/backfill" in template
|
||||
assert "/api/ai/pchome-match/backfill/status" in dashboard_template
|
||||
assert "PCHOME MATCH BACKFILL" in dashboard_template
|
||||
assert "data-pchome-backfill-trigger" in dashboard_template
|
||||
dashboard_js = (ROOT / "web/static/js/page-dashboard-v2.js").read_text(encoding="utf-8")
|
||||
assert "loadPchomeBackfillStatus" in dashboard_js
|
||||
assert "window.backfillPchomeMatches" in dashboard_js
|
||||
assert "'product_pick':['bg-success'" in template
|
||||
assert "kpiMatchRate" in template
|
||||
|
||||
|
||||
@@ -337,6 +337,53 @@ def test_marketplace_matcher_accepts_strong_multi_component_line_without_full_sp
|
||||
assert "strong_component_line_match" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_accepts_known_brand_alias_and_option_copy():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【OBgE】韓國 OBgE 男士自然遮瑕粉底棒13g",
|
||||
"OBgE/自然遮瑕粉底棒13g - 多款可選",
|
||||
momo_price=765,
|
||||
competitor_price=1099,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert diagnostics.comparison_mode == "exact_identity"
|
||||
|
||||
|
||||
def test_marketplace_matcher_accepts_same_pack_with_chinese_count_wording():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【SEBAMED】潔膚露1000ml共2入(大容量 平行輸入)",
|
||||
"Sebamed施巴 潔膚露1000ml 兩入組",
|
||||
momo_price=799,
|
||||
competitor_price=899,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert "brand_match" in diagnostics.tags
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_razor_series_and_blade_count_conflict():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【Gillette 吉列】SkinGuard 紳適系列刮鬍刀頭(4刀頭)",
|
||||
"【Gillette 吉列 】Fusion鋒隱系列刮鬍刀頭(8刀頭)",
|
||||
momo_price=499,
|
||||
competitor_price=906,
|
||||
)
|
||||
|
||||
assert diagnostics.score < 0.76
|
||||
assert diagnostics.hard_veto is True
|
||||
assert diagnostics.comparison_mode == "not_comparable"
|
||||
assert "count_conflict" in diagnostics.reasons
|
||||
assert "model_line_conflict" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_search_terms_prefer_readable_brand_core_spec():
|
||||
from services.marketplace_product_matcher import build_search_terms
|
||||
|
||||
|
||||
57
tests/test_pchome_backfill_status.py
Normal file
57
tests/test_pchome_backfill_status.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from services.pchome_backfill_status import (
|
||||
PchomeBackfillAlreadyRunning,
|
||||
fail_pchome_backfill_run,
|
||||
finish_pchome_backfill_run,
|
||||
get_pchome_backfill_status,
|
||||
start_pchome_backfill_run,
|
||||
update_pchome_backfill_run,
|
||||
)
|
||||
|
||||
|
||||
def test_pchome_backfill_status_tracks_running_and_completion(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("PCHOME_BACKFILL_STATUS_PATH", str(tmp_path / "status.json"))
|
||||
|
||||
run = start_pchome_backfill_run(limit=12, operator="tester")
|
||||
running = get_pchome_backfill_status()
|
||||
|
||||
assert running["running"] is True
|
||||
assert running["status"] == "running"
|
||||
assert running["current_run"]["run_id"] == run["run_id"]
|
||||
assert running["current_run"]["limit"] == 12
|
||||
|
||||
try:
|
||||
start_pchome_backfill_run(limit=5)
|
||||
except PchomeBackfillAlreadyRunning as exc:
|
||||
assert exc.status["running"] is True
|
||||
else:
|
||||
raise AssertionError("expected active PChome backfill guard")
|
||||
|
||||
update_pchome_backfill_run(
|
||||
run["run_id"],
|
||||
stage="matching",
|
||||
result={"total_skus": 12, "matched": 3},
|
||||
)
|
||||
finish_pchome_backfill_run(
|
||||
run["run_id"],
|
||||
result={"total_skus": 12, "matched": 3},
|
||||
pick_result={"written": 9},
|
||||
)
|
||||
completed = get_pchome_backfill_status()
|
||||
|
||||
assert completed["running"] is False
|
||||
assert completed["status"] == "completed"
|
||||
assert completed["last_result"]["matched"] == 3
|
||||
assert completed["recent_runs"][0]["pick_result"]["written"] == 9
|
||||
|
||||
|
||||
def test_pchome_backfill_status_records_failure(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("PCHOME_BACKFILL_STATUS_PATH", str(tmp_path / "status.json"))
|
||||
|
||||
run = start_pchome_backfill_run(limit=8)
|
||||
fail_pchome_backfill_run(run["run_id"], "crawler timeout")
|
||||
failed = get_pchome_backfill_status()
|
||||
|
||||
assert failed["running"] is False
|
||||
assert failed["status"] == "failed"
|
||||
assert failed["last_error"] == "crawler timeout"
|
||||
assert failed["recent_runs"][0]["last_error"] == "crawler timeout"
|
||||
@@ -163,6 +163,87 @@
|
||||
color: var(--momo-accent-strong);
|
||||
}
|
||||
|
||||
.dashboard-backfill-card {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(220px, 1fr) minmax(160px, 280px) minmax(240px, 1fr) auto;
|
||||
gap: 14px;
|
||||
align-items: center;
|
||||
min-width: 0;
|
||||
margin-top: 12px;
|
||||
padding: 14px 16px;
|
||||
overflow: hidden;
|
||||
background: var(--momo-bg-surface);
|
||||
border: 1px solid var(--momo-border-light);
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.dashboard-backfill-card[data-status="running"] {
|
||||
border-color: rgba(190, 106, 45, 0.36);
|
||||
box-shadow: inset 3px 0 0 var(--momo-warm-caramel);
|
||||
}
|
||||
|
||||
.dashboard-backfill-card[data-status="failed"],
|
||||
.dashboard-backfill-card[data-status="stale"] {
|
||||
border-color: rgba(188, 75, 49, 0.32);
|
||||
box-shadow: inset 3px 0 0 var(--momo-danger);
|
||||
}
|
||||
|
||||
.dashboard-backfill-main {
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.dashboard-backfill-label {
|
||||
margin-bottom: 3px;
|
||||
color: var(--momo-text-tertiary);
|
||||
font-size: 10px;
|
||||
font-weight: 800;
|
||||
letter-spacing: 0.10em;
|
||||
}
|
||||
|
||||
.dashboard-backfill-title {
|
||||
color: var(--momo-text-primary);
|
||||
font-size: 15px;
|
||||
font-weight: 800;
|
||||
line-height: 1.25;
|
||||
}
|
||||
|
||||
.dashboard-backfill-meta,
|
||||
.dashboard-backfill-status {
|
||||
min-width: 0;
|
||||
color: var(--momo-text-secondary);
|
||||
font-size: 11px;
|
||||
line-height: 1.45;
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
.dashboard-backfill-progress {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
height: 8px;
|
||||
overflow: hidden;
|
||||
background: rgba(42, 37, 32, 0.08);
|
||||
border: 1px solid rgba(42, 37, 32, 0.08);
|
||||
border-radius: 999px;
|
||||
}
|
||||
|
||||
.dashboard-backfill-progress span {
|
||||
position: absolute;
|
||||
inset: 0 auto 0 0;
|
||||
width: 0%;
|
||||
background: linear-gradient(90deg, var(--momo-warm-caramel), var(--momo-success));
|
||||
transition: width 240ms ease;
|
||||
}
|
||||
|
||||
.dashboard-backfill-card[data-status="failed"] .dashboard-backfill-progress span,
|
||||
.dashboard-backfill-card[data-status="stale"] .dashboard-backfill-progress span {
|
||||
background: linear-gradient(90deg, var(--momo-danger), var(--momo-warm-rust));
|
||||
}
|
||||
|
||||
.dashboard-backfill-status {
|
||||
display: grid;
|
||||
gap: 2px;
|
||||
}
|
||||
|
||||
.dashboard-focus-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(3, minmax(0, 1fr));
|
||||
@@ -1151,6 +1232,16 @@
|
||||
line-height: 1.35;
|
||||
}
|
||||
|
||||
.dashboard-backfill-card {
|
||||
grid-template-columns: 1fr;
|
||||
gap: 10px;
|
||||
padding: 14px;
|
||||
}
|
||||
|
||||
.dashboard-backfill-card .dashboard-action-button {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.dashboard-search,
|
||||
.dashboard-select,
|
||||
.dashboard-segmented {
|
||||
|
||||
@@ -280,6 +280,142 @@ let priceChartInstance = null;
|
||||
button.addEventListener('click', () => runDashboardTask(button.dataset.dashboardTask));
|
||||
});
|
||||
|
||||
let pchomeBackfillPollTimer = null;
|
||||
|
||||
function getPchomeBackfillElements() {
|
||||
const card = document.querySelector('[data-pchome-backfill-card]');
|
||||
return {
|
||||
card,
|
||||
trigger: document.querySelector('[data-pchome-backfill-trigger]'),
|
||||
status: document.querySelector('[data-pchome-backfill-status]'),
|
||||
result: document.querySelector('[data-pchome-backfill-result]'),
|
||||
progress: document.querySelector('[data-pchome-backfill-progress]'),
|
||||
backfillEndpoint: card ? card.dataset.backfillEndpoint : '/api/ai/pchome-match/backfill',
|
||||
statusEndpoint: card ? card.dataset.statusEndpoint : '/api/ai/pchome-match/backfill/status'
|
||||
};
|
||||
}
|
||||
|
||||
function formatBackfillCount(value) {
|
||||
return Number(value || 0).toLocaleString();
|
||||
}
|
||||
|
||||
function schedulePchomeBackfillPoll() {
|
||||
if (pchomeBackfillPollTimer) {
|
||||
clearTimeout(pchomeBackfillPollTimer);
|
||||
}
|
||||
pchomeBackfillPollTimer = setTimeout(loadPchomeBackfillStatus, 5000);
|
||||
}
|
||||
|
||||
function renderPchomeBackfillStatus(payload) {
|
||||
const status = payload && payload.data ? payload.data : (payload || {});
|
||||
const elements = getPchomeBackfillElements();
|
||||
if (!elements.card) return;
|
||||
|
||||
const currentRun = status.current_run || {};
|
||||
const result = currentRun.result || status.last_result || {};
|
||||
const pickResult = currentRun.pick_result || {};
|
||||
const running = Boolean(status.running || currentRun.running);
|
||||
const progressPct = Math.max(0, Math.min(Number(status.progress_pct || currentRun.progress_pct || 0), 100));
|
||||
const statusKey = status.status || currentRun.status || 'idle';
|
||||
const stageLabel = status.stage_label || currentRun.stage_label || '尚未執行';
|
||||
const updatedAt = status.updated_at || currentRun.updated_at || currentRun.finished_at || '';
|
||||
|
||||
elements.card.dataset.status = statusKey;
|
||||
if (elements.progress) {
|
||||
elements.progress.style.width = `${progressPct}%`;
|
||||
}
|
||||
if (elements.status) {
|
||||
elements.status.textContent = updatedAt ? `${stageLabel} · ${updatedAt}` : stageLabel;
|
||||
}
|
||||
if (elements.result) {
|
||||
if (status.last_error || currentRun.last_error) {
|
||||
elements.result.textContent = status.last_error || currentRun.last_error;
|
||||
} else if (result && Object.keys(result).length > 0) {
|
||||
const pickWritten = pickResult.written !== undefined ? ` · 挑品 ${formatBackfillCount(pickResult.written)}` : '';
|
||||
elements.result.textContent = (
|
||||
`比對 ${formatBackfillCount(result.total_skus)} · 成功 ${formatBackfillCount(result.matched)}`
|
||||
+ ` · 待覆核 ${formatBackfillCount(result.skipped_low_score)}`
|
||||
+ ` · 無結果 ${formatBackfillCount(result.skipped_no_result)}`
|
||||
+ pickWritten
|
||||
);
|
||||
} else {
|
||||
elements.result.textContent = running ? '正在累積結果' : '尚無最近結果';
|
||||
}
|
||||
}
|
||||
if (elements.trigger) {
|
||||
elements.trigger.disabled = running;
|
||||
elements.trigger.classList.toggle('is-loading', running);
|
||||
elements.trigger.innerHTML = running
|
||||
? '<i class="fas fa-spinner fa-spin"></i> 補抓中'
|
||||
: '<i class="fas fa-search"></i> 補抓 60 筆';
|
||||
}
|
||||
|
||||
if (running) {
|
||||
schedulePchomeBackfillPoll();
|
||||
} else if (pchomeBackfillPollTimer) {
|
||||
clearTimeout(pchomeBackfillPollTimer);
|
||||
pchomeBackfillPollTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
function loadPchomeBackfillStatus() {
|
||||
const elements = getPchomeBackfillElements();
|
||||
if (!elements.card) return Promise.resolve();
|
||||
return fetch(elements.statusEndpoint, {
|
||||
headers: { 'Accept': 'application/json' }
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(renderPchomeBackfillStatus)
|
||||
.catch(error => {
|
||||
console.warn('[DashboardV2] PChome backfill status load failed:', error);
|
||||
if (elements.status) {
|
||||
elements.status.textContent = '狀態讀取失敗';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function backfillPchomeMatches() {
|
||||
const elements = getPchomeBackfillElements();
|
||||
if (!elements.card || !elements.trigger) return;
|
||||
const limit = Number(elements.trigger.dataset.limit || 60);
|
||||
if (!confirm(`啟動 PChome 待比對補抓 ${limit} 筆?`)) return;
|
||||
|
||||
elements.trigger.disabled = true;
|
||||
if (elements.status) {
|
||||
elements.status.textContent = '正在送出補抓任務';
|
||||
}
|
||||
fetch(elements.backfillEndpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': getCSRFToken()
|
||||
},
|
||||
body: JSON.stringify({ limit })
|
||||
})
|
||||
.then(response => response.json().then(data => ({ ok: response.ok, status: response.status, data })))
|
||||
.then(({ ok, status, data }) => {
|
||||
renderPchomeBackfillStatus(data);
|
||||
if (!ok && status !== 409) {
|
||||
throw new Error(data.message || data.error || 'PChome 補抓啟動失敗');
|
||||
}
|
||||
schedulePchomeBackfillPoll();
|
||||
})
|
||||
.catch(error => {
|
||||
if (elements.status) {
|
||||
elements.status.textContent = error.message || 'PChome 補抓啟動失敗';
|
||||
}
|
||||
if (elements.trigger) {
|
||||
elements.trigger.disabled = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
window.backfillPchomeMatches = backfillPchomeMatches;
|
||||
document.querySelectorAll('[data-pchome-backfill-trigger]').forEach(button => {
|
||||
button.addEventListener('click', backfillPchomeMatches);
|
||||
});
|
||||
loadPchomeBackfillStatus();
|
||||
|
||||
function runPchomeReviewDecision(button) {
|
||||
const sku = button.dataset.reviewSku || '';
|
||||
const action = button.dataset.reviewAction || '';
|
||||
|
||||
Reference in New Issue
Block a user