diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 438b253..3e8d6c9 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.572 新增 PChome 決策支援覆蓋率:不放寬 `matched` / `decision_ready` 的 exact identity 門檻,另外把高分、無 hard veto、具同品線與規格證據,但因「任選 / 色號 / 型錄 / 即期」仍需覆核的候選,納入 `catalog_comparable_count` 與 `decision_support_rate`。Dashboard、當日業績、成長分析與 backfill 狀態摘要同步顯示「決策支援覆蓋率 / 精準可告警覆蓋 / 型錄可比 / 單位價」,讓覆蓋率提升建立在可解釋情報分層上,而不是把非 exact 商品硬寫成正式同款。 - V10.571 提升 PChome pending 覆蓋率搜尋召回:`PCHOME_FEEDER_MAX_SEARCH_TERMS` 預設由 5 提升到 6,新增 `PCHOME_FEEDER_SEARCH_COVERAGE_RESCUE_ENABLED`,在主要搜尋詞與原始名稱 fallback 之間插入狹義 coverage rescue terms。搜尋詞會保留 `5.5g`、`2.4g` 等小數規格,不再變成 `5 5g` / `2 4g`;同時排除外出清潔、卸除髒汙、卸防曬等非身份核心噪音。正式 pilot 顯示 CeraVe / TUNEMAKERS / Embryolisse / Neogence / NIVEA 這類雙語品牌商品常卡在 PChome 搜尋召回,因此補上「英文品牌 + 中文品牌 + 核心身份 + 規格」窄搜尋詞;「品牌 + 品類 + 規格」仍只開給安全品類,避免為了拉 pending 覆蓋率引入假陽性。 - V10.570 補 PChome 身份 / 報價證據契約:matcher 的 `match_diagnostic_json` 新增 `identity_evidence`、`offer_evidence`,把品牌、品類、identity anchor、型號、規格、入數與 variant guardrail 拆成結構化證據;覆核隊列與 decision envelope 新增 `difference_highlights`,可直接指出容量、入數、色號、香味、款式、補充包、檔期組合等差異。價格明確標記為 offer evidence,不再被誤當身份證據,Dashboard / PPT / OpenClaw / Webcrumbs 能共用同一份比對證據。 - 外部專業 benchmark 固定節奏:已建立每週一 09:30 自動檢視,並新增 `docs/guides/external_professional_benchmark.md`,把 Google Merchant Center、Google Product structured data、Schema.org Product/Offer/AggregateOffer 與 Baymard 電商 UX 做法轉成可落地準則:identity evidence、fresh offer、review 差異高亮、PPT/AI evidence 分層。 diff --git a/config.py b/config.py index e513abf..a341dc8 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.571" +SYSTEM_VERSION = "V10.572" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 21da05c..e4266e0 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-06-01:PChome 比價新鮮度操作閉環 +- **V10.572 PChome 決策支援覆蓋率分層**: 覆蓋率不再只有 exact `decision_ready_rate`。`fetch_competitor_coverage()` cache 升到 v11,新增 `catalog_comparable_count`、`decision_support_count`、`decision_support_rate` 與非 exact 支援數;只納入高分、無 hard veto、同時具型錄/任選/商業條件訊號與強身份證據,且排除品類、品線、入數、香味、型號、價格極端等硬衝突的候選。Dashboard、daily、growth 與 backfill JS 同步顯示「決策支援覆蓋率 / 精準可告警覆蓋 / 型錄可比 / 單位價」,提升可用情報覆蓋但不污染正式 `matched`。 - **V10.571 PChome pending 覆蓋率搜尋召回**: `competitor_price_feeder` 預設每個商品最多搜尋詞由 5 組提升為 6 組,並新增 `PCHOME_FEEDER_SEARCH_COVERAGE_RESCUE_ENABLED`。補抓流程會在主要 matcher 搜尋詞與原始名稱 fallback 之間加入狹義 coverage rescue terms,保留 `5.5g` / `2.4g` 等小數規格,並過濾外出清潔、卸除髒汙、卸防曬等非身份核心噪音。正式 pilot 顯示 CeraVe / TUNEMAKERS / Embryolisse / Neogence / NIVEA 這類雙語品牌商品常卡在 PChome 搜尋召回,因此補上「英文品牌 + 中文品牌 + 核心身份 + 規格」窄搜尋詞;`品牌 + 品類 + 規格` 仍只對安全品類開放,目標是提升 pending/no_result 候選取得率,同時維持 matcher hard veto 與 `MIN_MATCH_SCORE` 不變。 - **V10.570 PChome 身份 / 報價證據契約**: `score_marketplace_match()` 現在會在 `match_diagnostic_json` 內輸出 `identity_evidence` 與 `offer_evidence`,把品牌、品類、identity anchor、型號、規格、入數、variant guardrail 與價格 offer 拆層保存。`competitor_intel_repository` 會把這些證據轉成 `difference_highlights` 與 decision envelope 的 identity / offer evidence,讓覆核頁、PPT、OpenClaw、Webcrumbs 與 Telegram 摘要都能理解「為何同款 / 為何不同 / 價格只是報價證據不是身份證據」。 - **V10.569 Webcrumbs 比價信封摘要串接**: `build_webcrumbs_marketplace_host_data()` 讀取 `fetch_competitor_review_queue()` 後統一走 `summarize_review_decision_envelopes()`,在 host data payload 輸出 `reviewDecisionBrief`,並於 metadata 增加 `review_queue_count`、`hitl_count`、`auto_execute_blocked_count` 與 `decision_envelope_source`。Webcrumbs / Shared UI 現在和 Telegram、OpenClaw、PPT 共用同一份 PChome 覆核信封摘要,仍維持只讀、不呼叫 LLM、不抓外站、不寫 DB;同版收錄 `docs/guides/external_professional_benchmark.md` 作為外部專業做法週巡檢落地準則入口。 diff --git a/routes/ai_routes.py b/routes/ai_routes.py index 882cbe0..68f7b05 100644 --- a/routes/ai_routes.py +++ b/routes/ai_routes.py @@ -2205,6 +2205,11 @@ def _build_pchome_backfill_coverage_payload(): coverage.get('decision_ready_matches') or coverage.get('fresh_matches') or 0 ), 'decision_ready_rate': float(coverage.get('decision_ready_rate') or 0), + 'decision_support_count': int(coverage.get('decision_support_count') or 0), + 'decision_support_rate': float(coverage.get('decision_support_rate') or 0), + 'decision_support_non_exact_count': int(coverage.get('decision_support_non_exact_count') or 0), + 'catalog_comparable_count': int(coverage.get('catalog_comparable_count') or 0), + 'catalog_comparable_rate': float(coverage.get('catalog_comparable_rate') or 0), 'stale_matches': int(coverage.get('stale_matches') or 0), 'pending': int(coverage.get('pending') or 0), 'actionable_review_count': int(coverage.get('actionable_review_count') or 0), diff --git a/routes/dashboard_routes.py b/routes/dashboard_routes.py index 57e7968..5833da8 100644 --- a/routes/dashboard_routes.py +++ b/routes/dashboard_routes.py @@ -713,6 +713,11 @@ def _merge_competitor_review_context(overview, review_context): 'fresh_match_rate': coverage.get('fresh_match_rate', 0), 'decision_ready_count': int(coverage.get('decision_ready_matches') or coverage.get('fresh_matches') or 0), 'decision_ready_rate': coverage.get('decision_ready_rate', 0), + 'decision_support_count': int(coverage.get('decision_support_count') or 0), + 'decision_support_rate': coverage.get('decision_support_rate', 0), + 'decision_support_non_exact_count': int(coverage.get('decision_support_non_exact_count') or 0), + 'catalog_comparable_count': int(coverage.get('catalog_comparable_count') or 0), + 'catalog_comparable_rate': coverage.get('catalog_comparable_rate', 0), 'stale_match_count': int(coverage.get('stale_matches') or 0), 'unknown_freshness_count': int(coverage.get('unknown_freshness_matches') or 0), 'pending_match_count': int(coverage.get('pending') or overview.get('pending_match_count') or 0), @@ -863,6 +868,11 @@ def _load_competitor_decision_overview(session, latest_items=None): 'fresh_match_rate': 0, 'decision_ready_count': 0, 'decision_ready_rate': 0, + 'decision_support_count': 0, + 'decision_support_rate': 0, + 'decision_support_non_exact_count': 0, + 'catalog_comparable_count': 0, + 'catalog_comparable_rate': 0, 'stale_match_count': 0, 'unknown_freshness_count': 0, 'pchome_advantage_count': 0, @@ -1723,6 +1733,11 @@ def _load_cached_competitor_overview_for_review(now_taipei, review_queue, review 'match_rate': 0, 'decision_ready_count': 0, 'decision_ready_rate': 0, + 'decision_support_count': 0, + 'decision_support_rate': 0, + 'decision_support_non_exact_count': 0, + 'catalog_comparable_count': 0, + 'catalog_comparable_rate': 0, 'unknown_freshness_count': 0, 'pchome_advantage_count': 0, 'momo_threat_count': 0, @@ -1752,6 +1767,11 @@ def _load_cached_competitor_overview_for_review(now_taipei, review_queue, review overview['review_queue'] = list(review_queue[:3]) overview.setdefault('unit_comparable_count', 0) overview.setdefault('rescore_accepted_count', 0) + overview.setdefault('decision_support_count', overview.get('decision_ready_count') or 0) + overview.setdefault('decision_support_rate', overview.get('decision_ready_rate') or 0) + overview.setdefault('decision_support_non_exact_count', 0) + overview.setdefault('catalog_comparable_count', 0) + overview.setdefault('catalog_comparable_rate', 0) return overview diff --git a/services/competitor_intel_repository.py b/services/competitor_intel_repository.py index dcf131a..d5d5cad 100644 --- a/services/competitor_intel_repository.py +++ b/services/competitor_intel_repository.py @@ -24,8 +24,46 @@ from sqlalchemy import inspect, text PCHOME_MATCH_SCORE_FLOOR = 0.76 +CATALOG_COMPARABLE_SCORE_FLOOR = 0.88 UNIT_COMPARABLE_STATUSES = {"unit_comparable", "refresh_unit_comparable"} UNIT_PRICE_DECISION_STATUSES = UNIT_COMPARABLE_STATUSES | {"manual_unit_price_required"} +CATALOG_COMPARABLE_SIGNAL_REASONS = { + "variant_selection_review", + "makeup_catalog_selection_gap", + "commercial_condition_gap", + "catalog_count_omission", +} +CATALOG_COMPARABLE_IDENTITY_REASONS = { + "strong_product_line_match", + "strong_exact_spec_match", + "shared_identity_anchor_exact_line", + "shared_identity_anchor_core_line", + "shared_identity_anchor_variant_safe", + "spec_name_alignment", + "shared_model_token", +} +CATALOG_COMPARABLE_BLOCK_REASONS = { + "brand_conflict", + "type_conflict", + "product_line_conflict", + "core_ingredient_line_conflict", + "variant_option_conflict", + "variant_descriptor_conflict", + "aroma_scent_variant_conflict", + "bath_additive_variant_gap", + "makeup_finish_conflict", + "makeup_usage_conflict", + "romand_lip_line_conflict", + "count_conflict", + "component_count_conflict", + "multi_component_conflict", + "multi_component_count_conflict", + "bundle_offer_conflict", + "refill_pack_conflict", + "accessory_case_conflict", + "named_component_quantity_conflict", + "price_ratio_extreme", +} MANUAL_CLOSED_ATTEMPT_STATUSES = { "manual_rejected", "manual_unit_price_required", @@ -980,7 +1018,7 @@ def _cached_payload(cache_key: str, producer, ttl_seconds: int = COMPETITOR_INTE def fetch_competitor_coverage(engine) -> dict: return _cached_payload( - f"coverage:v10:floor={PCHOME_MATCH_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1", + f"coverage:v11:floor={PCHOME_MATCH_SCORE_FLOOR}:catalog_floor={CATALOG_COMPARABLE_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1:decision_support=1", lambda: _fetch_competitor_coverage_uncached(engine), ) @@ -1000,6 +1038,11 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: "unknown_freshness_matches": 0, "pending": 0, "decision_ready_matches": 0, + "decision_support_count": 0, + "decision_support_rate": 0, + "decision_support_non_exact_count": 0, + "catalog_comparable_count": 0, + "catalog_comparable_rate": 0, "identity_coverage_matches": 0, "identity_coverage_rate": 0, "pending_identity_count": 0, @@ -1029,7 +1072,10 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: latest_attempt AS ( SELECT NULL AS sku, - NULL AS attempt_status + NULL AS attempt_status, + NULL::numeric AS best_match_score, + NULL::boolean AS hard_veto, + NULL::jsonb AS diagnostic_codes WHERE FALSE ) """ @@ -1038,7 +1084,10 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: latest_attempt AS ( SELECT DISTINCT ON (sku) sku, - attempt_status + attempt_status, + best_match_score, + hard_veto, + diagnostic_codes FROM competitor_match_attempts WHERE source = 'pchome' ORDER BY sku, attempted_at DESC NULLS LAST @@ -1107,10 +1156,22 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: (SELECT COUNT(*) FROM latest_momo lm LEFT JOIN identity_competitor ic ON ic.sku = lm.sku - WHERE ic.sku IS NULL) AS pending, + WHERE ic.sku IS NULL) AS pending, (SELECT MAX(fc.crawled_at) FROM latest_momo lm JOIN fresh_competitor fc ON fc.sku = lm.sku) AS last_decision_ready_crawled_at, + (SELECT COUNT(*) + FROM latest_momo lm + LEFT JOIN fresh_competitor fc ON fc.sku = lm.sku + JOIN latest_attempt la ON la.sku = lm.sku + WHERE fc.sku IS NULL + AND la.attempt_status = 'true_low_confidence' + AND COALESCE(la.hard_veto, false) = false + AND COALESCE(la.best_match_score, 0) >= {CATALOG_COMPARABLE_SCORE_FLOOR} + AND (COALESCE(la.diagnostic_codes, '[]'::jsonb) ?| ARRAY[{", ".join(repr(reason) for reason in sorted(CATALOG_COMPARABLE_SIGNAL_REASONS))}]) + AND (COALESCE(la.diagnostic_codes, '[]'::jsonb) ?| ARRAY[{", ".join(repr(reason) for reason in sorted(CATALOG_COMPARABLE_IDENTITY_REASONS))}]) + AND NOT (COALESCE(la.diagnostic_codes, '[]'::jsonb) ?| ARRAY[{", ".join(repr(reason) for reason in sorted(CATALOG_COMPARABLE_BLOCK_REASONS))}]) + ) AS catalog_comparable_count, COALESCE(la.attempt_status, 'never_attempted') AS attempt_status, COUNT(*) AS status_count FROM latest_momo lm @@ -1133,6 +1194,9 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: for row in rows } unit_count = sum(statuses.get(status, 0) for status in UNIT_COMPARABLE_STATUSES) + catalog_comparable_count = int(rows[0].get("catalog_comparable_count") or 0) if rows else 0 + decision_support_non_exact_count = unit_count + catalog_comparable_count + decision_support_count = fresh + decision_support_non_exact_count rescore_accepted_count = int(statuses.get("rescore_accepted_current") or 0) actionable_count = sum(statuses.get(status, 0) for status in ACTIONABLE_ATTEMPT_STATUSES) manual_closed_count = sum(statuses.get(status, 0) for status in MANUAL_CLOSED_ATTEMPT_STATUSES) @@ -1145,6 +1209,11 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: "unknown_freshness_matches": unknown_freshness, "pending": pending, "decision_ready_matches": fresh, + "decision_support_count": decision_support_count, + "decision_support_rate": round(decision_support_count / max(active, 1) * 100, 1), + "decision_support_non_exact_count": decision_support_non_exact_count, + "catalog_comparable_count": catalog_comparable_count, + "catalog_comparable_rate": round(catalog_comparable_count / max(active, 1) * 100, 1), "identity_coverage_matches": valid, "identity_coverage_rate": round(valid / max(active, 1) * 100, 1), "pending_identity_count": pending, @@ -1168,6 +1237,7 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: "manual_unit_price_count": manual_review_summary["unit_price_required"], "manual_accept_rate": manual_review_summary["accept_rate"], "match_score_floor": PCHOME_MATCH_SCORE_FLOOR, + "catalog_comparable_score_floor": CATALOG_COMPARABLE_SCORE_FLOOR, } diff --git a/templates/daily_sales.html b/templates/daily_sales.html index 4bd8ff7..80a6d39 100644 --- a/templates/daily_sales.html +++ b/templates/daily_sales.html @@ -344,7 +344,11 @@