This commit is contained in:
@@ -338,6 +338,8 @@ LEFT JOIN competitor_prices cp
|
|||||||
- Dashboard、AI pick、Hermes、Excel export、daily/growth 圖表與 competitor PPT 必須以 `competitor_prices + competitor_price_history + competitor_match_attempts` 為短期唯一生產真相源,且只消費 `identity_v2` matcher 驗證過的配對;舊版僅靠 `match_score` 的快取不可直接進入決策或簡報。
|
- Dashboard、AI pick、Hermes、Excel export、daily/growth 圖表與 competitor PPT 必須以 `competitor_prices + competitor_price_history + competitor_match_attempts` 為短期唯一生產真相源,且只消費 `identity_v2` matcher 驗證過的配對;舊版僅靠 `match_score` 的快取不可直接進入決策或簡報。
|
||||||
- `pchome_matches` 與 live `pchome_batch()` 僅保留 legacy compatibility,不得作為新簡報或 AI 決策主來源。
|
- `pchome_matches` 與 live `pchome_batch()` 僅保留 legacy compatibility,不得作為新簡報或 AI 決策主來源。
|
||||||
- `services/competitor_intel_repository.py` 是下游頁面、圖表、簡報的共用查詢出口;新增消費端不得各自硬寫不同 match threshold。
|
- `services/competitor_intel_repository.py` 是下游頁面、圖表、簡報的共用查詢出口;新增消費端不得各自硬寫不同 match threshold。
|
||||||
|
- `services/competitor_identity_revalidator.py` 可對既有 `competitor_prices` legacy row 離線重跑 `identity_v2`:只有新版 matcher 分數 `>= 0.76` 且無 hard veto 才補 `identity_v2` / `legacy_revalidated` tags;預設不刷新 `expires_at`,避免過期價格進入決策。
|
||||||
|
- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待審`、`身份否決`、`找不到同款`、`抓取異常`、`尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。
|
||||||
|
|
||||||
### 執行方式
|
### 執行方式
|
||||||
|
|
||||||
@@ -345,6 +347,12 @@ LEFT JOIN competitor_prices cp
|
|||||||
# 手動觸發一輪抓取
|
# 手動觸發一輪抓取
|
||||||
python3 services/competitor_price_feeder.py
|
python3 services/competitor_price_feeder.py
|
||||||
|
|
||||||
|
# 預覽 legacy PChome 快取 identity_v2 重驗證(不寫入)
|
||||||
|
python3 -m services.competitor_identity_revalidator --limit 500
|
||||||
|
|
||||||
|
# 寫入安全通過的 identity_v2 tag;不刷新過期價格
|
||||||
|
python3 -m services.competitor_identity_revalidator --limit 500 --apply
|
||||||
|
|
||||||
# 未來整合為 K3s CronJob(每 4 小時)
|
# 未來整合為 K3s CronJob(每 4 小時)
|
||||||
# k8s/jobs/competitor-price-feeder-cronjob.yaml
|
# k8s/jobs/competitor-price-feeder-cronjob.yaml
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -62,7 +62,70 @@ def _to_float(value):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _build_pchome_match_status(attempt=None):
|
def _build_pchome_match_status(attempt=None, ineligible=None):
|
||||||
|
if attempt:
|
||||||
|
status = attempt.get('attempt_status') or 'unknown'
|
||||||
|
if status == 'matched':
|
||||||
|
score = _to_float(attempt.get('best_match_score'))
|
||||||
|
score_text = f"最佳候選 {round(score * 100)}%" if score is not None else "已完成身份比對"
|
||||||
|
return {
|
||||||
|
'label': '已配對待刷新',
|
||||||
|
'tone': 'watch',
|
||||||
|
'summary': '曾通過 identity_v2,但目前沒有有效價格快取,等待下一輪刷新',
|
||||||
|
'detail': score_text,
|
||||||
|
}
|
||||||
|
if status == 'expired_match':
|
||||||
|
score = _to_float(attempt.get('best_match_score'))
|
||||||
|
score_text = f"身份分數 {round(score * 100)}%" if score is not None else "已完成身份比對"
|
||||||
|
return {
|
||||||
|
'label': '價格過期待刷新',
|
||||||
|
'tone': 'watch',
|
||||||
|
'summary': '同款身份已確認,但 PChome 價格快取過期,不顯示舊價避免誤判',
|
||||||
|
'detail': score_text,
|
||||||
|
}
|
||||||
|
if status == 'identity_veto':
|
||||||
|
score = _to_float(attempt.get('best_match_score'))
|
||||||
|
score_text = f"最佳候選 {round(score * 100)}%" if score is not None else "已拒絕候選"
|
||||||
|
return {
|
||||||
|
'label': '身份否決',
|
||||||
|
'tone': 'neutral',
|
||||||
|
'summary': '新版 identity_v2 判定不是同款,已阻擋自動比價',
|
||||||
|
'detail': score_text,
|
||||||
|
}
|
||||||
|
|
||||||
|
if ineligible:
|
||||||
|
reason = ineligible.get('reason') or 'not_eligible'
|
||||||
|
score = _to_float(ineligible.get('match_score'))
|
||||||
|
score_text = f"match {round(score * 100)}%" if score is not None else None
|
||||||
|
if reason == 'expired_match':
|
||||||
|
return {
|
||||||
|
'label': '價格過期待刷新',
|
||||||
|
'tone': 'watch',
|
||||||
|
'summary': '已有高信心同款配對,但 PChome 價格快取過期,等待補抓刷新',
|
||||||
|
'detail': score_text,
|
||||||
|
}
|
||||||
|
if reason == 'legacy_without_identity_v2':
|
||||||
|
return {
|
||||||
|
'label': '舊版配對待重驗',
|
||||||
|
'tone': 'neutral',
|
||||||
|
'summary': '舊版 PChome 配對尚未通過 identity_v2,不進入正式決策',
|
||||||
|
'detail': score_text,
|
||||||
|
}
|
||||||
|
if reason == 'below_score_floor':
|
||||||
|
return {
|
||||||
|
'label': '低分配對待審',
|
||||||
|
'tone': 'neutral',
|
||||||
|
'summary': '已有候選但低於高信心門檻,避免錯配所以暫不採用',
|
||||||
|
'detail': score_text,
|
||||||
|
}
|
||||||
|
if reason == 'invalid_price':
|
||||||
|
return {
|
||||||
|
'label': '價格無效待刷新',
|
||||||
|
'tone': 'watch',
|
||||||
|
'summary': 'PChome 配對缺少有效價格,等待下一輪補抓',
|
||||||
|
'detail': None,
|
||||||
|
}
|
||||||
|
|
||||||
if not attempt:
|
if not attempt:
|
||||||
return {
|
return {
|
||||||
'label': '尚未搜尋',
|
'label': '尚未搜尋',
|
||||||
@@ -218,6 +281,89 @@ def _load_pchome_competitor_map(session, skus):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _load_pchome_ineligible_competitor_map(session, skus):
|
||||||
|
"""Read non-decision PChome rows so the UI can explain why a SKU is pending."""
|
||||||
|
sku_list = [str(sku) for sku in skus if sku]
|
||||||
|
if not sku_list:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
stmt = text("""
|
||||||
|
WITH ineligible AS (
|
||||||
|
SELECT
|
||||||
|
sku,
|
||||||
|
price,
|
||||||
|
competitor_product_id,
|
||||||
|
competitor_product_name,
|
||||||
|
match_score,
|
||||||
|
tags,
|
||||||
|
crawled_at,
|
||||||
|
expires_at,
|
||||||
|
CASE
|
||||||
|
WHEN price IS NULL OR price <= 0 THEN 'invalid_price'
|
||||||
|
WHEN (expires_at IS NOT NULL AND expires_at <= CURRENT_TIMESTAMP)
|
||||||
|
AND COALESCE(match_score, 0) >= :match_score_floor
|
||||||
|
AND COALESCE(tags, '[]'::jsonb) ? 'identity_v2'
|
||||||
|
THEN 'expired_match'
|
||||||
|
WHEN NOT (COALESCE(tags, '[]'::jsonb) ? 'identity_v2')
|
||||||
|
THEN 'legacy_without_identity_v2'
|
||||||
|
WHEN COALESCE(match_score, 0) < :match_score_floor
|
||||||
|
THEN 'below_score_floor'
|
||||||
|
ELSE 'not_eligible'
|
||||||
|
END AS reason,
|
||||||
|
ROW_NUMBER() OVER (
|
||||||
|
PARTITION BY sku
|
||||||
|
ORDER BY
|
||||||
|
CASE
|
||||||
|
WHEN (expires_at IS NOT NULL AND expires_at <= CURRENT_TIMESTAMP)
|
||||||
|
AND COALESCE(match_score, 0) >= :match_score_floor
|
||||||
|
AND COALESCE(tags, '[]'::jsonb) ? 'identity_v2'
|
||||||
|
THEN 0
|
||||||
|
WHEN NOT (COALESCE(tags, '[]'::jsonb) ? 'identity_v2') THEN 1
|
||||||
|
WHEN COALESCE(match_score, 0) < :match_score_floor THEN 2
|
||||||
|
ELSE 3
|
||||||
|
END,
|
||||||
|
crawled_at DESC NULLS LAST,
|
||||||
|
match_score DESC NULLS LAST
|
||||||
|
) AS rn
|
||||||
|
FROM competitor_prices
|
||||||
|
WHERE source = 'pchome'
|
||||||
|
AND sku IN :skus
|
||||||
|
AND NOT (
|
||||||
|
(expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)
|
||||||
|
AND price IS NOT NULL
|
||||||
|
AND price > 0
|
||||||
|
AND COALESCE(match_score, 0) >= :match_score_floor
|
||||||
|
AND COALESCE(tags, '[]'::jsonb) ? 'identity_v2'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT *
|
||||||
|
FROM ineligible
|
||||||
|
WHERE rn = 1
|
||||||
|
""").bindparams(bindparam("skus", expanding=True))
|
||||||
|
rows = session.execute(
|
||||||
|
stmt,
|
||||||
|
{"skus": sku_list, "match_score_floor": PCHOME_MATCH_SCORE_FLOOR},
|
||||||
|
).mappings().all()
|
||||||
|
except Exception as exc:
|
||||||
|
sys_log.warning(f"[Dashboard] PChome 非有效配對原因讀取略過: {exc}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
for row in rows:
|
||||||
|
result[str(row.get('sku'))] = {
|
||||||
|
'reason': row.get('reason'),
|
||||||
|
'price': _to_float(row.get('price')),
|
||||||
|
'product_id': row.get('competitor_product_id'),
|
||||||
|
'product_name': row.get('competitor_product_name'),
|
||||||
|
'match_score': _to_float(row.get('match_score')),
|
||||||
|
'tags': row.get('tags'),
|
||||||
|
'crawled_at': row.get('crawled_at'),
|
||||||
|
'expires_at': row.get('expires_at'),
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _load_pchome_match_attempt_map(session, skus):
|
def _load_pchome_match_attempt_map(session, skus):
|
||||||
sku_list = [str(sku) for sku in skus if sku]
|
sku_list = [str(sku) for sku in skus if sku]
|
||||||
if not sku_list:
|
if not sku_list:
|
||||||
@@ -1558,14 +1704,20 @@ def index():
|
|||||||
session,
|
session,
|
||||||
[item['record'].product.i_code for item in paged_items]
|
[item['record'].product.i_code for item in paged_items]
|
||||||
)
|
)
|
||||||
|
pchome_ineligible_map = _load_pchome_ineligible_competitor_map(
|
||||||
|
session,
|
||||||
|
[item['record'].product.i_code for item in paged_items]
|
||||||
|
)
|
||||||
for item in paged_items:
|
for item in paged_items:
|
||||||
product = item['record'].product
|
product = item['record'].product
|
||||||
sku = str(product.i_code)
|
sku = str(product.i_code)
|
||||||
competitor = pchome_map.get(sku)
|
competitor = pchome_map.get(sku)
|
||||||
attempt = pchome_attempt_map.get(sku)
|
attempt = pchome_attempt_map.get(sku)
|
||||||
match_status = _build_pchome_match_status(attempt)
|
ineligible = pchome_ineligible_map.get(sku)
|
||||||
|
match_status = _build_pchome_match_status(attempt, ineligible=ineligible)
|
||||||
item['pchome_competitor'] = competitor
|
item['pchome_competitor'] = competitor
|
||||||
item['pchome_match_attempt'] = attempt
|
item['pchome_match_attempt'] = attempt
|
||||||
|
item['pchome_ineligible_competitor'] = ineligible
|
||||||
item['pchome_match_status'] = match_status
|
item['pchome_match_status'] = match_status
|
||||||
item['competitor_decision'] = _build_competitor_decision(
|
item['competitor_decision'] = _build_competitor_decision(
|
||||||
item['record'].price,
|
item['record'].price,
|
||||||
|
|||||||
13
scheduler.py
13
scheduler.py
@@ -2193,6 +2193,7 @@ def run_pchome_match_backfill_task():
|
|||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine
|
||||||
from services.ai_product_pick_agent import generate_product_pick_list
|
from services.ai_product_pick_agent import generate_product_pick_list
|
||||||
from services.cache_manager import clear_dashboard_cache
|
from services.cache_manager import clear_dashboard_cache
|
||||||
|
from services.competitor_identity_revalidator import revalidate_existing_competitor_identities
|
||||||
from services.competitor_intel_repository import clear_competitor_intel_cache
|
from services.competitor_intel_repository import clear_competitor_intel_cache
|
||||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||||
|
|
||||||
@@ -2200,6 +2201,13 @@ def run_pchome_match_backfill_task():
|
|||||||
logging.info(f"[Scheduler] [PChomeBackfill] 🚀 啟動待比對補抓任務 | {now_str}")
|
logging.info(f"[Scheduler] [PChomeBackfill] 🚀 啟動待比對補抓任務 | {now_str}")
|
||||||
|
|
||||||
engine = create_engine(DATABASE_PATH)
|
engine = create_engine(DATABASE_PATH)
|
||||||
|
revalidation_result = revalidate_existing_competitor_identities(
|
||||||
|
engine,
|
||||||
|
limit=500,
|
||||||
|
dry_run=False,
|
||||||
|
include_expired=True,
|
||||||
|
write_attempts=True,
|
||||||
|
)
|
||||||
feeder_result = CompetitorPriceFeeder(engine=engine).run_unmatched_priority(limit=120)
|
feeder_result = CompetitorPriceFeeder(engine=engine).run_unmatched_priority(limit=120)
|
||||||
pick_result = generate_product_pick_list(engine, limit=50)
|
pick_result = generate_product_pick_list(engine, limit=50)
|
||||||
clear_dashboard_cache()
|
clear_dashboard_cache()
|
||||||
@@ -2213,12 +2221,17 @@ def run_pchome_match_backfill_task():
|
|||||||
"errors": feeder_result.errors,
|
"errors": feeder_result.errors,
|
||||||
"duration_sec": feeder_result.duration_sec,
|
"duration_sec": feeder_result.duration_sec,
|
||||||
"history_written": feeder_result.history_written,
|
"history_written": feeder_result.history_written,
|
||||||
|
"identity_revalidated_fresh": revalidation_result.promoted_fresh,
|
||||||
|
"identity_revalidated_expired": revalidation_result.promoted_expired,
|
||||||
|
"identity_revalidation_rejected_low": revalidation_result.rejected_low_score,
|
||||||
|
"identity_revalidation_rejected_veto": revalidation_result.rejected_veto,
|
||||||
"pick_candidates": pick_result.candidates,
|
"pick_candidates": pick_result.candidates,
|
||||||
"pick_written": pick_result.written,
|
"pick_written": pick_result.written,
|
||||||
"status": "Success",
|
"status": "Success",
|
||||||
}
|
}
|
||||||
logging.info(
|
logging.info(
|
||||||
f"[Scheduler] [PChomeBackfill] ✅ 完成 | "
|
f"[Scheduler] [PChomeBackfill] ✅ 完成 | "
|
||||||
|
f"revalidated={revalidation_result.promoted_fresh}+{revalidation_result.promoted_expired} "
|
||||||
f"matched={feeder_result.matched}/{feeder_result.total_skus} "
|
f"matched={feeder_result.matched}/{feeder_result.total_skus} "
|
||||||
f"history_written={feeder_result.history_written} "
|
f"history_written={feeder_result.history_written} "
|
||||||
f"pick_written={pick_result.written} "
|
f"pick_written={pick_result.written} "
|
||||||
|
|||||||
357
services/competitor_identity_revalidator.py
Normal file
357
services/competitor_identity_revalidator.py
Normal file
@@ -0,0 +1,357 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Revalidate legacy PChome competitor rows with the identity_v2 matcher.
|
||||||
|
|
||||||
|
This module upgrades only rows that the current matcher can prove are the same
|
||||||
|
product. It does not relax the production match gate and it does not refresh
|
||||||
|
expired prices by default.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from dataclasses import asdict, dataclass
|
||||||
|
from typing import Any, Iterable
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
from services.competitor_price_feeder import MIN_MATCH_SCORE
|
||||||
|
from services.marketplace_product_matcher import MatchDiagnostics, score_marketplace_match
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MATCH_ALGORITHM_TAG = "identity_v2"
|
||||||
|
REVALIDATION_TAG = "legacy_revalidated"
|
||||||
|
REVALIDATION_SOURCE_TAG = "revalidated_from_competitor_prices"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RevalidationDecision:
|
||||||
|
sku: str
|
||||||
|
competitor_product_id: str | None
|
||||||
|
status: str
|
||||||
|
accepted: bool
|
||||||
|
score: float | None
|
||||||
|
hard_veto: bool
|
||||||
|
is_expired: bool
|
||||||
|
tags: list[str]
|
||||||
|
diagnostic: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RevalidationStats:
|
||||||
|
scanned: int = 0
|
||||||
|
promoted_fresh: int = 0
|
||||||
|
promoted_expired: int = 0
|
||||||
|
rejected_low_score: int = 0
|
||||||
|
rejected_veto: int = 0
|
||||||
|
skipped: int = 0
|
||||||
|
attempts_written: int = 0
|
||||||
|
errors: int = 0
|
||||||
|
samples: list[dict[str, Any]] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _json_tags(value: Any) -> list[str]:
|
||||||
|
if not value:
|
||||||
|
return []
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [str(item) for item in value if item]
|
||||||
|
if isinstance(value, tuple):
|
||||||
|
return [str(item) for item in value if item]
|
||||||
|
if isinstance(value, str):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(value)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
return [str(item) for item in parsed if item]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe(values: Iterable[str]) -> list[str]:
|
||||||
|
result: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for value in values:
|
||||||
|
if not value or value in seen:
|
||||||
|
continue
|
||||||
|
result.append(value)
|
||||||
|
seen.add(value)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _diagnostic_text(diagnostics: MatchDiagnostics | None) -> str:
|
||||||
|
if diagnostics is None:
|
||||||
|
return ""
|
||||||
|
reasons = ",".join(diagnostics.reasons or ())
|
||||||
|
return (
|
||||||
|
f"score={diagnostics.score}; brand={diagnostics.brand_score}; "
|
||||||
|
f"token={diagnostics.token_score}; spec={diagnostics.spec_score}; "
|
||||||
|
f"seq={diagnostics.sequence_score}; type={diagnostics.type_score}; "
|
||||||
|
f"penalty={diagnostics.price_penalty}; veto={diagnostics.hard_veto}; "
|
||||||
|
f"reasons={reasons}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_revalidated_tags(existing_tags: Any, diagnostics: MatchDiagnostics) -> list[str]:
|
||||||
|
tags = list(_json_tags(existing_tags))
|
||||||
|
tags.extend(diagnostics.tags)
|
||||||
|
tags.append(REVALIDATION_TAG)
|
||||||
|
tags.append(REVALIDATION_SOURCE_TAG)
|
||||||
|
for reason in diagnostics.reasons or ():
|
||||||
|
tags.append(f"match_{reason}")
|
||||||
|
return _dedupe(tags)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_legacy_competitor_row(row: dict[str, Any]) -> RevalidationDecision:
|
||||||
|
"""Classify a legacy competitor_prices row without mutating storage."""
|
||||||
|
sku = str(row.get("sku") or "")
|
||||||
|
momo_name = row.get("momo_name") or ""
|
||||||
|
competitor_name = row.get("competitor_product_name") or ""
|
||||||
|
competitor_product_id = row.get("competitor_product_id")
|
||||||
|
is_expired = bool(row.get("is_expired"))
|
||||||
|
|
||||||
|
if not sku or not momo_name or not competitor_name:
|
||||||
|
return RevalidationDecision(
|
||||||
|
sku=sku,
|
||||||
|
competitor_product_id=competitor_product_id,
|
||||||
|
status="skipped_missing_identity_text",
|
||||||
|
accepted=False,
|
||||||
|
score=None,
|
||||||
|
hard_veto=False,
|
||||||
|
is_expired=is_expired,
|
||||||
|
tags=_json_tags(row.get("tags")),
|
||||||
|
diagnostic="missing momo_name or competitor_product_name",
|
||||||
|
)
|
||||||
|
|
||||||
|
diagnostics = score_marketplace_match(
|
||||||
|
momo_name,
|
||||||
|
competitor_name,
|
||||||
|
momo_price=row.get("momo_price"),
|
||||||
|
competitor_price=row.get("pchome_price"),
|
||||||
|
)
|
||||||
|
tags = _build_revalidated_tags(row.get("tags"), diagnostics)
|
||||||
|
diagnostic = _diagnostic_text(diagnostics)
|
||||||
|
|
||||||
|
if diagnostics.score >= MIN_MATCH_SCORE and not diagnostics.hard_veto:
|
||||||
|
return RevalidationDecision(
|
||||||
|
sku=sku,
|
||||||
|
competitor_product_id=competitor_product_id,
|
||||||
|
status="expired_match" if is_expired else "matched",
|
||||||
|
accepted=True,
|
||||||
|
score=diagnostics.score,
|
||||||
|
hard_veto=False,
|
||||||
|
is_expired=is_expired,
|
||||||
|
tags=tags,
|
||||||
|
diagnostic=diagnostic,
|
||||||
|
)
|
||||||
|
|
||||||
|
return RevalidationDecision(
|
||||||
|
sku=sku,
|
||||||
|
competitor_product_id=competitor_product_id,
|
||||||
|
status="identity_veto" if diagnostics.hard_veto else "low_score",
|
||||||
|
accepted=False,
|
||||||
|
score=diagnostics.score,
|
||||||
|
hard_veto=diagnostics.hard_veto,
|
||||||
|
is_expired=is_expired,
|
||||||
|
tags=tags,
|
||||||
|
diagnostic=diagnostic,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_legacy_rows(conn, source: str, limit: int, include_expired: bool) -> list[dict[str, Any]]:
|
||||||
|
expiry_filter = "" if include_expired else "AND (cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)"
|
||||||
|
sql = text(f"""
|
||||||
|
WITH latest_momo AS (
|
||||||
|
SELECT
|
||||||
|
p.id AS product_id,
|
||||||
|
p.i_code AS sku,
|
||||||
|
p.name AS momo_name,
|
||||||
|
pr.price AS momo_price,
|
||||||
|
ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC, pr.id DESC) AS rn
|
||||||
|
FROM products p
|
||||||
|
JOIN price_records pr ON pr.product_id = p.id
|
||||||
|
WHERE p.status = 'ACTIVE'
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
cp.id AS competitor_price_id,
|
||||||
|
lm.product_id AS momo_product_id,
|
||||||
|
lm.sku,
|
||||||
|
lm.momo_name,
|
||||||
|
lm.momo_price,
|
||||||
|
cp.price AS pchome_price,
|
||||||
|
cp.competitor_product_id,
|
||||||
|
cp.competitor_product_name,
|
||||||
|
cp.match_score AS previous_match_score,
|
||||||
|
cp.tags,
|
||||||
|
cp.crawled_at,
|
||||||
|
cp.expires_at,
|
||||||
|
CASE
|
||||||
|
WHEN cp.expires_at IS NOT NULL AND cp.expires_at <= CURRENT_TIMESTAMP
|
||||||
|
THEN TRUE ELSE FALSE
|
||||||
|
END AS is_expired
|
||||||
|
FROM latest_momo lm
|
||||||
|
JOIN competitor_prices cp
|
||||||
|
ON cp.sku = lm.sku
|
||||||
|
AND cp.source = :source
|
||||||
|
WHERE lm.rn = 1
|
||||||
|
AND cp.price IS NOT NULL
|
||||||
|
AND cp.price > 0
|
||||||
|
AND cp.competitor_product_name IS NOT NULL
|
||||||
|
AND NOT (COALESCE(cp.tags, '[]'::jsonb) ? :identity_tag)
|
||||||
|
{expiry_filter}
|
||||||
|
ORDER BY
|
||||||
|
CASE WHEN cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP THEN 0 ELSE 1 END,
|
||||||
|
cp.crawled_at DESC NULLS LAST,
|
||||||
|
cp.match_score DESC NULLS LAST,
|
||||||
|
lm.sku
|
||||||
|
LIMIT :limit
|
||||||
|
""")
|
||||||
|
rows = conn.execute(
|
||||||
|
sql,
|
||||||
|
{
|
||||||
|
"source": source,
|
||||||
|
"identity_tag": MATCH_ALGORITHM_TAG,
|
||||||
|
"limit": max(1, int(limit)),
|
||||||
|
},
|
||||||
|
).mappings().all()
|
||||||
|
return [dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_attempt_table(conn) -> None:
|
||||||
|
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||||
|
|
||||||
|
feeder = CompetitorPriceFeeder(engine=None)
|
||||||
|
feeder._ensure_competitor_match_attempts_table(conn)
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_attempt(conn, row: dict[str, Any], decision: RevalidationDecision, source: str) -> None:
|
||||||
|
search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms"
|
||||||
|
conn.execute(text(f"""
|
||||||
|
INSERT INTO competitor_match_attempts
|
||||||
|
(sku, source, momo_product_id, momo_product_name, momo_price,
|
||||||
|
search_terms, candidate_count, attempt_status,
|
||||||
|
best_competitor_product_id, best_competitor_product_name,
|
||||||
|
best_competitor_price, best_match_score, error_message,
|
||||||
|
attempted_at)
|
||||||
|
VALUES
|
||||||
|
(:sku, :source, :momo_product_id, :momo_product_name, :momo_price,
|
||||||
|
{search_terms_expr}, :candidate_count, :attempt_status,
|
||||||
|
:best_id, :best_name,
|
||||||
|
:best_price, :best_score, :error_message,
|
||||||
|
CURRENT_TIMESTAMP)
|
||||||
|
"""), {
|
||||||
|
"sku": decision.sku,
|
||||||
|
"source": source,
|
||||||
|
"momo_product_id": row.get("momo_product_id"),
|
||||||
|
"momo_product_name": row.get("momo_name"),
|
||||||
|
"momo_price": row.get("momo_price"),
|
||||||
|
"search_terms": json.dumps(["legacy_competitor_prices_revalidation"], ensure_ascii=False),
|
||||||
|
"candidate_count": 1,
|
||||||
|
"attempt_status": decision.status,
|
||||||
|
"best_id": decision.competitor_product_id,
|
||||||
|
"best_name": (row.get("competitor_product_name") or "")[:300] or None,
|
||||||
|
"best_price": row.get("pchome_price"),
|
||||||
|
"best_score": decision.score,
|
||||||
|
"error_message": (decision.diagnostic or "")[:1000] or None,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def revalidate_existing_competitor_identities(
|
||||||
|
engine,
|
||||||
|
*,
|
||||||
|
source: str = "pchome",
|
||||||
|
limit: int = 500,
|
||||||
|
dry_run: bool = True,
|
||||||
|
include_expired: bool = True,
|
||||||
|
write_attempts: bool = True,
|
||||||
|
sample_limit: int = 10,
|
||||||
|
) -> RevalidationStats:
|
||||||
|
"""Re-score legacy competitor_prices rows and optionally persist safe upgrades."""
|
||||||
|
stats = RevalidationStats(samples=[])
|
||||||
|
with engine.begin() as conn:
|
||||||
|
rows = _fetch_legacy_rows(conn, source=source, limit=limit, include_expired=include_expired)
|
||||||
|
stats.scanned = len(rows)
|
||||||
|
if not dry_run and write_attempts:
|
||||||
|
_ensure_attempt_table(conn)
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
try:
|
||||||
|
decision = classify_legacy_competitor_row(row)
|
||||||
|
if len(stats.samples or []) < sample_limit:
|
||||||
|
stats.samples.append({
|
||||||
|
"sku": decision.sku,
|
||||||
|
"status": decision.status,
|
||||||
|
"score": decision.score,
|
||||||
|
"is_expired": decision.is_expired,
|
||||||
|
"momo_name": (row.get("momo_name") or "")[:60],
|
||||||
|
"competitor_name": (row.get("competitor_product_name") or "")[:60],
|
||||||
|
})
|
||||||
|
|
||||||
|
if decision.accepted:
|
||||||
|
if decision.is_expired:
|
||||||
|
stats.promoted_expired += 1
|
||||||
|
else:
|
||||||
|
stats.promoted_fresh += 1
|
||||||
|
if not dry_run:
|
||||||
|
conn.execute(text("""
|
||||||
|
UPDATE competitor_prices
|
||||||
|
SET
|
||||||
|
match_score = :match_score,
|
||||||
|
tags = CAST(:tags AS jsonb)
|
||||||
|
WHERE id = :competitor_price_id
|
||||||
|
"""), {
|
||||||
|
"match_score": decision.score,
|
||||||
|
"tags": json.dumps(decision.tags, ensure_ascii=False),
|
||||||
|
"competitor_price_id": row.get("competitor_price_id"),
|
||||||
|
})
|
||||||
|
elif decision.status == "identity_veto":
|
||||||
|
stats.rejected_veto += 1
|
||||||
|
elif decision.status == "low_score":
|
||||||
|
stats.rejected_low_score += 1
|
||||||
|
else:
|
||||||
|
stats.skipped += 1
|
||||||
|
|
||||||
|
if not dry_run and write_attempts:
|
||||||
|
_insert_attempt(conn, row, decision, source=source)
|
||||||
|
stats.attempts_written += 1
|
||||||
|
except Exception as exc:
|
||||||
|
stats.errors += 1
|
||||||
|
logger.warning(
|
||||||
|
"[CompetitorIdentityRevalidator] row failed sku=%s: %s",
|
||||||
|
row.get("sku"),
|
||||||
|
exc,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def _main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(description="Revalidate legacy PChome competitor identities.")
|
||||||
|
parser.add_argument("--limit", type=int, default=500)
|
||||||
|
parser.add_argument("--source", default="pchome")
|
||||||
|
parser.add_argument("--apply", action="store_true", help="Persist accepted tags and attempt audit rows.")
|
||||||
|
parser.add_argument("--fresh-only", action="store_true", help="Skip expired competitor prices.")
|
||||||
|
parser.add_argument("--no-attempts", action="store_true", help="Do not append competitor_match_attempts rows.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
from database.manager import DatabaseManager
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||||||
|
engine = DatabaseManager().engine
|
||||||
|
stats = revalidate_existing_competitor_identities(
|
||||||
|
engine,
|
||||||
|
source=args.source,
|
||||||
|
limit=args.limit,
|
||||||
|
dry_run=not args.apply,
|
||||||
|
include_expired=not args.fresh_only,
|
||||||
|
write_attempts=not args.no_attempts,
|
||||||
|
)
|
||||||
|
print(json.dumps(asdict(stats), ensure_ascii=False, indent=2, default=str))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(_main())
|
||||||
65
tests/test_competitor_identity_revalidator.py
Normal file
65
tests/test_competitor_identity_revalidator.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
def test_revalidator_promotes_legacy_same_product_without_refreshing_expired_price():
|
||||||
|
from services.competitor_identity_revalidator import classify_legacy_competitor_row
|
||||||
|
|
||||||
|
row = {
|
||||||
|
"sku": "10950080",
|
||||||
|
"momo_name": "【台酒生技】黑酵母酒粕逆齡活膚青春露5入-(120ml/入)",
|
||||||
|
"momo_price": 999,
|
||||||
|
"pchome_price": 899,
|
||||||
|
"competitor_product_id": "PC-1",
|
||||||
|
"competitor_product_name": "【台酒生技】金粹黑酵母酒粕逆齡活膚青春露120ml_5入",
|
||||||
|
"tags": ["discount_10pct"],
|
||||||
|
"is_expired": True,
|
||||||
|
"expires_at": datetime.utcnow() - timedelta(hours=1),
|
||||||
|
}
|
||||||
|
|
||||||
|
decision = classify_legacy_competitor_row(row)
|
||||||
|
|
||||||
|
assert decision.accepted is True
|
||||||
|
assert decision.status == "expired_match"
|
||||||
|
assert decision.score >= 0.76
|
||||||
|
assert "identity_v2" in decision.tags
|
||||||
|
assert "legacy_revalidated" in decision.tags
|
||||||
|
|
||||||
|
|
||||||
|
def test_revalidator_rejects_legacy_brand_conflict():
|
||||||
|
from services.competitor_identity_revalidator import classify_legacy_competitor_row
|
||||||
|
|
||||||
|
row = {
|
||||||
|
"sku": "BAD-1",
|
||||||
|
"momo_name": "【蘭蔻】官方直營 玫瑰霜60ml+玫瑰精露150ml",
|
||||||
|
"momo_price": 18765,
|
||||||
|
"pchome_price": 1249,
|
||||||
|
"competitor_product_id": "PC-BAD",
|
||||||
|
"competitor_product_name": "LOREAL Paris 巴黎萊雅 金致臻顏花蜜奢養膠原輕盈乳霜_60ml",
|
||||||
|
"tags": [],
|
||||||
|
"is_expired": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
decision = classify_legacy_competitor_row(row)
|
||||||
|
|
||||||
|
assert decision.accepted is False
|
||||||
|
assert decision.status == "identity_veto"
|
||||||
|
assert decision.hard_veto is True
|
||||||
|
assert "brand_conflict" in decision.diagnostic
|
||||||
|
|
||||||
|
|
||||||
|
def test_dashboard_match_status_distinguishes_expired_and_legacy_rows():
|
||||||
|
from routes.dashboard_routes import _build_pchome_match_status
|
||||||
|
|
||||||
|
expired = _build_pchome_match_status(
|
||||||
|
None,
|
||||||
|
ineligible={"reason": "expired_match", "match_score": 0.91},
|
||||||
|
)
|
||||||
|
legacy = _build_pchome_match_status(
|
||||||
|
None,
|
||||||
|
ineligible={"reason": "legacy_without_identity_v2", "match_score": 0.82},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert expired["label"] == "價格過期待刷新"
|
||||||
|
assert expired["tone"] == "watch"
|
||||||
|
assert legacy["label"] == "舊版配對待重驗"
|
||||||
|
assert "identity_v2" in legacy["summary"]
|
||||||
Reference in New Issue
Block a user