V10.590 修正 PChome 副標去重與比價覆核入口
All checks were successful
CD Pipeline / deploy (push) Successful in 1m7s

This commit is contained in:
OoO
2026-06-04 22:00:32 +08:00
parent 58302c9fa7
commit bb8c29e56d
9 changed files with 109 additions and 9 deletions

View File

@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.588"
SYSTEM_VERSION = "V10.590"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -265,6 +265,9 @@ DIFFERENCE_DIMENSION_LABELS = {
"unknown_scent_variant_conflict": "香味未明確對齊",
"nail_polish_color_name_conflict": "指彩色號不同",
"nail_polish_model_code_conflict": "指彩型號不同",
"cetaphil_moisturizer_type_alignment": "舒特膚乳霜/潤膚霜同款型別對齊",
"focused_exact_identity_cetaphil_long_moisturizing_cream_250g": "舒特膚長效潤膚霜 250g 同款",
"focused_exact_identity_cetaphil_ad_repair_cream_227g": "舒特膚 AD 修護舒敏乳霜 227g 同款",
"saugella_variant_conflict": "私密清潔款式不同",
"lactacyd_variant_conflict": "私密清潔款式不同",
"refill_pack_conflict": "補充包/正裝差異",

View File

@@ -514,6 +514,8 @@ FOCUSED_IDENTITY_VARIANT_REVIEW_BYPASS_REASONS = {
"yes_nail_tool_exact_model_size",
"cetaphil_long_lotion_237ml",
"cetaphil_long_lotion_473ml",
"cetaphil_long_moisturizing_cream_250g",
"cetaphil_ad_repair_cream_227g",
"clarins_double_serum_eye_20ml",
"lab52_paw_patrol_baby_toothbrush_2pack",
"derma_baby_wash_150ml",
@@ -590,6 +592,8 @@ FOCUSED_IDENTITY_TOTAL_PRICE_REASONS = {
"so_natural_fixx_setting_spray_120ml_plain",
"cetaphil_long_lotion_237ml",
"cetaphil_long_lotion_473ml",
"cetaphil_long_moisturizing_cream_250g",
"cetaphil_ad_repair_cream_227g",
"clarins_double_serum_eye_20ml",
"lab52_paw_patrol_baby_toothbrush_2pack",
"derma_baby_wash_150ml",
@@ -1678,6 +1682,22 @@ def _has_nivea_creme_100ml_alignment(left: ProductIdentity, right: ProductIdenti
return all("妮維雅霜" in item.searchable_name and "隨身版" in item.searchable_name for item in (left, right))
def _has_cetaphil_moisturizer_type_alignment(left: ProductIdentity, right: ProductIdentity) -> bool:
"""Treat Cetaphil moisturizer wording variants as the same type only on exact named lines."""
if not ({"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)):
return False
if {left.product_type, right.product_type} != {"乳液", "面霜"}:
return False
left_text = left.searchable_name
right_text = right.searchable_name
if all("長效潤膚霜" in item for item in (left_text, right_text)):
return _has_shared_weight(left, right, 250)
if all("益膚康修護舒敏乳霜" in item for item in (left_text, right_text)):
return _has_shared_weight(left, right, 227)
return False
def _has_refill_pack(identity: ProductIdentity) -> bool:
text = identity.normalized_name
return bool(
@@ -2336,8 +2356,15 @@ def score_marketplace_match(
spec_score, spec_conflict, spec_reasons = _spec_score(left, right)
sequence_score = SequenceMatcher(None, left.searchable_name, right.searchable_name).ratio()
chinese_name_score = _chinese_bigram_score(left, right)
nivea_creme_100ml_alignment = _has_nivea_creme_100ml_alignment(left, right)
cetaphil_moisturizer_type_alignment = _has_cetaphil_moisturizer_type_alignment(left, right)
type_aligned = (
left.product_type == right.product_type
or nivea_creme_100ml_alignment
or cetaphil_moisturizer_type_alignment
)
if left.product_type and right.product_type:
type_score = 1.0 if left.product_type == right.product_type else 0.0
type_score = 1.0 if type_aligned else 0.0
else:
type_score = 0.55
@@ -2345,11 +2372,12 @@ def score_marketplace_match(
if brand_reason:
reasons.append(brand_reason)
reasons.extend(spec_reasons)
nivea_creme_100ml_alignment = _has_nivea_creme_100ml_alignment(left, right)
if left.product_type and right.product_type and left.product_type != right.product_type and not nivea_creme_100ml_alignment:
if left.product_type and right.product_type and left.product_type != right.product_type and not type_aligned:
reasons.append("type_conflict")
if nivea_creme_100ml_alignment:
reasons.append("nivea_creme_100ml_type_alignment")
if cetaphil_moisturizer_type_alignment:
reasons.append("cetaphil_moisturizer_type_alignment")
model_line_conflict = _has_model_line_conflict(left, right)
if model_line_conflict:
reasons.append("model_line_conflict")
@@ -2564,7 +2592,7 @@ def score_marketplace_match(
hard_veto = True
if chinese_name_score < 0.16 and token_score < 0.72:
hard_veto = True
if left.product_type and right.product_type and left.product_type != right.product_type and not nivea_creme_100ml_alignment:
if left.product_type and right.product_type and left.product_type != right.product_type and not type_aligned:
hard_veto = True
if sun_protection_line_conflict:
hard_veto = True
@@ -4683,6 +4711,20 @@ def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: Pro
and _has_shared_volume(left, right, 473)
):
return "cetaphil_long_lotion_473ml"
if (
{"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)
and "長效潤膚霜" in left_text
and "長效潤膚霜" in right_text
and _has_shared_weight(left, right, 250)
):
return "cetaphil_long_moisturizing_cream_250g"
if (
{"cetaphil", "舒特膚"} & (left.brand_tokens & right.brand_tokens)
and "益膚康修護舒敏乳霜" in left_text
and "益膚康修護舒敏乳霜" in right_text
and _has_shared_weight(left, right, 227)
):
return "cetaphil_ad_repair_cream_227g"
if (
{"nivea", "妮維雅"} & (left.brand_tokens & right.brand_tokens)
and "妮維雅霜" in left_text

View File

@@ -63,6 +63,17 @@ def _compact_identity_text(value: str) -> str:
return re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff]+", "", str(value or "").lower())
def _remove_display_name_from_subtitle(display_name: str, subtitle: str) -> str:
"""Remove one repeated display title from Nick while keeping useful promo/spec text."""
cleaned = str(subtitle or "").strip()
title = str(display_name or "").strip()
if not cleaned or not title:
return cleaned
if title in cleaned:
cleaned = cleaned.replace(title, " ", 1)
return re.sub(r"\s+", " ", cleaned).strip()
def _build_match_name(name: str, subtitle: str) -> str:
"""Build an identity-rich title without duplicating the PChome display name."""
display_name = str(name or '').strip()
@@ -78,6 +89,11 @@ def _build_match_name(name: str, subtitle: str) -> str:
return nick
if display_compact and display_compact == nick_compact:
return display_name
if display_name and display_compact and display_compact in nick_compact:
reduced_nick = _remove_display_name_from_subtitle(display_name, nick)
if reduced_nick:
return f"{display_name} {reduced_nick}".strip()
return display_name
return f"{display_name} {nick}".strip()

View File

@@ -49,9 +49,9 @@
<div class="dashboard-kpi-value momo-mono is-warning">{{ overview.review_queue_count | default(0) | number_format }}</div>
<div class="dashboard-kpi-metrics momo-mono" aria-label="比價覆核組成">
<a href="{{ url_for('dashboard.index', filter='pchome_review', category=current_category, q=search_query, review_status='rescore_accepted', sort_by='pchome_review', order='desc') }}" title="重算待覆核 {{ overview.rescore_accepted_count | default(0) | number_format }}"><em>重算待覆核</em><strong>{{ overview.rescore_accepted_count | default(0) | number_format }}</strong></a>
<a href="{{ url_for('dashboard.index', filter='pchome_review', category=current_category, q=search_query, review_status='catalog_identity_review', sort_by='pchome_review', order='desc') }}" title="身份採用待核 {{ overview.catalog_identity_review_count | default(0) | number_format }}"><em>身份採用</em><strong>{{ overview.catalog_identity_review_count | default(0) | number_format }}</strong></a>
<a href="{{ url_for('dashboard.index', filter='pchome_review', category=current_category, q=search_query, review_status='unit_comparable', sort_by='pchome_review', order='desc') }}" title="需單位價 {{ overview.unit_comparable_count | default(0) | number_format }}"><em>需單位價</em><strong>{{ overview.unit_comparable_count | default(0) | number_format }}</strong></a>
<span><em>待補抓</em><strong>{{ overview.pending_match_count | default(0) | number_format }}</strong></span>
<span><em>新鮮</em><strong>{{ overview.fresh_match_rate | default(0) }}%</strong></span>
</div>
</div>
</div>
@@ -123,8 +123,13 @@
<strong>單位價覆核</strong>
<em>{{ overview.unit_comparable_count | default(0) | number_format }} 筆需判斷容量、入數、組合價</em>
</a>
<a class="dashboard-decision-lane" href="{{ url_for('dashboard.index', filter='pchome_review', category=current_category, q=search_query, review_status='rescore_accepted', sort_by='pchome_review', order='desc') }}">
<a class="dashboard-decision-lane" href="{{ url_for('dashboard.index', filter='pchome_review', category=current_category, q=search_query, review_status='catalog_identity_review', sort_by='pchome_review', order='desc') }}">
<span class="momo-mono">04</span>
<strong>身份採用待核</strong>
<em>{{ overview.catalog_identity_review_count | default(0) | number_format }} 筆高信心同款,確認後可寫入正式比價</em>
</a>
<a class="dashboard-decision-lane" href="{{ url_for('dashboard.index', filter='pchome_review', category=current_category, q=search_query, review_status='rescore_accepted', sort_by='pchome_review', order='desc') }}">
<span class="momo-mono">05</span>
<strong>採用/否決候選</strong>
<em>{{ overview.rescore_accepted_count | default(0) | number_format }} 筆近門檻候選等待確認</em>
</a>

View File

@@ -127,6 +127,7 @@ def test_campaign_v2_product_table_keeps_real_operations_columns():
def test_dashboard_v2_is_production_default_and_uses_real_dashboard_data():
route_source = (ROOT / "routes/dashboard_routes.py").read_text(encoding="utf-8")
dashboard = (ROOT / "templates/dashboard_v2.html").read_text(encoding="utf-8")
dashboard_css = (ROOT / "web/static/css/page-dashboard-v2.css").read_text(encoding="utf-8")
assert "template_name = 'dashboard_v2.html'" in route_source
assert "template_name = 'dashboard.html' if request.args.get('ui') == 'legacy' else 'dashboard_v2.html'" not in route_source
@@ -192,6 +193,7 @@ def test_dashboard_v2_is_production_default_and_uses_real_dashboard_data():
assert "決策支援覆蓋率" in dashboard
assert "overview.decision_support_rate" in dashboard
assert "overview.catalog_comparable_count" in dashboard
assert "overview.catalog_identity_review_count" in dashboard
assert "比價決策焦點" in dashboard
assert "overview.match_rate" in dashboard
assert "overview.stale_match_count" in dashboard
@@ -202,6 +204,9 @@ def test_dashboard_v2_is_production_default_and_uses_real_dashboard_data():
assert "overview.review_queue" in dashboard
assert "需單位價 {{ overview.unit_comparable_count" in dashboard
assert "重算待覆核 {{ overview.rescore_accepted_count" in dashboard
assert "review_status='catalog_identity_review'" in dashboard
assert "身份採用待核" in dashboard
assert "grid-template-columns: repeat(5, minmax(0, 1fr))" in dashboard_css
assert "{% if review_total_is_estimated %}約 {% endif %}" in dashboard
assert "filter='ai_picks'" in dashboard
assert "filter='pchome_review'" in dashboard

View File

@@ -1597,6 +1597,16 @@ def test_marketplace_matcher_promotes_focused_manual_gate_exact_lines_to_total_p
"舒特膚 長效潤膚乳 473ml",
"focused_exact_identity_cetaphil_long_lotion_473ml",
),
(
"【Cetaphil 舒特膚】官方直營 長效潤膚霜 250g(臉部身體乳霜/敏感肌/保濕/B3/B5/乾燥粗糙)",
"【Cetaphil 舒特膚】長效潤膚霜250g",
"focused_exact_identity_cetaphil_long_moisturizing_cream_250g",
),
(
"【Cetaphil 舒特膚】官方直營 AD益膚康修護舒敏乳霜 227g(臉部身體乳霜/益膚保濕)",
"Cetaphil舒特膚 AD益膚康修護舒敏乳霜227g",
"focused_exact_identity_cetaphil_ad_repair_cream_227g",
),
]
for momo_name, competitor_name, expected_reason in cases:
@@ -1606,6 +1616,7 @@ def test_marketplace_matcher_promotes_focused_manual_gate_exact_lines_to_total_p
assert diagnostics.price_basis == "total_price"
assert diagnostics.alert_tier == "price_alert_exact"
assert "variant_selection_review" not in diagnostics.reasons
assert "type_conflict" not in diagnostics.reasons
if expected_reason:
assert expected_reason in diagnostics.reasons

View File

@@ -149,6 +149,24 @@ def test_pchome_match_name_deduplicates_normalized_nick_prefix():
assert match_name.count("29g") == 1
def test_pchome_match_name_deduplicates_marketing_prefix_before_title():
from services.pchome_crawler import _build_match_name
ad_cream = _build_match_name(
"Cetaphil舒特膚 AD益膚康修護舒敏乳霜227g",
"《即期特賣》Cetaphil舒特膚 AD益膚康修護舒敏乳霜227g",
)
moisturizing_cream = _build_match_name(
"【Cetaphil 舒特膚】長效潤膚霜250g",
"48小時長效保濕升級版 【Cetaphil 舒特膚】長效潤膚霜250g",
)
assert ad_cream == "Cetaphil舒特膚 AD益膚康修護舒敏乳霜227g 《即期特賣》"
assert ad_cream.count("227g") == 1
assert moisturizing_cream == "【Cetaphil 舒特膚】長效潤膚霜250g 48小時長效保濕升級版"
assert moisturizing_cream.count("250g") == 1
def test_pchome_match_name_strips_html_marketing_noise():
from services.pchome_crawler import _build_match_name

View File

@@ -366,7 +366,7 @@
.dashboard-decision-lanes {
display: grid;
grid-template-columns: repeat(4, minmax(0, 1fr));
grid-template-columns: repeat(5, minmax(0, 1fr));
gap: 8px;
min-width: 0;
}
@@ -1364,7 +1364,7 @@
}
.dashboard-decision-lanes {
grid-template-columns: repeat(2, minmax(0, 1fr));
grid-template-columns: repeat(3, minmax(0, 1fr));
}
.dashboard-ai-summary-grid {