feat: deepen pchome momo backfill guardrails
All checks were successful
CD Pipeline / deploy (push) Successful in 1m8s

This commit is contained in:
OoO
2026-06-19 00:41:20 +08:00
parent 4c59b74ced
commit 9d84cbfd43
9 changed files with 208 additions and 6 deletions

View File

@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.635"
SYSTEM_VERSION = "V10.636"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -684,6 +684,34 @@ def _targeted_candidate_auto_type(candidate: dict[str, Any]) -> str:
return "manual_review"
def _targeted_candidate_needs_review(candidate: dict[str, Any]) -> bool:
"""總價自動同步前的最後防線,避免高分但款式待確認的候選進作戰清單。"""
if candidate.get("target_hard_veto") is True:
return True
price_basis = str(candidate.get("target_price_basis") or "").strip()
alert_tier = str(candidate.get("target_alert_tier") or "").strip()
if price_basis and price_basis != "total_price":
return True
if alert_tier and alert_tier != "price_alert_exact":
return True
review_reason_markers = {
"manual_review",
"identity_review",
"unit_price_review",
"variant_selection_review",
"variant_option_conflict",
"variant_descriptor_conflict",
"makeup_catalog_selection_gap",
"commercial_condition_gap",
"count_conflict",
"bundle_offer_conflict",
"multi_component_conflict",
"component_count_conflict",
}
reasons = {str(reason or "") for reason in (candidate.get("target_match_reasons") or [])}
return bool(reasons & review_reason_markers)
def _targeted_candidate_to_external_offer(
candidate: dict[str, Any],
*,
@@ -692,6 +720,8 @@ def _targeted_candidate_to_external_offer(
auto_type = _targeted_candidate_auto_type(candidate)
if auto_type not in {"total_price", "unit_price"}:
return None, "不是可自動使用的候選"
if auto_type == "total_price" and _targeted_candidate_needs_review(candidate):
return None, "候選仍需人工確認"
momo_sku = str(candidate.get("product_id") or candidate.get("goodsCode") or candidate.get("id") or "").strip()
pchome_product_id = str(candidate.get("target_pchome_product_id") or "").strip()

View File

@@ -712,6 +712,7 @@ VARIANT_OPTION_COLOR_WORDS = {
"棕色",
"咖啡色",
"灰色",
"rose",
"白色",
"紅色",
"粉色",

View File

@@ -659,16 +659,24 @@ def search_momo_products_for_pchome_products(
continue
hard_veto = bool(getattr(diagnostics, "hard_veto", False))
comparison_mode = getattr(diagnostics, "comparison_mode", "exact_identity")
diagnostic_price_basis = str(getattr(diagnostics, "price_basis", "") or "")
diagnostic_alert_tier = str(getattr(diagnostics, "alert_tier", "") or "")
diagnostic_match_type = str(getattr(diagnostics, "match_type", "") or "")
unit_price_comparison = {}
auto_compare_type = "manual_review"
price_basis = "none"
review_status = "需人工確認"
if not hard_veto and comparison_mode == "exact_identity":
if (
not hard_veto
and comparison_mode == "exact_identity"
and diagnostic_price_basis == "total_price"
and diagnostic_alert_tier == "price_alert_exact"
):
can_auto_compare = True
auto_compare_type = "total_price"
price_basis = "total_price"
review_status = "可直接比價"
elif comparison_mode == "unit_comparable":
elif comparison_mode == "unit_comparable" or diagnostic_price_basis == "unit_price":
unit_price_comparison = build_unit_price_comparison(
momo_name,
pchome_name,
@@ -713,6 +721,8 @@ def search_momo_products_for_pchome_products(
"target_search_term": term,
"target_match_reasons": list(getattr(diagnostics, "reasons", ()) or ()),
"target_comparison_mode": comparison_mode,
"target_match_type": diagnostic_match_type,
"target_alert_tier": diagnostic_alert_tier,
"target_hard_veto": hard_veto,
"can_auto_compare": can_auto_compare,
"auto_compare_type": auto_compare_type,

View File

@@ -4,9 +4,26 @@
from __future__ import annotations
import os
from typing import Any, Callable
def _int_env(name: str, default: int, *, minimum: int, maximum: int) -> int:
try:
value = int(os.getenv(name, str(default)))
except (TypeError, ValueError):
value = default
return max(minimum, min(value, maximum))
def _float_env(name: str, default: float, *, minimum: float, maximum: float) -> float:
try:
value = float(os.getenv(name, str(default)))
except (TypeError, ValueError):
value = default
return max(minimum, min(value, maximum))
def candidate_auto_compare_type(candidate: dict[str, Any]) -> str:
auto_type = str(candidate.get("auto_compare_type") or "").strip()
if auto_type in {"total_price", "unit_price"}:
@@ -53,9 +70,24 @@ def _default_search_candidates(targets: list[dict[str, Any]], limit: int):
return search_momo_products_for_pchome_products(
targets,
max_products=limit,
limit_per_product=6,
max_terms_per_product=4,
min_score=0.45,
limit_per_product=_int_env(
"PCHOME_GROWTH_MOMO_BACKFILL_LIMIT_PER_TERM",
8,
minimum=3,
maximum=12,
),
max_terms_per_product=_int_env(
"PCHOME_GROWTH_MOMO_BACKFILL_MAX_TERMS",
8,
minimum=3,
maximum=10,
),
min_score=_float_env(
"PCHOME_GROWTH_MOMO_BACKFILL_MIN_SCORE",
0.45,
minimum=0.35,
maximum=0.8,
),
)

View File

@@ -325,6 +325,42 @@ def test_sync_targeted_momo_candidates_writes_unit_price_offer(monkeypatch):
assert stale_marks == [True]
def test_sync_targeted_momo_candidates_skips_total_price_identity_review(monkeypatch):
from services import external_market_offer_service as service
stale_marks = []
monkeypatch.setattr(service, "mark_pchome_growth_cache_stale", lambda: stale_marks.append(True))
engine = create_engine("sqlite:///:memory:")
_seed_external_offer_sync_tables(engine)
payload = service.sync_targeted_momo_candidates_to_external_offers(engine, [
{
"product_id": "12876190",
"name": "【LAURA MERCIER 蘿拉蜜思】煥顏透明蜜粉 29g(#Rose-國際航空版)",
"price": 809,
"target_pchome_product_id": "PCH-LAURA",
"target_pchome_name": "【Laura Mercier 蘿拉蜜思】煥顏透明蜜粉 29g",
"target_pchome_price": 899,
"target_match_score": 0.98,
"auto_compare_type": "total_price",
"target_price_basis": "manual_review",
"target_alert_tier": "identity_review",
"target_match_reasons": ["variant_selection_review", "strong_exact_spec_match"],
"target_comparison_mode": "exact_identity",
},
])
assert payload["success"] is True
assert payload["candidate_count"] == 1
assert payload["written_count"] == 0
assert payload["skipped_reasons"] == {"候選仍需人工確認": 1}
assert stale_marks == []
with engine.connect() as conn:
count = conn.execute(text("SELECT COUNT(*) FROM external_offers")).scalar()
assert count == 0
def test_external_source_readiness_uses_legacy_momo_reference_cache():
from services.external_market_offer_service import build_external_source_readiness

View File

@@ -1467,6 +1467,20 @@ def test_marketplace_matcher_promotes_recent_stale_recovery_single_spec_lines():
assert diagnostics.alert_tier == "price_alert_exact"
def test_marketplace_matcher_keeps_single_sided_rose_powder_in_identity_review():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【LAURA MERCIER 蘿拉蜜思】煥顏透明蜜粉 Rose 29g (國際航空版)",
"【Laura Mercier 蘿拉蜜思】煥顏透明蜜粉 29g",
)
assert diagnostics.hard_veto is False
assert diagnostics.price_basis == "manual_review"
assert diagnostics.alert_tier == "identity_review"
assert "variant_selection_review" in diagnostics.reasons
def test_marketplace_matcher_keeps_baby_shampoo_bath_gap_in_identity_review():
from services.marketplace_product_matcher import score_marketplace_match

View File

@@ -128,3 +128,58 @@ def test_targeted_momo_search_auto_routes_unit_comparable_candidates():
assert products[0]["target_unit_price_comparison"]["comparable"] is True
assert products[0]["target_unit_price_comparison"]["unit_label"] == "ml"
assert products[0]["target_gap_pct"] < 0
def test_targeted_momo_search_keeps_identity_review_candidates_manual(monkeypatch):
from services.momo_crawler import MomoProduct, search_momo_products_for_pchome_products
class FakeDiagnostics:
score = 0.98
hard_veto = False
comparison_mode = "exact_identity"
price_basis = "manual_review"
alert_tier = "identity_review"
match_type = "exact"
reasons = ("variant_selection_review", "strong_exact_spec_match")
class FakeCrawler:
def search_products(self, keyword, limit=10, sort_by="sSaleQty/dc"):
return True, "ok", [
MomoProduct(
product_id="12876190",
name="【LAURA MERCIER 蘿拉蜜思】煥顏透明蜜粉 29g(#Rose-國際航空版)",
price=809,
original_price=999,
discount=19,
image_url="",
product_url="https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code=12876190",
brand="蘿拉蜜思",
crawled_at=datetime.now(),
)
]
monkeypatch.setattr(
"services.marketplace_product_matcher.score_marketplace_match",
lambda *args, **kwargs: FakeDiagnostics(),
)
success, message, products = search_momo_products_for_pchome_products(
[
{
"product_id": "PCH-LAURA",
"name": "【Laura Mercier 蘿拉蜜思】煥顏透明蜜粉 29g",
"price": 899,
}
],
crawler=FakeCrawler(),
max_terms_per_product=1,
limit_per_product=3,
min_score=0.45,
)
assert success is True
assert "需人工確認 1 筆" in message
assert products[0]["auto_compare_type"] == "manual_review"
assert products[0]["can_auto_compare"] is False
assert products[0]["target_price_basis"] == "none"
assert products[0]["target_alert_tier"] == "identity_review"

View File

@@ -328,6 +328,30 @@ def test_pchome_growth_momo_backfill_service_targets_unmapped_high_sales_items()
assert captured["search_limit"] == 2
def test_pchome_growth_momo_backfill_default_search_uses_deeper_terms(monkeypatch):
from services import pchome_growth_momo_backfill_service as service
captured = {}
def fake_search(targets, **kwargs):
captured["targets"] = targets
captured["kwargs"] = kwargs
return True, "ok", []
monkeypatch.delenv("PCHOME_GROWTH_MOMO_BACKFILL_LIMIT_PER_TERM", raising=False)
monkeypatch.delenv("PCHOME_GROWTH_MOMO_BACKFILL_MAX_TERMS", raising=False)
monkeypatch.delenv("PCHOME_GROWTH_MOMO_BACKFILL_MIN_SCORE", raising=False)
monkeypatch.setattr("services.momo_crawler.search_momo_products_for_pchome_products", fake_search)
payload = service._default_search_candidates([{"product_id": "PCH-1", "name": "商品"}], limit=3)
assert payload == (True, "ok", [])
assert captured["kwargs"]["max_products"] == 3
assert captured["kwargs"]["limit_per_product"] == 8
assert captured["kwargs"]["max_terms_per_product"] == 8
assert captured["kwargs"]["min_score"] == 0.45
def test_pchome_growth_momo_backfill_route_calls_shared_service(monkeypatch):
from flask import Flask
from routes import ai_routes as routes