V10.588 提升比價救援穩定性與決策覆蓋
All checks were successful
CD Pipeline / deploy (push) Successful in 1m9s

This commit is contained in:
OoO
2026-06-04 21:21:40 +08:00
parent 5347c95b97
commit 58302c9fa7
5 changed files with 81 additions and 15 deletions

View File

@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.586"
SYSTEM_VERSION = "V10.588"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -24,7 +24,8 @@ from sqlalchemy import inspect, text
PCHOME_MATCH_SCORE_FLOOR = 0.76
CATALOG_COMPARABLE_SCORE_FLOOR = 0.88
CATALOG_COMPARABLE_SCORE_FLOOR = 0.85
CATALOG_IDENTITY_REVIEW_SCORE_FLOOR = 0.95
UNIT_COMPARABLE_STATUSES = {"unit_comparable", "refresh_unit_comparable"}
UNIT_PRICE_DECISION_STATUSES = UNIT_COMPARABLE_STATUSES | {"manual_unit_price_required"}
CATALOG_COMPARABLE_SIGNAL_REASONS = {
@@ -338,13 +339,21 @@ def _jsonb_any_array_predicate(jsonb_expr: str, values: set[str]) -> str:
def _catalog_comparable_sql(alias: str = "la") -> str:
diagnostic_codes = f"{alias}.diagnostic_codes"
signal_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_SIGNAL_REASONS)
identity_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_IDENTITY_REASONS)
block_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_BLOCK_REASONS)
return f"""(
{alias}.attempt_status = 'true_low_confidence'
AND COALESCE({alias}.hard_veto, false) = false
AND COALESCE({alias}.best_match_score, 0) >= {CATALOG_COMPARABLE_SCORE_FLOOR}
AND {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_SIGNAL_REASONS)}
AND {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_IDENTITY_REASONS)}
AND NOT {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_BLOCK_REASONS)}
AND {identity_sql}
AND NOT {block_sql}
AND (
(
COALESCE({alias}.best_match_score, 0) >= {CATALOG_COMPARABLE_SCORE_FLOOR}
AND {signal_sql}
)
OR COALESCE({alias}.best_match_score, 0) >= {CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}
)
)"""
@@ -1178,7 +1187,7 @@ def _cached_payload(cache_key: str, producer, ttl_seconds: int = COMPETITOR_INTE
def fetch_competitor_coverage(engine) -> dict:
return _cached_payload(
f"coverage:v13:floor={PCHOME_MATCH_SCORE_FLOOR}:catalog_floor={CATALOG_COMPARABLE_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1:decision_support=1:catalog_plan=1",
f"coverage:v14:floor={PCHOME_MATCH_SCORE_FLOOR}:catalog_floor={CATALOG_COMPARABLE_SCORE_FLOOR}:identity_floor={CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1:decision_support=1:catalog_plan=1",
lambda: _fetch_competitor_coverage_uncached(engine),
)
@@ -1435,6 +1444,7 @@ def _fetch_competitor_coverage_uncached(engine) -> dict:
"manual_accept_rate": manual_review_summary["accept_rate"],
"match_score_floor": PCHOME_MATCH_SCORE_FLOOR,
"catalog_comparable_score_floor": CATALOG_COMPARABLE_SCORE_FLOOR,
"catalog_identity_review_score_floor": CATALOG_IDENTITY_REVIEW_SCORE_FLOOR,
}

View File

@@ -939,6 +939,9 @@ def _recover_low_score_with_fresh_search(
momo_name: str,
momo_price: float = None,
existing_product_id: str = "",
max_terms: int | None = None,
max_pages: int | None = None,
max_seconds: float | None = None,
) -> tuple[Optional[tuple], list[str], int]:
"""
當 legacy / known-id 候選重評仍低分時,再跑一次 fresh keyword search
@@ -950,6 +953,9 @@ def _recover_low_score_with_fresh_search(
momo_name,
keywords=keywords,
momo_price=momo_price,
max_terms=max_terms,
max_pages=max_pages,
max_seconds=max_seconds,
)
if existing_product_id:
existing_key = _product_id_key(existing_product_id)
@@ -3053,6 +3059,7 @@ class CompetitorPriceFeeder:
source: str = "pchome",
label: str = "已確認身份價格刷新",
allow_missing_recovery: bool = True,
bounded_recovery: bool = False,
) -> FeederResult:
start = time.time()
@@ -3064,7 +3071,17 @@ class CompetitorPriceFeeder:
return FeederResult(0, 0, 0, 0, 0, 0.0)
from services.pchome_crawler import PChomeCrawler
crawler = PChomeCrawler(timeout=REQUEST_TIMEOUT, delay=RATE_DELAY)
crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT
crawler = PChomeCrawler(timeout=crawler_timeout, delay=RATE_DELAY)
recovery_kwargs = (
{
"max_terms": BACKFILL_MAX_SEARCH_TERMS,
"max_pages": BACKFILL_SEARCH_MAX_PAGES,
"max_seconds": BACKFILL_MAX_SECONDS_PER_SKU,
}
if bounded_recovery
else {}
)
requested_ids = [
str(item.get("competitor_product_id") or "").strip()
@@ -3120,6 +3137,7 @@ class CompetitorPriceFeeder:
momo_name,
momo_price=momo_price,
existing_product_id=competitor_product_id,
**recovery_kwargs,
)
if recovered:
best_product, score, diagnostics = recovered
@@ -3313,6 +3331,7 @@ class CompetitorPriceFeeder:
momo_name,
momo_price=momo_price,
existing_product_id=competitor_product_id,
**recovery_kwargs,
)
if recovered:
recovered_product, recovered_score, recovered_diagnostics = recovered
@@ -3595,6 +3614,7 @@ class CompetitorPriceFeeder:
source=source,
label="identity_v2 過期價格搜尋救援",
allow_missing_recovery=True,
bounded_recovery=True,
)
def run_retryable_candidate_revalidation(

View File

@@ -83,8 +83,9 @@ def test_competitor_coverage_counts_only_active_product_intersection():
"def _fetch_manual_review_summary", 1
)[0]
assert "coverage:v13" in source
assert "coverage:v14" in source
assert "CATALOG_COMPARABLE_SCORE_FLOOR" in source
assert "CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" in source
assert "rescore_accepted_count" in coverage_source
assert "(SELECT COUNT(*) FROM valid_competitor) AS valid_matches" not in coverage_source
assert "identity_competitor AS" in coverage_source
@@ -113,6 +114,27 @@ def test_competitor_coverage_counts_only_active_product_intersection():
assert "CATALOG_VARIANT_REVIEW_REASONS" in source
assert "CATALOG_UNIT_REVIEW_REASONS" in source
assert "CATALOG_COMPARABLE_IDENTITY_REASONS" in source
assert "\"catalog_identity_review_score_floor\": CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" in coverage_source
def test_catalog_comparable_sql_includes_high_confidence_identity_review_lane():
from services.competitor_intel_repository import (
CATALOG_COMPARABLE_SCORE_FLOOR,
CATALOG_IDENTITY_REVIEW_SCORE_FLOOR,
_catalog_comparable_sql,
)
source = (ROOT / "services" / "competitor_intel_repository.py").read_text(encoding="utf-8")
coverage_source = source.split("def _fetch_competitor_coverage_uncached", 1)[1].split(
"def _fetch_manual_review_summary", 1
)[0]
sql = _catalog_comparable_sql("la")
assert f">= {CATALOG_COMPARABLE_SCORE_FLOOR}" in sql
assert f">= {CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}" in sql
assert "CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" not in sql
assert "OR COALESCE(la.best_match_score, 0)" in sql
assert "CATALOG_COMPARABLE_SIGNAL_REASONS" not in sql
assert "CATALOG_COMPARABLE_BLOCK_REASONS" in source
assert "\"identity_coverage_matches\": valid" in coverage_source
assert "\"manual_closed_count\": manual_closed_count" in coverage_source

View File

@@ -310,7 +310,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
assert 'attempt_status="refresh_unit_comparable"' in source
assert "mode={getattr(diagnostics, 'comparison_mode'" in source
assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source
assert "PChomeCrawler(timeout=REQUEST_TIMEOUT" in source
assert "crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT" in source
assert "PChomeCrawler(timeout=crawler_timeout" in source
assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration
assert "attempt_status" in migration
@@ -2147,9 +2148,16 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l
def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.competitor_price_feeder import (
BACKFILL_MAX_SEARCH_TERMS,
BACKFILL_REQUEST_TIMEOUT,
BACKFILL_SEARCH_MAX_PAGES,
CompetitorPriceFeeder,
SEARCH_LIMIT,
)
from services.pchome_crawler import PChomeProduct
crawler_calls = {"init": [], "search": []}
recovered = PChomeProduct(
product_id="DDAB01-RECOVERED",
name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml",
@@ -2167,14 +2175,15 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def __init__(self, *_args, **kwargs):
crawler_calls["init"].append(kwargs)
def fetch_product_details(self, product_ids, batch_size=20):
assert product_ids == ["DDAB01-MISSING"]
return True, "ok", []
def search_products(self, *_args, **_kwargs):
def search_products(self, *_args, **kwargs):
crawler_calls["search"].append(kwargs)
return True, "ok", [recovered]
def fake_score(_momo_name, competitor_name, **_kwargs):
@@ -2228,9 +2237,13 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
"product_id": 4864,
"momo_price": 441,
"competitor_product_id": "DDAB01-MISSING",
}])
}], bounded_recovery=True)
assert result.matched == 1
assert crawler_calls["init"][0]["timeout"] == BACKFILL_REQUEST_TIMEOUT
assert len(crawler_calls["search"]) <= BACKFILL_MAX_SEARCH_TERMS
assert crawler_calls["search"][0]["max_pages"] == BACKFILL_SEARCH_MAX_PAGES
assert crawler_calls["search"][0]["limit"] == SEARCH_LIMIT * BACKFILL_SEARCH_MAX_PAGES
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
assert "missing_known_product_id" in writes[0]["tags"]
assert "fresh_search_recovery" in writes[0]["tags"]
@@ -2301,6 +2314,7 @@ def test_competitor_feeder_expired_recovery_allows_fresh_search(monkeypatch):
assert result.matched == 1
assert captured["skus"][0]["sku"] == "STALE-1"
assert captured["allow_missing_recovery"] is True
assert captured["bounded_recovery"] is True
assert captured["label"] == "identity_v2 過期價格搜尋救援"