diff --git a/config.py b/config.py index fcd6ebe..ff0150e 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.586" +SYSTEM_VERSION = "V10.588" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/services/competitor_intel_repository.py b/services/competitor_intel_repository.py index b49ed88..93e55f0 100644 --- a/services/competitor_intel_repository.py +++ b/services/competitor_intel_repository.py @@ -24,7 +24,8 @@ from sqlalchemy import inspect, text PCHOME_MATCH_SCORE_FLOOR = 0.76 -CATALOG_COMPARABLE_SCORE_FLOOR = 0.88 +CATALOG_COMPARABLE_SCORE_FLOOR = 0.85 +CATALOG_IDENTITY_REVIEW_SCORE_FLOOR = 0.95 UNIT_COMPARABLE_STATUSES = {"unit_comparable", "refresh_unit_comparable"} UNIT_PRICE_DECISION_STATUSES = UNIT_COMPARABLE_STATUSES | {"manual_unit_price_required"} CATALOG_COMPARABLE_SIGNAL_REASONS = { @@ -338,13 +339,21 @@ def _jsonb_any_array_predicate(jsonb_expr: str, values: set[str]) -> str: def _catalog_comparable_sql(alias: str = "la") -> str: diagnostic_codes = f"{alias}.diagnostic_codes" + signal_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_SIGNAL_REASONS) + identity_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_IDENTITY_REASONS) + block_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_BLOCK_REASONS) return f"""( {alias}.attempt_status = 'true_low_confidence' AND COALESCE({alias}.hard_veto, false) = false - AND COALESCE({alias}.best_match_score, 0) >= {CATALOG_COMPARABLE_SCORE_FLOOR} - AND {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_SIGNAL_REASONS)} - AND {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_IDENTITY_REASONS)} - AND NOT {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_BLOCK_REASONS)} + AND {identity_sql} + AND NOT {block_sql} + AND ( + ( + COALESCE({alias}.best_match_score, 0) >= {CATALOG_COMPARABLE_SCORE_FLOOR} + AND {signal_sql} + ) + OR COALESCE({alias}.best_match_score, 0) >= {CATALOG_IDENTITY_REVIEW_SCORE_FLOOR} + ) )""" @@ -1178,7 +1187,7 @@ def _cached_payload(cache_key: str, producer, ttl_seconds: int = COMPETITOR_INTE def fetch_competitor_coverage(engine) -> dict: return _cached_payload( - f"coverage:v13:floor={PCHOME_MATCH_SCORE_FLOOR}:catalog_floor={CATALOG_COMPARABLE_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1:decision_support=1:catalog_plan=1", + f"coverage:v14:floor={PCHOME_MATCH_SCORE_FLOOR}:catalog_floor={CATALOG_COMPARABLE_SCORE_FLOOR}:identity_floor={CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1:decision_support=1:catalog_plan=1", lambda: _fetch_competitor_coverage_uncached(engine), ) @@ -1435,6 +1444,7 @@ def _fetch_competitor_coverage_uncached(engine) -> dict: "manual_accept_rate": manual_review_summary["accept_rate"], "match_score_floor": PCHOME_MATCH_SCORE_FLOOR, "catalog_comparable_score_floor": CATALOG_COMPARABLE_SCORE_FLOOR, + "catalog_identity_review_score_floor": CATALOG_IDENTITY_REVIEW_SCORE_FLOOR, } diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index ab7b8e7..82d7dd9 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -939,6 +939,9 @@ def _recover_low_score_with_fresh_search( momo_name: str, momo_price: float = None, existing_product_id: str = "", + max_terms: int | None = None, + max_pages: int | None = None, + max_seconds: float | None = None, ) -> tuple[Optional[tuple], list[str], int]: """ 當 legacy / known-id 候選重評仍低分時,再跑一次 fresh keyword search, @@ -950,6 +953,9 @@ def _recover_low_score_with_fresh_search( momo_name, keywords=keywords, momo_price=momo_price, + max_terms=max_terms, + max_pages=max_pages, + max_seconds=max_seconds, ) if existing_product_id: existing_key = _product_id_key(existing_product_id) @@ -3053,6 +3059,7 @@ class CompetitorPriceFeeder: source: str = "pchome", label: str = "已確認身份價格刷新", allow_missing_recovery: bool = True, + bounded_recovery: bool = False, ) -> FeederResult: start = time.time() @@ -3064,7 +3071,17 @@ class CompetitorPriceFeeder: return FeederResult(0, 0, 0, 0, 0, 0.0) from services.pchome_crawler import PChomeCrawler - crawler = PChomeCrawler(timeout=REQUEST_TIMEOUT, delay=RATE_DELAY) + crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT + crawler = PChomeCrawler(timeout=crawler_timeout, delay=RATE_DELAY) + recovery_kwargs = ( + { + "max_terms": BACKFILL_MAX_SEARCH_TERMS, + "max_pages": BACKFILL_SEARCH_MAX_PAGES, + "max_seconds": BACKFILL_MAX_SECONDS_PER_SKU, + } + if bounded_recovery + else {} + ) requested_ids = [ str(item.get("competitor_product_id") or "").strip() @@ -3120,6 +3137,7 @@ class CompetitorPriceFeeder: momo_name, momo_price=momo_price, existing_product_id=competitor_product_id, + **recovery_kwargs, ) if recovered: best_product, score, diagnostics = recovered @@ -3313,6 +3331,7 @@ class CompetitorPriceFeeder: momo_name, momo_price=momo_price, existing_product_id=competitor_product_id, + **recovery_kwargs, ) if recovered: recovered_product, recovered_score, recovered_diagnostics = recovered @@ -3595,6 +3614,7 @@ class CompetitorPriceFeeder: source=source, label="identity_v2 過期價格搜尋救援", allow_missing_recovery=True, + bounded_recovery=True, ) def run_retryable_candidate_revalidation( diff --git a/tests/test_competitor_intel_cache.py b/tests/test_competitor_intel_cache.py index 18f6296..99dbf05 100644 --- a/tests/test_competitor_intel_cache.py +++ b/tests/test_competitor_intel_cache.py @@ -83,8 +83,9 @@ def test_competitor_coverage_counts_only_active_product_intersection(): "def _fetch_manual_review_summary", 1 )[0] - assert "coverage:v13" in source + assert "coverage:v14" in source assert "CATALOG_COMPARABLE_SCORE_FLOOR" in source + assert "CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" in source assert "rescore_accepted_count" in coverage_source assert "(SELECT COUNT(*) FROM valid_competitor) AS valid_matches" not in coverage_source assert "identity_competitor AS" in coverage_source @@ -113,6 +114,27 @@ def test_competitor_coverage_counts_only_active_product_intersection(): assert "CATALOG_VARIANT_REVIEW_REASONS" in source assert "CATALOG_UNIT_REVIEW_REASONS" in source assert "CATALOG_COMPARABLE_IDENTITY_REASONS" in source + assert "\"catalog_identity_review_score_floor\": CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" in coverage_source + + +def test_catalog_comparable_sql_includes_high_confidence_identity_review_lane(): + from services.competitor_intel_repository import ( + CATALOG_COMPARABLE_SCORE_FLOOR, + CATALOG_IDENTITY_REVIEW_SCORE_FLOOR, + _catalog_comparable_sql, + ) + + source = (ROOT / "services" / "competitor_intel_repository.py").read_text(encoding="utf-8") + coverage_source = source.split("def _fetch_competitor_coverage_uncached", 1)[1].split( + "def _fetch_manual_review_summary", 1 + )[0] + sql = _catalog_comparable_sql("la") + + assert f">= {CATALOG_COMPARABLE_SCORE_FLOOR}" in sql + assert f">= {CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}" in sql + assert "CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" not in sql + assert "OR COALESCE(la.best_match_score, 0)" in sql + assert "CATALOG_COMPARABLE_SIGNAL_REASONS" not in sql assert "CATALOG_COMPARABLE_BLOCK_REASONS" in source assert "\"identity_coverage_matches\": valid" in coverage_source assert "\"manual_closed_count\": manual_closed_count" in coverage_source diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index f0a8603..6db41e3 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -310,7 +310,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert 'attempt_status="refresh_unit_comparable"' in source assert "mode={getattr(diagnostics, 'comparison_mode'" in source assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source - assert "PChomeCrawler(timeout=REQUEST_TIMEOUT" in source + assert "crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT" in source + assert "PChomeCrawler(timeout=crawler_timeout" in source assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration assert "attempt_status" in migration @@ -2147,9 +2148,16 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch): - from services.competitor_price_feeder import CompetitorPriceFeeder + from services.competitor_price_feeder import ( + BACKFILL_MAX_SEARCH_TERMS, + BACKFILL_REQUEST_TIMEOUT, + BACKFILL_SEARCH_MAX_PAGES, + CompetitorPriceFeeder, + SEARCH_LIMIT, + ) from services.pchome_crawler import PChomeProduct + crawler_calls = {"init": [], "search": []} recovered = PChomeProduct( product_id="DDAB01-RECOVERED", name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml", @@ -2167,14 +2175,15 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch): ) class FakeCrawler: - def __init__(self, *_args, **_kwargs): - pass + def __init__(self, *_args, **kwargs): + crawler_calls["init"].append(kwargs) def fetch_product_details(self, product_ids, batch_size=20): assert product_ids == ["DDAB01-MISSING"] return True, "ok", [] - def search_products(self, *_args, **_kwargs): + def search_products(self, *_args, **kwargs): + crawler_calls["search"].append(kwargs) return True, "ok", [recovered] def fake_score(_momo_name, competitor_name, **_kwargs): @@ -2228,9 +2237,13 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch): "product_id": 4864, "momo_price": 441, "competitor_product_id": "DDAB01-MISSING", - }]) + }], bounded_recovery=True) assert result.matched == 1 + assert crawler_calls["init"][0]["timeout"] == BACKFILL_REQUEST_TIMEOUT + assert len(crawler_calls["search"]) <= BACKFILL_MAX_SEARCH_TERMS + assert crawler_calls["search"][0]["max_pages"] == BACKFILL_SEARCH_MAX_PAGES + assert crawler_calls["search"][0]["limit"] == SEARCH_LIMIT * BACKFILL_SEARCH_MAX_PAGES assert writes[0]["product_id"] == "DDAB01-RECOVERED" assert "missing_known_product_id" in writes[0]["tags"] assert "fresh_search_recovery" in writes[0]["tags"] @@ -2301,6 +2314,7 @@ def test_competitor_feeder_expired_recovery_allows_fresh_search(monkeypatch): assert result.matched == 1 assert captured["skus"][0]["sku"] == "STALE-1" assert captured["allow_missing_recovery"] is True + assert captured["bounded_recovery"] is True assert captured["label"] == "identity_v2 過期價格搜尋救援"