This commit is contained in:
@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.586"
|
||||
SYSTEM_VERSION = "V10.588"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -24,7 +24,8 @@ from sqlalchemy import inspect, text
|
||||
|
||||
|
||||
PCHOME_MATCH_SCORE_FLOOR = 0.76
|
||||
CATALOG_COMPARABLE_SCORE_FLOOR = 0.88
|
||||
CATALOG_COMPARABLE_SCORE_FLOOR = 0.85
|
||||
CATALOG_IDENTITY_REVIEW_SCORE_FLOOR = 0.95
|
||||
UNIT_COMPARABLE_STATUSES = {"unit_comparable", "refresh_unit_comparable"}
|
||||
UNIT_PRICE_DECISION_STATUSES = UNIT_COMPARABLE_STATUSES | {"manual_unit_price_required"}
|
||||
CATALOG_COMPARABLE_SIGNAL_REASONS = {
|
||||
@@ -338,13 +339,21 @@ def _jsonb_any_array_predicate(jsonb_expr: str, values: set[str]) -> str:
|
||||
|
||||
def _catalog_comparable_sql(alias: str = "la") -> str:
|
||||
diagnostic_codes = f"{alias}.diagnostic_codes"
|
||||
signal_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_SIGNAL_REASONS)
|
||||
identity_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_IDENTITY_REASONS)
|
||||
block_sql = _jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_BLOCK_REASONS)
|
||||
return f"""(
|
||||
{alias}.attempt_status = 'true_low_confidence'
|
||||
AND COALESCE({alias}.hard_veto, false) = false
|
||||
AND COALESCE({alias}.best_match_score, 0) >= {CATALOG_COMPARABLE_SCORE_FLOOR}
|
||||
AND {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_SIGNAL_REASONS)}
|
||||
AND {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_IDENTITY_REASONS)}
|
||||
AND NOT {_jsonb_any_array_predicate(diagnostic_codes, CATALOG_COMPARABLE_BLOCK_REASONS)}
|
||||
AND {identity_sql}
|
||||
AND NOT {block_sql}
|
||||
AND (
|
||||
(
|
||||
COALESCE({alias}.best_match_score, 0) >= {CATALOG_COMPARABLE_SCORE_FLOOR}
|
||||
AND {signal_sql}
|
||||
)
|
||||
OR COALESCE({alias}.best_match_score, 0) >= {CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}
|
||||
)
|
||||
)"""
|
||||
|
||||
|
||||
@@ -1178,7 +1187,7 @@ def _cached_payload(cache_key: str, producer, ttl_seconds: int = COMPETITOR_INTE
|
||||
|
||||
def fetch_competitor_coverage(engine) -> dict:
|
||||
return _cached_payload(
|
||||
f"coverage:v13:floor={PCHOME_MATCH_SCORE_FLOOR}:catalog_floor={CATALOG_COMPARABLE_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1:decision_support=1:catalog_plan=1",
|
||||
f"coverage:v14:floor={PCHOME_MATCH_SCORE_FLOOR}:catalog_floor={CATALOG_COMPARABLE_SCORE_FLOOR}:identity_floor={CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}:manual_reviews=1:rescore=1:review_no_fresh=1:decision_ready=1:open_queue=1:unknown_freshness=1:decision_support=1:catalog_plan=1",
|
||||
lambda: _fetch_competitor_coverage_uncached(engine),
|
||||
)
|
||||
|
||||
@@ -1435,6 +1444,7 @@ def _fetch_competitor_coverage_uncached(engine) -> dict:
|
||||
"manual_accept_rate": manual_review_summary["accept_rate"],
|
||||
"match_score_floor": PCHOME_MATCH_SCORE_FLOOR,
|
||||
"catalog_comparable_score_floor": CATALOG_COMPARABLE_SCORE_FLOOR,
|
||||
"catalog_identity_review_score_floor": CATALOG_IDENTITY_REVIEW_SCORE_FLOOR,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -939,6 +939,9 @@ def _recover_low_score_with_fresh_search(
|
||||
momo_name: str,
|
||||
momo_price: float = None,
|
||||
existing_product_id: str = "",
|
||||
max_terms: int | None = None,
|
||||
max_pages: int | None = None,
|
||||
max_seconds: float | None = None,
|
||||
) -> tuple[Optional[tuple], list[str], int]:
|
||||
"""
|
||||
當 legacy / known-id 候選重評仍低分時,再跑一次 fresh keyword search,
|
||||
@@ -950,6 +953,9 @@ def _recover_low_score_with_fresh_search(
|
||||
momo_name,
|
||||
keywords=keywords,
|
||||
momo_price=momo_price,
|
||||
max_terms=max_terms,
|
||||
max_pages=max_pages,
|
||||
max_seconds=max_seconds,
|
||||
)
|
||||
if existing_product_id:
|
||||
existing_key = _product_id_key(existing_product_id)
|
||||
@@ -3053,6 +3059,7 @@ class CompetitorPriceFeeder:
|
||||
source: str = "pchome",
|
||||
label: str = "已確認身份價格刷新",
|
||||
allow_missing_recovery: bool = True,
|
||||
bounded_recovery: bool = False,
|
||||
) -> FeederResult:
|
||||
start = time.time()
|
||||
|
||||
@@ -3064,7 +3071,17 @@ class CompetitorPriceFeeder:
|
||||
return FeederResult(0, 0, 0, 0, 0, 0.0)
|
||||
|
||||
from services.pchome_crawler import PChomeCrawler
|
||||
crawler = PChomeCrawler(timeout=REQUEST_TIMEOUT, delay=RATE_DELAY)
|
||||
crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT
|
||||
crawler = PChomeCrawler(timeout=crawler_timeout, delay=RATE_DELAY)
|
||||
recovery_kwargs = (
|
||||
{
|
||||
"max_terms": BACKFILL_MAX_SEARCH_TERMS,
|
||||
"max_pages": BACKFILL_SEARCH_MAX_PAGES,
|
||||
"max_seconds": BACKFILL_MAX_SECONDS_PER_SKU,
|
||||
}
|
||||
if bounded_recovery
|
||||
else {}
|
||||
)
|
||||
|
||||
requested_ids = [
|
||||
str(item.get("competitor_product_id") or "").strip()
|
||||
@@ -3120,6 +3137,7 @@ class CompetitorPriceFeeder:
|
||||
momo_name,
|
||||
momo_price=momo_price,
|
||||
existing_product_id=competitor_product_id,
|
||||
**recovery_kwargs,
|
||||
)
|
||||
if recovered:
|
||||
best_product, score, diagnostics = recovered
|
||||
@@ -3313,6 +3331,7 @@ class CompetitorPriceFeeder:
|
||||
momo_name,
|
||||
momo_price=momo_price,
|
||||
existing_product_id=competitor_product_id,
|
||||
**recovery_kwargs,
|
||||
)
|
||||
if recovered:
|
||||
recovered_product, recovered_score, recovered_diagnostics = recovered
|
||||
@@ -3595,6 +3614,7 @@ class CompetitorPriceFeeder:
|
||||
source=source,
|
||||
label="identity_v2 過期價格搜尋救援",
|
||||
allow_missing_recovery=True,
|
||||
bounded_recovery=True,
|
||||
)
|
||||
|
||||
def run_retryable_candidate_revalidation(
|
||||
|
||||
@@ -83,8 +83,9 @@ def test_competitor_coverage_counts_only_active_product_intersection():
|
||||
"def _fetch_manual_review_summary", 1
|
||||
)[0]
|
||||
|
||||
assert "coverage:v13" in source
|
||||
assert "coverage:v14" in source
|
||||
assert "CATALOG_COMPARABLE_SCORE_FLOOR" in source
|
||||
assert "CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" in source
|
||||
assert "rescore_accepted_count" in coverage_source
|
||||
assert "(SELECT COUNT(*) FROM valid_competitor) AS valid_matches" not in coverage_source
|
||||
assert "identity_competitor AS" in coverage_source
|
||||
@@ -113,6 +114,27 @@ def test_competitor_coverage_counts_only_active_product_intersection():
|
||||
assert "CATALOG_VARIANT_REVIEW_REASONS" in source
|
||||
assert "CATALOG_UNIT_REVIEW_REASONS" in source
|
||||
assert "CATALOG_COMPARABLE_IDENTITY_REASONS" in source
|
||||
assert "\"catalog_identity_review_score_floor\": CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" in coverage_source
|
||||
|
||||
|
||||
def test_catalog_comparable_sql_includes_high_confidence_identity_review_lane():
|
||||
from services.competitor_intel_repository import (
|
||||
CATALOG_COMPARABLE_SCORE_FLOOR,
|
||||
CATALOG_IDENTITY_REVIEW_SCORE_FLOOR,
|
||||
_catalog_comparable_sql,
|
||||
)
|
||||
|
||||
source = (ROOT / "services" / "competitor_intel_repository.py").read_text(encoding="utf-8")
|
||||
coverage_source = source.split("def _fetch_competitor_coverage_uncached", 1)[1].split(
|
||||
"def _fetch_manual_review_summary", 1
|
||||
)[0]
|
||||
sql = _catalog_comparable_sql("la")
|
||||
|
||||
assert f">= {CATALOG_COMPARABLE_SCORE_FLOOR}" in sql
|
||||
assert f">= {CATALOG_IDENTITY_REVIEW_SCORE_FLOOR}" in sql
|
||||
assert "CATALOG_IDENTITY_REVIEW_SCORE_FLOOR" not in sql
|
||||
assert "OR COALESCE(la.best_match_score, 0)" in sql
|
||||
assert "CATALOG_COMPARABLE_SIGNAL_REASONS" not in sql
|
||||
assert "CATALOG_COMPARABLE_BLOCK_REASONS" in source
|
||||
assert "\"identity_coverage_matches\": valid" in coverage_source
|
||||
assert "\"manual_closed_count\": manual_closed_count" in coverage_source
|
||||
|
||||
@@ -310,7 +310,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
assert 'attempt_status="refresh_unit_comparable"' in source
|
||||
assert "mode={getattr(diagnostics, 'comparison_mode'" in source
|
||||
assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source
|
||||
assert "PChomeCrawler(timeout=REQUEST_TIMEOUT" in source
|
||||
assert "crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT" in source
|
||||
assert "PChomeCrawler(timeout=crawler_timeout" in source
|
||||
|
||||
assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration
|
||||
assert "attempt_status" in migration
|
||||
@@ -2147,9 +2148,16 @@ def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_l
|
||||
|
||||
|
||||
def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.competitor_price_feeder import (
|
||||
BACKFILL_MAX_SEARCH_TERMS,
|
||||
BACKFILL_REQUEST_TIMEOUT,
|
||||
BACKFILL_SEARCH_MAX_PAGES,
|
||||
CompetitorPriceFeeder,
|
||||
SEARCH_LIMIT,
|
||||
)
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
crawler_calls = {"init": [], "search": []}
|
||||
recovered = PChomeProduct(
|
||||
product_id="DDAB01-RECOVERED",
|
||||
name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml",
|
||||
@@ -2167,14 +2175,15 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
def __init__(self, *_args, **kwargs):
|
||||
crawler_calls["init"].append(kwargs)
|
||||
|
||||
def fetch_product_details(self, product_ids, batch_size=20):
|
||||
assert product_ids == ["DDAB01-MISSING"]
|
||||
return True, "ok", []
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
def search_products(self, *_args, **kwargs):
|
||||
crawler_calls["search"].append(kwargs)
|
||||
return True, "ok", [recovered]
|
||||
|
||||
def fake_score(_momo_name, competitor_name, **_kwargs):
|
||||
@@ -2228,9 +2237,13 @@ def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch):
|
||||
"product_id": 4864,
|
||||
"momo_price": 441,
|
||||
"competitor_product_id": "DDAB01-MISSING",
|
||||
}])
|
||||
}], bounded_recovery=True)
|
||||
|
||||
assert result.matched == 1
|
||||
assert crawler_calls["init"][0]["timeout"] == BACKFILL_REQUEST_TIMEOUT
|
||||
assert len(crawler_calls["search"]) <= BACKFILL_MAX_SEARCH_TERMS
|
||||
assert crawler_calls["search"][0]["max_pages"] == BACKFILL_SEARCH_MAX_PAGES
|
||||
assert crawler_calls["search"][0]["limit"] == SEARCH_LIMIT * BACKFILL_SEARCH_MAX_PAGES
|
||||
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
|
||||
assert "missing_known_product_id" in writes[0]["tags"]
|
||||
assert "fresh_search_recovery" in writes[0]["tags"]
|
||||
@@ -2301,6 +2314,7 @@ def test_competitor_feeder_expired_recovery_allows_fresh_search(monkeypatch):
|
||||
assert result.matched == 1
|
||||
assert captured["skus"][0]["sku"] == "STALE-1"
|
||||
assert captured["allow_missing_recovery"] is True
|
||||
assert captured["bounded_recovery"] is True
|
||||
assert captured["label"] == "identity_v2 過期價格搜尋救援"
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user