from pathlib import Path import json import logging from datetime import datetime from types import SimpleNamespace ROOT = Path(__file__).resolve().parents[1] def _function_body(source: str, function_name: str, next_function_name: str) -> str: return source.split(f"def {function_name}", 1)[1].split(f"def {next_function_name}", 1)[0] def test_competitor_dashboard_hot_paths_use_latest_price_lateral_lookup(): source = (ROOT / "services/competitor_intel_repository.py").read_text(encoding="utf-8") coverage_body = _function_body( source, "_fetch_competitor_coverage_uncached", "fetch_competitor_gap_trend", ) review_cte_body = _function_body( source, "_review_queue_cte_and_filter", "_fetch_competitor_review_queue_page_uncached", ) review_sample_body = _function_body( source, "_fetch_competitor_review_queue_uncached", "fetch_competitor_comparison_results", ) for body in (coverage_body, review_cte_body, review_sample_body): assert "JOIN LATERAL" in body assert "ORDER BY pr.timestamp DESC, pr.id DESC" in body assert "LIMIT 1" in body assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in body assert "lm.rn = 1" not in body def test_competitor_review_queue_page_uses_single_paged_total_query(): source = (ROOT / "services/competitor_intel_repository.py").read_text(encoding="utf-8") page_body = _function_body( source, "_fetch_competitor_review_queue_page_uncached", "_fetch_competitor_review_queue_uncached", ) assert "total_rows AS" in page_body assert "paged_rows AS" in page_body assert "LEFT JOIN paged_rows ON TRUE" in page_body assert "if count_total:" in page_body assert "total = int(rows[0].get(\"total_count\") or 0) if count_total and rows else -1" in page_body assert "SELECT COUNT(*) AS total FROM review_rows" not in page_body assert "rows[0].get(\"total_count\")" in page_body assert "if row.get(\"sku\")" in page_body def test_competitor_review_queue_starts_from_latest_attempts_not_all_products(): source = (ROOT / "services/competitor_intel_repository.py").read_text(encoding="utf-8") review_cte_body = _function_body( source, "_review_queue_cte_and_filter", "_fetch_competitor_review_queue_page_uncached", ) assert "FROM latest_attempt la" in review_cte_body assert "JOIN products p" in review_cte_body assert "JOIN LATERAL" in review_cte_body assert "NOT EXISTS (" in review_cte_body assert "cma.hard_veto" in review_cte_body assert "cma.diagnostic_codes" in review_cte_body assert "catalog_comparable" in review_cte_body assert "catalog_review_lane" in review_cte_body assert "_catalog_comparable_sql(\"la\")" in review_cte_body assert "_catalog_review_lane_case_sql(\"la\")" in review_cte_body assert "status_filter == \"catalog_comparable\"" in review_cte_body assert "\"catalog_variant_review\"" in review_cte_body assert "\"catalog_unit_review\"" in review_cte_body assert "\"catalog_identity_review\"" in review_cte_body assert "status_filter == \"true_low_confidence\"" in review_cte_body assert "FROM latest_momo lm" not in review_cte_body assert "valid_competitor AS" not in review_cte_body def test_competitor_feeder_scores_with_pchome_match_name(monkeypatch): from services.competitor_price_feeder import _rank_match_details from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDDE15-A900JZ4GR", name="【寶拉珍選】水楊酸身體乳雙入組", price=1777, original_price=2640, discount=33, image_url="", product_url="https://24h.pchome.com.tw/prod/DDDE15-A900JZ4GR", stock=20, store="DDDE15", rating=None, review_count=0, is_on_sale=True, crawled_at=datetime.now(), subtitle="【寶拉珍選】水楊酸身體乳雙入組 (2%水楊酸身體乳 210ml x2)", match_name="【寶拉珍選】水楊酸身體乳雙入組 (2%水楊酸身體乳 210ml x2)", ) captured = {} def fake_score(momo_name, competitor_name, **kwargs): captured["competitor_name"] = competitor_name return SimpleNamespace(score=0.91) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) ranked = _rank_match_details("【Paulas Choice 寶拉珍選】2%水楊酸身體乳210ml二入", [product]) assert ranked[0][0] is product assert ranked[0][1] == 0.91 assert "210ml x2" in captured["competitor_name"] def test_competitor_feeder_persists_all_match_attempt_outcomes(): source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8") diagnostics_migration = (ROOT / "migrations/041_competitor_match_diagnostics.sql").read_text(encoding="utf-8") browse_migration = (ROOT / "migrations/042_add_browse_diagnostics_to_match_attempts.sql").read_text(encoding="utf-8") assert "attempts_written" in source assert "_ensure_competitor_match_attempts_table" in source assert "_record_match_attempt" in source assert "INSERT INTO competitor_match_attempts" in source assert "CAST(:search_terms AS jsonb)" in source assert 'attempt_status="matched"' in source assert '"recoverable_low_score"' in source assert '"true_low_confidence"' in source assert '"identity_veto"' in source assert 'attempt_status="no_result"' in source assert 'attempt_status="no_match"' in source assert 'attempt_status="error"' in source assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source assert "_prepare_browse_diagnostic" in source assert "browse_diagnostic_json" in source assert "PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED" in source assert 'attempt_status="protected_existing_match"' in source assert "_should_upsert_competitor_price" in source assert "_classify_low_score_attempt" in source assert "replace_legacy_unverified" in source assert "identity_v2" in source assert "_fetch_expired_identity_skus" in source assert "run_expired_identity_refresh" in source assert "allow_missing_recovery=False" in source assert "price_basis_total_price" in source assert "alert_tier_price_alert_exact" in source assert "_fetch_retryable_candidate_skus" in source assert "preview_retryable_candidate_revalidation" in source assert "preview_expired_identity_recovery" in source assert "_fetch_expired_identity_recovery_skus" in source assert "STALE_IDENTITY_RECOVERY_BLOCK_REASONS" in source assert "FOCUSED_IDENTITY_TOTAL_PRICE_REASONS" in source assert "FOCUSED_TOTAL_PRICE_IDENTITY_SQL_REASON_LIST" in source assert "FOCUSED_TOTAL_PRICE_SAFE_SQL_REASON_LIST" in source assert r"\+|[xX]\s*\d|[**]\s*\d" in source assert "湛藍|麋香|海洋|玫瑰|薰衣草" in source assert "read_only_no_crawl_no_llm_no_db_write" in source assert "run_retryable_candidate_revalidation" in source assert "run_expired_identity_search_recovery" in source recovery_source = source.split("def _fetch_expired_identity_recovery_skus", 1)[1].split( "def _fetch_expired_identity_skus", 1 )[0] assert "expired_competitor AS" in recovery_source assert "JOIN LATERAL" in recovery_source assert "ORDER BY pr.timestamp DESC, pr.id DESC" in recovery_source assert "cp.expires_at IS NULL OR cp.expires_at <= CURRENT_TIMESTAMP" in recovery_source assert "cp.expires_at ASC NULLS FIRST" in recovery_source assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in recovery_source assert "candidate_limit" in recovery_source unmatched_source = source.split("def _fetch_unmatched_priority_skus", 1)[1].split( "def _fetch_retryable_candidate_skus", 1 )[0] assert "latest_attempt AS" in unmatched_source assert "JOIN LATERAL" in unmatched_source assert "ORDER BY pr.timestamp DESC, pr.id DESC" in unmatched_source assert "cp.expires_at > CURRENT_TIMESTAMP" in unmatched_source assert "COALESCE(la.attempt_status, 'never_attempted')" in unmatched_source assert "generic_recall_block_pattern" in unmatched_source assert "p.name !~* :generic_recall_block_pattern" in unmatched_source assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in unmatched_source expired_source = source.split("def _fetch_expired_identity_skus", 1)[1].split( "def _upsert_competitor_price", 1 )[0] assert "JOIN LATERAL" in expired_source assert "ORDER BY pr.timestamp DESC, pr.id DESC" in expired_source assert "cp.expires_at IS NULL OR cp.expires_at <= CURRENT_TIMESTAMP" in expired_source assert "cp.expires_at ASC NULLS FIRST" in expired_source assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in expired_source retryable_source = source.split("def _fetch_retryable_candidate_skus", 1)[1].split( "def preview_retryable_candidate_revalidation", 1 )[0] for status in ( "'low_score'", "'refresh_low_score'", "'recoverable_low_score'", ): assert status in retryable_source retryable_status_list = retryable_source.split("la.attempt_status IN (", 1)[1].split(")", 1)[0] for status in ( "'true_low_confidence'", "'unit_comparable'", "'refresh_unit_comparable'", "'identity_veto'", ): assert status not in retryable_status_list assert "la.attempt_status = 'true_low_confidence'" in retryable_source assert "la.attempt_status = 'rescore_accepted_current'" in retryable_source assert "COALESCE(la.best_match_score, 0) >= 0.76" in retryable_source assert "COALESCE(la.best_match_score, 0) >= 0.95" in retryable_source assert "strong_exact_spec_match" in retryable_source assert "focused_exact_total_price_safe" in retryable_source assert "FOCUSED_TOTAL_PRICE_IDENTITY_SQL_REASON_LIST" in retryable_source assert "COALESCE(la.best_match_score, 0) >= :match_score_floor" in retryable_source assert "focused_exact_total_price_safe" in retryable_source.split( "FOCUSED_TOTAL_PRICE_IDENTITY_SQL_REASON_LIST", 1 )[0] assert "COALESCE(la.match_diagnostic_json->>'match_type', '') = 'exact'" in retryable_source assert "COALESCE(la.match_diagnostic_json->>'price_basis', '') = 'manual_review'" in retryable_source assert "微煥膚藻晶去角質凝膠" in retryable_source assert "舒新雪潤安撫水" in retryable_source assert "舒護活泉水" in retryable_source assert "超捍輕便刀" in retryable_source assert "雪肌精限量豪華禮盒" in retryable_source assert "focused_exact_identity_romand_juicy_lip_tint_2_catalog" not in retryable_source assert "focused_exact_identity_solone_longlasting_eyeliner" not in retryable_source assert "focused_exact_identity_summer_eve_full_skin_wash_2pack" not in retryable_source assert "REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST" in retryable_source assert "commercial_condition_gap" in source assert "variant_selection_review" in source assert "beauty foot" in retryable_source assert "蜜愛潤滑液" in retryable_source assert "青春露" in retryable_source assert "美白修護霜" in retryable_source assert "腳指甲剪刀" in retryable_source assert "極細指甲緣硬皮剪刀" in retryable_source assert "嬰兒高純修護凝膠" in retryable_source assert "FOCUSED_REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST" in retryable_source assert "花美水" in retryable_source assert "私密呼呼溫和潔淨慕斯" in retryable_source assert "果酸煥膚水凝乳" in retryable_source assert "嬰兒沐浴洗髮" in retryable_source assert "八小時潤澤護唇膏" in retryable_source assert "全面修復潤唇膏" in retryable_source assert "膠原蛋白粉底液" in retryable_source assert "moisture" in retryable_source assert "保濕修護" in retryable_source assert "inclear" in retryable_source assert "櫻克麗兒" in retryable_source assert "私密淨化凝膠" in retryable_source assert "寶寶益菌屁屁膏" in retryable_source assert "齒妍堂" in retryable_source assert "汪汪隊" in retryable_source assert "水果沙拉系列彩色防水眼線液筆" in retryable_source assert "護手霜" in retryable_source assert "持采亮化UV防曬水凝乳" in retryable_source assert "類光繚" in retryable_source assert "isl[a-z0-9]*[0-9]{2,3}" in retryable_source assert "兒茶眼部配方" in retryable_source assert "綿羊油" in retryable_source assert "德國奔月" in retryable_source assert "私密植萃抗菌潔淨露" in retryable_source assert "護潔露" in retryable_source assert "鋒利窄弧型剪刀" in retryable_source assert "晨霧純精油擴香儀" in retryable_source assert "pavaruni" in retryable_source assert "天然植物" in retryable_source assert "香氛蠟燭" in retryable_source assert "450\\\\s*g" in retryable_source assert "大地" in retryable_source assert "有機植萃" in retryable_source assert "護膚油" in retryable_source assert "2\\\\s*入" in retryable_source assert "COALESCE(la.hard_veto, false) = false" in retryable_source assert "match_diagnostic_json->>'comparison_mode'" in retryable_source assert "?| array[" in retryable_source assert "candidate_attempt AS" in retryable_source assert "legacy_unmasked_attempt AS" in retryable_source assert "JOIN latest_attempt current_la" in retryable_source assert "current_la.attempt_status IN (" in retryable_source assert "'refresh_no_result'" in retryable_source assert "'expired_match'" in retryable_source assert "cma.attempt_status IN (" in retryable_source assert "cma.attempted_at < current_la.attempted_at" in retryable_source assert "UNION ALL" in retryable_source assert "SELECT * FROM legacy_unmasked_attempt" in retryable_source assert "SELECT DISTINCT ON (la.sku, la.best_competitor_product_id)" in retryable_source assert "FROM candidate_attempt la" in retryable_source assert "JOIN LATERAL" in retryable_source assert "ORDER BY pr.timestamp DESC, pr.id DESC" in retryable_source assert "AND cp.expires_at > CURRENT_TIMESTAMP" in retryable_source assert "(cp.expires_at IS NULL OR cp.expires_at > CURRENT_TIMESTAMP)" not in retryable_source assert "latest_price.price AS momo_price" in retryable_source assert "ROW_NUMBER() OVER (PARTITION BY p.id" not in retryable_source assert "lm.rn = 1" not in retryable_source latest_attempt_source = retryable_source.split("latest_attempt AS", 1)[1].split( "candidate_attempt AS", 1 )[0] assert "cma.best_competitor_product_id IS NOT NULL" not in latest_attempt_source assert "la.best_competitor_product_id IS NOT NULL" in retryable_source assert "refresh_known_identity" in source assert 'attempt_status="unit_comparable"' in source assert 'attempt_status="refresh_unit_comparable"' in source assert "mode={getattr(diagnostics, 'comparison_mode'" in source assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source assert "crawler_timeout = BACKFILL_REQUEST_TIMEOUT if bounded_recovery else REQUEST_TIMEOUT" in source assert "PChomeCrawler(timeout=crawler_timeout" in source assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration assert "attempt_status" in migration assert "search_terms" in migration assert "best_match_score" in migration assert "match_diagnostic_json" in diagnostics_migration assert "comparison_mode" in diagnostics_migration assert "diagnostic_codes" in diagnostics_migration assert "browse_diagnostic_json" in browse_migration assert "idx_comp_match_attempts_browse_diag_time" in browse_migration assert "competitor_product_url" in source assert "competitor_image_url" in source assert "competitor_stock" in source assert "error_message" in migration assert "idx_comp_match_attempts_sku_source_time" in migration def test_competitor_feeder_focused_total_price_reason_gate_requires_named_identity(): from services.competitor_price_feeder import ( FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS, FOCUSED_TOTAL_PRICE_SAFE_DIAGNOSTIC_REASONS, ) assert "focused_exact_total_price_safe" in FOCUSED_TOTAL_PRICE_SAFE_DIAGNOSTIC_REASONS assert "focused_exact_total_price_safe" not in FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS assert ( "focused_exact_identity_pavaruni_20_scent_candle" in FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS ) assert ( "focused_exact_identity_romand_juicy_lip_tint_2_catalog" not in FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS ) assert ( "focused_exact_identity_solone_longlasting_eyeliner" not in FOCUSED_TOTAL_PRICE_IDENTITY_DIAGNOSTIC_REASONS ) def test_competitor_feeder_legacy_focused_identity_reason_uses_global_score_floor(): source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") retryable_source = source.split("def _fetch_retryable_candidate_skus", 1)[1].split( "def preview_retryable_candidate_revalidation", 1 )[0] focused_branch = retryable_source.split("focused_exact_total_price_safe", 1)[1].split( "REVALIDATABLE_REVIEW_BLOCK_SQL_REASON_LIST", 1 )[0] assert "FOCUSED_TOTAL_PRICE_IDENTITY_SQL_REASON_LIST" in focused_branch assert "COALESCE(la.best_match_score, 0) >= :match_score_floor" in focused_branch assert "FOCUSED_TOTAL_PRICE_SAFE_SQL_REASON_LIST" not in focused_branch def test_competitor_feeder_blocks_identity_review_from_auto_price_write(): from types import SimpleNamespace from services.competitor_price_feeder import ( _classify_auto_write_block_attempt, _is_auto_price_write_safe, ) identity_review = SimpleNamespace( hard_veto=False, comparison_mode="exact_identity", match_type="comparable", price_basis="manual_review", alert_tier="identity_review", reasons=("variant_selection_review", "strong_product_line_match"), ) unit_review = SimpleNamespace( hard_veto=False, comparison_mode="exact_identity", match_type="exact", price_basis="manual_review", alert_tier="identity_review", reasons=("commercial_condition_gap", "strong_exact_spec_match"), ) catalog_identity_review = SimpleNamespace( hard_veto=False, comparison_mode="exact_identity", match_type="exact", price_basis="manual_review", alert_tier="identity_review", reasons=("strong_exact_spec_match", "shared_identity_anchor_variant_safe"), ) exact_price = SimpleNamespace( hard_veto=False, comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", reasons=(), ) assert _is_auto_price_write_safe(identity_review) is False assert _classify_auto_write_block_attempt(0.783, identity_review) == "catalog_variant_review" assert _classify_auto_write_block_attempt(1.0, unit_review) == "catalog_unit_review" assert _classify_auto_write_block_attempt(1.0, catalog_identity_review) == "catalog_identity_review" assert _is_auto_price_write_safe(exact_price) is True def test_competitor_feeder_keeps_variant_selection_review_out_of_recoverable(): from services.competitor_price_feeder import _classify_low_score_attempt diagnostics = SimpleNamespace( score=0.86, brand_score=1.0, token_score=0.9, sequence_score=0.8, hard_veto=False, comparison_mode="exact_identity", reasons=("variant_selection_review", "strong_product_line_match"), ) assert _classify_low_score_attempt(0.86, diagnostics) == "true_low_confidence" def test_competitor_feeder_retryable_preview_is_read_only_summary(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder feeder = CompetitorPriceFeeder(engine=object()) def fake_fetch(limit, min_score): assert limit == 2 assert min_score == 0.70 return [ { "sku": "SKU-1", "name": "MOMO A", "competitor_product_id": "PCHOME-1", "competitor_product_name": "PChome A", "best_match_score": 0.96, "attempt_status": "true_low_confidence", }, { "sku": "SKU-2", "name": "MOMO B", "competitor_product_id": "PCHOME-2", "competitor_product_name": "PChome B", "best_match_score": 0.83, "attempt_status": "recoverable_low_score", }, ] monkeypatch.setattr(feeder, "_fetch_retryable_candidate_skus", fake_fetch) payload = feeder.preview_retryable_candidate_revalidation(limit=2, min_score=0.70) assert payload["candidate_count"] == 2 assert payload["has_more"] is True assert payload["review_gated_count"] == 1 assert payload["status_counts"] == { "true_low_confidence": 1, "recoverable_low_score": 1, } assert payload["examples"][0]["best_match_score"] == 0.96 assert payload["boundary"] == "read_only_no_crawl_no_llm_no_db_write" def test_competitor_feeder_expired_identity_recovery_preview_is_read_only(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder feeder = CompetitorPriceFeeder(engine=object()) def fake_fetch(limit): assert limit == 2 return [ { "sku": "SKU-1", "name": "MOMO A", "competitor_product_id": "PCHOME-1", "competitor_product_name": "PChome A", "match_score": 0.92, "expires_at": "2026-05-31 10:00:00", }, { "sku": "SKU-2", "name": "MOMO B", "competitor_product_id": "PCHOME-2", "competitor_product_name": "PChome B", "match_score": 0.88, "expires_at": "2026-05-31 11:00:00", }, ] monkeypatch.setattr(feeder, "_fetch_expired_identity_recovery_skus", fake_fetch) payload = feeder.preview_expired_identity_recovery(limit=2) assert payload["candidate_count"] == 2 assert payload["has_more"] is True assert payload["examples"][0]["match_score"] == 0.92 assert payload["boundary"] == "read_only_no_crawl_no_llm_no_db_write" def test_competitor_feeder_expired_search_recovery_allows_fresh_recovery(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder, FeederResult feeder = CompetitorPriceFeeder(engine=object()) calls = [] def fake_fetch(limit): assert limit == 120 return [{"sku": "SKU-1", "competitor_product_id": "PCHOME-1"}] def fake_run(items, **kwargs): calls.append((items, kwargs)) return FeederResult(1, 1, 0, 0, 0, 0.1) monkeypatch.setattr(feeder, "_fetch_expired_identity_recovery_skus", fake_fetch) monkeypatch.setattr(feeder, "_run_known_identity_refresh_items", fake_run) result = feeder.run_expired_identity_search_recovery(limit=999) assert result.matched == 1 assert calls[0][1]["allow_missing_recovery"] is True assert calls[0][1]["label"] == "identity_v2 過期價格搜尋救援" def test_competitor_feeder_stale_recovery_blocks_variant_names(): from services.competitor_price_feeder import _has_stale_identity_recovery_block assert _has_stale_identity_recovery_block({ "name": "舒摩兒 生理呵護 日用型 237ml", "competitor_product_name": "舒摩兒 私密潔浴露 237ml", "diagnostic_reasons": [], }) is True assert _has_stale_identity_recovery_block({ "name": "品牌 沐浴露 500ml x2", "competitor_product_name": "品牌 沐浴露 500ml", "diagnostic_reasons": [], }) is True assert _has_stale_identity_recovery_block({ "name": "品牌 沐浴露 500ml", "competitor_product_name": "品牌 沐浴露 500ml", "diagnostic_reasons": [], }) is False def test_competitor_feeder_records_browse_sh_plan_for_no_result(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [] monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_sku_items([{ "sku": "BROWSE001", "name": "MOMO 稀有專櫃組合 50ml", "product_id": 901, "momo_price": 1280, }]) assert result.matched == 0 assert result.skipped_no_result == 1 browse_plan = attempts[0]["browse_diagnostic"] assert browse_plan["tool"] == "browse.sh" assert browse_plan["mode"] == "plan_only" assert browse_plan["execute_enabled"] is False assert browse_plan["reason"] == "no_result" assert browse_plan["execution"]["status"] == "disabled" assert browse_plan["suggested_commands"][0]["args"][0] == "get" assert "ecshweb.pchome.com.tw/search" in browse_plan["urls"][0] def test_competitor_match_attempt_persists_browse_diagnostic_json(): from sqlalchemy import create_engine, text from services.competitor_price_feeder import CompetitorPriceFeeder engine = create_engine("sqlite:///:memory:") feeder = CompetitorPriceFeeder(engine=engine) feeder._record_match_attempt( sku="BROWSE002", momo_name="MOMO 取證測試商品", search_terms=["取證 測試"], attempt_status="no_result", browse_diagnostic={ "tool": "browse.sh", "mode": "plan_only", "urls": ["https://ecshweb.pchome.com.tw/search/v3.3/?q=test"], }, ) with engine.connect() as conn: row = conn.execute(text(""" SELECT browse_diagnostic_json FROM competitor_match_attempts WHERE sku = 'BROWSE002' """)).scalar_one() payload = json.loads(row) assert payload["tool"] == "browse.sh" assert payload["mode"] == "plan_only" def test_match_diagnostics_payload_carries_professional_match_lanes(): from services.competitor_price_feeder import _match_diagnostics_payload, _extend_match_tags from services.marketplace_product_matcher import score_marketplace_match diagnostics = score_marketplace_match( "理膚寶水 B5 全面修復霜 40ml x2 超值組", "理膚寶水 全面修復霜 B5 40ml", momo_price=1199, competitor_price=679, ) payload = _match_diagnostics_payload(diagnostics) tags = _extend_match_tags([], diagnostics) assert payload["match_type"] == "same_product_different_pack" assert payload["price_basis"] == "unit_price" assert payload["alert_tier"] == "unit_price_review" assert "unit_comparable" in payload["evidence_flags"] assert payload["identity_evidence"]["version"] == "identity_evidence_v1" assert payload["identity_evidence"]["lane"]["price_basis"] == "unit_price" assert payload["identity_evidence"]["specs"]["mismatches"][0]["field"] == "count" assert payload["offer_evidence"]["version"] == "offer_evidence_v1" assert payload["offer_evidence"]["price_is_identity_evidence"] is False assert payload["offer_evidence"]["gap_pct"] == 76.58 assert "match_type_same_product_different_pack" in tags assert "price_basis_unit_price" in tags assert "alert_tier_unit_price_review" in tags def test_competitor_match_review_service_closes_human_review_loop(): service_source = (ROOT / "services/competitor_match_review_service.py").read_text(encoding="utf-8") migration = (ROOT / "migrations/039_create_competitor_match_reviews.sql").read_text(encoding="utf-8") dashboard_js = (ROOT / "web/static/js/page-dashboard-v2.js").read_text(encoding="utf-8") assert "VALID_REVIEW_ACTIONS" in service_source assert "accept_identity" in service_source assert "reject_identity" in service_source assert "unit_price_required" in service_source assert "manual_accepted" in service_source assert "manual_rejected" in service_source assert "manual_unit_price_required" in service_source assert "INSERT INTO competitor_match_reviews" in service_source assert "INSERT INTO competitor_prices" in service_source assert "INSERT INTO competitor_price_history" in service_source assert "manual_review" in service_source assert "manual_accept" in service_source assert "_fetch_latest_manual_review_for_candidate" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") assert "manual_review_rejected" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") assert "manual_accept_override" in (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") assert "CREATE TABLE IF NOT EXISTS competitor_match_reviews" in migration assert "review_action" in migration assert "reviewer_identity" in migration assert "candidate_diagnostic" in migration assert "idx_comp_match_reviews_sku_source_time" in migration assert "runPchomeReviewDecision" in dashboard_js assert "/api/pchome-review/" in dashboard_js def test_manual_review_attempt_preserves_match_diagnostics_when_supported(): from sqlalchemy import create_engine, text from services.competitor_match_review_service import record_competitor_match_review engine = create_engine("sqlite:///:memory:") diagnostic = { "match_type": "same_product_different_pack", "price_basis": "unit_price", "alert_tier": "unit_price_review", "reasons": ["unit_comparable"], } with engine.begin() as conn: conn.execute(text("CREATE TABLE products (id INTEGER PRIMARY KEY, i_code TEXT, name TEXT)")) conn.execute(text("CREATE TABLE price_records (id INTEGER PRIMARY KEY, product_id INTEGER, price NUMERIC, timestamp TEXT)")) conn.execute(text(""" CREATE TABLE competitor_match_attempts ( id INTEGER PRIMARY KEY AUTOINCREMENT, sku TEXT, source TEXT, momo_product_id INTEGER, momo_product_name TEXT, momo_price NUMERIC, search_terms TEXT, candidate_count INTEGER, attempt_status TEXT, best_competitor_product_id TEXT, best_competitor_product_name TEXT, best_competitor_price NUMERIC, best_match_score NUMERIC, error_message TEXT, match_diagnostic_json TEXT, comparison_mode TEXT, hard_veto INTEGER, diagnostic_codes TEXT, attempted_at TEXT ) """)) conn.execute(text(""" CREATE TABLE competitor_prices ( sku TEXT, source TEXT, price NUMERIC, competitor_product_id TEXT, competitor_product_name TEXT, match_score NUMERIC, tags TEXT, crawled_at TEXT, expires_at TEXT ) """)) conn.execute(text("INSERT INTO products VALUES (1, 'A006', '理膚寶水 B5 全面修復霜 40ml x2')")) conn.execute(text("INSERT INTO price_records VALUES (1, 1, 1199, '2026-05-20 09:00:00')")) conn.execute(text(""" INSERT INTO competitor_match_attempts (sku, source, momo_product_id, momo_product_name, momo_price, search_terms, candidate_count, attempt_status, best_competitor_product_id, best_competitor_product_name, best_competitor_price, best_match_score, error_message, match_diagnostic_json, comparison_mode, hard_veto, diagnostic_codes, attempted_at) VALUES (:sku, 'pchome', 1, :momo_name, 1199, '[]', 1, 'unit_comparable', 'DABC01-B5', :pc_name, 679, 0.742, 'score=0.742; reasons=unit_comparable', :diagnostic, 'unit_comparable', 0, '["unit_comparable"]', '2026-05-20 09:10:00') """), { "sku": "A006", "momo_name": "理膚寶水 B5 全面修復霜 40ml x2", "pc_name": "理膚寶水 全面修復霜 B5 40ml", "diagnostic": json.dumps(diagnostic, ensure_ascii=False), }) result = record_competitor_match_review( engine, sku="A006", review_action="unit_price_required", reviewer_identity="pytest", ) assert result["success"] is True with engine.connect() as conn: latest = conn.execute(text(""" SELECT attempt_status, match_diagnostic_json, comparison_mode, diagnostic_codes FROM competitor_match_attempts WHERE sku = 'A006' ORDER BY id DESC LIMIT 1 """)).mappings().first() review_diagnostic = conn.execute(text(""" SELECT candidate_diagnostic FROM competitor_match_reviews WHERE sku = 'A006' ORDER BY id DESC LIMIT 1 """)).scalar_one() payload = json.loads(latest["match_diagnostic_json"]) assert latest["attempt_status"] == "manual_unit_price_required" assert latest["comparison_mode"] == "unit_comparable" assert json.loads(latest["diagnostic_codes"]) == ["unit_comparable"] assert payload["price_basis"] == "unit_price" assert "match_diagnostic_json=" in review_diagnostic assert "unit_comparable" in review_diagnostic def test_reject_review_expires_current_formal_price(): from sqlalchemy import create_engine, text from services.competitor_match_review_service import record_competitor_match_review engine = create_engine("sqlite:///:memory:") with engine.begin() as conn: conn.execute(text("CREATE TABLE products (id INTEGER PRIMARY KEY, i_code TEXT, name TEXT)")) conn.execute(text("CREATE TABLE price_records (id INTEGER PRIMARY KEY, product_id INTEGER, price NUMERIC, timestamp TEXT)")) conn.execute(text(""" CREATE TABLE competitor_match_attempts ( id INTEGER PRIMARY KEY AUTOINCREMENT, sku TEXT, source TEXT, momo_product_id INTEGER, momo_product_name TEXT, momo_price NUMERIC, search_terms TEXT, candidate_count INTEGER, attempt_status TEXT, best_competitor_product_id TEXT, best_competitor_product_name TEXT, best_competitor_price NUMERIC, best_match_score NUMERIC, error_message TEXT, attempted_at TEXT ) """)) conn.execute(text(""" CREATE TABLE competitor_prices ( sku TEXT, source TEXT, price NUMERIC, original_price NUMERIC, discount_pct INTEGER, competitor_product_id TEXT, competitor_product_name TEXT, match_score NUMERIC, tags TEXT, crawled_at TEXT, expires_at TEXT ) """)) conn.execute(text("INSERT INTO products VALUES (1, 'A005', '舒特膚 AD 乳液 200ml')")) conn.execute(text("INSERT INTO price_records VALUES (1, 1, 980, '2026-05-20 09:00:00')")) conn.execute(text(""" INSERT INTO competitor_match_attempts (sku, source, momo_product_id, momo_product_name, momo_price, search_terms, candidate_count, attempt_status, best_competitor_product_id, best_competitor_product_name, best_competitor_price, best_match_score, error_message, attempted_at) VALUES ('A005', 'pchome', 1, '舒特膚 AD 乳液 200ml', 980, '[]', 1, 'protected_existing_match', 'DDAB01-REJECT', '舒特膚 AD 乳液 200ml', 899, 0.84, 'score=0.84', '2026-05-20 09:10:00') """)) conn.execute(text(""" INSERT INTO competitor_prices (sku, source, price, competitor_product_id, competitor_product_name, match_score, tags, crawled_at, expires_at) VALUES ('A005', 'pchome', 899, 'DDAB01-REJECT', '舒特膚 AD 乳液 200ml', 0.84, '["identity_v2"]', '2026-05-20 09:10:00', '2099-01-01 00:00:00') """)) result = record_competitor_match_review( engine, sku="A005", review_action="reject_identity", reviewer_identity="pytest", ) assert result["success"] is True with engine.connect() as conn: expires_at = conn.execute(text(""" SELECT expires_at FROM competitor_prices WHERE sku = 'A005' AND source = 'pchome' """)).scalar() manual_status = conn.execute(text(""" SELECT attempt_status FROM competitor_match_attempts WHERE sku = 'A005' ORDER BY id DESC LIMIT 1 """)).scalar() assert expires_at != "2099-01-01 00:00:00" assert manual_status == "manual_rejected" def test_competitor_feeder_respects_manual_rejected_candidate(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-REJECTED", name="舒特膚 AD 乳液 200ml", price=899, original_price=999, discount=10, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-REJECTED", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] writes = [] monkeypatch.setattr( feeder, "_fetch_latest_manual_review_for_candidate", lambda *_args, **_kwargs: {"review_action": "reject_identity"}, ) monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda *args, **kwargs: writes.append((args, kwargs)), ) result = feeder._run_sku_items([{ "sku": "A003", "name": "舒特膚 AD 乳液 200ml", "product_id": 3, "momo_price": 980, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert writes == [] assert attempts[0]["attempt_status"] == "manual_rejected" assert "manual_review_rejected" in attempts[0]["error_message"] def test_competitor_feeder_skips_rejected_candidate_and_uses_next_best(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct rejected = PChomeProduct( product_id="DDAB01-REJECTED", name="舒特膚 AD 乳液 200ml 舊候選", price=899, original_price=999, discount=10, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-REJECTED", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) accepted = PChomeProduct( product_id="DDAB01-ACCEPTABLE", name="舒特膚 AD 乳液 200ml 新候選", price=909, original_price=999, discount=9, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-ACCEPTABLE", stock=20, store="24h", rating=4.6, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [rejected, accepted] def fake_score(_momo_name, competitor_name, **_kwargs): score = 0.95 if "舊候選" in competitor_name else 0.84 return SimpleNamespace( score=score, brand_score=1.0, token_score=0.9, spec_score=1.0, sequence_score=0.8, type_score=1.0, price_penalty=0.0, hard_veto=False, reasons=(), comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] writes = [] monkeypatch.setattr( feeder, "_fetch_latest_manual_review_for_candidate", lambda _sku, candidate_id, **_kwargs: ( {"review_action": "reject_identity"} if candidate_id == "DDAB01-REJECTED" else None ), ) monkeypatch.setattr( feeder, "_should_upsert_competitor_price", lambda *_args, **_kwargs: (True, "new_match"), ) monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda *args, **kwargs: writes.append((args, kwargs)), ) result = feeder._run_sku_items([{ "sku": "A004", "name": "舒特膚 AD 乳液 200ml", "product_id": 4, "momo_price": 980, }]) assert result.matched == 1 assert writes[0][0][1].product_id == "DDAB01-ACCEPTABLE" assert attempts[0]["attempt_status"] == "matched" assert attempts[0]["best_product"].product_id == "DDAB01-ACCEPTABLE" def test_competitor_feeder_splits_hard_veto_from_low_score(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-WRONG", name="iPhone 16 Pro 保護膜", price=399, original_price=499, discount=20, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-WRONG", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] def fake_score(*_args, **_kwargs): return SimpleNamespace( score=0.31, brand_score=0.0, token_score=0.1, spec_score=0.55, sequence_score=0.1, type_score=0.55, price_penalty=0.0, hard_veto=True, reasons=("brand_conflict", "product_line_conflict"), comparison_mode="not_comparable", tags=["identity_v2", "identity_veto"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_sku_items([{ "sku": "A006", "name": "【TAICEND 泰陞】寶貝液體保護膜 屁屁噴 100ml", "product_id": 6, "momo_price": 399, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert attempts[0]["attempt_status"] == "identity_veto" assert attempts[0]["diagnostics"].hard_veto is True def test_competitor_feeder_marks_near_threshold_same_line_as_recoverable(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-RECOVERABLE", name="Recipe Box 韓兔 兒童防曬氣墊粉餅", price=699, original_price=799, discount=12, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERABLE", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] def fake_score(*_args, **_kwargs): return SimpleNamespace( score=0.754, brand_score=1.0, token_score=0.59, spec_score=0.55, sequence_score=0.53, type_score=1.0, price_penalty=0.0, hard_veto=False, reasons=("strong_product_line_match",), comparison_mode="exact_identity", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_sku_items([{ "sku": "RB001", "name": "【Recipebox】Recipe Box兒童防曬氣墊粉餅(兒童化妝品/無毒防曬粉餅/天然彩妝)", "product_id": 8, "momo_price": 699, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert attempts[0]["attempt_status"] == "recoverable_low_score" def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-WEAK", name="韓系彩妝 十色眼影盤", price=499, original_price=699, discount=28, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-WEAK", stock=20, store="24h", rating=4.2, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] def fake_score(*_args, **_kwargs): return SimpleNamespace( score=0.733, brand_score=0.95, token_score=0.51, spec_score=0.45, sequence_score=0.44, type_score=0.55, price_penalty=0.0, hard_veto=False, reasons=(), comparison_mode="exact_identity", tags=["identity_v2", "comparison_exact_identity"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_sku_items([{ "sku": "RM001", "name": "【rom&nd】勝過眼皮十色眼影盤", "product_id": 9, "momo_price": 499, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert attempts[0]["attempt_status"] == "true_low_confidence" def test_competitor_feeder_keeps_variant_selection_review_out_of_recoverable_queue(): from types import SimpleNamespace from services.competitor_price_feeder import _classify_low_score_attempt diagnostics = SimpleNamespace( hard_veto=False, reasons=("variant_selection_review", "shared_identity_anchor_packaging_variant"), brand_score=1.0, token_score=0.72, sequence_score=0.66, comparison_mode="exact_identity", ) assert _classify_low_score_attempt(0.734, diagnostics) == "true_low_confidence" def test_competitor_feeder_does_not_treat_spec_only_match_as_recoverable(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-SPEC", name="LANCOME 蘭蔻 超極限肌因精華露150ml 專櫃公司貨", price=3200, original_price=3600, discount=11, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-SPEC", stock=20, store="24h", rating=4.6, review_count=12, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] def fake_score(*_args, **_kwargs): return SimpleNamespace( score=0.748, brand_score=1.0, token_score=0.42, spec_score=1.0, sequence_score=0.49, type_score=0.55, price_penalty=0.0, hard_veto=False, reasons=("strong_exact_spec_match",), comparison_mode="exact_identity", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_sku_items([{ "sku": "LAN001", "name": "【LANCOME 蘭蔻】官方直營 超極光活粹晶露150ml", "product_id": 10, "momo_price": 3200, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert attempts[0]["attempt_status"] == "true_low_confidence" def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct products = [ PChomeProduct( product_id="DDAB01-08", name="PERIPERA 雙頭旋轉極細眉筆 08深杏色 0.05g", price=180, original_price=220, discount=18, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-08", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ), PChomeProduct( product_id="DDAB01-09", name="PERIPERA 雙頭旋轉極細眉筆 09灰褐棕 0.05g", price=180, original_price=220, discount=18, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-09", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ), PChomeProduct( product_id="DDAB01-11", name="PERIPERA 雙頭旋轉極細眉筆 11摩卡灰褐 0.05g", price=180, original_price=220, discount=18, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-11", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ), ] class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", products def fake_score(_momo_name, competitor_name, **_kwargs): return SimpleNamespace( score=0.734 if "09灰褐棕" in competitor_name else 0.733, brand_score=1.0, token_score=0.74, spec_score=0.55, sequence_score=0.66, type_score=0.55, price_penalty=0.0, hard_veto=False, reasons=("shared_identity_anchor_packaging_variant",), comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_sku_items([{ "sku": "P001", "name": "【peripera官方直營】雙頭旋轉極細眉筆_多色任選(1.5mm極細筆頭)", "product_id": 11, "momo_price": 180, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert attempts[0]["attempt_status"] == "true_low_confidence" def test_competitor_feeder_treats_choose_one_offer_as_missing_variant_signal(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-YSL", name="【YSL聖羅蘭】恆久完美透膚煙染腮紅 6g ( #12/ #57/ #93)", price=1650, original_price=1780, discount=7, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-YSL", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] def fake_score(*_args, **_kwargs): return SimpleNamespace( score=0.735, brand_score=1.0, token_score=0.74, spec_score=0.55, sequence_score=0.66, type_score=1.0, price_penalty=0.0, hard_veto=False, reasons=("shared_identity_anchor_packaging_variant",), comparison_mode="exact_identity", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_sku_items([{ "sku": "Y001", "name": "【YSL】官方直營 恆久完美透膚煙染腮紅(腮紅/任選1款/新品上市)", "product_id": 12, "momo_price": 1650, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert attempts[0]["attempt_status"] == "true_low_confidence" def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score(): from sqlalchemy import create_engine, text from services.competitor_price_feeder import CompetitorPriceFeeder engine = create_engine("sqlite:///:memory:") with engine.begin() as conn: conn.execute(text(""" CREATE TABLE competitor_prices ( sku TEXT, source TEXT, competitor_product_id TEXT, competitor_product_name TEXT, match_score REAL, tags TEXT ) """)) conn.execute(text(""" INSERT INTO competitor_prices ( sku, source, competitor_product_id, competitor_product_name, match_score, tags ) VALUES ( '13701353', 'pchome', 'DDBH69-A900JVFUJ', 'LANCOME 蘭蔻 零粉感超持久柔焦蜜粉餅(10g)#00 UNIVERSAL-公司貨', 0.788, '[\"identity_v2\",\"comparison_exact_identity\",\"brand_match\"]' ) """)) feeder = CompetitorPriceFeeder(engine=engine) product = SimpleNamespace( product_id="DDBH4E-A900JS80T", name="《LANCOME 蘭蔻》零粉感超持久柔焦蜜粉餅 10g", ) should_write, reason = feeder._should_upsert_competitor_price( "13701353", product, 0.811, source="pchome", ) assert should_write is True assert reason.startswith("replace_same_identity_better_score=0.788->0.811") def test_competitor_feeder_replaces_stale_existing_identity_when_current_matcher_rejects_it(): from sqlalchemy import create_engine, text from services.competitor_price_feeder import CompetitorPriceFeeder engine = create_engine("sqlite:///:memory:") with engine.begin() as conn: conn.execute(text(""" CREATE TABLE competitor_prices ( sku TEXT, source TEXT, competitor_product_id TEXT, competitor_product_name TEXT, match_score REAL, tags TEXT ) """)) conn.execute(text(""" INSERT INTO competitor_prices ( sku, source, competitor_product_id, competitor_product_name, match_score, tags ) VALUES ( '6911139', 'pchome', 'OLD-WRONG', '自白肌 極潤玻尿酸精華霜50g', 0.802, '[\"identity_v2\",\"comparison_exact_identity\",\"brand_match\"]' ) """)) feeder = CompetitorPriceFeeder(engine=engine) product = SimpleNamespace( product_id="NEW-RIGHT", name="自白肌極潤玻尿酸精華乳200ml", ) should_write, reason = feeder._should_upsert_competitor_price( "6911139", product, 0.778, momo_name="【自白肌】官方直營 極潤玻尿酸精華乳200ml", source="pchome", ) assert should_write is True assert reason.startswith("replace_stale_existing_identity=0.802->") def test_competitor_feeder_marks_existing_stronger_match_as_protected(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-NEW", name="PONY EFFECT 絕對持久定妝噴霧", price=599, original_price=699, discount=14, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-NEW", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] def fake_score(*_args, **_kwargs): return SimpleNamespace( score=0.781, brand_score=1.0, token_score=0.79, spec_score=0.55, sequence_score=0.68, type_score=0.55, price_penalty=0.0, hard_veto=False, reasons=("shared_identity_anchor_packaging_variant",), comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] writes = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) monkeypatch.setattr( feeder, "_should_upsert_competitor_price", lambda *_args, **_kwargs: (False, "existing_match_conflict;existing_score=0.948;incoming_score=0.781"), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda *args, **kwargs: writes.append((args, kwargs)), ) result = feeder._run_sku_items([{ "sku": "14133077", "name": "【PONY EFFECT】絕對持久定妝噴霧", "product_id": 10, "momo_price": 599, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert writes == [] assert attempts[0]["attempt_status"] == "protected_existing_match" assert "existing_match_conflict" in attempts[0]["error_message"] def test_search_candidates_does_not_stop_on_merely_acceptable_match(monkeypatch): from services.competitor_price_feeder import _search_pchome_candidates from services.pchome_crawler import PChomeProduct first = PChomeProduct( product_id="DDAB01-FIRST", name="理膚寶水 B5 修復霜 40ml 普通候選", price=679, original_price=799, discount=15, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-FIRST", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) second = PChomeProduct( product_id="DDAB01-SECOND", name="理膚寶水 B5 修復霜 40ml 強同款", price=689, original_price=799, discount=14, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-SECOND", stock=20, store="24h", rating=4.8, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self): self.calls = [] def search_products(self, keyword, **_kwargs): self.calls.append(keyword) if keyword == "broad": return True, "ok", [first] return True, "ok", [second] def fake_score(_momo_name, competitor_name, **_kwargs): score = 0.80 if "普通候選" in competitor_name else 0.95 return SimpleNamespace(score=score) crawler = FakeCrawler() monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) candidates = _search_pchome_candidates( crawler, "理膚寶水 B5 修復霜 40ml", keywords=["broad", "precise", "unused"], momo_price=699, ) assert crawler.calls == ["broad", "precise"] assert [candidate.product_id for candidate in candidates] == ["DDAB01-FIRST", "DDAB01-SECOND"] def test_search_candidates_adds_variant_recall_sorts_for_dashing_diva(monkeypatch): from services.competitor_price_feeder import _search_pchome_candidates from services.pchome_crawler import PChomeProduct candidate = PChomeProduct( product_id="DDBH8E-A900JMCJZ", name="Dashing Diva/F 時尚潮流美甲片-月光銀影 MDF5F010AG", price=420, original_price=520, discount=19, image_url="", product_url="https://24h.pchome.com.tw/prod/DDBH8E-A900JMCJZ", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self): self.calls = [] def search_products(self, keyword, **kwargs): self.calls.append((keyword, kwargs.get("sort"))) return True, "ok", [candidate] monkeypatch.setattr( "services.marketplace_product_matcher.score_marketplace_match", lambda *_args, **_kwargs: SimpleNamespace(score=0.72), ) crawler = FakeCrawler() _search_pchome_candidates( crawler, "【DASHING DIVA】MAGICPRESS時尚潮流美甲片_極光之藍", keywords=["dashing diva 時尚潮流美甲片 極光之藍"], momo_price=331, ) assert crawler.calls == [ ("dashing diva 時尚潮流美甲片 極光之藍", None), ("dashing diva 時尚潮流美甲片", None), ("dashing diva magicpress", None), ("dashing diva 美甲片", None), ("dashing diva 時尚潮流美甲片", "sale/dc"), ("dashing diva 時尚潮流美甲片", "new/dc"), ] def test_search_candidates_uses_line_specific_recall_for_dashing_diva_p_line(monkeypatch): from services.competitor_price_feeder import _search_pchome_candidates from services.pchome_crawler import PChomeProduct candidate = PChomeProduct( product_id="MDU5F009AG", name="Dashing Diva/P 頂級璀燦美甲片-心動陰影 MDU5F009AG", price=420, original_price=520, discount=19, image_url="", product_url="https://24h.pchome.com.tw/prod/MDU5F009AG", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self): self.calls = [] def search_products(self, keyword, **kwargs): self.calls.append((keyword, kwargs.get("sort"))) return True, "ok", [candidate] monkeypatch.setattr( "services.marketplace_product_matcher.score_marketplace_match", lambda *_args, **_kwargs: SimpleNamespace(score=0.72), ) crawler = FakeCrawler() _search_pchome_candidates( crawler, "【DASHING DIVA】MAGICPRESS 頂級璀燦美甲片_心情史努比(史奴比)", keywords=["dashing diva 頂級璀燦美甲片 心情史努比"], momo_price=331, ) assert crawler.calls == [ ("dashing diva 頂級璀燦美甲片 心情史努比", None), ("dashing diva 頂級璀燦美甲片", None), ("dashing diva magicpress", None), ("dashing diva 美甲片", None), ("dashing diva 頂級璀燦美甲片", "sale/dc"), ("dashing diva 頂級璀燦美甲片", "new/dc"), ] def test_search_candidates_adds_safe_generic_brand_type_recall(monkeypatch): from services.competitor_price_feeder import _search_pchome_candidates from services.pchome_crawler import PChomeProduct candidate = PChomeProduct( product_id="DDAB01-B5CREAM", name="理膚寶水 B5 全面修復霜 40ml", price=699, original_price=799, discount=13, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-B5CREAM", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self): self.calls = [] def search_products(self, keyword, **kwargs): self.calls.append((keyword, kwargs.get("sort"))) return True, "ok", [candidate] monkeypatch.setattr( "services.marketplace_product_matcher.score_marketplace_match", lambda *_args, **_kwargs: SimpleNamespace(score=0.72), ) crawler = FakeCrawler() _search_pchome_candidates( crawler, "【理膚寶水】B5 全面修復霜 40ml", keywords=["理膚寶水 全面修復霜 b5 40ml"], momo_price=699, ) assert crawler.calls == [ ("理膚寶水 全面修復霜 b5 40ml", None), ("理膚寶水 面霜", None), ] def test_search_candidates_does_not_add_generic_recall_for_variant_sensitive_lip(monkeypatch): from services.competitor_price_feeder import _search_pchome_candidates from services.pchome_crawler import PChomeProduct candidate = PChomeProduct( product_id="DDBH8E-ROMAND12", name="rom&nd 果汁唇釉 2.0 #12", price=299, original_price=399, discount=25, image_url="", product_url="https://24h.pchome.com.tw/prod/DDBH8E-ROMAND12", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self): self.calls = [] def search_products(self, keyword, **kwargs): self.calls.append((keyword, kwargs.get("sort"))) return True, "ok", [candidate] monkeypatch.setattr( "services.marketplace_product_matcher.score_marketplace_match", lambda *_args, **_kwargs: SimpleNamespace(score=0.72), ) crawler = FakeCrawler() _search_pchome_candidates( crawler, "【rom&nd】果汁唇釉 2.0 #12", keywords=["romand 果汁唇釉 12"], momo_price=299, ) assert crawler.calls == [("romand 果汁唇釉 12", None)] def test_competitor_feeder_logs_keyword_parser_fallback(monkeypatch, caplog): from services import competitor_price_feeder from services import marketplace_product_matcher def broken_build_search_terms(*_args, **_kwargs): raise RuntimeError("matcher unavailable") monkeypatch.setattr(marketplace_product_matcher, "build_search_terms", broken_build_search_terms) caplog.set_level(logging.DEBUG, logger="services.competitor_price_feeder") terms = competitor_price_feeder._build_search_keywords("理膚寶水 B5 修復霜 40ml") assert terms assert "fallback to cleaned product name" in caplog.text def test_competitor_feeder_keeps_original_name_as_search_fallback(): from services import competitor_price_feeder terms = competitor_price_feeder._build_search_keywords( "【Mustela 慕之恬廊】慕之幼 免用水潔淨液 300ml(外出清潔 卸除髒汙 卸除防曬 卸防曬)" ) assert len(terms) <= competitor_price_feeder.MAX_SEARCH_TERMS assert terms[-1].startswith("Mustela 慕之恬廊 慕之幼 免用水潔淨液") assert any("免用水潔淨液 300ml" in term for term in terms[:4]) assert not any("防曬 300ml" in term for term in terms[:-1]) assert not any("卸除髒汙" in term for term in terms[:-1]) def test_competitor_feeder_coverage_rescue_terms_preserve_decimal_specs(): from services import competitor_price_feeder romand_terms = competitor_price_feeder._build_search_keywords("【rom&nd】果汁唇釉 12 5.5g") lip_terms = competitor_price_feeder._build_search_keywords("曼秀雷敦 頂級濃潤柔霜潤唇膏 2.4g") nivea_terms = competitor_price_feeder._build_search_keywords("【NIVEA 妮維雅】止汗爽身噴霧 150ml") assert any("5.5g" in term for term in romand_terms) assert not any("5 5g" in term for term in romand_terms) assert any("2.4g" in term for term in lip_terms) assert not any("2 4g" in term for term in lip_terms) assert any(term == "nivea 妮維雅 止汗噴霧 150ml" for term in nivea_terms) assert len(nivea_terms) <= competitor_price_feeder.MAX_SEARCH_TERMS def test_competitor_feeder_coverage_rescue_adds_bilingual_brand_terms(): from services import competitor_price_feeder cerave_terms = competitor_price_feeder._build_search_keywords( "【CeraVe 適樂膚】安敏補水★全效極潤修護精華水 200ml_A" ) tunemakers_terms = competitor_price_feeder._build_search_keywords( "【TUNEMAKERS 渡美】神經醯胺修護凍膜70g(7天細彈滑/面膜/臉部保養)" ) assert any(term == "cerave 適樂膚 全效極潤修護精華水 200ml" for term in cerave_terms) assert any( term == "tunemakers 渡美 神經醯胺修護凍膜 70g" for term in tunemakers_terms ) assert not any(term == "tunemakers 渡美 面膜 70g" for term in tunemakers_terms) def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct requested = [] product = PChomeProduct( product_id="DDAB01-1900ABCD", name="舒特膚 AD 乳液 200ml", price=899, original_price=999, discount=10, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-1900ABCD", stock=50, store="24h", rating=4.8, review_count=12, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def fetch_product_details(self, product_ids, batch_size=20): requested.extend(product_ids) return True, "ok", [product] monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) feeder = CompetitorPriceFeeder(engine=object()) writes = [] attempts = [] monkeypatch.setattr( feeder, "_should_upsert_competitor_price", lambda *_args, **_kwargs: (True, "same_or_empty_existing"), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda sku, product, score, tags, **kwargs: writes.append({ "sku": sku, "product_id": product.product_id, "score": score, "tags": tags, **kwargs, }), ) monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_known_identity_refresh_items([{ "sku": "A001", "name": "舒特膚 AD 乳液 200ml", "product_id": 1, "momo_price": 980, "competitor_product_id": "DDAB01-1900ABCD", }]) assert requested == ["DDAB01-1900ABCD"] assert result.matched == 1 assert writes[0]["product_id"] == "DDAB01-1900ABCD" assert "identity_v2" in writes[0]["tags"] assert "refresh_known_identity" in writes[0]["tags"] assert attempts[0]["attempt_status"] == "matched" assert attempts[0]["search_terms"] == ["known_product_id:DDAB01-1900ABCD"] def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_low_score(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct stale = PChomeProduct( product_id="DDAB01-STALE", name="Panasonic 國際牌 男仕防水美體除毛器 國際版 (ER-GK83)", price=2290, original_price=2490, discount=8, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-STALE", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) recovered = PChomeProduct( product_id="DDAB01-RECOVERED", name="Panasonic 國際牌 男士身體除毛器 ER-GK83", price=2390, original_price=2490, discount=4, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED", stock=20, store="24h", rating=4.8, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def fetch_product_details(self, product_ids, batch_size=20): assert product_ids == ["DDAB01-STALE"] return True, "ok", [stale] def search_products(self, *_args, **_kwargs): return True, "ok", [stale, recovered] def fake_score(_momo_name, competitor_name, **_kwargs): if "RECOVERED" in competitor_name or "男士身體除毛器" in competitor_name: return SimpleNamespace( score=0.81, brand_score=1.0, token_score=0.8, spec_score=0.8, sequence_score=0.72, type_score=0.55, price_penalty=0.0, hard_veto=False, reasons=("shared_model_token",), comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) return SimpleNamespace( score=0.68, brand_score=1.0, token_score=0.55, spec_score=0.55, sequence_score=0.6, type_score=0.55, price_penalty=0.0, hard_veto=False, reasons=(), comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] writes = [] monkeypatch.setattr( feeder, "_should_upsert_competitor_price", lambda *_args, **_kwargs: (True, "same_or_empty_existing"), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda sku, product, score, tags, **kwargs: writes.append({ "sku": sku, "product_id": product.product_id, "score": score, "tags": tags, **kwargs, }), ) monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_known_identity_refresh_items([{ "sku": "TP00090100000153", "name": "【Panasonic 國際牌】男士身體除毛器 2025新款 ER-GK83 日版 日本直送", "product_id": 1, "momo_price": 2490, "competitor_product_id": "DDAB01-STALE", }]) assert result.matched == 1 assert writes[0]["product_id"] == "DDAB01-RECOVERED" assert "fresh_search_recovery" in writes[0]["tags"] assert attempts[0]["attempt_status"] == "matched" assert "known_product_id:DDAB01-STALE" in attempts[0]["search_terms"] assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"]) def test_competitor_feeder_refresh_recovers_when_known_id_missing(monkeypatch): from services.competitor_price_feeder import ( BACKFILL_MAX_SEARCH_TERMS, BACKFILL_REQUEST_TIMEOUT, BACKFILL_SEARCH_MAX_PAGES, CompetitorPriceFeeder, SEARCH_LIMIT, ) from services.pchome_crawler import PChomeProduct crawler_calls = {"init": [], "search": []} recovered = PChomeProduct( product_id="DDAB01-RECOVERED", name="eve舒摩兒 賦活美學浴潔露-全肌防護 237ml", price=441, original_price=499, discount=11, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED", stock=20, store="24h", rating=4.8, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **kwargs): crawler_calls["init"].append(kwargs) def fetch_product_details(self, product_ids, batch_size=20): assert product_ids == ["DDAB01-MISSING"] return True, "ok", [] def search_products(self, *_args, **kwargs): crawler_calls["search"].append(kwargs) return True, "ok", [recovered] def fake_score(_momo_name, competitor_name, **_kwargs): return SimpleNamespace( score=0.885, brand_score=1.0, token_score=0.7, spec_score=1.0, sequence_score=0.62, type_score=1.0, price_penalty=0.0, hard_veto=False, reasons=("spec_name_alignment",), comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] writes = [] monkeypatch.setattr( feeder, "_should_upsert_competitor_price", lambda *_args, **_kwargs: (True, "same_or_empty_existing"), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda sku, product, score, tags, **kwargs: writes.append({ "sku": sku, "product_id": product.product_id, "score": score, "tags": tags, **kwargs, }), ) monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_known_identity_refresh_items([{ "sku": "9823407", "name": "【Summer’s Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)", "product_id": 4864, "momo_price": 441, "competitor_product_id": "DDAB01-MISSING", }], bounded_recovery=True) assert result.matched == 1 assert crawler_calls["init"][0]["timeout"] == BACKFILL_REQUEST_TIMEOUT assert len(crawler_calls["search"]) <= BACKFILL_MAX_SEARCH_TERMS assert crawler_calls["search"][0]["max_pages"] == BACKFILL_SEARCH_MAX_PAGES assert crawler_calls["search"][0]["limit"] == SEARCH_LIMIT * BACKFILL_SEARCH_MAX_PAGES assert writes[0]["product_id"] == "DDAB01-RECOVERED" assert "missing_known_product_id" in writes[0]["tags"] assert "fresh_search_recovery" in writes[0]["tags"] assert attempts[0]["attempt_status"] == "matched" def test_competitor_feeder_expired_refresh_defers_missing_known_id_recovery(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def fetch_product_details(self, product_ids, batch_size=20): assert product_ids == ["DDAB01-MISSING"] return True, "ok", [] def search_products(self, *_args, **_kwargs): raise AssertionError("expired identity refresh should not run fresh search recovery") monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_known_identity_refresh_items([{ "sku": "9823407", "name": "【Summer’s Eve 舒摩兒】浴潔露237ml 單入任選(私密清潔 經典防護王)", "product_id": 4864, "momo_price": 441, "competitor_product_id": "DDAB01-MISSING", }], allow_missing_recovery=False) assert result.matched == 0 assert result.skipped_no_result == 1 assert attempts[0]["attempt_status"] == "refresh_no_result" assert "fresh_search_recovery_deferred" in attempts[0]["error_message"] def test_competitor_feeder_expired_recovery_allows_fresh_search(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder, FeederResult feeder = CompetitorPriceFeeder(engine=object()) captured = {} monkeypatch.setattr( feeder, "_fetch_expired_identity_recovery_skus", lambda limit: [{"sku": "STALE-1", "competitor_product_id": "OLD-PID"}], ) def fake_run_known_identity_refresh_items(skus, **kwargs): captured["skus"] = skus captured.update(kwargs) return FeederResult(1, 1, 0, 0, 0, 0.1, history_written=1, attempts_written=1) monkeypatch.setattr( feeder, "_run_known_identity_refresh_items", fake_run_known_identity_refresh_items, ) result = feeder.run_expired_identity_search_recovery(limit=40) assert result.matched == 1 assert captured["skus"][0]["sku"] == "STALE-1" assert captured["allow_missing_recovery"] is True assert captured["bounded_recovery"] is True assert captured["label"] == "identity_v2 過期價格搜尋救援" def test_competitor_feeder_records_missing_known_id_low_score_candidate(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct candidate = PChomeProduct( product_id="DDAB01-LOW", name="Recipe Box 韓國 recipebox 可撕式水性兒童指甲油", price=299, original_price=350, discount=15, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-LOW", stock=20, store="24h", rating=4.8, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def fetch_product_details(self, product_ids, batch_size=20): assert product_ids == ["DDAB01-MISSING"] return True, "ok", [] def search_products(self, *_args, **_kwargs): return True, "ok", [candidate] def fake_score(*_args, **_kwargs): return SimpleNamespace( score=0.742, brand_score=1.0, token_score=0.66, spec_score=0.55, sequence_score=0.60, type_score=0.55, price_penalty=0.0, hard_veto=False, reasons=("shared_identity_anchor_variant_safe",), comparison_mode="exact_identity", match_type="no_match", price_basis="none", alert_tier="suppress", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_known_identity_refresh_items([{ "sku": "TP00018610000157", "name": "韓國 recipebox 可撕式水性兒童指甲油(兒童水性指甲油 可撕式指甲油 韓兔指甲油)", "product_id": 1, "momo_price": 299, "competitor_product_id": "DDAB01-MISSING", }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert attempts[0]["attempt_status"] == "recoverable_low_score" assert attempts[0]["best_product"].product_id == "DDAB01-LOW" assert "missing_known_product_id_fresh_search_low_confidence" in attempts[0]["error_message"] def test_competitor_feeder_refresh_recovers_when_known_id_is_hard_veto(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct stale = PChomeProduct( product_id="DDAB01-STALE", name="Pavaruni 香氛蠟燭500g", price=980, original_price=1200, discount=18, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-STALE", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) recovered = PChomeProduct( product_id="DDAB01-RECOVERED", name="Pavaruni 天然植物香氛精油40種香味10ml", price=399, original_price=499, discount=20, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED", stock=20, store="24h", rating=4.8, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def fetch_product_details(self, product_ids, batch_size=20): assert product_ids == ["DDAB01-STALE"] return True, "ok", [stale] def search_products(self, *_args, **_kwargs): return True, "ok", [stale, recovered] def fake_score(_momo_name, competitor_name, **_kwargs): if "RECOVERED" in competitor_name or "天然植物香氛精油" in competitor_name: return SimpleNamespace( score=0.84, brand_score=1.0, token_score=0.78, spec_score=1.0, sequence_score=0.70, type_score=1.0, price_penalty=0.0, hard_veto=False, reasons=("shared_identity_anchor",), comparison_mode="exact_identity", match_type="exact", price_basis="total_price", alert_tier="price_alert_exact", tags=["identity_v2", "comparison_exact_identity", "brand_match"], ) return SimpleNamespace( score=0.32, brand_score=1.0, token_score=0.20, spec_score=0.0, sequence_score=0.20, type_score=0.0, price_penalty=0.0, hard_veto=True, reasons=("type_conflict",), comparison_mode="not_comparable", match_type="no_match", price_basis="none", alert_tier="suppress", tags=["identity_v2", "identity_veto"], ) monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] writes = [] monkeypatch.setattr( feeder, "_should_upsert_competitor_price", lambda *_args, **_kwargs: (True, "same_or_empty_existing"), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda sku, product, score, tags, **kwargs: writes.append({ "sku": sku, "product_id": product.product_id, "score": score, "tags": tags, **kwargs, }), ) monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) result = feeder._run_known_identity_refresh_items([{ "sku": "PAVARUNI-OIL", "name": "【Pavaruni】天然植物香氛精油40種香味10ml", "product_id": 1, "momo_price": 399, "competitor_product_id": "DDAB01-STALE", }]) assert result.matched == 1 assert writes[0]["product_id"] == "DDAB01-RECOVERED" assert "fresh_search_recovery" in writes[0]["tags"] assert attempts[0]["attempt_status"] == "matched" def test_should_upsert_protects_stronger_existing_identity_candidate(): from sqlalchemy import create_engine, text from services.competitor_price_feeder import CompetitorPriceFeeder engine = create_engine("sqlite:///:memory:") with engine.begin() as conn: conn.execute(text(""" CREATE TABLE competitor_prices ( sku TEXT, source TEXT, competitor_product_id TEXT, competitor_product_name TEXT, match_score REAL, tags TEXT ) """)) conn.execute(text(""" INSERT INTO competitor_prices ( sku, source, competitor_product_id, competitor_product_name, match_score, tags ) VALUES ( '14133077', 'pchome', 'DDAB01-STRONG', 'PONY EFFECT 絕對持久定妝噴霧 100ml', 0.950, '["identity_v2","comparison_exact_identity","brand_match"]' ) """)) feeder = CompetitorPriceFeeder(engine=engine) product = SimpleNamespace( product_id="DDAB01-WEAKER", name="PONY EFFECT 絕對持久定妝噴霧", ) should_write, reason = feeder._should_upsert_competitor_price( "14133077", product, 0.850, source="pchome", ) assert should_write is False assert "stronger_existing" in reason assert "existing_score=0.950" in reason assert "incoming_score=0.850" in reason def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch): from services.competitor_price_feeder import CompetitorPriceFeeder from services.pchome_crawler import PChomeProduct product = PChomeProduct( product_id="DDAB01-UNIT", name="理膚寶水 全面修復霜 B5 40ml", price=679, original_price=799, discount=15, image_url="", product_url="https://24h.pchome.com.tw/prod/DDAB01-UNIT", stock=20, store="24h", rating=4.7, review_count=8, is_on_sale=True, crawled_at=datetime.now(), ) class FakeCrawler: def __init__(self, *_args, **_kwargs): pass def search_products(self, *_args, **_kwargs): return True, "ok", [product] monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) feeder = CompetitorPriceFeeder(engine=object()) attempts = [] writes = [] monkeypatch.setattr( feeder, "_record_match_attempt", lambda *args, **kwargs: attempts.append(kwargs), ) monkeypatch.setattr( feeder, "_upsert_competitor_price", lambda *args, **kwargs: writes.append((args, kwargs)), ) result = feeder._run_sku_items([{ "sku": "A002", "name": "理膚寶水 B5 全面修復霜 40ml x2 超值組", "product_id": 2, "momo_price": 1199, }]) assert result.matched == 0 assert result.skipped_low_score == 1 assert writes == [] assert attempts[0]["attempt_status"] == "unit_comparable" assert "unit_comparable" in attempts[0]["error_message"]