導入 browse.sh 比價診斷計畫
All checks were successful
CD Pipeline / deploy (push) Successful in 1m21s

This commit is contained in:
OoO
2026-05-21 18:40:14 +08:00
committed by AiderHeal Bot
parent 106c1935f4
commit 0cea70890a
9 changed files with 576 additions and 2 deletions

View File

@@ -432,6 +432,12 @@ PCHOME_FEEDER_RATE_DELAY=1.0
PCHOME_FEEDER_SEARCH_LIMIT=20
PCHOME_FEEDER_MAX_SEARCH_TERMS=5
PCHOME_FEEDER_SEARCH_MAX_PAGES=2
# browse.sh 只作低信心/無結果的診斷計畫;正式排程預設不自動開瀏覽器。
PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED=true
PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED=false
PCHOME_FEEDER_BROWSE_SH_TIMEOUT=20
PCHOME_FEEDER_BROWSE_SH_MAX_PER_RUN=3
PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS=1200
PCHOME_BACKFILL_STATUS_PATH=/app/data/pchome_match_backfill_status.json
PCHOME_BACKFILL_ACTIVE_TTL_SECONDS=7200

View File

@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.380"
SYSTEM_VERSION = "V10.381"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -42,6 +42,7 @@
- 2026-05-21 追記:同步 EDM 失效頁 alert guard 與 REJURAN 唇膏寬價差 exact-identity matcher 更新後的 `scheduler.py``services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
- 2026-05-21 追記:同步過期 EDM / seasonal promo crawler 排程改為 opt-in、NIVEA/OPI 搜尋 noise 與 identity anchor 補強後的 `run_scheduler.py``services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
- 2026-05-21 追記:同步 Recipe Box 多效提亮防曬霜同款漂移比對補強後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
- 2026-05-21 追記:同步 browse.sh 診斷計畫寫入 `competitor_match_attempts` 後的 `services/competitor_price_feeder.py` 行數;此處只更新 inventory不變更模組化決策。
## 達到或超過 800 行檔案清單
@@ -75,7 +76,7 @@
| 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter |
| 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration |
| 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 |
| 1733 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy |
| 2149 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy |
| 1327 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
| 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
| 1319 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |

View File

@@ -13,6 +13,7 @@
## 📅 詳細更新日誌 (考古存檔)
### 2026-05-21瀏覽器測試守門與 PChome 熱路徑優化
- **V10.381 browse.sh 比價診斷計畫**: PChome feeder 在 `no_result``no_match`、低信心、單位價覆核、既有配對保護與爬蟲錯誤時,會把 read-only `browse_diagnostic_json` 寫入 `competitor_match_attempts`,內含 PChome search URL 與建議 `browse get/open` 命令;正式排程仍 API-first`PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED=false` 預設不自動開瀏覽器,避免瀏覽器彈窗、登入或密碼提示干擾。
- **V10.380 111 Ollama final fallback 收斂**: 111 Mac fallback 從救急路徑改成更短的保護路徑,`OLLAMA_111_MAX_TIMEOUT` 預設由 45s 收緊到 20s並新增 `OLLAMA_111_NUM_PREDICT=512` 輸出上限;落到 111 時仍會降級重模型到 `llama3.2:latest`、縮 `num_ctx=4096``keep_alive=5m`,避免 GCP-A/GCP-B 短暫 timeout 後把長篇 Hermes/OpenClaw 工作轉嫁到 111 造成 swap 與 load 飆高。
- **V10.379 MCP runtime promotion gate**: 新增 `mcp_runtime_promotion` read-only builder、GET/POST endpoint、UI promotion package 審核面板與 deployment readiness smoke target將 MCP activation evidence 與 runtime smoke receipt 合併審核,讓 completion audit 的 runtime 缺口可由人工收據明確補齊。
- **V10.379 只讀安全邊界**: 本階段不保存 payload、不打 health、不開 DB、不抓外站、不掛 scheduler也不會因 promotion 通過自動打開人工 fetch gate正式 fetch / DB write / scheduler attach 仍需各自獨立 gate。

View File

@@ -0,0 +1,23 @@
-- =============================================================================
-- Migration 042: PChome browse.sh 診斷計畫欄位
-- MOMO PRO — Price comparison fallback observability
-- 2026-05-21 台北
-- =============================================================================
-- 說明:
-- competitor_match_attempts 補存 browse.sh 診斷計畫 JSON。
-- 正式價格爬蟲仍維持 API-first此欄位只保存低信心、無結果、
-- 單位價覆核或外部爬蟲錯誤時的 read-only probe plan供人工或
-- opt-in 執行 `browse get/open` 做 selector / XHR / 商品頁取證。
-- =============================================================================
ALTER TABLE IF EXISTS competitor_match_attempts
ADD COLUMN IF NOT EXISTS browse_diagnostic_json JSONB;
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_browse_diag_time
ON competitor_match_attempts (attempted_at DESC)
WHERE browse_diagnostic_json IS NOT NULL;
DO $$
BEGIN
RAISE NOTICE '✅ Migration 042 完成 — PChome browse.sh 診斷計畫欄位已補齊';
END $$;

View File

@@ -31,6 +31,7 @@ import time
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Optional
from urllib.parse import quote_plus
logger = logging.getLogger(__name__)
@@ -47,6 +48,11 @@ TTL_HOURS = 6 # competitor_prices 快取有效期
REQUEST_TIMEOUT = float(os.getenv("PCHOME_FEEDER_TIMEOUT", "12")) # 避免外部搜尋 API 長時間卡住排程
VARIANT_RECALL_SORTS = ("sale/dc", "new/dc")
RECOVERABLE_LOW_SCORE_FLOOR = max(MIN_MATCH_SCORE - 0.03, 0.72)
BROWSE_SH_DIAGNOSTIC_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_DIAGNOSTIC_ENABLED", "true").lower() in {"1", "true", "yes", "on"}
BROWSE_SH_EXECUTE_ENABLED = os.getenv("PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED", "false").lower() in {"1", "true", "yes", "on"}
BROWSE_SH_TIMEOUT_SECONDS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_TIMEOUT", "20"))
BROWSE_SH_MAX_EXECUTIONS_PER_RUN = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_MAX_PER_RUN", "3"))
BROWSE_SH_OUTPUT_PREVIEW_CHARS = int(os.getenv("PCHOME_FEEDER_BROWSE_SH_OUTPUT_PREVIEW_CHARS", "1200"))
RECOVERABLE_DIAGNOSTIC_REASONS = {
"strong_product_line_match",
"strong_exact_spec_match",
@@ -95,6 +101,43 @@ def _classify_low_score_attempt(score: float, diagnostics) -> str:
return "true_low_confidence"
def _has_variant_selection_gap(
momo_name: str,
ranked_matches: list[tuple],
best_score: float,
) -> bool:
"""True when source lacks explicit variant selection but top candidates require one."""
try:
from services.marketplace_product_matcher import (
_explicit_variant_option_tokens,
parse_product_identity,
)
except Exception:
return False
source_identity = parse_product_identity(momo_name)
source_options = set(_explicit_variant_option_tokens(source_identity))
if re.search(r"任選\s*[一二兩三四五六七八九十0-9]+\s*款", momo_name):
source_options -= {str(value) for value in range(1, 11)}
source_options -= {f"{value:02d}" for value in range(1, 11)}
if source_options:
return False
threshold = max(best_score - 0.02, RECOVERABLE_LOW_SCORE_FLOOR)
option_buckets: set[str] = set()
for product, score, diagnostics in ranked_matches[:5]:
if getattr(diagnostics, "hard_veto", False) or score < threshold:
continue
candidate_identity = parse_product_identity(getattr(product, "name", "") or "")
options = _explicit_variant_option_tokens(candidate_identity)
if len(options) >= 2:
return True
option_buckets.update(options)
if len(option_buckets) >= 2:
return True
return False
def _extract_tags(pchome_product) -> list:
"""
從 PChomeProduct 物件提取語意標籤
@@ -286,6 +329,66 @@ def _match_diagnostics_payload(diagnostics) -> dict:
}
def _pchome_search_url(keyword: str) -> str:
return f"https://ecshweb.pchome.com.tw/search/v3.3/?q={quote_plus(keyword or '')}"
def _build_browse_sh_diagnostic_payload(
momo_name: str,
search_terms: list[str] = None,
reason: str = "unknown",
best_product=None,
best_score: float = None,
diagnostics=None,
candidate_count: int = 0,
) -> dict:
"""Build a read-only browse.sh probe plan for low-confidence PChome cases."""
if not BROWSE_SH_DIAGNOSTIC_ENABLED:
return {}
terms = _dedupe_terms(search_terms or _build_search_keywords(momo_name))[:3]
urls = [_pchome_search_url(term) for term in terms]
product_url = getattr(best_product, "product_url", None)
if product_url:
urls.append(product_url)
urls = list(dict.fromkeys(url for url in urls if url))
primary_url = urls[0] if urls else _pchome_search_url(momo_name)
diagnostic_payload = _match_diagnostics_payload(diagnostics)
return {
"tool": "browse.sh",
"mode": "execute_on_demand" if BROWSE_SH_EXECUTE_ENABLED else "plan_only",
"reason": reason,
"execute_enabled": BROWSE_SH_EXECUTE_ENABLED,
"timeout_seconds": BROWSE_SH_TIMEOUT_SECONDS,
"candidate_count": int(candidate_count or 0),
"momo_name": (momo_name or "")[:300],
"search_terms": terms,
"urls": urls,
"suggested_commands": [
{
"purpose": "static_fetch_first_page",
"args": ["get", primary_url],
},
{
"purpose": "manual_browser_probe",
"args": ["open", primary_url],
},
],
"best_candidate": {
"product_id": getattr(best_product, "product_id", None),
"name": (getattr(best_product, "name", None) or "")[:300] or None,
"price": getattr(best_product, "price", None),
"url": product_url,
"score": best_score,
} if best_product else None,
"diagnostic_codes": diagnostic_payload.get("reasons") or [],
"comparison_mode": diagnostic_payload.get("comparison_mode"),
"hard_veto": diagnostic_payload.get("hard_veto"),
"execution": {"status": "disabled"},
}
def _product_snapshot_payload(product) -> dict:
payload = {
"competitor_product_url": None,
@@ -471,6 +574,7 @@ class CompetitorPriceFeeder:
self._history_table_ready = False
self._attempt_table_ready = False
self._price_table_columns_ready = False
self._browse_sh_executions = 0
def _ensure_table_columns(self, conn, table: str, column_specs: list[tuple[str, str]]) -> None:
"""補齊既有表欄位;避免正式端舊表在新 INSERT 時炸掉。"""
@@ -613,6 +717,7 @@ class CompetitorPriceFeeder:
comparison_mode VARCHAR(40),
hard_veto BOOLEAN,
diagnostic_codes JSONB,
browse_diagnostic_json JSONB,
error_message TEXT,
attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
)
@@ -648,6 +753,7 @@ class CompetitorPriceFeeder:
comparison_mode VARCHAR(40),
hard_veto BOOLEAN,
diagnostic_codes TEXT,
browse_diagnostic_json TEXT,
error_message TEXT,
attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
@@ -669,9 +775,64 @@ class CompetitorPriceFeeder:
("comparison_mode", "VARCHAR(40)"),
("hard_veto", "BOOLEAN"),
("diagnostic_codes", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
("browse_diagnostic_json", "JSONB" if conn.dialect.name == "postgresql" else "TEXT"),
])
self._attempt_table_ready = True
def _prepare_browse_diagnostic(
self,
momo_name: str,
search_terms: list = None,
reason: str = "unknown",
best_product=None,
best_score: float = None,
diagnostics=None,
candidate_count: int = 0,
) -> dict:
"""Return browse.sh diagnostic evidence; CLI execution remains opt-in and rate-limited."""
payload = _build_browse_sh_diagnostic_payload(
momo_name,
search_terms=search_terms,
reason=reason,
best_product=best_product,
best_score=best_score,
diagnostics=diagnostics,
candidate_count=candidate_count,
)
if not payload or not BROWSE_SH_EXECUTE_ENABLED:
return payload
if self._browse_sh_executions >= BROWSE_SH_MAX_EXECUTIONS_PER_RUN:
payload["execution"] = {"status": "rate_limited"}
return payload
command_args = tuple((payload.get("suggested_commands") or [{}])[0].get("args") or ())
if not command_args:
payload["execution"] = {"status": "missing_command"}
return payload
try:
from services.browse_sh_tool import BrowseShTool
self._browse_sh_executions += 1
result = BrowseShTool(timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS).run(
command_args,
timeout_seconds=BROWSE_SH_TIMEOUT_SECONDS,
)
payload["execution"] = {
"status": "ok" if result.ok else "failed",
"returncode": result.returncode,
"timed_out": result.timed_out,
"unavailable_reason": result.unavailable_reason,
"stdout_preview": (result.stdout or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS],
"stderr_preview": (result.stderr or "")[:BROWSE_SH_OUTPUT_PREVIEW_CHARS],
}
except Exception as exc:
payload["execution"] = {
"status": "error",
"error": str(exc)[:500],
}
return payload
def _record_match_attempt(
self,
sku: str,
@@ -684,6 +845,7 @@ class CompetitorPriceFeeder:
best_product=None,
best_score: float = None,
diagnostics=None,
browse_diagnostic: dict = None,
error_message: str = None,
source: str = "pchome",
) -> None:
@@ -695,9 +857,15 @@ class CompetitorPriceFeeder:
search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms"
json_cast = "CAST(:match_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":match_diagnostic_json"
codes_cast = "CAST(:diagnostic_codes AS jsonb)" if conn.dialect.name == "postgresql" else ":diagnostic_codes"
browse_cast = "CAST(:browse_diagnostic_json AS jsonb)" if conn.dialect.name == "postgresql" else ":browse_diagnostic_json"
diagnostic_payload = _match_diagnostics_payload(diagnostics)
diagnostic_codes = diagnostic_payload.get("reasons") or []
product_payload = _product_snapshot_payload(best_product)
browse_diagnostic_json = (
json.dumps(browse_diagnostic, ensure_ascii=False)
if browse_diagnostic
else None
)
conn.execute(text(f"""
INSERT INTO competitor_match_attempts
(sku, source, momo_product_id, momo_product_name, momo_price,
@@ -706,6 +874,7 @@ class CompetitorPriceFeeder:
competitor_product_url, competitor_image_url, competitor_stock,
best_competitor_price, best_match_score,
match_diagnostic_json, comparison_mode, hard_veto, diagnostic_codes,
browse_diagnostic_json,
error_message,
attempted_at)
VALUES
@@ -715,6 +884,7 @@ class CompetitorPriceFeeder:
:competitor_product_url, :competitor_image_url, :competitor_stock,
:best_price, :best_score,
{json_cast}, :comparison_mode, :hard_veto, {codes_cast},
{browse_cast},
:error_message,
CURRENT_TIMESTAMP)
"""), {
@@ -735,6 +905,7 @@ class CompetitorPriceFeeder:
"comparison_mode": diagnostic_payload.get("comparison_mode"),
"hard_veto": diagnostic_payload.get("hard_veto"),
"diagnostic_codes": json.dumps(diagnostic_codes, ensure_ascii=False) if diagnostic_codes else None,
"browse_diagnostic_json": browse_diagnostic_json,
"error_message": (error_message or "")[:1000] or None,
})
@@ -1197,6 +1368,12 @@ class CompetitorPriceFeeder:
products = _search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)
if not products:
logger.debug(f"[Feeder] {sku} 無搜尋結果,跳過")
browse_diagnostic = self._prepare_browse_diagnostic(
momo_name,
search_terms=search_terms,
reason="no_result",
candidate_count=0,
)
self._record_match_attempt(
sku,
momo_name,
@@ -1205,6 +1382,7 @@ class CompetitorPriceFeeder:
search_terms=search_terms,
candidate_count=0,
attempt_status="no_result",
browse_diagnostic=browse_diagnostic,
source=source,
)
attempts_written += 1
@@ -1213,6 +1391,12 @@ class CompetitorPriceFeeder:
ranked_matches = _rank_match_details(momo_name, products, momo_price=momo_price)
if not ranked_matches:
browse_diagnostic = self._prepare_browse_diagnostic(
momo_name,
search_terms=search_terms,
reason="no_match",
candidate_count=len(products),
)
self._record_match_attempt(
sku,
momo_name,
@@ -1221,6 +1405,7 @@ class CompetitorPriceFeeder:
search_terms=search_terms,
candidate_count=len(products),
attempt_status="no_match",
browse_diagnostic=browse_diagnostic,
source=source,
)
attempts_written += 1
@@ -1305,6 +1490,15 @@ class CompetitorPriceFeeder:
f"[Feeder] {sku} 候選屬單位價可比但非同販售組合,"
f"不寫入正式價差 | {_format_match_diagnostics(diagnostics)}"
)
browse_diagnostic = self._prepare_browse_diagnostic(
momo_name,
search_terms=search_terms,
reason="unit_comparable",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
candidate_count=len(products),
)
self._record_match_attempt(
sku,
momo_name,
@@ -1316,6 +1510,7 @@ class CompetitorPriceFeeder:
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
browse_diagnostic=browse_diagnostic,
error_message=_format_match_diagnostics(diagnostics),
source=source,
)
@@ -1325,10 +1520,24 @@ class CompetitorPriceFeeder:
if score < MIN_MATCH_SCORE and not manual_accept_override:
attempt_status = _classify_low_score_attempt(score, diagnostics)
if (
attempt_status == "recoverable_low_score"
and _has_variant_selection_gap(momo_name, ranked_matches, score)
):
attempt_status = "true_low_confidence"
logger.debug(
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE})"
f"{_format_match_diagnostics(diagnostics)}"
)
browse_diagnostic = self._prepare_browse_diagnostic(
momo_name,
search_terms=search_terms,
reason=attempt_status,
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
candidate_count=len(products),
)
self._record_match_attempt(
sku,
momo_name,
@@ -1340,6 +1549,7 @@ class CompetitorPriceFeeder:
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
browse_diagnostic=browse_diagnostic,
error_message=_format_match_diagnostics(diagnostics),
source=source,
)
@@ -1365,6 +1575,15 @@ class CompetitorPriceFeeder:
write_reason = "manual_accept_override"
if not should_write:
logger.info(f"[Feeder] {sku} 進入人工覆核,不覆蓋既有配對 | {write_reason}")
browse_diagnostic = self._prepare_browse_diagnostic(
momo_name,
search_terms=search_terms,
reason="protected_existing_match",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
candidate_count=len(products),
)
self._record_match_attempt(
sku,
momo_name,
@@ -1376,6 +1595,7 @@ class CompetitorPriceFeeder:
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
browse_diagnostic=browse_diagnostic,
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
source=source,
)
@@ -1418,6 +1638,12 @@ class CompetitorPriceFeeder:
except Exception as e:
logger.error(f"[Feeder] {sku} 處理失敗: {e}")
try:
browse_diagnostic = self._prepare_browse_diagnostic(
momo_name,
search_terms=search_terms,
reason="crawler_error",
candidate_count=0,
)
self._record_match_attempt(
sku,
momo_name,
@@ -1425,6 +1651,7 @@ class CompetitorPriceFeeder:
momo_price=momo_price,
search_terms=search_terms,
attempt_status="error",
browse_diagnostic=browse_diagnostic,
error_message=str(e),
source=source,
)

View File

@@ -520,6 +520,7 @@ BRAND_ALIAS_OVERRIDES = {
"xiaomi": ("小米有品", "小米", "xiaomi"),
"mac": ("m.a.c", "mac", "m a c"),
"opi": ("o.p.i", "opi", "o p i"),
"st雞仔牌": ("日本雞仔牌st", "日本st雞仔牌", "st雞仔牌", "雞仔牌st", "雞仔牌"),
}
PRODUCT_TYPES = {
@@ -1157,12 +1158,25 @@ def _has_refill_pack(identity: ProductIdentity) -> bool:
return bool(
"補充瓶" in text
or "補充包" in text
or "補充芯" in text
or "補充蕊" in text
or "替換蕊" in text
or "替換芯" in text
or "refill" in text
)
def _has_accessory_case(identity: ProductIdentity) -> bool:
text = identity.normalized_name
return bool(
"眉彩餅盒" in text
or "盒一入款" in text
or "盒三入款" in text
or "盒單入" in text
or "空盒" in text
)
def _spec_mention_count(identity: ProductIdentity) -> int:
return len(
re.findall(
@@ -1461,6 +1475,7 @@ def _build_evidence_flags(
"count_conflict",
"bundle_offer_conflict",
"multi_component_conflict",
"accessory_case_conflict",
"refill_pack_conflict",
"price_ratio_extreme",
"price_ratio_wide",
@@ -1557,6 +1572,9 @@ def score_marketplace_match(
reasons.append("multi_component_conflict")
if _has_refill_pack(left) != _has_refill_pack(right):
reasons.append("refill_pack_conflict")
accessory_case_conflict = _has_accessory_case(left) != _has_accessory_case(right)
if accessory_case_conflict:
reasons.append("accessory_case_conflict")
left_spec_mentions = _spec_mention_count(left)
right_spec_mentions = _spec_mention_count(right)
if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
@@ -1579,6 +1597,8 @@ def score_marketplace_match(
hard_veto = True
if _has_refill_pack(left) != _has_refill_pack(right):
hard_veto = True
if accessory_case_conflict:
hard_veto = True
if model_line_conflict:
hard_veto = True
if left_spec_mentions and right_spec_mentions and left_spec_mentions != right_spec_mentions:
@@ -1752,6 +1772,20 @@ def score_marketplace_match(
):
score += 0.07
reasons.append("shared_identity_anchor_exact_line")
if (
"無印乾爽止汗爽身乳液" in shared_anchor
and {"nivea", "妮維雅"} & (left.brand_tokens | right.brand_tokens)
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.95
and spec_score >= 0.45
and token_score >= 0.55
and sequence_score >= 0.62
and not variant_descriptor_conflict
):
score += 0.08
reasons.append("shared_identity_anchor_nivea_dry_lotion")
if (
"多效提亮防曬霜" in shared_anchor
and {"recipe", "box"} <= (left.brand_tokens | right.brand_tokens)
@@ -1967,6 +2001,10 @@ def _extract_anchor_phrases(token: str) -> list[str]:
phrases: list[str] = []
if "經典旋轉眉筆" in cleaned:
phrases.append("經典旋轉眉筆")
if "無印乾爽" in cleaned and "止汗爽身乳液" in cleaned:
phrases.append("無印乾爽止汗爽身乳液")
if "智能光感應" in cleaned and "無線自動除臭芳香噴霧機" in cleaned:
phrases.append("智能光感應無線自動除臭芳香噴霧機")
if "悠斯晶" in normalized and "經典乳霜" in normalized:
phrases.append("悠斯晶經典乳霜")
if "經典乳霜" in normalized:

View File

@@ -1,4 +1,5 @@
from pathlib import Path
import json
import logging
from datetime import datetime
from types import SimpleNamespace
@@ -42,6 +43,7 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8")
diagnostics_migration = (ROOT / "migrations/041_competitor_match_diagnostics.sql").read_text(encoding="utf-8")
browse_migration = (ROOT / "migrations/042_add_browse_diagnostics_to_match_attempts.sql").read_text(encoding="utf-8")
assert "attempts_written" in source
assert "_ensure_competitor_match_attempts_table" in source
@@ -56,6 +58,9 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
assert 'attempt_status="no_match"' in source
assert 'attempt_status="error"' in source
assert "_search_pchome_candidates(crawler, momo_name, search_terms, momo_price=momo_price)" in source
assert "_prepare_browse_diagnostic" in source
assert "browse_diagnostic_json" in source
assert "PCHOME_FEEDER_BROWSE_SH_EXECUTE_ENABLED" in source
assert 'attempt_status="protected_existing_match"' in source
assert "_should_upsert_competitor_price" in source
assert "_classify_low_score_attempt" in source
@@ -88,6 +93,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
assert "match_diagnostic_json" in diagnostics_migration
assert "comparison_mode" in diagnostics_migration
assert "diagnostic_codes" in diagnostics_migration
assert "browse_diagnostic_json" in browse_migration
assert "idx_comp_match_attempts_browse_diag_time" in browse_migration
assert "competitor_product_url" in source
assert "competitor_image_url" in source
assert "competitor_stock" in source
@@ -95,6 +102,74 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes():
assert "idx_comp_match_attempts_sku_source_time" in migration
def test_competitor_feeder_records_browse_sh_plan_for_no_result(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", []
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "BROWSE001",
"name": "MOMO 稀有專櫃組合 50ml",
"product_id": 901,
"momo_price": 1280,
}])
assert result.matched == 0
assert result.skipped_no_result == 1
browse_plan = attempts[0]["browse_diagnostic"]
assert browse_plan["tool"] == "browse.sh"
assert browse_plan["mode"] == "plan_only"
assert browse_plan["execute_enabled"] is False
assert browse_plan["reason"] == "no_result"
assert browse_plan["execution"]["status"] == "disabled"
assert browse_plan["suggested_commands"][0]["args"][0] == "get"
assert "ecshweb.pchome.com.tw/search" in browse_plan["urls"][0]
def test_competitor_match_attempt_persists_browse_diagnostic_json():
from sqlalchemy import create_engine, text
from services.competitor_price_feeder import CompetitorPriceFeeder
engine = create_engine("sqlite:///:memory:")
feeder = CompetitorPriceFeeder(engine=engine)
feeder._record_match_attempt(
sku="BROWSE002",
momo_name="MOMO 取證測試商品",
search_terms=["取證 測試"],
attempt_status="no_result",
browse_diagnostic={
"tool": "browse.sh",
"mode": "plan_only",
"urls": ["https://ecshweb.pchome.com.tw/search/v3.3/?q=test"],
},
)
with engine.connect() as conn:
row = conn.execute(text("""
SELECT browse_diagnostic_json
FROM competitor_match_attempts
WHERE sku = 'BROWSE002'
""")).scalar_one()
payload = json.loads(row)
assert payload["tool"] == "browse.sh"
assert payload["mode"] == "plan_only"
def test_match_diagnostics_payload_carries_professional_match_lanes():
from services.competitor_price_feeder import _match_diagnostics_payload, _extend_match_tags
from services.marketplace_product_matcher import score_marketplace_match
@@ -591,6 +666,166 @@ def test_competitor_feeder_marks_weak_identity_as_true_low_confidence(monkeypatc
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_competitor_feeder_downgrades_variant_selection_gap_from_recoverable(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
products = [
PChomeProduct(
product_id="DDAB01-08",
name="PERIPERA 雙頭旋轉極細眉筆 08深杏色 0.05g",
price=180,
original_price=220,
discount=18,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-08",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
),
PChomeProduct(
product_id="DDAB01-09",
name="PERIPERA 雙頭旋轉極細眉筆 09灰褐棕 0.05g",
price=180,
original_price=220,
discount=18,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-09",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
),
PChomeProduct(
product_id="DDAB01-11",
name="PERIPERA 雙頭旋轉極細眉筆 11摩卡灰褐 0.05g",
price=180,
original_price=220,
discount=18,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-11",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
),
]
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", products
def fake_score(_momo_name, competitor_name, **_kwargs):
return SimpleNamespace(
score=0.734 if "09灰褐棕" in competitor_name else 0.733,
brand_score=1.0,
token_score=0.74,
spec_score=0.55,
sequence_score=0.66,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_identity_anchor_packaging_variant",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "P001",
"name": "【peripera官方直營】雙頭旋轉極細眉筆_多色任選(1.5mm極細筆頭)",
"product_id": 11,
"momo_price": 180,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_competitor_feeder_treats_choose_one_offer_as_missing_variant_signal(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
product = PChomeProduct(
product_id="DDAB01-YSL",
name="【YSL聖羅蘭】恆久完美透膚煙染腮紅 6g ( #12/ #57/ #93)",
price=1650,
original_price=1780,
discount=7,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-YSL",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def search_products(self, *_args, **_kwargs):
return True, "ok", [product]
def fake_score(*_args, **_kwargs):
return SimpleNamespace(
score=0.735,
brand_score=1.0,
token_score=0.74,
spec_score=0.55,
sequence_score=0.66,
type_score=1.0,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_identity_anchor_packaging_variant",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_sku_items([{
"sku": "Y001",
"name": "【YSL】官方直營 恆久完美透膚煙染腮紅(腮紅/任選1款/新品上市)",
"product_id": 12,
"momo_price": 1650,
}])
assert result.matched == 0
assert result.skipped_low_score == 1
assert attempts[0]["attempt_status"] == "true_low_confidence"
def test_should_upsert_allows_same_identity_candidate_to_replace_lower_score():
from sqlalchemy import create_engine, text

View File

@@ -503,6 +503,49 @@ def test_marketplace_matcher_promotes_recipe_box_marketing_line_drift():
assert "shared_identity_anchor_recipe_box_line" in diagnostics.reasons
def test_marketplace_matcher_promotes_st_deodorizer_with_brand_alias_and_line_anchor():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【日本雞仔牌ST】室內消臭力智能光感應3段定時無線自動除臭芳香噴霧機(內贈芳香劑39ml 衛浴精油擴香瓶棒組)",
"日本ST雞仔牌-室內消臭力智能光感應3段定時無線自動除臭芳香噴霧機1入(含芳香劑39ml)",
momo_price=699,
competitor_price=699,
)
assert diagnostics.score >= 0.76
assert "shared_identity_anchor_exact_line" in diagnostics.reasons or "shared_identity_anchor_packaging_variant" in diagnostics.reasons
def test_marketplace_matcher_promotes_nivea_dry_lotion_with_long_shared_anchor():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【NIVEA 妮維雅】男士無印乾爽止汗爽身乳液(無印止汗滾珠/德國妮維雅)",
"【NIVEA 妮維雅】止汗爽身乳液 無印乾爽50ml",
momo_price=129,
competitor_price=129,
)
assert diagnostics.score >= 0.76
assert "shared_identity_anchor_nivea_dry_lotion" in diagnostics.reasons
def test_marketplace_matcher_rejects_refill_core_vs_case_only_pack():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【KATE 凱婷】3D造型眉彩餅補充芯(眉彩刷、眉餅盒分開販售)",
"【KATE 凱婷】眉彩餅盒一入款(搭配3D造型眉彩餅補充芯)",
momo_price=280,
competitor_price=280,
)
assert diagnostics.score < 0.76
assert diagnostics.hard_veto is True
assert "accessory_case_conflict" in diagnostics.reasons or "refill_pack_conflict" in diagnostics.reasons
def test_marketplace_matcher_suppresses_wide_price_penalty_for_exact_lip_product():
from services.marketplace_product_matcher import score_marketplace_match