[V10.350] add retryable identity recovery loop
This commit is contained in:
@@ -29,6 +29,9 @@
|
||||
- 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。
|
||||
- 2026-05-20 追記:同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-20 追記:同步 PChome 共享 identity anchor scorer 與市場情報 review report route 進入大檔門檻後的行數;此處只更新 inventory,不變更功能。
|
||||
- 2026-05-20 追記:同步 PChome contained identity anchor scorer 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-20 追記:同步 PChome spec/name alignment near-threshold scorer 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
- 2026-05-20 追記:同步市場情報 review report route 與 review receipt 巨檔現況,並校正 PChome fresh-search recovery 更新後的 `services/competitor_price_feeder.py`、`services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。
|
||||
|
||||
## 達到或超過 800 行檔案清單
|
||||
|
||||
@@ -55,18 +58,19 @@
|
||||
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
|
||||
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
|
||||
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
|
||||
| 1447 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 1559 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
|
||||
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
|
||||
| 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
|
||||
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
|
||||
| 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter |
|
||||
| 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration |
|
||||
| 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 |
|
||||
| 1602 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
|
||||
| 1733 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy |
|
||||
| 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
|
||||
| 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
|
||||
| 804 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |
|
||||
| 990 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |
|
||||
| 811 | `services/market_intel/deployment_readiness.py` | P2 market intel deployment readiness | preflight gates / readiness payload / route contract helpers |
|
||||
| 846 | `services/market_intel/candidate_queue_review_ai_summary_persistence_telegram_dispatch_report_catalog_record_run_receipt.py` | P2 market intel review receipt pipeline | AI summary / persistence / Telegram dispatch / report catalog run receipt orchestration |
|
||||
|
||||
## 市場情報開發前置禁區
|
||||
|
||||
|
||||
@@ -274,6 +274,36 @@ def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None, mo
|
||||
return candidates
|
||||
|
||||
|
||||
def _recover_low_score_with_fresh_search(
|
||||
crawler,
|
||||
momo_name: str,
|
||||
momo_price: float = None,
|
||||
existing_product_id: str = "",
|
||||
) -> tuple[Optional[tuple], list[str], int]:
|
||||
"""
|
||||
當 legacy / known-id 候選重評仍低分時,再跑一次 fresh keyword search,
|
||||
嘗試把舊錯配洗成新的真同款。
|
||||
"""
|
||||
keywords = _build_search_keywords(momo_name)
|
||||
candidates = _search_pchome_candidates(
|
||||
crawler,
|
||||
momo_name,
|
||||
keywords=keywords,
|
||||
momo_price=momo_price,
|
||||
)
|
||||
if existing_product_id:
|
||||
existing_key = _product_id_key(existing_product_id)
|
||||
fresh_candidates = [
|
||||
candidate
|
||||
for candidate in candidates
|
||||
if _product_id_key(getattr(candidate, "product_id", "")) != existing_key
|
||||
]
|
||||
if fresh_candidates:
|
||||
candidates = fresh_candidates
|
||||
best = _find_best_match_detail(momo_name, candidates, momo_price=momo_price)
|
||||
return best, keywords, len(candidates)
|
||||
|
||||
|
||||
def _structural_similarity(momo_p, pchome_p) -> float:
|
||||
"""
|
||||
結構化相似度計算(品牌 + 規格 + 關鍵字)
|
||||
@@ -1411,14 +1441,94 @@ class CompetitorPriceFeeder:
|
||||
continue
|
||||
|
||||
if score < MIN_MATCH_SCORE:
|
||||
recovery_terms: list[str] = []
|
||||
recovery_candidate_count = 0
|
||||
if not getattr(diagnostics, "hard_veto", False):
|
||||
recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search(
|
||||
crawler,
|
||||
momo_name,
|
||||
momo_price=momo_price,
|
||||
existing_product_id=competitor_product_id,
|
||||
)
|
||||
if recovered:
|
||||
recovered_product, recovered_score, recovered_diagnostics = recovered
|
||||
if recovered_score > score:
|
||||
best_product, score, diagnostics = recovered_product, recovered_score, recovered_diagnostics
|
||||
|
||||
if score >= MIN_MATCH_SCORE:
|
||||
tags = _extract_tags(best_product)
|
||||
tags.extend(getattr(diagnostics, "tags", []))
|
||||
for reason in getattr(diagnostics, "reasons", ()) or ():
|
||||
tags.append(f"match_{reason}")
|
||||
tags.append("refresh_known_identity")
|
||||
if recovery_terms:
|
||||
tags.append("fresh_search_recovery")
|
||||
tags = list(dict.fromkeys(tags))
|
||||
|
||||
should_write, write_reason = self._should_upsert_competitor_price(
|
||||
sku,
|
||||
best_product,
|
||||
score,
|
||||
source=source,
|
||||
)
|
||||
candidate_count = max(1, recovery_candidate_count or 1)
|
||||
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
|
||||
if not should_write:
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=attempt_terms,
|
||||
candidate_count=candidate_count,
|
||||
attempt_status="refresh_needs_review",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
|
||||
source=source,
|
||||
)
|
||||
skipped_low += 1
|
||||
attempts_written += 1
|
||||
continue
|
||||
|
||||
tags.append(write_reason)
|
||||
self._upsert_competitor_price(
|
||||
sku,
|
||||
best_product,
|
||||
score,
|
||||
tags,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
diagnostics=diagnostics,
|
||||
source=source,
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=attempt_terms,
|
||||
candidate_count=candidate_count,
|
||||
attempt_status="matched",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
diagnostics=diagnostics,
|
||||
source=source,
|
||||
)
|
||||
matched += 1
|
||||
history_written += 1
|
||||
attempts_written += 1
|
||||
continue
|
||||
|
||||
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "refresh_low_score"
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=1,
|
||||
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
|
||||
candidate_count=max(1, recovery_candidate_count or 1),
|
||||
attempt_status=attempt_status,
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
|
||||
@@ -35,6 +35,8 @@ NOISE_PHRASES = (
|
||||
"送",
|
||||
"買一送一",
|
||||
"買1送1",
|
||||
"限定版",
|
||||
"璀璨奢金限定版",
|
||||
"任選",
|
||||
"即期品",
|
||||
"福利品",
|
||||
@@ -136,6 +138,8 @@ SEARCH_NOISE_PHRASES = (
|
||||
"精美紙袋",
|
||||
"交換禮物",
|
||||
"聖誕禮物",
|
||||
"限定版",
|
||||
"璀璨奢金限定版",
|
||||
"母親節",
|
||||
"父親節",
|
||||
"情人節",
|
||||
@@ -191,7 +195,11 @@ SEARCH_NOISE_TOKENS = {
|
||||
}
|
||||
|
||||
SEARCH_IDENTITY_ANCHORS = (
|
||||
"止汗爽身噴霧",
|
||||
"止汗爽身乳膏pro",
|
||||
"零粉感超持久粉底棒",
|
||||
"超持久水光鎖吻唇釉",
|
||||
"裸光蜜粉餅",
|
||||
"絕對完美永生玫瑰逆齡乳霜",
|
||||
"永生玫瑰逆齡乳霜",
|
||||
"永生玫瑰霜",
|
||||
@@ -223,6 +231,7 @@ SEARCH_IDENTITY_ANCHORS = (
|
||||
"身體乳",
|
||||
"緊實乳",
|
||||
"潔膚露",
|
||||
"浴潔露",
|
||||
"潔淨液",
|
||||
"護甲油",
|
||||
"指甲油",
|
||||
@@ -288,6 +297,10 @@ BRAND_ALIAS_OVERRIDES = {
|
||||
}
|
||||
|
||||
PRODUCT_TYPES = {
|
||||
"止汗噴霧": ("止汗爽身噴霧", "爽身噴霧", "止汗噴霧"),
|
||||
"潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"),
|
||||
"唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"),
|
||||
"粉底棒": ("粉底棒", "foundation stick"),
|
||||
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),
|
||||
"化妝水": ("化妝水", "機能水", "toner", "lotion"),
|
||||
"乳液": ("乳液", "emulsion", "milk"),
|
||||
@@ -1259,6 +1272,19 @@ def score_marketplace_match(
|
||||
):
|
||||
score += 0.03
|
||||
reasons.append("shared_identity_anchor_no_spec")
|
||||
if (
|
||||
shared_anchor
|
||||
and brand_score >= 0.95
|
||||
and not hard_veto
|
||||
and price_penalty == 0
|
||||
and type_score >= 0.95
|
||||
and spec_score >= 0.45
|
||||
and token_score >= 0.56
|
||||
and sequence_score >= 0.62
|
||||
and not variant_descriptor_conflict
|
||||
):
|
||||
score += 0.02
|
||||
reasons.append("shared_identity_anchor_packaging_variant")
|
||||
if (
|
||||
brand_score >= 0.95
|
||||
and not hard_veto
|
||||
|
||||
@@ -618,6 +618,124 @@ def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkey
|
||||
assert attempts[0]["search_terms"] == ["known_product_id:DDAB01-1900ABCD"]
|
||||
|
||||
|
||||
def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_low_score(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
stale = PChomeProduct(
|
||||
product_id="DDAB01-STALE",
|
||||
name="Panasonic 國際牌 男仕防水美體除毛器 國際版 (ER-GK83)",
|
||||
price=2290,
|
||||
original_price=2490,
|
||||
discount=8,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-STALE",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.7,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
recovered = PChomeProduct(
|
||||
product_id="DDAB01-RECOVERED",
|
||||
name="Panasonic 國際牌 男士身體除毛器 ER-GK83",
|
||||
price=2390,
|
||||
original_price=2490,
|
||||
discount=4,
|
||||
image_url="",
|
||||
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED",
|
||||
stock=20,
|
||||
store="24h",
|
||||
rating=4.8,
|
||||
review_count=8,
|
||||
is_on_sale=True,
|
||||
crawled_at=datetime.now(),
|
||||
)
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def fetch_product_details(self, product_ids, batch_size=20):
|
||||
assert product_ids == ["DDAB01-STALE"]
|
||||
return True, "ok", [stale]
|
||||
|
||||
def search_products(self, *_args, **_kwargs):
|
||||
return True, "ok", [stale, recovered]
|
||||
|
||||
def fake_score(_momo_name, competitor_name, **_kwargs):
|
||||
if "RECOVERED" in competitor_name or "男士身體除毛器" in competitor_name:
|
||||
return SimpleNamespace(
|
||||
score=0.81,
|
||||
brand_score=1.0,
|
||||
token_score=0.8,
|
||||
spec_score=0.8,
|
||||
sequence_score=0.72,
|
||||
type_score=0.55,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=("shared_model_token",),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
|
||||
)
|
||||
return SimpleNamespace(
|
||||
score=0.68,
|
||||
brand_score=1.0,
|
||||
token_score=0.55,
|
||||
spec_score=0.55,
|
||||
sequence_score=0.6,
|
||||
type_score=0.55,
|
||||
price_penalty=0.0,
|
||||
hard_veto=False,
|
||||
reasons=(),
|
||||
comparison_mode="exact_identity",
|
||||
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
|
||||
)
|
||||
|
||||
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
|
||||
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
|
||||
feeder = CompetitorPriceFeeder(engine=object())
|
||||
attempts = []
|
||||
writes = []
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_should_upsert_competitor_price",
|
||||
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_upsert_competitor_price",
|
||||
lambda sku, product, score, tags, **kwargs: writes.append({
|
||||
"sku": sku,
|
||||
"product_id": product.product_id,
|
||||
"score": score,
|
||||
"tags": tags,
|
||||
**kwargs,
|
||||
}),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
feeder,
|
||||
"_record_match_attempt",
|
||||
lambda *args, **kwargs: attempts.append(kwargs),
|
||||
)
|
||||
|
||||
result = feeder._run_known_identity_refresh_items([{
|
||||
"sku": "TP00090100000153",
|
||||
"name": "【Panasonic 國際牌】男士身體除毛器 2025新款 ER-GK83 日版 日本直送",
|
||||
"product_id": 1,
|
||||
"momo_price": 2490,
|
||||
"competitor_product_id": "DDAB01-STALE",
|
||||
}])
|
||||
|
||||
assert result.matched == 1
|
||||
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
|
||||
assert "fresh_search_recovery" in writes[0]["tags"]
|
||||
assert attempts[0]["attempt_status"] == "matched"
|
||||
assert "known_product_id:DDAB01-STALE" in attempts[0]["search_terms"]
|
||||
assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"])
|
||||
|
||||
|
||||
def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch):
|
||||
from services.competitor_price_feeder import CompetitorPriceFeeder
|
||||
from services.pchome_crawler import PChomeProduct
|
||||
|
||||
@@ -214,6 +214,50 @@ def test_marketplace_matcher_rejects_product_type_conflict_even_when_line_matche
|
||||
assert diagnostics.comparison_mode == "not_comparable"
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_foundation_stick_vs_foundation_liquid():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【蘭蔻】零粉感超持久粉底棒9.5g",
|
||||
"【LANCOME 蘭蔻】零粉感超持久粉底 30ml",
|
||||
momo_price=1620,
|
||||
competitor_price=1580,
|
||||
)
|
||||
|
||||
assert diagnostics.score < 0.76
|
||||
assert diagnostics.hard_veto is True
|
||||
assert "type_conflict" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_nivea_deodorant_spray_identity():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【NIVEA 妮維雅】男士 止汗爽身噴霧 無印乾爽-清新海洋",
|
||||
"NIVEA 妮維雅 男士止汗爽身噴霧 無印乾爽-清新海洋150ml",
|
||||
momo_price=159,
|
||||
competitor_price=169,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
|
||||
|
||||
def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
diagnostics = score_marketplace_match(
|
||||
"【NARS】官方直營 裸光蜜粉餅(璀璨奢金限定版/星沙金小白餅)",
|
||||
"【NARS】裸光蜜粉餅(小白餅) 10g",
|
||||
momo_price=1050,
|
||||
competitor_price=1050,
|
||||
)
|
||||
|
||||
assert diagnostics.score >= 0.76
|
||||
assert diagnostics.hard_veto is False
|
||||
assert "shared_identity_anchor_packaging_variant" in diagnostics.reasons
|
||||
|
||||
|
||||
def test_marketplace_matcher_rejects_same_count_different_unit_family():
|
||||
from services.marketplace_product_matcher import score_marketplace_match
|
||||
|
||||
|
||||
Reference in New Issue
Block a user