[V10.350] add retryable identity recovery loop

This commit is contained in:
OoO
2026-05-20 20:24:40 +08:00
parent cf4d8aedea
commit d030e4cf22
5 changed files with 307 additions and 5 deletions

View File

@@ -29,6 +29,9 @@
- 2026-05-20 追記:同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py``services/competitor_price_feeder.py` 行數;並補列背景市場情報 deployment readiness 大檔,僅更新 inventory。
- 2026-05-20 追記:同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
- 2026-05-20 追記:同步 PChome 共享 identity anchor scorer 與市場情報 review report route 進入大檔門檻後的行數;此處只更新 inventory不變更功能。
- 2026-05-20 追記:同步 PChome contained identity anchor scorer 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
- 2026-05-20 追記:同步 PChome spec/name alignment near-threshold scorer 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
- 2026-05-20 追記:同步市場情報 review report route 與 review receipt 巨檔現況,並校正 PChome fresh-search recovery 更新後的 `services/competitor_price_feeder.py``services/marketplace_product_matcher.py` 行數;此處只更新 inventory不變更模組化決策。
## 達到或超過 800 行檔案清單
@@ -55,18 +58,19 @@
| 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
| 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
| 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
| 1447 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
| 1559 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
| 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
| 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
| 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
| 1042 | `services/code_review_pipeline_service.py` | P2 Code review pipeline service | scan orchestration / finding normalization / persistence adapter |
| 953 | `routes/export_routes.py` | P2 Export flow | export command/router glue / file path / download orchestration |
| 816 | `services/ppt_vision_service.py` | P2 PPT vision QA service | runtime state / queue status / model probe / audit execution 分離 |
| 1602 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
| 1733 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / retryable candidate recovery / cache strategy |
| 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
| 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
| 804 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |
| 990 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |
| 811 | `services/market_intel/deployment_readiness.py` | P2 market intel deployment readiness | preflight gates / readiness payload / route contract helpers |
| 846 | `services/market_intel/candidate_queue_review_ai_summary_persistence_telegram_dispatch_report_catalog_record_run_receipt.py` | P2 market intel review receipt pipeline | AI summary / persistence / Telegram dispatch / report catalog run receipt orchestration |
## 市場情報開發前置禁區

View File

@@ -274,6 +274,36 @@ def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None, mo
return candidates
def _recover_low_score_with_fresh_search(
crawler,
momo_name: str,
momo_price: float = None,
existing_product_id: str = "",
) -> tuple[Optional[tuple], list[str], int]:
"""
當 legacy / known-id 候選重評仍低分時,再跑一次 fresh keyword search
嘗試把舊錯配洗成新的真同款。
"""
keywords = _build_search_keywords(momo_name)
candidates = _search_pchome_candidates(
crawler,
momo_name,
keywords=keywords,
momo_price=momo_price,
)
if existing_product_id:
existing_key = _product_id_key(existing_product_id)
fresh_candidates = [
candidate
for candidate in candidates
if _product_id_key(getattr(candidate, "product_id", "")) != existing_key
]
if fresh_candidates:
candidates = fresh_candidates
best = _find_best_match_detail(momo_name, candidates, momo_price=momo_price)
return best, keywords, len(candidates)
def _structural_similarity(momo_p, pchome_p) -> float:
"""
結構化相似度計算(品牌 + 規格 + 關鍵字)
@@ -1411,14 +1441,94 @@ class CompetitorPriceFeeder:
continue
if score < MIN_MATCH_SCORE:
recovery_terms: list[str] = []
recovery_candidate_count = 0
if not getattr(diagnostics, "hard_veto", False):
recovered, recovery_terms, recovery_candidate_count = _recover_low_score_with_fresh_search(
crawler,
momo_name,
momo_price=momo_price,
existing_product_id=competitor_product_id,
)
if recovered:
recovered_product, recovered_score, recovered_diagnostics = recovered
if recovered_score > score:
best_product, score, diagnostics = recovered_product, recovered_score, recovered_diagnostics
if score >= MIN_MATCH_SCORE:
tags = _extract_tags(best_product)
tags.extend(getattr(diagnostics, "tags", []))
for reason in getattr(diagnostics, "reasons", ()) or ():
tags.append(f"match_{reason}")
tags.append("refresh_known_identity")
if recovery_terms:
tags.append("fresh_search_recovery")
tags = list(dict.fromkeys(tags))
should_write, write_reason = self._should_upsert_competitor_price(
sku,
best_product,
score,
source=source,
)
candidate_count = max(1, recovery_candidate_count or 1)
attempt_terms = search_terms + [term for term in recovery_terms if term not in search_terms]
if not should_write:
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=attempt_terms,
candidate_count=candidate_count,
attempt_status="refresh_needs_review",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
error_message=f"{write_reason}; {_format_match_diagnostics(diagnostics)}",
source=source,
)
skipped_low += 1
attempts_written += 1
continue
tags.append(write_reason)
self._upsert_competitor_price(
sku,
best_product,
score,
tags,
momo_product_id=momo_product_id,
momo_price=momo_price,
diagnostics=diagnostics,
source=source,
)
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=attempt_terms,
candidate_count=candidate_count,
attempt_status="matched",
best_product=best_product,
best_score=score,
diagnostics=diagnostics,
source=source,
)
matched += 1
history_written += 1
attempts_written += 1
continue
attempt_status = "identity_veto" if getattr(diagnostics, "hard_veto", False) else "refresh_low_score"
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms,
candidate_count=1,
search_terms=search_terms + [term for term in recovery_terms if term not in search_terms],
candidate_count=max(1, recovery_candidate_count or 1),
attempt_status=attempt_status,
best_product=best_product,
best_score=score,

View File

@@ -35,6 +35,8 @@ NOISE_PHRASES = (
"",
"買一送一",
"買1送1",
"限定版",
"璀璨奢金限定版",
"任選",
"即期品",
"福利品",
@@ -136,6 +138,8 @@ SEARCH_NOISE_PHRASES = (
"精美紙袋",
"交換禮物",
"聖誕禮物",
"限定版",
"璀璨奢金限定版",
"母親節",
"父親節",
"情人節",
@@ -191,7 +195,11 @@ SEARCH_NOISE_TOKENS = {
}
SEARCH_IDENTITY_ANCHORS = (
"止汗爽身噴霧",
"止汗爽身乳膏pro",
"零粉感超持久粉底棒",
"超持久水光鎖吻唇釉",
"裸光蜜粉餅",
"絕對完美永生玫瑰逆齡乳霜",
"永生玫瑰逆齡乳霜",
"永生玫瑰霜",
@@ -223,6 +231,7 @@ SEARCH_IDENTITY_ANCHORS = (
"身體乳",
"緊實乳",
"潔膚露",
"浴潔露",
"潔淨液",
"護甲油",
"指甲油",
@@ -288,6 +297,10 @@ BRAND_ALIAS_OVERRIDES = {
}
PRODUCT_TYPES = {
"止汗噴霧": ("止汗爽身噴霧", "爽身噴霧", "止汗噴霧"),
"潔膚露": ("潔膚露", "浴潔露", "護潔露", "沐浴露", "wash"),
"唇釉": ("唇釉", "唇彩", "lip tint", "lip glaze"),
"粉底棒": ("粉底棒", "foundation stick"),
"精華": ("精華", "精華液", "essence", "serum", "安瓶"),
"化妝水": ("化妝水", "機能水", "toner", "lotion"),
"乳液": ("乳液", "emulsion", "milk"),
@@ -1259,6 +1272,19 @@ def score_marketplace_match(
):
score += 0.03
reasons.append("shared_identity_anchor_no_spec")
if (
shared_anchor
and brand_score >= 0.95
and not hard_veto
and price_penalty == 0
and type_score >= 0.95
and spec_score >= 0.45
and token_score >= 0.56
and sequence_score >= 0.62
and not variant_descriptor_conflict
):
score += 0.02
reasons.append("shared_identity_anchor_packaging_variant")
if (
brand_score >= 0.95
and not hard_veto

View File

@@ -618,6 +618,124 @@ def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkey
assert attempts[0]["search_terms"] == ["known_product_id:DDAB01-1900ABCD"]
def test_competitor_feeder_refresh_recovers_with_fresh_search_when_known_id_is_low_score(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct
stale = PChomeProduct(
product_id="DDAB01-STALE",
name="Panasonic 國際牌 男仕防水美體除毛器 國際版 (ER-GK83)",
price=2290,
original_price=2490,
discount=8,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-STALE",
stock=20,
store="24h",
rating=4.7,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
recovered = PChomeProduct(
product_id="DDAB01-RECOVERED",
name="Panasonic 國際牌 男士身體除毛器 ER-GK83",
price=2390,
original_price=2490,
discount=4,
image_url="",
product_url="https://24h.pchome.com.tw/prod/DDAB01-RECOVERED",
stock=20,
store="24h",
rating=4.8,
review_count=8,
is_on_sale=True,
crawled_at=datetime.now(),
)
class FakeCrawler:
def __init__(self, *_args, **_kwargs):
pass
def fetch_product_details(self, product_ids, batch_size=20):
assert product_ids == ["DDAB01-STALE"]
return True, "ok", [stale]
def search_products(self, *_args, **_kwargs):
return True, "ok", [stale, recovered]
def fake_score(_momo_name, competitor_name, **_kwargs):
if "RECOVERED" in competitor_name or "男士身體除毛器" in competitor_name:
return SimpleNamespace(
score=0.81,
brand_score=1.0,
token_score=0.8,
spec_score=0.8,
sequence_score=0.72,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=("shared_model_token",),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
return SimpleNamespace(
score=0.68,
brand_score=1.0,
token_score=0.55,
spec_score=0.55,
sequence_score=0.6,
type_score=0.55,
price_penalty=0.0,
hard_veto=False,
reasons=(),
comparison_mode="exact_identity",
tags=["identity_v2", "comparison_exact_identity", "brand_match"],
)
monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler)
monkeypatch.setattr("services.marketplace_product_matcher.score_marketplace_match", fake_score)
feeder = CompetitorPriceFeeder(engine=object())
attempts = []
writes = []
monkeypatch.setattr(
feeder,
"_should_upsert_competitor_price",
lambda *_args, **_kwargs: (True, "same_or_empty_existing"),
)
monkeypatch.setattr(
feeder,
"_upsert_competitor_price",
lambda sku, product, score, tags, **kwargs: writes.append({
"sku": sku,
"product_id": product.product_id,
"score": score,
"tags": tags,
**kwargs,
}),
)
monkeypatch.setattr(
feeder,
"_record_match_attempt",
lambda *args, **kwargs: attempts.append(kwargs),
)
result = feeder._run_known_identity_refresh_items([{
"sku": "TP00090100000153",
"name": "【Panasonic 國際牌】男士身體除毛器 2025新款 ER-GK83 日版 日本直送",
"product_id": 1,
"momo_price": 2490,
"competitor_product_id": "DDAB01-STALE",
}])
assert result.matched == 1
assert writes[0]["product_id"] == "DDAB01-RECOVERED"
assert "fresh_search_recovery" in writes[0]["tags"]
assert attempts[0]["attempt_status"] == "matched"
assert "known_product_id:DDAB01-STALE" in attempts[0]["search_terms"]
assert any("Panasonic" in term or "國際牌" in term for term in attempts[0]["search_terms"])
def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch):
from services.competitor_price_feeder import CompetitorPriceFeeder
from services.pchome_crawler import PChomeProduct

View File

@@ -214,6 +214,50 @@ def test_marketplace_matcher_rejects_product_type_conflict_even_when_line_matche
assert diagnostics.comparison_mode == "not_comparable"
def test_marketplace_matcher_rejects_foundation_stick_vs_foundation_liquid():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【蘭蔻】零粉感超持久粉底棒9.5g",
"【LANCOME 蘭蔻】零粉感超持久粉底 30ml",
momo_price=1620,
competitor_price=1580,
)
assert diagnostics.score < 0.76
assert diagnostics.hard_veto is True
assert "type_conflict" in diagnostics.reasons
def test_marketplace_matcher_promotes_nivea_deodorant_spray_identity():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【NIVEA 妮維雅】男士 止汗爽身噴霧 無印乾爽-清新海洋",
"NIVEA 妮維雅 男士止汗爽身噴霧 無印乾爽-清新海洋150ml",
momo_price=159,
competitor_price=169,
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
def test_marketplace_matcher_promotes_packaging_variant_for_same_nars_powder():
from services.marketplace_product_matcher import score_marketplace_match
diagnostics = score_marketplace_match(
"【NARS】官方直營 裸光蜜粉餅(璀璨奢金限定版/星沙金小白餅)",
"【NARS】裸光蜜粉餅(小白餅) 10g",
momo_price=1050,
competitor_price=1050,
)
assert diagnostics.score >= 0.76
assert diagnostics.hard_veto is False
assert "shared_identity_anchor_packaging_variant" in diagnostics.reasons
def test_marketplace_matcher_rejects_same_count_different_unit_family():
from services.marketplace_product_matcher import score_marketplace_match