diff --git a/config.py b/config.py index d1bf77b..40e6f15 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.372" +SYSTEM_VERSION = "V10.373" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index 06f3fe8..efc89f5 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -37,6 +37,7 @@ - 2026-05-21 追記:同步 111 fallback context/resource guard 合併後的 `services/ollama_service.py` 行數;此處只更新 inventory,不變更 Ollama 路由行為。 - 2026-05-21 追記:同步專業比價分級連動合併後的 `services/competitor_intel_repository.py` 與 `services/nemoton_dispatcher_service.py` 行數;此處只更新 inventory,不變更比價或告警行為。 - 2026-05-21 追記:同步 PChome/LUDEYA 商品線名稱漂移比對更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 +- 2026-05-21 追記:同步 MAC/Yuskin/AHC 名稱漂移與 bundle equivalent matcher 更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更模組化決策。 ## 達到或超過 800 行檔案清單 @@ -63,7 +64,7 @@ | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders | | 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers | | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting | -| 2225 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | +| 2279 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service | | 961 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing | | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy | diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index b4c323a..7e470bb 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化 +- **V10.373 PChome 同款名稱漂移整合**: 整合並修正 concurrent matcher work,新增 MAC/M.A.C 品牌 alias、Yuskin 經典乳霜 4入/4盒組同數量 bundle equivalent、AHC 瞬效 B5 玻尿酸關鍵字重排 anchor;修復 `_count_score()` 縮排破壞與 unreachable code,讓新增測試可穩定通過。 - **V10.372 Smoke 與 EventRouter queue 修復**: 修正 AI automation smoke 對 NemoTron fallback 的 class 判斷,改接受實際存在的 `NemotronDispatcher._hermes_rule_fallback`,避免 Hermes fallback 正常卻被誤報 critical;EventRouter 失敗佇列回放改為重建短版 HTML-safe 訊息,escape 標題/摘要/trace/error 並限制長度,避免舊 Selenium stacktrace 的 `` 造成 Telegram HTTP 400 反覆卡住;同版整合 LUDEYA 蜂王玫瑰商品線在 MOMO/PChome 名稱漂移時的 identity anchor alias。 - **V10.371 品牌缺失同款放行**: marketplace matcher 新增 `brandless_exact_identity` 加分,只限「一側有品牌、一側缺品牌」但 shared identity anchor 夠長、規格/序列/中文名相似度都高且無 hard veto 的案例;覆蓋小米有品小浪智能感應自動噴香機,讓 PChome 標題省略品牌時仍可進入同款告警候選。 - **V10.370 Gemini runtime sentinel**: AI automation smoke 新增 `Gemini 出站費用 sentinel`,每天檢查近 24h `ai_calls.provider='gemini'` 的 calls/tokens/cost/top callers;若 `GEMINI_API_HARD_DISABLED=true` 仍有 Gemini 記錄,smoke 直接升為 critical。scheduler 09:10 摘要推播前會先執行一次只讀 smoke,讓 Gemini 費用異常不再依賴人工打開 `/ai_automation_smoke` 才被發現。 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 2fb9b40..07655ad 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -290,6 +290,9 @@ SEARCH_NOISE_TOKENS = { } SEARCH_IDENTITY_ANCHORS = ( + "持久植物香氛精油", + "口袋雙色修容打亮盤", + "經典乳霜", "蜂王玫瑰外泌微臻霜", "微分子肌底原生露", "小浪智能感應自動噴香機", @@ -455,6 +458,8 @@ VARIANT_DESCRIPTOR_NOISE_KEYWORDS = { "極細筆頭", "筆頭", "官方直營", + "入組", + "盒組", } SEARCH_AMBIGUOUS_PRODUCT_TERMS = { @@ -490,6 +495,7 @@ BRAND_ALIAS_OVERRIDES = { "prada": ("prada", "普拉達"), "za": ("za",), "xiaomi": ("小米有品", "小米", "xiaomi"), + "mac": ("m.a.c", "mac", "m a c"), } PRODUCT_TYPES = { @@ -1064,6 +1070,14 @@ def _count_score(left: ProductIdentity, right: ProductIdentity) -> tuple[float, return 0.5, False +def _has_exact_count_alignment(left: ProductIdentity, right: ProductIdentity) -> bool: + if not left.counts or not right.counts: + return False + left_counts = sorted(count for count, _ in left.counts) + right_counts = sorted(count for count, _ in right.counts) + return left_counts == right_counts + + def _spec_score(left: ProductIdentity, right: ProductIdentity) -> tuple[float, bool, tuple[str, ...]]: volume_score, volume_conflict = _spec_component(left.volumes_ml, right.volumes_ml) weight_score, weight_conflict = _spec_component(left.weights_g, right.weights_g) @@ -1732,6 +1746,35 @@ def score_marketplace_match( ): score += 0.09 reasons.append("brandless_exact_identity") + if ( + shared_anchor + and len(shared_anchor.replace(" ", "")) >= 6 + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and spec_score >= 0.85 + and token_score >= 0.30 + and sequence_score >= 0.50 + and not variant_descriptor_conflict + ): + score += 0.06 + reasons.append("shared_identity_anchor_reordered_line") + if ( + shared_anchor + and len(shared_anchor.replace(" ", "")) >= 4 + and brand_score >= 0.95 + and not hard_veto + and price_penalty == 0 + and type_score >= 0.95 + and spec_score >= 0.65 + and token_score >= 0.50 + and sequence_score >= 0.50 + and _has_exact_count_alignment(left, right) + and not variant_descriptor_conflict + ): + score += 0.05 + reasons.append("shared_identity_anchor_bundle_equivalent") if ( shared_anchor and len(shared_anchor.replace(" ", "")) >= 6 @@ -1863,15 +1906,24 @@ def _search_spec_terms(identity: ProductIdentity) -> list[str]: def _extract_anchor_phrases(token: str) -> list[str]: + normalized = normalize_product_text(token) cleaned = _clean_search_phrase(token) if not cleaned: + if "經典乳霜" in normalized: + return ["經典乳霜"] return [] phrases: list[str] = [] + if "悠斯晶" in normalized and "經典乳霜" in normalized: + phrases.append("悠斯晶經典乳霜") + if "經典乳霜" in normalized: + phrases.append("經典乳霜") if "蜂王玫瑰" in cleaned and any( keyword in cleaned for keyword in ("外泌微臻霜", "微泌新生霜", "瑰泌霜") ): phrases.append("蜂王玫瑰瑰泌霜") + if "瞬效" in cleaned and "b5" in cleaned and "玻尿酸" in cleaned and "精華" in cleaned: + phrases.append("瞬效b5玻尿酸精華") for anchor in SEARCH_IDENTITY_ANCHORS: if anchor not in cleaned: continue @@ -1898,6 +1950,8 @@ def _shared_identity_anchor(left: ProductIdentity, right: ProductIdentity) -> st left_anchors.update(_extract_anchor_phrases(token)) for token in right.core_tokens: right_anchors.update(_extract_anchor_phrases(token)) + left_anchors.update(_extract_anchor_phrases(left.normalized_name)) + right_anchors.update(_extract_anchor_phrases(right.normalized_name)) left_anchors.update(_extract_anchor_phrases(left.searchable_name)) right_anchors.update(_extract_anchor_phrases(right.searchable_name)) diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index a8842a4..543fac1 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -470,6 +470,61 @@ def test_marketplace_matcher_promotes_ludeya_line_with_platform_name_drift(): assert "shared_identity_anchor" in diagnostics.reasons or "shared_identity_anchor_no_spec" in diagnostics.reasons +def test_marketplace_matcher_promotes_mac_brand_alias_and_exact_compact_name(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【M.A.C】口袋雙色修容打亮盤 7g(國際航空版)", + "【MAC】口袋雙色修容打亮盤 7g", + momo_price=1200, + competitor_price=1200, + ) + + assert diagnostics.score >= 0.76 + + +def test_marketplace_matcher_promotes_yuskin_bundle_when_only_bundle_wording_differs(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【悠斯晶】經典乳霜120g(4入組)", + "【Yuskin悠斯晶】經典乳霜 4盒組(120g/盒)", + momo_price=1200, + competitor_price=1200, + ) + + assert diagnostics.score >= 0.76 + assert "variant_descriptor_conflict" not in diagnostics.reasons + + +def test_marketplace_matcher_promotes_ahc_line_with_keyword_reordering(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【AHC】瞬效B5微導玻尿酸保濕精華液30ml_3入(右旋B5/玻尿酸/保濕霸主/高效修護/敏感肌/醫美後適用)", + "【AHC】瞬效保濕B5微導 玻尿酸精華 30ML 3入組", + momo_price=1200, + competitor_price=1200, + ) + + assert diagnostics.score >= 0.76 + assert "shared_identity_anchor_reordered_line" in diagnostics.reasons + + +def test_marketplace_matcher_promotes_bundle_equivalent_when_count_matches(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "【悠斯晶】經典乳霜120g(4入組)", + "【Yuskin悠斯晶】經典乳霜 4盒組(120g/盒)", + momo_price=1200, + competitor_price=1200, + ) + + assert diagnostics.score >= 0.76 + assert "shared_identity_anchor_bundle_equivalent" in diagnostics.reasons + + def test_marketplace_matcher_rejects_same_count_different_unit_family(): from services.marketplace_product_matcher import score_marketplace_match