From 8edd8a86046d393642d132b8d5f55abbc4a215ef Mon Sep 17 00:00:00 2001
From: OoO <ooo@MacBook-Pro.local>
Date: Wed, 20 May 2026 19:34:21 +0800
Subject: [PATCH] =?UTF-8?q?[V10.346]=20=E8=A3=9C=E5=BC=B7=20PChome=20ident?=
 =?UTF-8?q?ity=20anchor=20scorer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py                                     |  2 +-
 .../code_modularization_inventory_20260430.md |  4 +-
 services/marketplace_product_matcher.py       | 60 +++++++++++++++++++
 tests/test_marketplace_product_matcher.py     | 22 +++++++
 4 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/config.py b/config.py
index 51cd96c..2e1ceb5 100644
--- a/config.py
+++ b/config.py
@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
 # ==========================================
 # 系統版本與路徑
 # ==========================================
-SYSTEM_VERSION = "V10.345"
+SYSTEM_VERSION = "V10.346"
 LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
 public_url = PUBLIC_URL  # 用於模板顯示
 
diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md
index 73b2df3..1e4811f 100644
--- a/docs/memory/code_modularization_inventory_20260430.md
+++ b/docs/memory/code_modularization_inventory_20260430.md
@@ -28,6 +28,7 @@
 - 2026-05-20 追記：同步 PChome 近門檻候選重評與 matcher 系列/刀片數防錯配更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數；此處只更新 inventory，不變更比價行為。
 - 2026-05-20 追記：同步 PChome 搜尋詞品質層、候選召回與 hard-veto 狀態分流更新後的 `services/marketplace_product_matcher.py`、`services/competitor_price_feeder.py` 行數；並補列背景市場情報 deployment readiness 大檔，僅更新 inventory。
 - 2026-05-20 追記：同步 PChome 搜尋詞特定品線優先級更新後的 `services/marketplace_product_matcher.py` 行數；此處只更新 inventory，不變更模組化決策。
+- 2026-05-20 追記：同步 PChome 共享 identity anchor scorer 與市場情報 review report route 進入大檔門檻後的行數；此處只更新 inventory，不變更功能。
 
 ## 達到或超過 800 行檔案清單
 
@@ -54,7 +55,7 @@
 | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders |
 | 933 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers |
 | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting |
-| 1387 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
+| 1447 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization |
 | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service |
 | 844 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing |
 | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy |
@@ -64,6 +65,7 @@
 | 1602 | `services/competitor_price_feeder.py` | P2 competitor price feeder | crawler scheduling / price normalization / cache strategy |
 | 1120 | `services/competitor_intel_repository.py` | P2 competitor intel repository | review queue query / cache shaping / formatting helpers |
 | 805 | `routes/bot_api_routes.py` | P2 Bot API Blueprint | route glue / bot action service |
+| 804 | `routes/market_intel_review_report_routes.py` | P2 market intel review report Blueprint | review report route glue / export payload / phase handoff orchestration |
 | 811 | `services/market_intel/deployment_readiness.py` | P2 market intel deployment readiness | preflight gates / readiness payload / route contract helpers |
 
 ## 市場情報開發前置禁區
diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py
index 930c32c..bcf374f 100644
--- a/services/marketplace_product_matcher.py
+++ b/services/marketplace_product_matcher.py
@@ -114,6 +114,8 @@ GENERIC_TOKENS = {
 SEARCH_NOISE_PHRASES = (
     "新品上市",
     "全新上市",
+    "國際航空版",
+    "超取免運",
     "任選一款",
     "任選1款",
     "任選一色",
@@ -134,6 +136,10 @@ SEARCH_NOISE_PHRASES = (
     "卸除髒汙",
     "卸除防曬",
     "卸防曬",
+    "防水眼線",
+    "寶寶牙刷",
+    "紗布牙刷",
+    "調節亮度",
     "韓國彩妝",
     "水光感",
     "官方直營",
@@ -161,6 +167,14 @@ SEARCH_NOISE_TOKENS = {
     "防曬",
     "彩妝",
     "水光感",
+    "超取",
+    "免運",
+    "航空版",
+    "國際版",
+    "附燈泡",
+    "定時",
+    "眼妝",
+    "滅菌",
     "保濕",
     "抗老",
     "超品日",
@@ -178,6 +192,17 @@ SEARCH_IDENTITY_ANCHORS = (
     "青春敷面膜",
     "長效潤膚霜",
     "小黑瓶",
+    "私密處護潔露",
+    "私密護潔露",
+    "口腔清潔棒",
+    "含氟防蛀修護牙膏",
+    "自然遮瑕素顏霜",
+    "超持久細滑眼線筆",
+    "香氛融蠟燈",
+    "水晶香氛能量寶盒禮盒組",
+    "零粉感超持久柔焦蜜粉餅",
+    "私密肌潔淨露",
+    "身體除毛器",
     "免用水潔淨液",
     "身體按摩精油",
     "按摩精油",
@@ -487,6 +512,11 @@ def _leading_brand_tokens(original: str, normalized: str) -> set[str]:
                     tokens.add(token)
 
     leading = normalized[:48]
+    leading_tokens = _tokenize(leading)
+    if leading_tokens:
+        first_token = leading_tokens[0]
+        if re.fullmatch(r"[\u4e00-\u9fff]{2,6}", first_token) and first_token not in GENERIC_TOKENS:
+            tokens.add(first_token)
     for token in _tokenize(leading):
         if re.fullmatch(r"[a-z][a-z0-9\-']{2,}", token):
             tokens.add(token)
@@ -1183,6 +1213,17 @@ def score_marketplace_match(
     ):
         score += 0.025
         reasons.append("strong_exact_spec_match")
+    shared_anchor = _shared_identity_anchor(left, right)
+    if (
+        shared_anchor
+        and brand_score >= 0.95
+        and not hard_veto
+        and price_penalty == 0
+        and spec_score >= 0.85
+        and (token_score >= 0.43 or sequence_score >= 0.58)
+    ):
+        score += 0.03
+        reasons.append("shared_identity_anchor")
     if (
         brand_score >= 0.95
         and not hard_veto
@@ -1267,6 +1308,25 @@ def _extract_anchor_phrases(token: str) -> list[str]:
     return phrases
 
 
+def _shared_identity_anchor(left: ProductIdentity, right: ProductIdentity) -> str:
+    left_anchors: set[str] = set()
+    right_anchors: set[str] = set()
+    for token in left.core_tokens:
+        left_anchors.update(_extract_anchor_phrases(token))
+    for token in right.core_tokens:
+        right_anchors.update(_extract_anchor_phrases(token))
+
+    shared = sorted(
+        {
+            anchor
+            for anchor in left_anchors & right_anchors
+            if len(anchor.replace(" ", "")) >= 5 and anchor not in SEARCH_BROAD_ANCHORS
+        },
+        key=lambda anchor: (-len(anchor.replace(" ", "")), anchor),
+    )
+    return shared[0] if shared else ""
+
+
 def _search_core_score(token: str, all_tokens: set[str]) -> tuple[int, int, str]:
     cleaned = _clean_search_phrase(token)
     if not cleaned:
diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py
index db66b63..3477a16 100644
--- a/tests/test_marketplace_product_matcher.py
+++ b/tests/test_marketplace_product_matcher.py
@@ -426,6 +426,28 @@ def test_marketplace_matcher_does_not_promote_different_option_without_spec():
     assert "strong_exact_spec_match" not in diagnostics.reasons
 
 
+def test_marketplace_matcher_promotes_shared_identity_anchor_near_threshold():
+    from services.marketplace_product_matcher import score_marketplace_match
+
+    obge = score_marketplace_match(
+        "【OBgE】自然遮瑕素顏霜 50g",
+        "OBgE/自然遮瑕素顏霜50g",
+        momo_price=699,
+        competitor_price=699,
+    )
+    unicat = score_marketplace_match(
+        "【UNICAT 變臉貓】超持久細滑眼線筆1.5ml",
+        "【UNICAT】超持久細滑眼線筆 1.5ml 新品搶先優惠",
+        momo_price=399,
+        competitor_price=399,
+    )
+
+    for diagnostics in (obge, unicat):
+        assert diagnostics.score >= 0.76
+        assert diagnostics.hard_veto is False
+        assert "shared_identity_anchor" in diagnostics.reasons
+
+
 def test_marketplace_search_terms_prefer_readable_brand_core_spec():
     from services.marketplace_product_matcher import build_search_terms