diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 90abd07..9ab7f37 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.291 補核心 MOMO/PChome 比價第三層語意:同核心商品但買送、套組、件數不同時標記 `unit_comparable`,只寫入 `competitor_match_attempts`,商品看板顯示「需單位價比較」,不再把不同販售組合直接寫進正式總價差。 - V10.289 重排 Elephant Alpha L3 HITL `ea_escalation` Telegram 告警:改成專業 incident brief 格式,分成決策狀態、背景摘要、風險摘要、TOP 待審 SKU 與建議處置;價格行動會拆出 MOMO/PChome 價格、價差、人工處置與 PChome ID,避免長 bullet 難讀。 - V10.284 關閉 Code Review Hermes LLM scan 預設路徑:Step 2 改 deterministic fast static scan,不再讓部署後先卡三段 Ollama timeout;若需要 LLM scan 可用 `CODE_REVIEW_HERMES_LLM_SCAN_ENABLED=true` 顯式開啟,仍只走本地矩陣、不走 Gemini。 - V10.283 將 Code Review Hermes scan 收斂為 fast compact prompt:預設 2 檔 × 900 字、輸出 384 tokens,仍走 GCP-A → GCP-B → 111 本地矩陣,避免部署後 code_review_hermes 先卡三段 timeout。 diff --git a/config.py b/config.py index b997d14..b0db930 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.290" +SYSTEM_VERSION = "V10.291" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 43a3ba4..c605e02 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-05-19 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 僅備援 / 鎖定場景 -> **適用版本**: V10.289 +> **適用版本**: V10.291 --- @@ -308,6 +308,7 @@ LIMIT 300 | 比對算法 | 品牌 + 核心 token + 容量/重量/包數 + 品類 + 價格 sanity check | 由 `marketplace_product_matcher.py` 統一供 feeder、legacy crawler、AI/PPT 鏈路使用 | | 最低比對門檻 | 0.76 | 核心比價寧可待審,不允許低信心錯配影響 AI 決策 | | 已有不同 PChome 商品覆蓋門檻 | 0.84 | 新候選與既有正式配對不同時,除非超高信心,否則寫入 `needs_review` attempt 不覆蓋 | +| 單位價可比模式 | `unit_comparable` | 同核心商品但買送/套組/件數不同時,不寫正式總價差;只寫入 attempt,供單位價或人工覆核 | | 語意標籤 | JSONB 陣列 | 傳給 Hermes 提升情境感知品質 | ### 競品比對邏輯(`competitor_price_feeder.py`) @@ -318,6 +319,7 @@ MOMO 商品名稱 → PChomeCrawler.search_products(keyword, limit=12) → marketplace_product_matcher.score_marketplace_match() → 品牌衝突 / 容量衝突 / 包數衝突 hard veto + → 同核心但買送/套組/件數不同標記 unit_comparable,不進正式總價差 → 同款高信心 score ≥ 0.76 才進 competitor_prices → 低信心、規格衝突、既有配對衝突寫入 competitor_match_attempts ``` @@ -343,9 +345,10 @@ LEFT JOIN competitor_prices cp - `services/competitor_identity_revalidator.py` 可對既有 `competitor_prices` legacy row 離線重跑 `identity_v2`:只有新版 matcher 分數 `>= 0.76` 且無 hard veto 才補 `identity_v2` / `legacy_revalidated` tags;預設不刷新 `expires_at`,避免過期價格進入決策。 - `CompetitorPriceFeeder.run_expired_identity_refresh()` 會優先刷新已通過 `identity_v2` 但 TTL 過期的 PChome row:直接用既有 `competitor_product_id` 批次呼叫 PChome 商品 API,再用新版 matcher 重新驗證名稱/規格/價格 sanity,通過後寫回 `competitor_prices` 與 `competitor_price_history`。這條路徑提升新鮮價格覆蓋率,但不降低 match threshold,也不讓過期價格直接進入決策。 - `marketplace_product_matcher.py` 的擴充只能走「正向證據 + 反向 veto」:品牌一致、商品線/型號訊號強、價格合理且無 hard veto 時才允許 `strong_product_line_match` 加分;補充瓶/補充包/refill 與一般正裝不互相配對,分享組/加量組/明星組等組合包不得誤配單品。 +- 套組/買送/件數不同但品牌、核心商品線與基礎規格一致時,matcher 必須回傳 `comparison_mode='unit_comparable'` 與 `unit_comparable` reason;Feeder 只能寫入 `competitor_match_attempts.attempt_status='unit_comparable'` 或 `refresh_unit_comparable`,不得寫入 `competitor_prices`,直到下游支援單位價換算或人工覆核。 - PChome feeder 的外部 request timeout 由 `PCHOME_FEEDER_TIMEOUT` 控制,預設 12 秒;排程不得因單一 PChome 搜尋 API timeout 被拖到數分鐘。 - 商品看板的 PChome 狀態必須把 matcher 診斷原因翻成可行動語意:品牌衝突、規格衝突、補充包差異、組合差異、商品線不符等,不可只顯示籠統「待比對」或「身份否決」。 -- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待審`、`身份否決`、`找不到同款`、`抓取異常`、`尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。 +- Dashboard 必須把「待比對」拆成可診斷狀態:`價格過期待刷新`、`舊版配對待重驗`、`低分配對待審`、`身份否決`、`需單位價比較`、`找不到同款`、`抓取異常`、`尚未搜尋`。不可再用單一「待比對」掩蓋資料品質原因。 ### 執行方式 diff --git a/routes/dashboard_routes.py b/routes/dashboard_routes.py index 06ee401..20825a0 100644 --- a/routes/dashboard_routes.py +++ b/routes/dashboard_routes.py @@ -65,6 +65,8 @@ def _to_float(value): def _diagnostic_match_rejection_label(diagnostic_text, score_text, *, blocked=True): diagnostic_text = diagnostic_text or '' suffix = '已停止自動採用' if blocked else '不自動採用以避免錯配' + if 'unit_comparable' in diagnostic_text: + return '需單位價比較', f'{score_text},同核心商品但販售組合不同,需轉換每 ml / 每入後再判讀' if 'refill_pack_conflict' in diagnostic_text: return '補充包差異待審', f'{score_text},補充瓶/補充包與一般正裝不同,{suffix}' if any(token in diagnostic_text for token in ('bundle_offer_conflict', 'multi_component_conflict')): @@ -115,6 +117,15 @@ def _build_pchome_match_status(attempt=None, ineligible=None): 'summary': summary, 'detail': score_text, } + if status in {'unit_comparable', 'refresh_unit_comparable'}: + score = _to_float(attempt.get('best_match_score')) + score_text = f"最佳候選 {round(score * 100)}%" if score is not None else "已找到同核心候選" + return { + 'label': '需單位價比較', + 'tone': 'watch', + 'summary': '候選同核心商品,但販售組合/買送不同;不可直接比總價,需用單位價或人工覆核', + 'detail': score_text, + } if ineligible: reason = ineligible.get('reason') or 'not_eligible' diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index ab0d23d..af01f8b 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -776,6 +776,27 @@ class CompetitorPriceFeeder: continue best_product, score, diagnostics = result + if getattr(diagnostics, "comparison_mode", "") == "unit_comparable": + logger.info( + f"[Feeder] {sku} 候選屬單位價可比但非同販售組合," + f"不寫入正式價差 | {_format_match_diagnostics(diagnostics)}" + ) + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=len(products), + attempt_status="unit_comparable", + best_product=best_product, + best_score=score, + error_message=_format_match_diagnostics(diagnostics), + source=source, + ) + attempts_written += 1 + skipped_low += 1 + continue if score < MIN_MATCH_SCORE: logger.debug( @@ -974,6 +995,24 @@ class CompetitorPriceFeeder: continue best_product, score, diagnostics = result + if getattr(diagnostics, "comparison_mode", "") == "unit_comparable": + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=1, + attempt_status="refresh_unit_comparable", + best_product=best_product, + best_score=score, + error_message=_format_match_diagnostics(diagnostics), + source=source, + ) + skipped_low += 1 + attempts_written += 1 + continue + if score < MIN_MATCH_SCORE: self._record_match_attempt( sku, diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 4366570..3b1efed 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -152,10 +152,13 @@ class MatchDiagnostics: price_penalty: float hard_veto: bool reasons: tuple[str, ...] + comparison_mode: str = "exact_identity" @property def tags(self) -> list[str]: tags: list[str] = ["identity_v2"] + if self.comparison_mode: + tags.append(f"comparison_{self.comparison_mode}") if self.brand_score >= 0.95: tags.append("brand_match") if self.spec_score >= 0.85: @@ -479,6 +482,55 @@ def _spec_mention_count(identity: ProductIdentity) -> int: return len(re.findall(r"\d+(?:\.\d+)?\s*(?:ml|毫升|l|g|公克|kg)", identity.normalized_name, re.I)) +def _has_overlapping_base_spec(left: ProductIdentity, right: ProductIdentity) -> bool: + for left_value in left.volumes_ml: + if any(_close_number(left_value, right_value) for right_value in right.volumes_ml): + return True + for left_value in left.weights_g: + if any(_close_number(left_value, right_value) for right_value in right.weights_g): + return True + return False + + +def _is_unit_comparable_candidate( + left: ProductIdentity, + right: ProductIdentity, + token_score: float, + chinese_name_score: float, + brand_conflict: bool, + type_score: float, + reasons: Iterable[str], +) -> bool: + """Identify same core product sold in different packs. + + These are not safe exact matches. They can only enter a normalized unit-price + review lane, otherwise a bundle price may be incorrectly compared with a + single-item price. + """ + reason_set = set(reasons) + pack_difference = bool(reason_set & { + "bundle_offer_conflict", + "multi_component_conflict", + "count_conflict", + "component_count_conflict", + }) + if not pack_difference: + return False + if brand_conflict or "brand_conflict" in reason_set: + return False + if "refill_pack_conflict" in reason_set: + return False + if type_score == 0.0 or "type_conflict" in reason_set: + return False + if not _has_overlapping_base_spec(left, right): + return False + if token_score < 0.45 and chinese_name_score < 0.28: + return False + if "product_line_conflict" in reason_set and token_score < 0.72: + return False + return True + + def _chinese_bigram_score(left: ProductIdentity, right: ProductIdentity) -> float: def signature(identity: ProductIdentity) -> set[str]: text = identity.searchable_name @@ -579,6 +631,21 @@ def score_marketplace_match( if left.product_type and right.product_type and left.product_type != right.product_type and token_score < 0.55: hard_veto = True + comparison_mode = "exact_identity" + if _is_unit_comparable_candidate( + left, + right, + token_score, + chinese_name_score, + brand_conflict, + type_score, + reasons, + ): + comparison_mode = "unit_comparable" + reasons.append("unit_comparable") + elif hard_veto: + comparison_mode = "not_comparable" + price_penalty = 0.0 try: if momo_price and competitor_price: @@ -614,7 +681,7 @@ def score_marketplace_match( score += 0.07 reasons.append("strong_product_line_match") if hard_veto: - score = min(score, 0.32) + score = min(score, 0.74 if comparison_mode == "unit_comparable" else 0.32) score = max(0.0, min(1.0, score)) return MatchDiagnostics( @@ -627,6 +694,7 @@ def score_marketplace_match( price_penalty=round(price_penalty, 3), hard_veto=hard_veto, reasons=tuple(reasons), + comparison_mode=comparison_mode, ) diff --git a/tests/test_competitor_identity_revalidator.py b/tests/test_competitor_identity_revalidator.py index c747955..d848bcc 100644 --- a/tests/test_competitor_identity_revalidator.py +++ b/tests/test_competitor_identity_revalidator.py @@ -83,3 +83,17 @@ def test_dashboard_match_status_explains_identity_veto_reason(): assert "組合包/多件組" in bundle["summary"] assert refill["label"] == "補充包差異待審" assert "補充瓶/補充包" in refill["summary"] + + +def test_dashboard_match_status_explains_unit_comparable_bundle(): + from routes.dashboard_routes import _build_pchome_match_status + + status = _build_pchome_match_status({ + "attempt_status": "unit_comparable", + "best_match_score": 0.74, + "error_message": "score=0.74; reasons=bundle_offer_conflict,unit_comparable", + }) + + assert status["label"] == "需單位價比較" + assert status["tone"] == "watch" + assert "不可直接比總價" in status["summary"] diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py index c102cf0..0bc9814 100644 --- a/tests/test_competitor_match_attempts_persistence.py +++ b/tests/test_competitor_match_attempts_persistence.py @@ -28,6 +28,8 @@ def test_competitor_feeder_persists_all_match_attempt_outcomes(): assert "_fetch_expired_identity_skus" in source assert "run_expired_identity_refresh" in source assert "refresh_known_identity" in source + assert 'attempt_status="unit_comparable"' in source + assert 'attempt_status="refresh_unit_comparable"' in source assert 'PCHOME_FEEDER_TIMEOUT", "12"' in source assert "PChomeCrawler(timeout=REQUEST_TIMEOUT" in source @@ -125,3 +127,59 @@ def test_competitor_feeder_refreshes_expired_identity_by_known_product_id(monkey assert "refresh_known_identity" in writes[0]["tags"] assert attempts[0]["attempt_status"] == "matched" assert attempts[0]["search_terms"] == ["known_product_id:DDAB01-1900ABCD"] + + +def test_competitor_feeder_records_unit_comparable_without_price_upsert(monkeypatch): + from services.competitor_price_feeder import CompetitorPriceFeeder + from services.pchome_crawler import PChomeProduct + + product = PChomeProduct( + product_id="DDAB01-UNIT", + name="理膚寶水 全面修復霜 B5 40ml", + price=679, + original_price=799, + discount=15, + image_url="", + product_url="https://24h.pchome.com.tw/prod/DDAB01-UNIT", + stock=20, + store="24h", + rating=4.7, + review_count=8, + is_on_sale=True, + crawled_at=datetime.now(), + ) + + class FakeCrawler: + def __init__(self, *_args, **_kwargs): + pass + + def search_products(self, *_args, **_kwargs): + return True, "ok", [product] + + monkeypatch.setattr("services.pchome_crawler.PChomeCrawler", FakeCrawler) + feeder = CompetitorPriceFeeder(engine=object()) + attempts = [] + writes = [] + monkeypatch.setattr( + feeder, + "_record_match_attempt", + lambda *args, **kwargs: attempts.append(kwargs), + ) + monkeypatch.setattr( + feeder, + "_upsert_competitor_price", + lambda *args, **kwargs: writes.append((args, kwargs)), + ) + + result = feeder._run_sku_items([{ + "sku": "A002", + "name": "理膚寶水 B5 全面修復霜 40ml x2 超值組", + "product_id": 2, + "momo_price": 1199, + }]) + + assert result.matched == 0 + assert result.skipped_low_score == 1 + assert writes == [] + assert attempts[0]["attempt_status"] == "unit_comparable" + assert "unit_comparable" in attempts[0]["error_message"] diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 43cbe74..0c36c22 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -89,6 +89,24 @@ def test_marketplace_matcher_rejects_bundle_to_single_even_when_brand_matches(): assert diagnostics.score < 0.76 assert diagnostics.hard_veto is True assert "bundle_offer_conflict" in diagnostics.reasons + assert diagnostics.comparison_mode == "not_comparable" + + +def test_marketplace_matcher_marks_bundle_single_as_unit_comparable_not_exact(): + from services.marketplace_product_matcher import score_marketplace_match + + diagnostics = score_marketplace_match( + "理膚寶水 B5 全面修復霜 40ml x2 超值組", + "理膚寶水 全面修復霜 B5 40ml", + momo_price=1199, + competitor_price=679, + ) + + assert diagnostics.score < 0.76 + assert diagnostics.hard_veto is True + assert diagnostics.comparison_mode == "unit_comparable" + assert "unit_comparable" in diagnostics.reasons + assert "comparison_unit_comparable" in diagnostics.tags def test_marketplace_matcher_does_not_promote_wide_price_refill_candidate():