diff --git a/app.py b/app.py index 4b91bd3..0c15bec 100644 --- a/app.py +++ b/app.py @@ -95,8 +95,8 @@ except Exception as e: sys_log.error(f"無法檢測磁碟空間: {e}") # 🚩 系統版本定義 (備份與顯示用) -# 🚩 2026-05-01 V10.71: Persist full campaign crawl snapshots -SYSTEM_VERSION = "V10.71" +# 🚩 2026-05-01 V10.72: Persist competitor match attempts and schedule remaining V2 pages +SYSTEM_VERSION = "V10.72" # ========================================== # 🔒 SQL Injection 防護函數 diff --git a/config.py b/config.py index 8a79ba2..b7617d8 100644 --- a/config.py +++ b/config.py @@ -254,7 +254,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.71" +SYSTEM_VERSION = "V10.72" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/guides/frontend_upgrade_roadmap.md b/docs/guides/frontend_upgrade_roadmap.md index b378aab..7b9dfe8 100644 --- a/docs/guides/frontend_upgrade_roadmap.md +++ b/docs/guides/frontend_upgrade_roadmap.md @@ -142,13 +142,23 @@ 目標:讓常用營運頁進入新版 shell,但先不重寫全部細節。 -建議順序: +推進順序: -1. `web/templates/vendor_stockout/index.html`(已以 `templates/vendor_stockout_index_v2.html` feature flag 方式落地) -2. `web/templates/vendor_stockout/list.html`(已以 `templates/vendor_stockout_list_v2.html` feature flag 方式落地) -3. `templates/auto_import_index.html` -4. `templates/ai_recommend.html` -5. `templates/settings.html` / `templates/crawler_management.html` +1. `templates/ai_intelligence.html` / `templates/ai_recommend.html` / `templates/ai_history.html` + - 原因:AI 挑品、PChome 比對、建議信心度與待補證據是目前商品看板下一步核心工作流。 + - 目標:新版 shell、AI 挑品清單、比對嘗試狀態、信心度證據缺口、操作回饋。 +2. `templates/auto_import_index.html` + - 原因:所有真實資料要完整入庫,匯入頁是營運資料進入系統的主要入口。 + - 目標:新版 shell、匯入批次狀態、錯誤列、最近匯入紀錄與資料表落點。 +3. `templates/sales_analysis.html` / `templates/daily_sales.html` / `templates/monthly_summary_analysis.html` + - 原因:AI 挑品信心度需要銷售額、銷量、成本、毛利等證據支撐。 + - 目標:保留原報表功能,改版為可掃描的營運分析工作台。 +4. `web/templates/vendor_stockout/index.html`、`list.html`、`import.html` + - 狀態:首頁、列表、匯入已先以 `templates/vendor_stockout_*_v2.html` 方式落地;後續補齊 history、vendor management、send email。 +5. 系統管理相關頁:`templates/settings.html`、`templates/system_settings.html`、`templates/user_management.html`、`templates/logs.html` + - 目標:新版 shell、設定群組化、狀態/權限/日誌統一呈現。 +6. PChome / 趨勢 / 其他工具頁:`templates/pchome_crawler.html`、`templates/price_comparison.html`、`templates/trends.html` + - 目標:接入新版 shell 與資料完整性狀態,不改核心爬蟲規則。 工作內容: diff --git a/migrations/023_competitor_match_attempts.sql b/migrations/023_competitor_match_attempts.sql new file mode 100644 index 0000000..67c9e1d --- /dev/null +++ b/migrations/023_competitor_match_attempts.sql @@ -0,0 +1,52 @@ +-- ============================================================================= +-- Migration 023: 競品比對嘗試歷史表 +-- MOMO PRO — PChome match observability +-- 2026-05-01 台北 +-- ============================================================================= +-- 說明: +-- competitor_prices / competitor_price_history 只保存成功配對。 +-- competitor_match_attempts 採 append-only,保存每一次 PChome 比對嘗試, +-- 包含 matched、low_score、no_result、no_match、error。 +-- 這讓「待比對」商品也有可回溯資料,可用來改善搜尋詞、門檻與 AI 挑品信心度。 +-- ============================================================================= + +CREATE TABLE IF NOT EXISTS competitor_match_attempts ( + id BIGSERIAL PRIMARY KEY, + + -- MOMO 側商品識別與當下快照 + sku VARCHAR(50) NOT NULL, + source VARCHAR(30) NOT NULL DEFAULT 'pchome', + momo_product_id INTEGER, + momo_product_name TEXT, + momo_price NUMERIC(10,2), + + -- 本次比對嘗試 + search_terms JSONB DEFAULT '[]'::jsonb, + candidate_count INTEGER DEFAULT 0, + attempt_status VARCHAR(30) NOT NULL, + + -- 最佳候選,即使低分也保存,供後續分析 + best_competitor_product_id VARCHAR(100), + best_competitor_product_name TEXT, + best_competitor_price NUMERIC(10,2), + best_match_score NUMERIC(4,3), + + -- 錯誤或外部 API 異常 + error_message TEXT, + + attempted_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time + ON competitor_match_attempts (sku, source, attempted_at DESC); + +CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time + ON competitor_match_attempts (attempt_status, attempted_at DESC); + +GRANT ALL PRIVILEGES ON competitor_match_attempts TO momo; +GRANT USAGE, SELECT ON SEQUENCE competitor_match_attempts_id_seq TO momo; + +DO $$ +BEGIN + RAISE NOTICE '✅ Migration 023 完成 — competitor_match_attempts 比對嘗試歷史表已建立'; +END $$; diff --git a/services/competitor_price_feeder.py b/services/competitor_price_feeder.py index 79b17bb..2b0acdd 100644 --- a/services/competitor_price_feeder.py +++ b/services/competitor_price_feeder.py @@ -51,6 +51,7 @@ class FeederResult: errors: int duration_sec: float history_written: int = 0 + attempts_written: int = 0 def _extract_tags(pchome_product) -> list: @@ -186,11 +187,11 @@ def _find_best_match(momo_name: str, pchome_products: list) -> Optional[tuple]: return (best, best_score) if best else None -def _search_pchome_candidates(crawler, momo_name: str) -> list: +def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None) -> list: """以多組搜尋詞擴大 PChome 候選池,找到可信候選後提早停止。""" candidates = [] seen_ids = set() - for keyword in _build_search_keywords(momo_name): + for keyword in keywords or _build_search_keywords(momo_name): ok, _, products = crawler.search_products(keyword, limit=SEARCH_LIMIT) if not ok or not products: continue @@ -259,6 +260,7 @@ class CompetitorPriceFeeder: def __init__(self, engine=None): self.engine = engine self._history_table_ready = False + self._attempt_table_ready = False def _ensure_competitor_price_history_table(self, conn): """確保競品價格歷史表存在;排程可自癒補表,不依賴手動 migration。""" @@ -321,6 +323,120 @@ class CompetitorPriceFeeder: self._history_table_ready = True + def _ensure_competitor_match_attempts_table(self, conn): + """確保 PChome 比對嘗試表存在;成功、低分、無結果與錯誤都要留痕。""" + if self._attempt_table_ready: + return + + from sqlalchemy import text + if conn.dialect.name == "postgresql": + conn.execute(text(""" + CREATE TABLE IF NOT EXISTS competitor_match_attempts ( + id BIGSERIAL PRIMARY KEY, + sku VARCHAR(50) NOT NULL, + source VARCHAR(30) NOT NULL DEFAULT 'pchome', + momo_product_id INTEGER, + momo_product_name TEXT, + momo_price NUMERIC(10,2), + search_terms JSONB DEFAULT '[]'::jsonb, + candidate_count INTEGER DEFAULT 0, + attempt_status VARCHAR(30) NOT NULL, + best_competitor_product_id VARCHAR(100), + best_competitor_product_name TEXT, + best_competitor_price NUMERIC(10,2), + best_match_score NUMERIC(4,3), + error_message TEXT, + attempted_at TIMESTAMP NOT NULL DEFAULT NOW() + ) + """)) + conn.execute(text(""" + CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time + ON competitor_match_attempts (sku, source, attempted_at DESC) + """)) + conn.execute(text(""" + CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time + ON competitor_match_attempts (attempt_status, attempted_at DESC) + """)) + else: + conn.execute(text(""" + CREATE TABLE IF NOT EXISTS competitor_match_attempts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + sku VARCHAR(50) NOT NULL, + source VARCHAR(30) NOT NULL DEFAULT 'pchome', + momo_product_id INTEGER, + momo_product_name TEXT, + momo_price NUMERIC(10,2), + search_terms TEXT DEFAULT '[]', + candidate_count INTEGER DEFAULT 0, + attempt_status VARCHAR(30) NOT NULL, + best_competitor_product_id VARCHAR(100), + best_competitor_product_name TEXT, + best_competitor_price NUMERIC(10,2), + best_match_score NUMERIC(4,3), + error_message TEXT, + attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """)) + conn.execute(text(""" + CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time + ON competitor_match_attempts (sku, source, attempted_at DESC) + """)) + conn.execute(text(""" + CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time + ON competitor_match_attempts (attempt_status, attempted_at DESC) + """)) + + self._attempt_table_ready = True + + def _record_match_attempt( + self, + sku: str, + momo_name: str, + momo_product_id: int = None, + momo_price: float = None, + search_terms: list = None, + candidate_count: int = 0, + attempt_status: str = "unknown", + best_product=None, + best_score: float = None, + error_message: str = None, + source: str = "pchome", + ) -> None: + """追加一筆 PChome 比對嘗試紀錄,讓待比對/低信心也能回溯。""" + from sqlalchemy import text + + with self.engine.begin() as conn: + self._ensure_competitor_match_attempts_table(conn) + search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms" + conn.execute(text(f""" + INSERT INTO competitor_match_attempts + (sku, source, momo_product_id, momo_product_name, momo_price, + search_terms, candidate_count, attempt_status, + best_competitor_product_id, best_competitor_product_name, + best_competitor_price, best_match_score, error_message, + attempted_at) + VALUES + (:sku, :source, :momo_product_id, :momo_product_name, :momo_price, + {search_terms_expr}, :candidate_count, :attempt_status, + :best_id, :best_name, + :best_price, :best_score, :error_message, + CURRENT_TIMESTAMP) + """), { + "sku": sku, + "source": source, + "momo_product_id": momo_product_id, + "momo_product_name": momo_name, + "momo_price": momo_price, + "search_terms": json.dumps(search_terms or [], ensure_ascii=False), + "candidate_count": candidate_count, + "attempt_status": attempt_status, + "best_id": getattr(best_product, "product_id", None), + "best_name": (getattr(best_product, "name", None) or "")[:300] or None, + "best_price": getattr(best_product, "price", None), + "best_score": best_score, + "error_message": (error_message or "")[:1000] or None, + }) + def _fetch_active_skus(self) -> list: """ 從 products 表取得待監控的 ACTIVE 商品清單 @@ -489,22 +605,46 @@ class CompetitorPriceFeeder: skipped_low = 0 errors = 0 history_written = 0 + attempts_written = 0 for item in skus: sku = item["sku"] momo_name = item["name"] momo_product_id = item.get("product_id") momo_price = item.get("momo_price") + search_terms = _build_search_keywords(momo_name) try: - products = _search_pchome_candidates(crawler, momo_name) + products = _search_pchome_candidates(crawler, momo_name, search_terms) if not products: logger.debug(f"[Feeder] {sku} 無搜尋結果,跳過") + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=0, + attempt_status="no_result", + source=source, + ) + attempts_written += 1 skipped_no += 1 continue result = _find_best_match(momo_name, products) if not result: + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=len(products), + attempt_status="no_match", + source=source, + ) + attempts_written += 1 skipped_no += 1 continue @@ -514,6 +654,19 @@ class CompetitorPriceFeeder: logger.debug( f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE}),跳過" ) + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=len(products), + attempt_status="low_score", + best_product=best_product, + best_score=score, + source=source, + ) + attempts_written += 1 skipped_low += 1 continue @@ -527,8 +680,21 @@ class CompetitorPriceFeeder: momo_price=momo_price, source=source, ) + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + candidate_count=len(products), + attempt_status="matched", + best_product=best_product, + best_score=score, + source=source, + ) matched += 1 history_written += 1 + attempts_written += 1 logger.debug( f"[Feeder] {sku} → PChome ${best_product.price} " f"score={score:.3f} tags={tags}" @@ -536,13 +702,27 @@ class CompetitorPriceFeeder: except Exception as e: logger.error(f"[Feeder] {sku} 處理失敗: {e}") + try: + self._record_match_attempt( + sku, + momo_name, + momo_product_id=momo_product_id, + momo_price=momo_price, + search_terms=search_terms, + attempt_status="error", + error_message=str(e), + source=source, + ) + attempts_written += 1 + except Exception as attempt_error: + logger.warning(f"[Feeder] {sku} 比對嘗試紀錄寫入失敗: {attempt_error}") errors += 1 duration = round(time.time() - start, 2) logger.info( f"[Feeder] 完成 matched={matched} skipped_no={skipped_no} " f"skipped_low={skipped_low} errors={errors} " - f"history_written={history_written} 耗時={duration}s" + f"history_written={history_written} attempts_written={attempts_written} 耗時={duration}s" ) return FeederResult( total_skus=len(skus), @@ -552,6 +732,7 @@ class CompetitorPriceFeeder: errors=errors, duration_sec=duration, history_written=history_written, + attempts_written=attempts_written, ) def run(self, source: str = "pchome") -> FeederResult: diff --git a/tests/test_competitor_match_attempts_persistence.py b/tests/test_competitor_match_attempts_persistence.py new file mode 100644 index 0000000..038524f --- /dev/null +++ b/tests/test_competitor_match_attempts_persistence.py @@ -0,0 +1,28 @@ +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + + +def test_competitor_feeder_persists_all_match_attempt_outcomes(): + source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8") + migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8") + + assert "attempts_written" in source + assert "_ensure_competitor_match_attempts_table" in source + assert "_record_match_attempt" in source + assert "INSERT INTO competitor_match_attempts" in source + assert "CAST(:search_terms AS jsonb)" in source + assert 'attempt_status="matched"' in source + assert 'attempt_status="low_score"' in source + assert 'attempt_status="no_result"' in source + assert 'attempt_status="no_match"' in source + assert 'attempt_status="error"' in source + assert "_search_pchome_candidates(crawler, momo_name, search_terms)" in source + + assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration + assert "attempt_status" in migration + assert "search_terms" in migration + assert "best_match_score" in migration + assert "error_message" in migration + assert "idx_comp_match_attempts_sku_source_time" in migration