feat(competitor): persist match attempts
All checks were successful
CD Pipeline / deploy (push) Successful in 2m9s

This commit is contained in:
OoO
2026-05-01 20:56:17 +08:00
parent c1f43b0ae4
commit 7d46ff9ba5
6 changed files with 284 additions and 13 deletions

4
app.py
View File

@@ -95,8 +95,8 @@ except Exception as e:
sys_log.error(f"無法檢測磁碟空間: {e}")
# 🚩 系統版本定義 (備份與顯示用)
# 🚩 2026-05-01 V10.71: Persist full campaign crawl snapshots
SYSTEM_VERSION = "V10.71"
# 🚩 2026-05-01 V10.72: Persist competitor match attempts and schedule remaining V2 pages
SYSTEM_VERSION = "V10.72"
# ==========================================
# 🔒 SQL Injection 防護函數

View File

@@ -254,7 +254,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.71"
SYSTEM_VERSION = "V10.72"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -142,13 +142,23 @@
目標:讓常用營運頁進入新版 shell但先不重寫全部細節。
建議順序:
推進順序:
1. `web/templates/vendor_stockout/index.html`(已以 `templates/vendor_stockout_index_v2.html` feature flag 方式落地)
2. `web/templates/vendor_stockout/list.html`(已以 `templates/vendor_stockout_list_v2.html` feature flag 方式落地)
3. `templates/auto_import_index.html`
4. `templates/ai_recommend.html`
5. `templates/settings.html` / `templates/crawler_management.html`
1. `templates/ai_intelligence.html` / `templates/ai_recommend.html` / `templates/ai_history.html`
- 原因AI 挑品、PChome 比對、建議信心度與待補證據是目前商品看板下一步核心工作流。
- 目標:新版 shell、AI 挑品清單、比對嘗試狀態、信心度證據缺口、操作回饋。
2. `templates/auto_import_index.html`
- 原因:所有真實資料要完整入庫,匯入頁是營運資料進入系統的主要入口。
- 目標:新版 shell、匯入批次狀態、錯誤列、最近匯入紀錄與資料表落點。
3. `templates/sales_analysis.html` / `templates/daily_sales.html` / `templates/monthly_summary_analysis.html`
- 原因AI 挑品信心度需要銷售額、銷量、成本、毛利等證據支撐。
- 目標:保留原報表功能,改版為可掃描的營運分析工作台。
4. `web/templates/vendor_stockout/index.html``list.html``import.html`
- 狀態:首頁、列表、匯入已先以 `templates/vendor_stockout_*_v2.html` 方式落地;後續補齊 history、vendor management、send email。
5. 系統管理相關頁:`templates/settings.html``templates/system_settings.html``templates/user_management.html``templates/logs.html`
- 目標:新版 shell、設定群組化、狀態/權限/日誌統一呈現。
6. PChome / 趨勢 / 其他工具頁:`templates/pchome_crawler.html``templates/price_comparison.html``templates/trends.html`
- 目標:接入新版 shell 與資料完整性狀態,不改核心爬蟲規則。
工作內容:

View File

@@ -0,0 +1,52 @@
-- =============================================================================
-- Migration 023: 競品比對嘗試歷史表
-- MOMO PRO — PChome match observability
-- 2026-05-01 台北
-- =============================================================================
-- 說明:
-- competitor_prices / competitor_price_history 只保存成功配對。
-- competitor_match_attempts 採 append-only保存每一次 PChome 比對嘗試,
-- 包含 matched、low_score、no_result、no_match、error。
-- 這讓「待比對」商品也有可回溯資料,可用來改善搜尋詞、門檻與 AI 挑品信心度。
-- =============================================================================
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
id BIGSERIAL PRIMARY KEY,
-- MOMO 側商品識別與當下快照
sku VARCHAR(50) NOT NULL,
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
momo_product_id INTEGER,
momo_product_name TEXT,
momo_price NUMERIC(10,2),
-- 本次比對嘗試
search_terms JSONB DEFAULT '[]'::jsonb,
candidate_count INTEGER DEFAULT 0,
attempt_status VARCHAR(30) NOT NULL,
-- 最佳候選,即使低分也保存,供後續分析
best_competitor_product_id VARCHAR(100),
best_competitor_product_name TEXT,
best_competitor_price NUMERIC(10,2),
best_match_score NUMERIC(4,3),
-- 錯誤或外部 API 異常
error_message TEXT,
attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
ON competitor_match_attempts (sku, source, attempted_at DESC);
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
ON competitor_match_attempts (attempt_status, attempted_at DESC);
GRANT ALL PRIVILEGES ON competitor_match_attempts TO momo;
GRANT USAGE, SELECT ON SEQUENCE competitor_match_attempts_id_seq TO momo;
DO $$
BEGIN
RAISE NOTICE '✅ Migration 023 完成 — competitor_match_attempts 比對嘗試歷史表已建立';
END $$;

View File

@@ -51,6 +51,7 @@ class FeederResult:
errors: int
duration_sec: float
history_written: int = 0
attempts_written: int = 0
def _extract_tags(pchome_product) -> list:
@@ -186,11 +187,11 @@ def _find_best_match(momo_name: str, pchome_products: list) -> Optional[tuple]:
return (best, best_score) if best else None
def _search_pchome_candidates(crawler, momo_name: str) -> list:
def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None) -> list:
"""以多組搜尋詞擴大 PChome 候選池,找到可信候選後提早停止。"""
candidates = []
seen_ids = set()
for keyword in _build_search_keywords(momo_name):
for keyword in keywords or _build_search_keywords(momo_name):
ok, _, products = crawler.search_products(keyword, limit=SEARCH_LIMIT)
if not ok or not products:
continue
@@ -259,6 +260,7 @@ class CompetitorPriceFeeder:
def __init__(self, engine=None):
self.engine = engine
self._history_table_ready = False
self._attempt_table_ready = False
def _ensure_competitor_price_history_table(self, conn):
"""確保競品價格歷史表存在;排程可自癒補表,不依賴手動 migration。"""
@@ -321,6 +323,120 @@ class CompetitorPriceFeeder:
self._history_table_ready = True
def _ensure_competitor_match_attempts_table(self, conn):
"""確保 PChome 比對嘗試表存在;成功、低分、無結果與錯誤都要留痕。"""
if self._attempt_table_ready:
return
from sqlalchemy import text
if conn.dialect.name == "postgresql":
conn.execute(text("""
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
id BIGSERIAL PRIMARY KEY,
sku VARCHAR(50) NOT NULL,
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
momo_product_id INTEGER,
momo_product_name TEXT,
momo_price NUMERIC(10,2),
search_terms JSONB DEFAULT '[]'::jsonb,
candidate_count INTEGER DEFAULT 0,
attempt_status VARCHAR(30) NOT NULL,
best_competitor_product_id VARCHAR(100),
best_competitor_product_name TEXT,
best_competitor_price NUMERIC(10,2),
best_match_score NUMERIC(4,3),
error_message TEXT,
attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
ON competitor_match_attempts (sku, source, attempted_at DESC)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
ON competitor_match_attempts (attempt_status, attempted_at DESC)
"""))
else:
conn.execute(text("""
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sku VARCHAR(50) NOT NULL,
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
momo_product_id INTEGER,
momo_product_name TEXT,
momo_price NUMERIC(10,2),
search_terms TEXT DEFAULT '[]',
candidate_count INTEGER DEFAULT 0,
attempt_status VARCHAR(30) NOT NULL,
best_competitor_product_id VARCHAR(100),
best_competitor_product_name TEXT,
best_competitor_price NUMERIC(10,2),
best_match_score NUMERIC(4,3),
error_message TEXT,
attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
ON competitor_match_attempts (sku, source, attempted_at DESC)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
ON competitor_match_attempts (attempt_status, attempted_at DESC)
"""))
self._attempt_table_ready = True
def _record_match_attempt(
self,
sku: str,
momo_name: str,
momo_product_id: int = None,
momo_price: float = None,
search_terms: list = None,
candidate_count: int = 0,
attempt_status: str = "unknown",
best_product=None,
best_score: float = None,
error_message: str = None,
source: str = "pchome",
) -> None:
"""追加一筆 PChome 比對嘗試紀錄,讓待比對/低信心也能回溯。"""
from sqlalchemy import text
with self.engine.begin() as conn:
self._ensure_competitor_match_attempts_table(conn)
search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms"
conn.execute(text(f"""
INSERT INTO competitor_match_attempts
(sku, source, momo_product_id, momo_product_name, momo_price,
search_terms, candidate_count, attempt_status,
best_competitor_product_id, best_competitor_product_name,
best_competitor_price, best_match_score, error_message,
attempted_at)
VALUES
(:sku, :source, :momo_product_id, :momo_product_name, :momo_price,
{search_terms_expr}, :candidate_count, :attempt_status,
:best_id, :best_name,
:best_price, :best_score, :error_message,
CURRENT_TIMESTAMP)
"""), {
"sku": sku,
"source": source,
"momo_product_id": momo_product_id,
"momo_product_name": momo_name,
"momo_price": momo_price,
"search_terms": json.dumps(search_terms or [], ensure_ascii=False),
"candidate_count": candidate_count,
"attempt_status": attempt_status,
"best_id": getattr(best_product, "product_id", None),
"best_name": (getattr(best_product, "name", None) or "")[:300] or None,
"best_price": getattr(best_product, "price", None),
"best_score": best_score,
"error_message": (error_message or "")[:1000] or None,
})
def _fetch_active_skus(self) -> list:
"""
從 products 表取得待監控的 ACTIVE 商品清單
@@ -489,22 +605,46 @@ class CompetitorPriceFeeder:
skipped_low = 0
errors = 0
history_written = 0
attempts_written = 0
for item in skus:
sku = item["sku"]
momo_name = item["name"]
momo_product_id = item.get("product_id")
momo_price = item.get("momo_price")
search_terms = _build_search_keywords(momo_name)
try:
products = _search_pchome_candidates(crawler, momo_name)
products = _search_pchome_candidates(crawler, momo_name, search_terms)
if not products:
logger.debug(f"[Feeder] {sku} 無搜尋結果,跳過")
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms,
candidate_count=0,
attempt_status="no_result",
source=source,
)
attempts_written += 1
skipped_no += 1
continue
result = _find_best_match(momo_name, products)
if not result:
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms,
candidate_count=len(products),
attempt_status="no_match",
source=source,
)
attempts_written += 1
skipped_no += 1
continue
@@ -514,6 +654,19 @@ class CompetitorPriceFeeder:
logger.debug(
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE}),跳過"
)
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms,
candidate_count=len(products),
attempt_status="low_score",
best_product=best_product,
best_score=score,
source=source,
)
attempts_written += 1
skipped_low += 1
continue
@@ -527,8 +680,21 @@ class CompetitorPriceFeeder:
momo_price=momo_price,
source=source,
)
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms,
candidate_count=len(products),
attempt_status="matched",
best_product=best_product,
best_score=score,
source=source,
)
matched += 1
history_written += 1
attempts_written += 1
logger.debug(
f"[Feeder] {sku} → PChome ${best_product.price} "
f"score={score:.3f} tags={tags}"
@@ -536,13 +702,27 @@ class CompetitorPriceFeeder:
except Exception as e:
logger.error(f"[Feeder] {sku} 處理失敗: {e}")
try:
self._record_match_attempt(
sku,
momo_name,
momo_product_id=momo_product_id,
momo_price=momo_price,
search_terms=search_terms,
attempt_status="error",
error_message=str(e),
source=source,
)
attempts_written += 1
except Exception as attempt_error:
logger.warning(f"[Feeder] {sku} 比對嘗試紀錄寫入失敗: {attempt_error}")
errors += 1
duration = round(time.time() - start, 2)
logger.info(
f"[Feeder] 完成 matched={matched} skipped_no={skipped_no} "
f"skipped_low={skipped_low} errors={errors} "
f"history_written={history_written} 耗時={duration}s"
f"history_written={history_written} attempts_written={attempts_written} 耗時={duration}s"
)
return FeederResult(
total_skus=len(skus),
@@ -552,6 +732,7 @@ class CompetitorPriceFeeder:
errors=errors,
duration_sec=duration,
history_written=history_written,
attempts_written=attempts_written,
)
def run(self, source: str = "pchome") -> FeederResult:

View File

@@ -0,0 +1,28 @@
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
def test_competitor_feeder_persists_all_match_attempt_outcomes():
source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8")
assert "attempts_written" in source
assert "_ensure_competitor_match_attempts_table" in source
assert "_record_match_attempt" in source
assert "INSERT INTO competitor_match_attempts" in source
assert "CAST(:search_terms AS jsonb)" in source
assert 'attempt_status="matched"' in source
assert 'attempt_status="low_score"' in source
assert 'attempt_status="no_result"' in source
assert 'attempt_status="no_match"' in source
assert 'attempt_status="error"' in source
assert "_search_pchome_candidates(crawler, momo_name, search_terms)" in source
assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration
assert "attempt_status" in migration
assert "search_terms" in migration
assert "best_match_score" in migration
assert "error_message" in migration
assert "idx_comp_match_attempts_sku_source_time" in migration