457 lines
18 KiB
Python
457 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""PChome / MOMO 比價人工覆核決策服務。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any
|
|
|
|
from sqlalchemy import inspect, text
|
|
|
|
|
|
VALID_REVIEW_ACTIONS = {
|
|
"accept_identity": {
|
|
"label": "採用同款",
|
|
"attempt_status": "manual_accepted",
|
|
"message": "已採用候選商品為正式 PChome 同款配對",
|
|
},
|
|
"reject_identity": {
|
|
"label": "否決候選",
|
|
"attempt_status": "manual_rejected",
|
|
"message": "已否決候選商品,本輪覆核已關閉",
|
|
},
|
|
"unit_price_required": {
|
|
"label": "標記單位價",
|
|
"attempt_status": "manual_unit_price_required",
|
|
"message": "已標記為需單位價比較,不寫入正式總價差",
|
|
},
|
|
"needs_research": {
|
|
"label": "需補搜尋",
|
|
"attempt_status": "manual_needs_research",
|
|
"message": "已標記為需補搜尋詞或重新抓取",
|
|
},
|
|
}
|
|
|
|
|
|
def _num(value: Any) -> float | None:
|
|
try:
|
|
if value is None:
|
|
return None
|
|
return float(value)
|
|
except (TypeError, ValueError):
|
|
return None
|
|
|
|
|
|
def _json_array_expr(conn, bind_name: str) -> str:
|
|
return f"CAST(:{bind_name} AS jsonb)" if conn.dialect.name == "postgresql" else f":{bind_name}"
|
|
|
|
|
|
def _json_object_expr(conn, bind_name: str) -> str:
|
|
return f"CAST(:{bind_name} AS jsonb)" if conn.dialect.name == "postgresql" else f":{bind_name}"
|
|
|
|
|
|
def _json_text_payload(value: Any) -> str | None:
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, (dict, list)):
|
|
return json.dumps(value, ensure_ascii=False)
|
|
if isinstance(value, str):
|
|
stripped = value.strip()
|
|
if not stripped:
|
|
return None
|
|
try:
|
|
json.loads(stripped)
|
|
return stripped
|
|
except (TypeError, ValueError):
|
|
return json.dumps({"raw": stripped}, ensure_ascii=False)
|
|
return json.dumps(value, ensure_ascii=False)
|
|
|
|
|
|
def _has_table_column(conn, table_name: str, column_name: str) -> bool:
|
|
try:
|
|
columns = inspect(conn).get_columns(table_name)
|
|
return any(str(column.get("name")) == column_name for column in columns)
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def _candidate_diagnostic_text(attempt: dict[str, Any]) -> str:
|
|
parts: list[str] = []
|
|
error_message = str(attempt.get("error_message") or "").strip()
|
|
diagnostic_payload = _json_text_payload(attempt.get("match_diagnostic_json"))
|
|
if error_message:
|
|
parts.append(error_message)
|
|
if diagnostic_payload:
|
|
parts.append(f"match_diagnostic_json={diagnostic_payload}")
|
|
return "; ".join(parts)[:4000]
|
|
|
|
|
|
def _ensure_competitor_match_reviews_table(conn) -> None:
|
|
if conn.dialect.name == "postgresql":
|
|
conn.execute(text("""
|
|
CREATE TABLE IF NOT EXISTS competitor_match_reviews (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
sku VARCHAR(50) NOT NULL,
|
|
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
|
review_action VARCHAR(40) NOT NULL,
|
|
review_reason TEXT,
|
|
reviewer_identity VARCHAR(120),
|
|
momo_product_id INTEGER,
|
|
momo_product_name TEXT,
|
|
momo_price NUMERIC(10,2),
|
|
candidate_product_id VARCHAR(100),
|
|
candidate_product_name TEXT,
|
|
candidate_price NUMERIC(10,2),
|
|
candidate_match_score NUMERIC(4,3),
|
|
candidate_diagnostic TEXT,
|
|
resulting_attempt_status VARCHAR(40),
|
|
reviewed_at TIMESTAMP NOT NULL DEFAULT NOW()
|
|
)
|
|
"""))
|
|
else:
|
|
conn.execute(text("""
|
|
CREATE TABLE IF NOT EXISTS competitor_match_reviews (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
sku VARCHAR(50) NOT NULL,
|
|
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
|
review_action VARCHAR(40) NOT NULL,
|
|
review_reason TEXT,
|
|
reviewer_identity VARCHAR(120),
|
|
momo_product_id INTEGER,
|
|
momo_product_name TEXT,
|
|
momo_price NUMERIC(10,2),
|
|
candidate_product_id VARCHAR(100),
|
|
candidate_product_name TEXT,
|
|
candidate_price NUMERIC(10,2),
|
|
candidate_match_score NUMERIC(4,3),
|
|
candidate_diagnostic TEXT,
|
|
resulting_attempt_status VARCHAR(40),
|
|
reviewed_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""))
|
|
conn.execute(text("""
|
|
CREATE INDEX IF NOT EXISTS idx_comp_match_reviews_sku_source_time
|
|
ON competitor_match_reviews (sku, source, reviewed_at DESC)
|
|
"""))
|
|
conn.execute(text("""
|
|
CREATE INDEX IF NOT EXISTS idx_comp_match_reviews_action_time
|
|
ON competitor_match_reviews (review_action, reviewed_at DESC)
|
|
"""))
|
|
conn.execute(text("""
|
|
CREATE INDEX IF NOT EXISTS idx_comp_match_reviews_candidate
|
|
ON competitor_match_reviews (candidate_product_id)
|
|
"""))
|
|
|
|
|
|
def _ensure_competitor_price_history_table(conn) -> None:
|
|
tags_type = "JSONB" if conn.dialect.name == "postgresql" else "TEXT"
|
|
pk_type = "BIGSERIAL PRIMARY KEY" if conn.dialect.name == "postgresql" else "INTEGER PRIMARY KEY AUTOINCREMENT"
|
|
conn.execute(text(f"""
|
|
CREATE TABLE IF NOT EXISTS competitor_price_history (
|
|
id {pk_type},
|
|
sku VARCHAR(50) NOT NULL,
|
|
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
|
momo_product_id INTEGER,
|
|
momo_price NUMERIC(10,2),
|
|
price NUMERIC(10,2) NOT NULL,
|
|
original_price NUMERIC(10,2),
|
|
discount_pct INTEGER,
|
|
competitor_product_id VARCHAR(100),
|
|
competitor_product_name TEXT,
|
|
match_score NUMERIC(4,3),
|
|
tags {tags_type} DEFAULT '[]',
|
|
crawled_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""))
|
|
conn.execute(text("""
|
|
CREATE INDEX IF NOT EXISTS idx_comp_price_history_sku_source_time
|
|
ON competitor_price_history (sku, source, crawled_at DESC)
|
|
"""))
|
|
|
|
|
|
def _fetch_latest_attempt(conn, sku: str, source: str) -> dict[str, Any] | None:
|
|
latest_price_sql = """
|
|
pr.price AS current_momo_price
|
|
FROM competitor_match_attempts cma
|
|
LEFT JOIN products p ON p.i_code = cma.sku
|
|
LEFT JOIN LATERAL (
|
|
SELECT price
|
|
FROM price_records
|
|
WHERE product_id = p.id
|
|
ORDER BY timestamp DESC, id DESC
|
|
LIMIT 1
|
|
) pr ON TRUE
|
|
"""
|
|
if conn.dialect.name != "postgresql":
|
|
latest_price_sql = """
|
|
(
|
|
SELECT price
|
|
FROM price_records
|
|
WHERE product_id = p.id
|
|
ORDER BY timestamp DESC, id DESC
|
|
LIMIT 1
|
|
) AS current_momo_price
|
|
FROM competitor_match_attempts cma
|
|
LEFT JOIN products p ON p.i_code = cma.sku
|
|
"""
|
|
|
|
row = conn.execute(text(f"""
|
|
SELECT
|
|
cma.*,
|
|
p.id AS current_momo_product_id,
|
|
p.name AS current_momo_product_name,
|
|
{latest_price_sql}
|
|
WHERE cma.sku = :sku
|
|
AND cma.source = :source
|
|
ORDER BY cma.attempted_at DESC, cma.id DESC
|
|
LIMIT 1
|
|
"""), {"sku": sku, "source": source}).mappings().first()
|
|
return dict(row) if row else None
|
|
|
|
|
|
def _insert_manual_attempt(conn, attempt: dict[str, Any], action_meta: dict[str, str], source: str) -> None:
|
|
columns = [
|
|
"sku",
|
|
"source",
|
|
"momo_product_id",
|
|
"momo_product_name",
|
|
"momo_price",
|
|
"search_terms",
|
|
"candidate_count",
|
|
"attempt_status",
|
|
"best_competitor_product_id",
|
|
"best_competitor_product_name",
|
|
"best_competitor_price",
|
|
"best_match_score",
|
|
"error_message",
|
|
"attempted_at",
|
|
]
|
|
search_terms_expr = _json_array_expr(conn, "search_terms")
|
|
values = [
|
|
":sku",
|
|
":source",
|
|
":momo_product_id",
|
|
":momo_product_name",
|
|
":momo_price",
|
|
search_terms_expr,
|
|
":candidate_count",
|
|
":attempt_status",
|
|
":best_id",
|
|
":best_name",
|
|
":best_price",
|
|
":best_score",
|
|
":error_message",
|
|
"CURRENT_TIMESTAMP",
|
|
]
|
|
params = {
|
|
"sku": attempt.get("sku"),
|
|
"source": source,
|
|
"momo_product_id": attempt.get("momo_product_id") or attempt.get("current_momo_product_id"),
|
|
"momo_product_name": attempt.get("momo_product_name") or attempt.get("current_momo_product_name"),
|
|
"momo_price": attempt.get("momo_price") or attempt.get("current_momo_price"),
|
|
"search_terms": json.dumps([f"manual_review:{action_meta['attempt_status']}"], ensure_ascii=False),
|
|
"candidate_count": attempt.get("candidate_count") or 0,
|
|
"attempt_status": action_meta["attempt_status"],
|
|
"best_id": attempt.get("best_competitor_product_id"),
|
|
"best_name": attempt.get("best_competitor_product_name"),
|
|
"best_price": attempt.get("best_competitor_price"),
|
|
"best_score": attempt.get("best_match_score"),
|
|
"error_message": action_meta["message"],
|
|
}
|
|
|
|
if _has_table_column(conn, "competitor_match_attempts", "match_diagnostic_json"):
|
|
columns.append("match_diagnostic_json")
|
|
values.append(_json_object_expr(conn, "match_diagnostic_json"))
|
|
params["match_diagnostic_json"] = _json_text_payload(attempt.get("match_diagnostic_json"))
|
|
if _has_table_column(conn, "competitor_match_attempts", "comparison_mode"):
|
|
columns.append("comparison_mode")
|
|
values.append(":comparison_mode")
|
|
params["comparison_mode"] = attempt.get("comparison_mode")
|
|
if _has_table_column(conn, "competitor_match_attempts", "hard_veto"):
|
|
columns.append("hard_veto")
|
|
values.append(":hard_veto")
|
|
params["hard_veto"] = attempt.get("hard_veto")
|
|
if _has_table_column(conn, "competitor_match_attempts", "diagnostic_codes"):
|
|
columns.append("diagnostic_codes")
|
|
values.append(_json_array_expr(conn, "diagnostic_codes"))
|
|
params["diagnostic_codes"] = _json_text_payload(attempt.get("diagnostic_codes"))
|
|
|
|
conn.execute(text(f"""
|
|
INSERT INTO competitor_match_attempts
|
|
({", ".join(columns)})
|
|
VALUES
|
|
({", ".join(values)})
|
|
"""), params)
|
|
|
|
|
|
def _promote_manual_match(conn, attempt: dict[str, Any], source: str) -> None:
|
|
candidate_id = str(attempt.get("best_competitor_product_id") or "").strip()
|
|
candidate_name = str(attempt.get("best_competitor_product_name") or "").strip()
|
|
candidate_price = _num(attempt.get("best_competitor_price"))
|
|
if not candidate_id or not candidate_name or not candidate_price or candidate_price <= 0:
|
|
raise ValueError("採用同款需要候選 PChome 商品 ID、名稱與有效價格")
|
|
|
|
_ensure_competitor_price_history_table(conn)
|
|
_taipei = timezone(timedelta(hours=8))
|
|
ttl_hours = int(os.getenv("PCHOME_FEEDER_TTL_HOURS", "48"))
|
|
expires_at = (datetime.now(_taipei) + timedelta(hours=ttl_hours)).strftime("%Y-%m-%d %H:%M:%S")
|
|
match_score = max(_num(attempt.get("best_match_score")) or 0, 0.76)
|
|
tags = [
|
|
"identity_v2",
|
|
"manual_review",
|
|
"manual_accept",
|
|
"comparison_exact_identity",
|
|
]
|
|
tags_json = json.dumps(tags, ensure_ascii=False)
|
|
tags_expr = _json_array_expr(conn, "tags")
|
|
momo_product_id = attempt.get("momo_product_id") or attempt.get("current_momo_product_id")
|
|
momo_price = attempt.get("momo_price") or attempt.get("current_momo_price")
|
|
|
|
conn.execute(text(f"""
|
|
INSERT INTO competitor_prices
|
|
(sku, source, price, original_price, discount_pct,
|
|
competitor_product_id, competitor_product_name,
|
|
match_score, tags, crawled_at, expires_at)
|
|
VALUES
|
|
(:sku, :source, :price, NULL, NULL,
|
|
:candidate_id, :candidate_name,
|
|
:match_score, {tags_expr}, CURRENT_TIMESTAMP, :expires_at)
|
|
ON CONFLICT (sku, source) DO UPDATE
|
|
SET price = EXCLUDED.price,
|
|
original_price = EXCLUDED.original_price,
|
|
discount_pct = EXCLUDED.discount_pct,
|
|
competitor_product_id = EXCLUDED.competitor_product_id,
|
|
competitor_product_name = EXCLUDED.competitor_product_name,
|
|
match_score = EXCLUDED.match_score,
|
|
tags = EXCLUDED.tags,
|
|
crawled_at = CURRENT_TIMESTAMP,
|
|
expires_at = EXCLUDED.expires_at
|
|
"""), {
|
|
"sku": attempt.get("sku"),
|
|
"source": source,
|
|
"price": candidate_price,
|
|
"candidate_id": candidate_id,
|
|
"candidate_name": candidate_name[:200],
|
|
"match_score": match_score,
|
|
"tags": tags_json,
|
|
"expires_at": expires_at,
|
|
})
|
|
conn.execute(text(f"""
|
|
INSERT INTO competitor_price_history
|
|
(sku, source, momo_product_id, momo_price,
|
|
price, original_price, discount_pct,
|
|
competitor_product_id, competitor_product_name,
|
|
match_score, tags, crawled_at)
|
|
VALUES
|
|
(:sku, :source, :momo_product_id, :momo_price,
|
|
:price, NULL, NULL,
|
|
:candidate_id, :candidate_name,
|
|
:match_score, {tags_expr}, CURRENT_TIMESTAMP)
|
|
"""), {
|
|
"sku": attempt.get("sku"),
|
|
"source": source,
|
|
"momo_product_id": momo_product_id,
|
|
"momo_price": momo_price,
|
|
"price": candidate_price,
|
|
"candidate_id": candidate_id,
|
|
"candidate_name": candidate_name[:200],
|
|
"match_score": match_score,
|
|
"tags": tags_json,
|
|
})
|
|
|
|
|
|
def _expire_current_manual_candidate(conn, attempt: dict[str, Any], source: str) -> None:
|
|
"""Expire a current official match when the operator rejects its candidate."""
|
|
candidate_id = str(attempt.get("best_competitor_product_id") or "").strip()
|
|
sku = str(attempt.get("sku") or "").strip()
|
|
if not sku or not candidate_id:
|
|
return
|
|
|
|
conn.execute(text("""
|
|
UPDATE competitor_prices
|
|
SET expires_at = CURRENT_TIMESTAMP,
|
|
crawled_at = CURRENT_TIMESTAMP
|
|
WHERE sku = :sku
|
|
AND source = :source
|
|
AND competitor_product_id = :candidate_id
|
|
"""), {
|
|
"sku": sku,
|
|
"source": source,
|
|
"candidate_id": candidate_id,
|
|
})
|
|
|
|
|
|
def record_competitor_match_review(
|
|
engine,
|
|
sku: str,
|
|
review_action: str,
|
|
reviewer_identity: str = "dashboard",
|
|
review_reason: str = "",
|
|
source: str = "pchome",
|
|
) -> dict[str, Any]:
|
|
"""Record a human decision for the latest PChome match attempt."""
|
|
sku = str(sku or "").strip()
|
|
source = str(source or "pchome").strip() or "pchome"
|
|
review_action = str(review_action or "").strip()
|
|
review_reason = str(review_reason or "").strip()[:1000]
|
|
reviewer_identity = str(reviewer_identity or "dashboard").strip()[:120] or "dashboard"
|
|
|
|
if not sku:
|
|
return {"success": False, "message": "缺少 MOMO 商品 ID"}
|
|
action_meta = VALID_REVIEW_ACTIONS.get(review_action)
|
|
if not action_meta:
|
|
return {"success": False, "message": "不支援的覆核動作"}
|
|
|
|
with engine.begin() as conn:
|
|
_ensure_competitor_match_reviews_table(conn)
|
|
attempt = _fetch_latest_attempt(conn, sku, source)
|
|
if not attempt:
|
|
return {"success": False, "message": "找不到可覆核的 PChome 比對嘗試"}
|
|
|
|
if review_action == "accept_identity":
|
|
_promote_manual_match(conn, attempt, source)
|
|
else:
|
|
_expire_current_manual_candidate(conn, attempt, source)
|
|
|
|
_insert_manual_attempt(conn, attempt, action_meta, source)
|
|
conn.execute(text("""
|
|
INSERT INTO competitor_match_reviews
|
|
(sku, source, review_action, review_reason, reviewer_identity,
|
|
momo_product_id, momo_product_name, momo_price,
|
|
candidate_product_id, candidate_product_name, candidate_price,
|
|
candidate_match_score, candidate_diagnostic,
|
|
resulting_attempt_status, reviewed_at)
|
|
VALUES
|
|
(:sku, :source, :review_action, :review_reason, :reviewer_identity,
|
|
:momo_product_id, :momo_product_name, :momo_price,
|
|
:candidate_id, :candidate_name, :candidate_price,
|
|
:candidate_score, :candidate_diagnostic,
|
|
:resulting_attempt_status, CURRENT_TIMESTAMP)
|
|
"""), {
|
|
"sku": sku,
|
|
"source": source,
|
|
"review_action": review_action,
|
|
"review_reason": review_reason,
|
|
"reviewer_identity": reviewer_identity,
|
|
"momo_product_id": attempt.get("momo_product_id") or attempt.get("current_momo_product_id"),
|
|
"momo_product_name": attempt.get("momo_product_name") or attempt.get("current_momo_product_name"),
|
|
"momo_price": attempt.get("momo_price") or attempt.get("current_momo_price"),
|
|
"candidate_id": attempt.get("best_competitor_product_id"),
|
|
"candidate_name": attempt.get("best_competitor_product_name"),
|
|
"candidate_price": attempt.get("best_competitor_price"),
|
|
"candidate_score": attempt.get("best_match_score"),
|
|
"candidate_diagnostic": _candidate_diagnostic_text(attempt),
|
|
"resulting_attempt_status": action_meta["attempt_status"],
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"message": action_meta["message"],
|
|
"sku": sku,
|
|
"review_action": review_action,
|
|
"attempt_status": action_meta["attempt_status"],
|
|
}
|