fix: sanitize excel export values
All checks were successful
CD Pipeline / deploy (push) Successful in 1m7s

This commit is contained in:
OoO
2026-06-18 14:58:32 +08:00
parent c83fb4cfa9
commit 8145c227c7
3 changed files with 47 additions and 3 deletions

View File

@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.630"
SYSTEM_VERSION = "V10.631"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -8,6 +8,7 @@
import os
import io
import json
import re
from datetime import datetime, timezone, timedelta
from flask import Blueprint, request, send_file, redirect, url_for, flash
from auth import login_required
@@ -31,6 +32,8 @@ sys_log = SystemLogger("ExportRoutes").get_logger()
# Blueprint 定義
export_bp = Blueprint('export', __name__)
_EXCEL_ILLEGAL_CHAR_RE = re.compile(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]')
# ==========================================
# 輔助函數 (使用獨立模組,避免循環依賴)
@@ -48,6 +51,24 @@ def _get_sales_cache():
return _SALES_PROCESSED_CACHE
def _sanitize_excel_cell(value):
"""Remove control characters rejected by openpyxl worksheet cells."""
if isinstance(value, str):
return _EXCEL_ILLEGAL_CHAR_RE.sub('', value)
return value
def _sanitize_excel_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""Return an Excel-safe copy without changing numeric/date columns."""
if df.empty:
return df
cleaned = df.copy()
for column in cleaned.columns:
if cleaned[column].dtype == object:
cleaned[column] = cleaned[column].map(_sanitize_excel_cell)
return cleaned
def _flatten_review_decision_envelope(item):
"""Flatten the shared review decision envelope into operator-friendly columns."""
envelope = item.get('decision_envelope') or {}
@@ -251,7 +272,7 @@ def export_excel_ai_picks():
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df = pd.DataFrame(export_rows)
df = _sanitize_excel_dataframe(pd.DataFrame(export_rows))
df.to_excel(writer, index=False, sheet_name='AI挑品清單')
worksheet = writer.sheets['AI挑品清單']
for column_cells in worksheet.columns:
@@ -360,7 +381,7 @@ def export_excel_pchome_review():
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df = pd.DataFrame(export_rows)
df = _sanitize_excel_dataframe(pd.DataFrame(export_rows))
df.to_excel(writer, index=False, sheet_name='PChome覆核隊列')
worksheet = writer.sheets['PChome覆核隊列']
for column_cells in worksheet.columns:

View File

@@ -0,0 +1,23 @@
import io
import pandas as pd
from routes.export_routes import _sanitize_excel_dataframe
def test_excel_export_sanitizer_removes_openpyxl_illegal_control_chars():
df = pd.DataFrame({
"商品名稱": ["【Cetaphil 舒特膚官方】Baby舒緩潤膚乳400ml\x0b"],
"價格": [399],
})
cleaned = _sanitize_excel_dataframe(df)
assert cleaned.loc[0, "商品名稱"] == "【Cetaphil 舒特膚官方】Baby舒緩潤膚乳400ml"
assert cleaned.loc[0, "價格"] == 399
output = io.BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as writer:
cleaned.to_excel(writer, index=False, sheet_name="PChome覆核隊列")
assert output.getbuffer().nbytes > 0