diff --git a/config.py b/config.py index 2ec03dd..2891aed 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.630" +SYSTEM_VERSION = "V10.631" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/routes/export_routes.py b/routes/export_routes.py index e68e1f3..457cd08 100644 --- a/routes/export_routes.py +++ b/routes/export_routes.py @@ -8,6 +8,7 @@ import os import io import json +import re from datetime import datetime, timezone, timedelta from flask import Blueprint, request, send_file, redirect, url_for, flash from auth import login_required @@ -31,6 +32,8 @@ sys_log = SystemLogger("ExportRoutes").get_logger() # Blueprint 定義 export_bp = Blueprint('export', __name__) +_EXCEL_ILLEGAL_CHAR_RE = re.compile(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]') + # ========================================== # 輔助函數 (使用獨立模組,避免循環依賴) @@ -48,6 +51,24 @@ def _get_sales_cache(): return _SALES_PROCESSED_CACHE +def _sanitize_excel_cell(value): + """Remove control characters rejected by openpyxl worksheet cells.""" + if isinstance(value, str): + return _EXCEL_ILLEGAL_CHAR_RE.sub('', value) + return value + + +def _sanitize_excel_dataframe(df: pd.DataFrame) -> pd.DataFrame: + """Return an Excel-safe copy without changing numeric/date columns.""" + if df.empty: + return df + cleaned = df.copy() + for column in cleaned.columns: + if cleaned[column].dtype == object: + cleaned[column] = cleaned[column].map(_sanitize_excel_cell) + return cleaned + + def _flatten_review_decision_envelope(item): """Flatten the shared review decision envelope into operator-friendly columns.""" envelope = item.get('decision_envelope') or {} @@ -251,7 +272,7 @@ def export_excel_ai_picks(): output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: - df = pd.DataFrame(export_rows) + df = _sanitize_excel_dataframe(pd.DataFrame(export_rows)) df.to_excel(writer, index=False, sheet_name='AI挑品清單') worksheet = writer.sheets['AI挑品清單'] for column_cells in worksheet.columns: @@ -360,7 +381,7 @@ def export_excel_pchome_review(): output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: - df = pd.DataFrame(export_rows) + df = _sanitize_excel_dataframe(pd.DataFrame(export_rows)) df.to_excel(writer, index=False, sheet_name='PChome覆核隊列') worksheet = writer.sheets['PChome覆核隊列'] for column_cells in worksheet.columns: diff --git a/tests/test_export_excel_sanitizer.py b/tests/test_export_excel_sanitizer.py new file mode 100644 index 0000000..1af2ce6 --- /dev/null +++ b/tests/test_export_excel_sanitizer.py @@ -0,0 +1,23 @@ +import io + +import pandas as pd + +from routes.export_routes import _sanitize_excel_dataframe + + +def test_excel_export_sanitizer_removes_openpyxl_illegal_control_chars(): + df = pd.DataFrame({ + "商品名稱": ["【Cetaphil 舒特膚官方】Baby舒緩潤膚乳400ml\x0b"], + "價格": [399], + }) + + cleaned = _sanitize_excel_dataframe(df) + + assert cleaned.loc[0, "商品名稱"] == "【Cetaphil 舒特膚官方】Baby舒緩潤膚乳400ml" + assert cleaned.loc[0, "價格"] == 399 + + output = io.BytesIO() + with pd.ExcelWriter(output, engine="openpyxl") as writer: + cleaned.to_excel(writer, index=False, sheet_name="PChome覆核隊列") + + assert output.getbuffer().nbytes > 0