From 3b14368d4e3110dcc340929afc565ea49232dacc Mon Sep 17 00:00:00 2001 From: ogt Date: Fri, 26 Jun 2026 17:52:06 +0800 Subject: [PATCH] fix: harden alerts and backup deployment guard --- backup_system.py | 7 + config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 4 +- scripts/tools/backup_system.py | 185 ++++++++++++++++---- services/telegram_templates.py | 25 ++- tests/test_backup_system_tool.py | 71 ++++++++ tests/test_telegram_triaged_alert_format.py | 21 ++- 7 files changed, 274 insertions(+), 41 deletions(-) create mode 100644 backup_system.py create mode 100644 tests/test_backup_system_tool.py diff --git a/backup_system.py b/backup_system.py new file mode 100644 index 0000000..9461c1a --- /dev/null +++ b/backup_system.py @@ -0,0 +1,7 @@ +"""Repository-root entrypoint for the MOMO Pro source backup tool.""" + +from scripts.tools.backup_system import create_backup + + +if __name__ == "__main__": + create_backup() diff --git a/config.py b/config.py index 29f7604..7d204ba 100644 --- a/config.py +++ b/config.py @@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.708" +SYSTEM_VERSION = "V10.709" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 2a1a44a..bad4ce4 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-06-26 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯;PChome 後台業績匯入韌性已補強;產品定位正名為「PChome 業績成長自動化作戰系統」;外部市場來源正規化層、自動同步、作戰清單與價格參考表優先讀取、CSV 備援預檢、前台操作入口、高可見頁面繁中化守門、比價/作戰 UI 工作台化、跨平台來源治理與商品身份 UI 契約已建立,GCP embedding 熔斷延後處理、110 proxy rescue 與 direct host health skip 已建立 -> **適用版本**: V10.708 +> **適用版本**: V10.709 --- @@ -792,3 +792,5 @@ POSTGRES_HOST=momo-db | 2026-06-26 | 外部來源視野不可停在少數平台 | V10.707 起外部來源契約再補 TikTok Shop、LINE 購物、露天、品牌官網 / Shopify;所有待接來源必須在 UI 顯示為待接入且不進告警,等官方 API、商品 feed、供應商 API 或人工 CSV 通過品質門檻後才可進作戰清單。 | | 2026-06-26 | 同版 CSS 修正必須跳版本破快取 | V10.707 起 UI 修正若影響 `web/static` 資產,必須同步提升 `SYSTEM_VERSION`,讓正式 HTML 的 `?v=` 參數改變;不得在同一版本號下修改 CSS 後宣稱使用者一定看得到。 | | 2026-06-26 | AI 挑品賣場操作必須固定可見 | V10.708 起 AI 挑品清單在桌面寬度固定「AI 建議 / 賣場操作」欄,橫向查看價格與更新欄時仍能直接開 MOMO / PChome 賣場;手機版維持卡片式堆疊。 | +| 2026-06-26 | Telegram 告警不得因非支援 HTML 送出失敗 | V10.709 起 Telegram HTML 發送前只保留 Bot API 支援的 ``、``、``、`
`、`` 等白名單標籤;``、原始錯誤物件或其他未知標籤會轉成可讀文字,避免營運告警因 parse error 400 消失。 |
+| 2026-06-26 | 部署前備份入口必須備到專案根目錄 | V10.709 起根目錄 `backup_system.py` 與 `scripts/tools/backup_system.py` 共用同一套備份流程,預設打包專案根目錄並排除 `.env`、Google token、`.git`、runtime volume 與既有 backups,避免只備到 `scripts/tools` 或把敏感 runtime 檔案包入備份。 |
diff --git a/scripts/tools/backup_system.py b/scripts/tools/backup_system.py
index 8706619..c7ae379 100644
--- a/scripts/tools/backup_system.py
+++ b/scripts/tools/backup_system.py
@@ -1,60 +1,179 @@
-import os
 import datetime
+import os
 import re
 import zipfile
+from pathlib import Path
+
+
+EXCLUDED_DIRS = {
+    ".claude",
+    ".codex",
+    ".coverage",
+    ".cache",
+    ".git",
+    ".idea",
+    ".mypy_cache",
+    ".next",
+    ".pytest_cache",
+    ".ruff_cache",
+    ".venv",
+    ".vscode",
+    "__pycache__",
+    "backups",
+    "bin",
+    "bin 2",
+    "node_modules",
+    "runtime",
+    "volumes",
+    "build",
+    "dist",
+    "htmlcov",
+    "logs",
+    "playwright-report",
+    "screenshots",
+    "temp",
+    "tmp",
+    "uploads",
+}
+
+EXCLUDED_PATH_PREFIXES = {
+    ("docs", "design"),
+    ("export_assets",),
+    ("frontend", ".next"),
+    ("MOMO Pro",),
+}
+
+EXCLUDED_FILES = {
+    ".DS_Store",
+    ".env",
+    "google_credentials.json",
+    "google_token.json",
+    "google_token.pickle",
+}
+
+EXCLUDED_SUFFIXES = (".pyc", ".pyo", ".sqlite", ".sqlite3", ".tsbuildinfo")
+
+
+def _resolve_project_root() -> Path:
+    configured = os.getenv("MOMO_BACKUP_ROOT")
+    if configured:
+        return Path(configured).expanduser().resolve()
+    return Path(__file__).resolve().parents[2]
+
+
+def _resolve_backup_folder(project_root: Path) -> Path:
+    configured = os.getenv("MOMO_BACKUP_DIR")
+    if configured:
+        return Path(configured).expanduser().resolve()
+    return project_root / "backups"
+
+
+def _read_system_version(project_root: Path) -> str:
+    version = "Unknown"
+    config_path = project_root / "config.py"
+    try:
+        if config_path.exists():
+            content = config_path.read_text(encoding="utf-8")
+            match = re.search(r'SYSTEM_VERSION\s*=\s*["\']([^"\']+)["\']', content)
+            if match:
+                version = match.group(1)
+    except Exception as e:
+        print(f"⚠️ 無法讀取版本號: {e}")
+    return version
+
+
+def _should_skip_file(file_path: Path, project_root: Path, backup_folder: Path) -> bool:
+    if file_path.is_symlink():
+        return True
+    if file_path.name in EXCLUDED_FILES:
+        return True
+    if file_path.name.startswith(".env."):
+        return True
+    if file_path.suffix in EXCLUDED_SUFFIXES:
+        return True
+
+    try:
+        file_path.resolve().relative_to(backup_folder)
+        return True
+    except ValueError:
+        pass
+
+    rel_parts = file_path.relative_to(project_root).parts
+    if _is_excluded_path(rel_parts):
+        return True
+    return any(_is_excluded_dir_name(part) for part in rel_parts[:-1])
+
+
+def _is_excluded_dir_name(dirname: str) -> bool:
+    return dirname in EXCLUDED_DIRS or dirname.startswith("production_v")
+
+
+def _is_excluded_path(rel_parts: tuple[str, ...]) -> bool:
+    for prefix in EXCLUDED_PATH_PREFIXES:
+        if rel_parts[:len(prefix)] == prefix:
+            return True
+    return False
+
+
+def _prune_dirs(dirs: list[str], root: Path, project_root: Path, backup_folder: Path) -> None:
+    kept = []
+    for dirname in dirs:
+        candidate = root / dirname
+        try:
+            rel_parts = candidate.relative_to(project_root).parts
+        except ValueError:
+            rel_parts = ()
+        if rel_parts and _is_excluded_path(rel_parts):
+            continue
+        if _is_excluded_dir_name(dirname):
+            continue
+        if candidate.is_symlink():
+            continue
+        try:
+            candidate.resolve().relative_to(backup_folder)
+            continue
+        except ValueError:
+            pass
+        kept.append(dirname)
+    dirs[:] = kept
 
 def create_backup():
     """
     建立系統完整備份 (Zip 壓縮檔)
     檔名格式: momo_pro_system_backup_YYYYMMDD_HHMMSS_V{version}.zip
     """
-    # 1. 基礎路徑設定
-    base_dir = os.path.dirname(os.path.abspath(__file__))
-    backup_folder = os.path.join(base_dir, 'backups')
+    project_root = _resolve_project_root()
+    backup_folder = _resolve_backup_folder(project_root)
     
-    if not os.path.exists(backup_folder):
-        os.makedirs(backup_folder)
+    if not backup_folder.exists():
+        backup_folder.mkdir(parents=True)
         print(f"📂 已建立備份目錄: {backup_folder}")
 
-    # 2. 嘗試從 app.py 讀取版本號
-    version = "Unknown"
-    app_py_path = os.path.join(base_dir, 'app.py')
-    try:
-        if os.path.exists(app_py_path):
-            with open(app_py_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-                # 尋找 SYSTEM_VERSION = "V9.0"
-                match = re.search(r'SYSTEM_VERSION\s*=\s*["\']([^"\']+)["\']', content)
-                if match:
-                    version = match.group(1)
-    except Exception as e:
-        print(f"⚠️ 無法讀取版本號: {e}")
+    version = _read_system_version(project_root)
 
-    # 3. 產生備份檔名
     timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
     base_name = f"momo_pro_system_backup_{timestamp}_{version}.zip"
-    output_path = os.path.join(backup_folder, base_name)
+    output_path = backup_folder / base_name
 
-    print(f"📦 正在打包專案目錄: {base_dir}")
+    print(f"📦 正在打包專案目錄: {project_root}")
     print(f"🎯 目標檔案: {output_path}")
 
-    # 4. 執行壓縮
     try:
         with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for root, dirs, files in os.walk(base_dir):
-                # 排除不需要備份的目錄
-                for ignore in ['backups', '__pycache__', '.git', '.idea', '.vscode', 'bin', 'bin 2']:
-                    if ignore in dirs:
-                        dirs.remove(ignore)
-                
+            for root, dirs, files in os.walk(project_root):
+                root_path = Path(root)
+                _prune_dirs(dirs, root_path, project_root, backup_folder)
                 for file in files:
-                    if file == '.DS_Store' or file.endswith('.pyc'): continue
-                    file_path = os.path.join(root, file)
-                    arcname = os.path.relpath(file_path, base_dir)
+                    file_path = root_path / file
+                    if _should_skip_file(file_path, project_root, backup_folder):
+                        continue
+                    arcname = file_path.relative_to(project_root)
                     zipf.write(file_path, arcname)
         print(f"✅ 備份完成!")
+        return str(output_path)
     except Exception as e:
         print(f"❌ 備份失敗: {e}")
+        return None
 
 if __name__ == "__main__":
-    create_backup()
\ No newline at end of file
+    create_backup()
diff --git a/services/telegram_templates.py b/services/telegram_templates.py
index bba6317..ed03394 100644
--- a/services/telegram_templates.py
+++ b/services/telegram_templates.py
@@ -31,6 +31,11 @@ sys_log = logging.getLogger("TelegramTpl")
 TELEGRAM_BOT_TOKEN_ENV = "TELEGRAM_BOT_TOKEN"
 TELEGRAM_CHAT_IDS_ENV  = "TELEGRAM_CHAT_IDS"
 _TELEGRAM_HTML_BR_RE = re.compile(r"<\s*br\s*/?\s*>", re.IGNORECASE)
+_TELEGRAM_HTML_TAG_RE = re.compile(r"<[^<>\n]{1,500}>")
+_TELEGRAM_ALLOWED_HTML_TAG_RE = re.compile(
+    r"(?:||)",
+    re.IGNORECASE,
+)
 
 
 # ══════════════════════════════════════════════════════════════════════════════
@@ -57,13 +62,25 @@ def _get_chat_ids() -> list:
 
 
 def _sanitize_telegram_html(text: str, parse_mode: Optional[str] = "HTML") -> str:
-    """Telegram HTML 不支援 
,統一轉為換行避免 sendMessage 400。""" + """Telegram HTML 只保留白名單標籤,其餘轉成文字避免 sendMessage 400。""" value = str(text or "") if parse_mode and str(parse_mode).upper() == "HTML": - return _TELEGRAM_HTML_BR_RE.sub("\n", value) + value = _normalize_telegram_html_linebreaks(value) + return _TELEGRAM_HTML_TAG_RE.sub(_escape_unsupported_telegram_html_tag, value) return value +def _normalize_telegram_html_linebreaks(text: str) -> str: + return _TELEGRAM_HTML_BR_RE.sub("\n", str(text or "")) + + +def _escape_unsupported_telegram_html_tag(match: re.Match) -> str: + tag = match.group(0) + if _TELEGRAM_ALLOWED_HTML_TAG_RE.fullmatch(tag): + return tag + return escape(tag) + + def _callback_payload_utf8(value: Any, max_bytes: int = 52) -> str: """Clamp callback payload by UTF-8 bytes without splitting multibyte chars.""" text = str(value or "unknown").strip() or "unknown" @@ -352,7 +369,7 @@ def price_decision(product_name: str, product_sku: str, direction = "📉" if diff > 0 else "📈" if diff < 0 else "➡️" safe_name = escape(str(product_name or "")) safe_sku = escape(str(product_sku or "")) - safe_reason = escape(_sanitize_telegram_html(str(reason or ""), "HTML")) + safe_reason = escape(_normalize_telegram_html_linebreaks(str(reason or ""))) message = ( f"💰 AI 定價決策建議\n" @@ -854,7 +871,7 @@ def _format_price_decision_envelope(envelope: Dict[str, Any]) -> List[str]: lines = [ "🧭 決策信封", - f"• 狀態:{decision_type} 等級:{severity}{confidence_text}", + f"• 類型:{decision_type} 嚴重度:{severity}{confidence_text}", f"• 資料品質:{data_quality} 自動執行:{'允許' if can_auto_execute else '不允許'}", ] if blocked_reason: diff --git a/tests/test_backup_system_tool.py b/tests/test_backup_system_tool.py new file mode 100644 index 0000000..6db8fe9 --- /dev/null +++ b/tests/test_backup_system_tool.py @@ -0,0 +1,71 @@ +import zipfile +from pathlib import Path + + +def test_source_backup_uses_project_root_and_excludes_runtime_secrets(tmp_path, monkeypatch): + from scripts.tools import backup_system + + project_root = tmp_path / "momo-pro-system" + project_root.mkdir() + (project_root / "config.py").write_text('SYSTEM_VERSION = "V10.TEST"\n', encoding="utf-8") + (project_root / "app.py").write_text("print('app')\n", encoding="utf-8") + (project_root / ".env").write_text("SECRET=1\n", encoding="utf-8") + (project_root / "config").mkdir() + (project_root / "config" / "google_token.json").write_text("{}", encoding="utf-8") + (project_root / "config" / "source_contract.json").write_text("{}", encoding="utf-8") + (project_root / "services").mkdir() + (project_root / "services" / "growth.py").write_text("# ok\n", encoding="utf-8") + (project_root / "docs" / "design").mkdir(parents=True) + (project_root / "docs" / "design" / "handoff.jsx").write_text("// generated\n", encoding="utf-8") + (project_root / "docs" / "guide.md").write_text("guide\n", encoding="utf-8") + (project_root / "export_assets").mkdir() + (project_root / "export_assets" / "logo.ai").write_text("asset\n", encoding="utf-8") + (project_root / "frontend" / ".next").mkdir(parents=True) + (project_root / "frontend" / ".next" / "trace").write_text("build\n", encoding="utf-8") + (project_root / "frontend" / "app").mkdir() + (project_root / "frontend" / "app" / "page.tsx").write_text("// source\n", encoding="utf-8") + (project_root / "logs").mkdir() + (project_root / "logs" / "system.log").write_text("runtime log\n", encoding="utf-8") + (project_root / "MOMO Pro" / "uploads").mkdir(parents=True) + (project_root / "MOMO Pro" / "uploads" / "pasted.png").write_text("asset\n", encoding="utf-8") + (project_root / "production_v3 3" / "static").mkdir(parents=True) + (project_root / "production_v3 3" / "static" / "old.css").write_text("old\n", encoding="utf-8") + (project_root / "components").symlink_to("web/templates/components") + (project_root / ".claude" / "worktrees").mkdir(parents=True) + (project_root / ".claude" / "worktrees" / "old.md").write_text("old", encoding="utf-8") + (project_root / "backups").mkdir() + (project_root / "backups" / "old.zip").write_text("old", encoding="utf-8") + + monkeypatch.setenv("MOMO_BACKUP_ROOT", str(project_root)) + + backup_path = Path(backup_system.create_backup()) + + assert backup_path.parent == project_root / "backups" + assert "V10.TEST" in backup_path.name + assert backup_path.exists() + + with zipfile.ZipFile(backup_path) as archive: + names = set(archive.namelist()) + + assert "app.py" in names + assert "config.py" in names + assert "services/growth.py" in names + assert "docs/guide.md" in names + assert "frontend/app/page.tsx" in names + assert "config/source_contract.json" in names + assert "docs/design/handoff.jsx" not in names + assert "export_assets/logo.ai" not in names + assert "frontend/.next/trace" not in names + assert "logs/system.log" not in names + assert "MOMO Pro/uploads/pasted.png" not in names + assert "production_v3 3/static/old.css" not in names + assert ".env" not in names + assert ".claude/worktrees/old.md" not in names + assert "components" not in names + assert "config/google_token.json" not in names + assert "backups/old.zip" not in names + assert backup_path.name not in names + + +def test_root_backup_entrypoint_exists(): + assert (Path(__file__).resolve().parents[1] / "backup_system.py").exists() diff --git a/tests/test_telegram_triaged_alert_format.py b/tests/test_telegram_triaged_alert_format.py index 55ca288..73a50cc 100644 --- a/tests/test_telegram_triaged_alert_format.py +++ b/tests/test_telegram_triaged_alert_format.py @@ -10,6 +10,20 @@ def test_telegram_html_sanitizer_converts_br_tags_to_newlines(): assert _sanitize_telegram_html("第一行
第二行", parse_mode=None) == "第一行
第二行" +def test_telegram_html_sanitizer_escapes_unsupported_tags_but_keeps_allowed_tags(): + msg = _sanitize_telegram_html( + 'Ollama timeout ' + 'health ' + "" + ) + + assert "Ollama" in msg + assert "timeout" in msg + assert 'health' in msg + assert "標的
" in msg assert "PChome" in msg - assert "match_score / 91%" in msg + assert "📊 價格證據" in msg + assert "價差:NT$ 120(正值代表 MOMO 較貴)" in msg + assert "🧩 比對證據" in msg + assert "Match:0.91" in msg assert "identity_v2 + price_alert_exact" in msg + assert "✅ 人工下一步" in msg assert "動作:human_review" in msg - assert "revenue_loss_7d=42000" in msg assert "ai_call_id=123" in msg assert keyboard["inline_keyboard"][0][0]["callback_data"] == "momo:eig:decision_env_001"