diff --git a/CONSTITUTION.md b/CONSTITUTION.md index b9a6913..4c1b09a 100644 --- a/CONSTITUTION.md +++ b/CONSTITUTION.md @@ -2,7 +2,7 @@ > 本文件定義專案開發的核心準則與不可違反的規範 > **建立日期**: 2026-01-12 -> **當前版本**: V10.17 (Ollama embedding /api/embed 強化版) +> **當前版本**: V10.18 (Scheduler 例外記錄強化版) > **最後更新**: 2026-04-30 --- diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 5797c4d..721cd45 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -26,6 +26,7 @@ - ElephantAlpha NIM fallback 強化:預設改用 production 可呼叫的 `nvidia/llama-3.3-nemotron-super-49b-v1.5`,Ultra 253B 權限 404 時自動 fallback。 - DatabaseManager 連線池收斂:PostgreSQL 每 worker pool 調整為 `pool_size=2/max_overflow=3`,避免多 route 重複 new manager 時吃滿連線。 - Ollama embedding 強化:改為優先 `/api/embed`,舊節點才 fallback `/api/embeddings`,並新增 `EMBEDDING_TIMEOUT`。 + - Scheduler 例外記錄強化:清除 `scheduler.py` 靜默 `except/pass`,資源清理、EDM 可選欄位、備份 insight/通知失敗全改為可診斷 log。 【下次待辦】 - 觀察 Prometheus scrape 後 `momo_ai_*` 是否在事件發生後產生時間序列。 diff --git a/app.py b/app.py index aa80958..1c170a0 100644 --- a/app.py +++ b/app.py @@ -95,8 +95,8 @@ except Exception as e: sys_log.error(f"無法檢測磁碟空間: {e}") # 🚩 系統版本定義 (備份與顯示用) -# 🚩 2026-04-30 V10.17: Ollama embedding /api/embed hardening -SYSTEM_VERSION = "V10.17" +# 🚩 2026-04-30 V10.18: Scheduler exception logging hardening +SYSTEM_VERSION = "V10.18" # ========================================== # 🔒 SQL Injection 防護函數 diff --git a/config.py b/config.py index 41fe631..f856d5b 100644 --- a/config.py +++ b/config.py @@ -254,7 +254,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.17" +SYSTEM_VERSION = "V10.18" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index bb32258..8cb5ff0 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-04-30 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地 — EventRouter / AutoHeal / OpenClaw Memory / ElephantAlpha bridge / Prometheus metrics / Smoke Dashboard / Smoke Trend Management / Telegram Summary / Grafana provisioning / Prometheus scrape / CD Gunicorn 掛載具測試覆蓋 -> **適用版本**: V10.17 Ollama embedding /api/embed 強化版 +> **適用版本**: V10.18 Scheduler 例外記錄強化版 --- diff --git a/docs/memory/ai_automation_closure_20260429.md b/docs/memory/ai_automation_closure_20260429.md index 4d5e2a9..3fb3283 100644 --- a/docs/memory/ai_automation_closure_20260429.md +++ b/docs/memory/ai_automation_closure_20260429.md @@ -26,6 +26,7 @@ - 2026-04-30 production `NVIDIA_API_KEY` 可列出 Ultra 253B 但呼叫 `nvidia/llama-3.1-nemotron-ultra-253b-v1` 會 404;ElephantAlpha 預設改用 `nvidia/llama-3.3-nemotron-super-49b-v1.5` 並加入 fallback models。 - 2026-04-30 `DatabaseManager()` 多 route 重複建立曾有吃滿 PostgreSQL clients 風險;已重用 engine/session 並將每 worker pool 收斂為 `pool_size=2/max_overflow=3`。 - 2026-04-30 OpenClaw embedding worker 曾在舊 `/api/embeddings` 路徑遇到 Hermes timeout;Ollama client 已改為優先 `/api/embed`,舊節點才 fallback `/api/embeddings`。 +- 2026-04-30 `scheduler.py` 殘留靜默 `except/pass`;已改為 warning/debug log,備份 insight 與 Telegram 通知失敗不再靜默。 ## 已落地範圍 @@ -64,6 +65,7 @@ - 2026-04-30 ElephantAlpha NIM fallback hardening:新增 `tests/test_elephant_service.py`。 - 2026-04-30 DatabaseManager pool convergence:`tests/test_database_manager_cache.py` 覆蓋 pool size/overflow 與 engine reuse。 - 2026-04-30 Ollama embedding API migration:新增 `tests/test_ollama_embedding.py`。 +- 2026-04-30 Phase 3f cleanup contracts:`tests/test_phase3f_cleanup_contracts.py` 覆蓋 orphan services、env 範例、scheduler 靜默例外。 - 2026-04-29 L2 安全記憶批次:`24 passed`。 - collect-only:`48 tests collected`。 - `git diff --check` 已通過。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index e57dc05..1ccd9f4 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -39,6 +39,7 @@ - **ElephantAlpha NIM fallback 強化**: production 帳號呼叫 Ultra 253B 會 404,預設改用可呼叫的 Nemotron Super 49B v1.5,並加入 70B / 8B fallback。 - **DatabaseManager 連線池收斂**: PostgreSQL 每 worker pool 收斂為 `pool_size=2/max_overflow=3`,並以 cache 重用 engine/session。 - **Ollama embedding API 遷移**: embedding client 優先使用官方 `/api/embed`,舊節點才 fallback `/api/embeddings`,降低 deprecated endpoint 與 timeout 風險。 +- **Scheduler 例外記錄強化**: 清除 `scheduler.py` 靜默 `except/pass`,Chrome 清理、EDM optional 欄位、備份 insight/Telegram 失敗均保留 log。 ### 2026-04-28~29:Phase 3e 重構大戰 + daily_sales cache 隱形 bug 根除 - **app.py 縮減 -10.8%**: 7,386 → 6,590 行,11 commits 全綠零 502。 diff --git a/scheduler.py b/scheduler.py index 61571d2..645ef97 100644 --- a/scheduler.py +++ b/scheduler.py @@ -151,8 +151,11 @@ def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, if driver: try: driver.quit() - except: - pass + except Exception as cleanup_error: + logging.warning( + f"[Scraper] [Resource] ⚠️ Chrome 初始化失敗後關閉 driver 也失敗 | Error: {cleanup_error}", + exc_info=True, + ) driver = None if retry_count <= max_retries: @@ -178,8 +181,11 @@ def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, # 先關閉所有視窗 try: driver.close() - except: - pass + except Exception as close_error: + logging.debug( + f"[Scraper] [Resource] Chrome 視窗關閉失敗但繼續 quit | Error: {close_error}", + exc_info=True, + ) # 再退出 driver driver.quit() except Exception as quit_error: @@ -188,14 +194,20 @@ def managed_scraper_resources(window_size='1920,5000', debug=False, timeout=45, try: import subprocess subprocess.run(['pkill', '-f', 'chrome.*--headless'], timeout=5, capture_output=True) - except: - pass + except Exception as pkill_error: + logging.warning( + f"[Scraper] [Resource] ⚠️ Chrome 強制清理失敗 | Error: {pkill_error}", + exc_info=True, + ) if session: try: session.close() - except: - pass + except Exception as session_error: + logging.warning( + f"[Scraper] [Resource] ⚠️ DB session 關閉失敗 | Error: {session_error}", + exc_info=True, + ) def run_momo_task(): @@ -206,8 +218,11 @@ def run_momo_task(): if is_task_paused("run_momo_task"): logging.info("[Crawler] [MOMO] ⏸️ 任務被 HITL 暫停中,本次跳過") return - except Exception: - pass # agent_actions 未就緒時不阻塞排程 + except Exception as pause_check_error: + logging.debug( + f"[Crawler] [MOMO] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}", + exc_info=True, + ) try: # V-New: 每次執行任務時,動態從 JSON 檔案重新讀取分類 @@ -517,8 +532,11 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): if "/" in txt and ":" in txt: # 增強判斷:需包含日期斜線與時間冒號 activity_time_text = txt break - except: - pass + except Exception as activity_time_error: + logging.debug( + f"[Crawler] [EDM] 活動時間文字解析失敗但繼續 | Error: {activity_time_error}", + exc_info=True, + ) if not activity_time_text: activity_time_text = activity_name logging.info(f"[Crawler] [EDM] ⏰ 抓取到的全站活動時間: {activity_time_text}") @@ -553,8 +571,11 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): # 嘗試抓取 .dateTime,若無則嘗試找包含 "開搶" 的元素 dt_el = parent.find_element(By.CSS_SELECTOR, ".dateTime") session_time_text = dt_el.text.strip() - except: - pass + except Exception as session_time_error: + logging.debug( + f"[Crawler] [EDM] 區塊時間說明解析失敗但繼續 | Block: {i+1} | Error: {session_time_error}", + exc_info=True, + ) time_el = parent.find_element(By.CSS_SELECTOR, ".dateTime .time span") if time_el: @@ -642,9 +663,10 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): price_text = price_el.text.replace(",", "").strip() if price_text.isdigit(): price = int(price_text) - except Exception: - logging.info(f"[Crawler] [EDM] ℹ️ 找不到價格元素 | i_code: {i_code} | Info: 可能已售完") - pass # price 保持為 None + except Exception as price_error: + logging.info( + f"[Crawler] [EDM] ℹ️ 找不到價格元素 | i_code: {i_code} | Info: 可能已售完 | Error: {price_error}" + ) # V9.91: 解析折扣數 discount_text = "" @@ -666,8 +688,11 @@ def run_edm_task(lpn_code="O1K5FBOqsvN"): qty_text = qty_span.text.strip().replace(",", "") if qty_text.isdigit(): remain_qty = int(qty_text) - except: - pass + except Exception as remain_qty_error: + logging.debug( + f"[Crawler] [EDM] 倒數組數解析失敗但繼續 | i_code: {i_code} | Error: {remain_qty_error}", + exc_info=True, + ) current_scan_icodes.add((i_code, time_slot)) @@ -1810,8 +1835,11 @@ def run_auto_import_task(): if is_task_paused("run_auto_import_task"): logging.info("[Scheduler] [AutoImport] ⏸️ 任務被 HITL 暫停中,本次跳過") return - except Exception: - pass + except Exception as pause_check_error: + logging.debug( + f"[Scheduler] [AutoImport] HITL 暫停檢查失敗但繼續排程 | Error: {pause_check_error}", + exc_info=True, + ) try: from services.import_service import import_service @@ -2226,8 +2254,11 @@ def run_db_backup_task(): metadata={"status": "success", "size_kb": size_kb, "deleted_old": deleted_count}, ai_model="scheduler", ) - except Exception: - pass + except Exception as insight_error: + logging.warning( + f"[Scheduler] [Backup] ⚠️ 備份成功 insight 寫入失敗但繼續通知 | Error: {insight_error}", + exc_info=True, + ) else: msg = ( f"🚨 資料庫備份失敗 ({now_str})\n" @@ -2262,8 +2293,11 @@ def run_db_backup_task(): metadata={"status": "failed", "error": result.get("error")}, ai_model="scheduler", ) - except Exception: - pass + except Exception as insight_error: + logging.warning( + f"[Scheduler] [Backup] ⚠️ 備份失敗 insight 寫入失敗但繼續通知 | Error: {insight_error}", + exc_info=True, + ) notifier._send_telegram_messages([msg]) @@ -2288,8 +2322,11 @@ def run_db_backup_task(): NotificationManager()._send_telegram_messages([ f"🚨 DB 備份排程異常\n錯誤:{e}" ]) - except Exception: - pass + except Exception as notify_error: + logging.warning( + f"[Scheduler] [Backup] ⚠️ 備份異常 Telegram 通知失敗 | Error: {notify_error}", + exc_info=True, + ) def run_backup_monitor_task(): @@ -2362,8 +2399,11 @@ def run_backup_monitor_task(): metadata={"alert": True, "reason": alert_reason, "latest_file": info.get("filename")}, ai_model="scheduler", ) - except Exception: - pass + except Exception as insight_error: + logging.warning( + f"[Scheduler] [BackupMonitor] ⚠️ 備份監控 insight 寫入失敗但繼續 | Error: {insight_error}", + exc_info=True, + ) else: created_at = info.get("created_at") logging.info(f"[Scheduler] [BackupMonitor] ✅ 備份狀態正常 | 最新: {info.get('filename')} @ {created_at}") diff --git a/tests/test_phase3f_cleanup_contracts.py b/tests/test_phase3f_cleanup_contracts.py index 781cdc4..9f25cfa 100644 --- a/tests/test_phase3f_cleanup_contracts.py +++ b/tests/test_phase3f_cleanup_contracts.py @@ -1,3 +1,4 @@ +import re from pathlib import Path @@ -69,3 +70,10 @@ def test_env_example_documents_runtime_and_ai_automation_variables(): } assert expected_keys <= _env_example_keys() + + +def test_scheduler_does_not_silently_swallow_exceptions(): + scheduler_source = (ROOT / "scheduler.py").read_text(encoding="utf-8") + + assert "except:" not in scheduler_source + assert not re.search(r"except(?: Exception)?[^\n]*:\n\s+pass(?:\s|#|$)", scheduler_source)