From cde8b0cd3ed3ce74854979c45d3aa029e13fe4cc Mon Sep 17 00:00:00 2001 From: OoO Date: Wed, 29 Apr 2026 23:46:48 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20AI=20=E8=87=AA=E5=8B=95?= =?UTF-8?q?=E5=8C=96=20Smoke=20Dashboard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CONSTITUTION.md | 2 +- TODO_NEXT_STEPS.txt | 5 +- app.py | 6 +- config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 6 +- docs/adr/ADR-012-agent-action-ladder.md | 3 +- docs/guides/ai_automation_session_sop.md | 3 +- docs/memory/ai_automation_closure_20260429.md | 3 + docs/memory/history_logs.md | 1 + routes/README.md | 2 +- routes/system_public_routes.py | 19 ++ services/ai_automation_smoke_service.py | 210 ++++++++++++++++++ templates/ai_automation_smoke.html | 179 +++++++++++++++ templates/components/_navbar.html | 7 +- tests/test_ai_automation_smoke_service.py | 38 ++++ 15 files changed, 473 insertions(+), 13 deletions(-) create mode 100644 services/ai_automation_smoke_service.py create mode 100644 templates/ai_automation_smoke.html create mode 100644 tests/test_ai_automation_smoke_service.py diff --git a/CONSTITUTION.md b/CONSTITUTION.md index e8d0861..5cab4f1 100644 --- a/CONSTITUTION.md +++ b/CONSTITUTION.md @@ -2,7 +2,7 @@ > 本文件定義專案開發的核心準則與不可違反的規範 > **建立日期**: 2026-01-12 -> **當前版本**: V10.5 (四 AI Agent 自動化可觀測性版) +> **當前版本**: V10.6 (四 AI Agent 自動化 Smoke Dashboard 版) > **最後更新**: 2026-04-29 --- diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 70e139c..9065aed 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -7,13 +7,14 @@ - ADR-018:四 AI Agent 自動化控制面立案。 - Memory:新增 `docs/memory/ai_automation_closure_20260429.md`。 - Guide/Skills 替代:新增 `docs/guides/ai_automation_session_sop.md`。 - - SOT:更新 `docs/AI_INTELLIGENCE_MODULE_SOT.md` 至 V10.5 AI Automation Metrics 架構。 + - SOT:更新 `docs/AI_INTELLIGENCE_MODULE_SOT.md` 至 V10.6 AI Automation Smoke Dashboard 架構。 - Codex 規則:更新 `AGENTS.md`、`CONSTITUTION.md`、ADR/memory 索引。 - Prometheus 指標化:新增 EventRouter / AutoHeal / safe action / replay in-process metrics,並接入 `/metrics`。 + - 線上 smoke dashboard:新增 `/ai_automation_smoke` 與 `/api/ai-automation/smoke`,覆蓋 EventRouter、AutoHeal、NemoTron fallback、OpenClaw embedding queue、ElephantAlpha HITL。 【下次待辦】 - - 補線上 smoke dashboard:EventRouter、AutoHeal、NemoTron fallback、OpenClaw embedding queue、ElephantAlpha HITL。 - Superset / Grafana 視覺化:`momo_ai_event_router_dispatch_total`、`momo_ai_event_router_latency_ms_*`、`momo_ai_autoheal_action_total`。 + - Smoke dashboard 增加最近一次實際 smoke test 結果保存與趨勢圖。 ================================================================================ 品牌資產最終處理與維護 (Phase 7) [DONE] diff --git a/app.py b/app.py index feb5271..f39e13e 100644 --- a/app.py +++ b/app.py @@ -95,9 +95,9 @@ except Exception as e: sys_log.error(f"無法檢測磁碟空間: {e}") # 🚩 系統版本定義 (備份與顯示用) -# 🚩 2026-04-29 V10.5: AI 自動化可觀測性 — EventRouter / AutoHeal / -# safe action / Telegram replay metrics 接入 /metrics -SYSTEM_VERSION = "V10.5" +# 🚩 2026-04-29 V10.6: AI 自動化 Smoke Dashboard — EventRouter / AutoHeal / +# NemoTron / OpenClaw / ElephantAlpha 線上閉環快檢 +SYSTEM_VERSION = "V10.6" # ========================================== # 🔒 SQL Injection 防護函數 diff --git a/config.py b/config.py index 31ca607..ffe9d11 100644 --- a/config.py +++ b/config.py @@ -253,7 +253,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.5" +SYSTEM_VERSION = "V10.6" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 75f44bf..f6211d4 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -1,8 +1,8 @@ # MOMO PRO — AI 競價情報模組 Single Source of Truth > **最後更新**: 2026-04-29 (台北時間) -> **狀態**: 🟢 四 AI Agent 自動化閉環已落地 — EventRouter / AutoHeal / OpenClaw Memory / ElephantAlpha bridge / Prometheus metrics 具測試覆蓋 -> **適用版本**: V10.5 AI Automation Metrics 架構 +> **狀態**: 🟢 四 AI Agent 自動化閉環已落地 — EventRouter / AutoHeal / OpenClaw Memory / ElephantAlpha bridge / Prometheus metrics / Smoke Dashboard 具測試覆蓋 +> **適用版本**: V10.6 AI Automation Smoke Dashboard 架構 --- @@ -62,6 +62,8 @@ SQL漏斗(~300筆) - `/metrics` 匯出 `momo_ai_event_router_safe_action_total`。 - `/metrics` 匯出 `momo_ai_event_router_replay_total`。 - `/metrics` 匯出 `momo_ai_autoheal_action_total` 與 `momo_ai_autoheal_duration_ms_count/sum/max`。 +- `/ai_automation_smoke` 提供登入後 smoke dashboard。 +- `/api/ai-automation/smoke` 提供 read-only JSON 狀態,不做外部網路呼叫。 --- diff --git a/docs/adr/ADR-012-agent-action-ladder.md b/docs/adr/ADR-012-agent-action-ladder.md index 6542b88..fe04797 100644 --- a/docs/adr/ADR-012-agent-action-ladder.md +++ b/docs/adr/ADR-012-agent-action-ladder.md @@ -144,7 +144,8 @@ L1 Hermes 掛 → L0 模板直出 + 🟡 「AI 分析暫不可用」 - Phase 1~3 核心已落地:EventRouter 分流、L2 safe action、NemoTron fallback、OpenClaw memory 寫入與通知 replay 已具測試覆蓋。 - L3 已擴展為 OpenClaw + ElephantAlpha:OpenClaw 負責策略/記憶,ElephantAlpha 負責 orchestration/HITL/AutoHeal bridge。 - 2026-04-29 已補 `/metrics` 匯出:EventRouter dispatch、L2 safe action、Telegram replay、AutoHeal action 與 latency/duration。 -- 尚未完成:完整線上 smoke dashboard 與 Grafana/Superset 視覺化面板。 +- 2026-04-29 已補 `/ai_automation_smoke` 與 `/api/ai-automation/smoke`:EventRouter、AutoHeal、NemoTron fallback、OpenClaw embedding queue、ElephantAlpha HITL 線上快檢。 +- 尚未完成:Grafana/Superset 視覺化面板與 smoke 結果趨勢保存。 ## References - `services/event_router.py` — 分流入口(Phase 1) diff --git a/docs/guides/ai_automation_session_sop.md b/docs/guides/ai_automation_session_sop.md index c189aa3..4de6ce8 100644 --- a/docs/guides/ai_automation_session_sop.md +++ b/docs/guides/ai_automation_session_sop.md @@ -25,6 +25,7 @@ - EventRouter 失敗時必須降級到 Hermes rule / L0 template,不可中斷通知。 - Telegram 失敗必須可暫存與 replay。 - EventRouter / AutoHeal 變更必須更新 `services/ai_automation_metrics.py` 指標或確認既有指標已覆蓋。 +- AI 自動化閉環變更必須確認 `/api/ai-automation/smoke` 與 `/ai_automation_smoke` 仍能反映新狀態。 - L2 action 必須在 `SAFE_ACTIONS` 且可審計、可回放、低副作用。 - AutoHeal 不得 restart / stop / recreate `momo-db` 或 `momo-postgres`。 - raw `ai_insights` 寫入後必須 enqueue embedding;若 enqueue 失敗,必須可 backfill。 @@ -33,7 +34,7 @@ ## 收尾 checklist -- 相關測試至少覆蓋 EventRouter、AutoHeal、NemoTron fallback、OpenClaw embedding bridge、ElephantAlpha engine、agent_actions、AI automation metrics。 +- 相關測試至少覆蓋 EventRouter、AutoHeal、NemoTron fallback、OpenClaw embedding bridge、ElephantAlpha engine、agent_actions、AI automation metrics、AI automation smoke。 - 若有架構決策,新增 ADR 並更新 `docs/adr/README.md`。 - 若有長期實況,更新 `docs/memory/*.md` 與 `docs/memory/README.md`。 - 若 AI 架構事實改變,更新 `docs/AI_INTELLIGENCE_MODULE_SOT.md`。 diff --git a/docs/memory/ai_automation_closure_20260429.md b/docs/memory/ai_automation_closure_20260429.md index 80a3315..0af3763 100644 --- a/docs/memory/ai_automation_closure_20260429.md +++ b/docs/memory/ai_automation_closure_20260429.md @@ -10,6 +10,7 @@ - OpenClaw learning 是 AI 記憶與 embedding queue 的橋接層;raw `ai_insights` 寫入者必須 enqueue embedding 或可回補。 - ElephantAlpha 只負責 orchestration / HITL / AutoHeal bridge,不可繞過 ADR-011、ADR-012、ADR-013。 - AI 自動化最小 Prometheus 指標已接入 `/metrics`,來源為 `services/ai_automation_metrics.py`。 +- 線上 smoke dashboard 已接入 `/ai_automation_smoke`,JSON API 為 `/api/ai-automation/smoke`。 ## 已落地範圍 @@ -22,10 +23,12 @@ - ElephantAlpha 執行引擎補 sync timeout、HITL reply_markup、未知 step fail fast、code/resource action 走 AutoHeal bridge。 - L2 `agent_actions.py` 的 `flag_for_human_review`、`route_to_km`、`mark_for_relearn` 已從 stub 改為可審計 OpenClaw memory 寫入。 - `/metrics` 已匯出 EventRouter dispatch、latency、safe action、Telegram replay、AutoHeal action 與 duration 指標。 +- Smoke dashboard read-only 檢查 EventRouter queue、AutoHeal protected resources、NemoTron fallback、OpenClaw embedding queue、ElephantAlpha HITL,不做外部網路呼叫。 ## 驗證紀錄 - 2026-04-29 AI metrics 批次:`26 passed`。 +- 2026-04-29 AI smoke dashboard 批次:`2 passed`(單檔 smoke service),後續核心組需持續納入。 - 2026-04-29 L2 安全記憶批次:`24 passed`。 - collect-only:`48 tests collected`。 - `git diff --check` 已通過。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index f8e8f04..0b7d5d9 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -25,6 +25,7 @@ - **記憶閉環**: raw `ai_insights` insert 路徑補 embedding enqueue;OpenClaw learning 支援 stale reset 與 missing embedding backfill。 - **L2 action 落地**: `flag_for_human_review`、`route_to_km`、`mark_for_relearn` 改為可審計 OpenClaw memory 寫入。 - **可觀測性落地**: `/metrics` 匯出 EventRouter dispatch/latency、safe action、Telegram replay、AutoHeal action/duration 指標。 +- **Smoke Dashboard**: 新增 `/ai_automation_smoke` 與 `/api/ai-automation/smoke`,提供四 Agent 閉環 read-only 快檢。 ### 2026-04-28~29:Phase 3e 重構大戰 + daily_sales cache 隱形 bug 根除 - **app.py 縮減 -10.8%**: 7,386 → 6,590 行,11 commits 全綠零 502。 diff --git a/routes/README.md b/routes/README.md index 6d478b5..a943017 100644 --- a/routes/README.md +++ b/routes/README.md @@ -14,7 +14,7 @@ |------|------|----------| | `dashboard_routes.py` | 商品看板首頁 | `/` | | `sales_routes.py` | 業績分析與 ABC 明細 | `/sales_analysis`, `/growth_analysis`, `/abc_analysis/detail`, `/api/sales_analysis/*` | -| `system_public_routes.py` | 無 prefix 公開系統頁與監控 | `/health`, `/metrics`, `/settings`, `/system_settings`, `/logs`, `/api/logs`, `/api/backup` | +| `system_public_routes.py` | 無 prefix 公開系統頁與監控 | `/health`, `/metrics`, `/ai_automation_smoke`, `/api/ai-automation/smoke`, `/settings`, `/system_settings`, `/logs`, `/api/logs`, `/api/backup` | | `system_routes.py` | 內部系統維護 API | `/api/system/*` | | `edm_routes.py` | EDM 與節慶儀表板 | `/edm`, `/festival` | | `monthly_routes.py` | 月結分析 | `/monthly_summary_analysis`, `/api/monthly_summary_data` | diff --git a/routes/system_public_routes.py b/routes/system_public_routes.py index 987e2ac..f59f157 100644 --- a/routes/system_public_routes.py +++ b/routes/system_public_routes.py @@ -192,6 +192,25 @@ def system_settings_page(): return render_template('system_settings.html', system_version=SYSTEM_VERSION) +@system_public_bp.route('/ai_automation_smoke') +@login_required +def ai_automation_smoke_page(): + """AI 自動化閉環 smoke dashboard.""" + return render_template( + 'ai_automation_smoke.html', + system_version=SYSTEM_VERSION, + active_page='ai_automation_smoke', + ) + + +@system_public_bp.route('/api/ai-automation/smoke') +@login_required +def ai_automation_smoke_api(): + """Read-only smoke status for the four-agent AI automation control plane.""" + from services.ai_automation_smoke_service import collect_ai_automation_smoke + return jsonify(collect_ai_automation_smoke()) + + @system_public_bp.route('/logs') def show_logs(): return render_template('logs.html') diff --git a/services/ai_automation_smoke_service.py b/services/ai_automation_smoke_service.py new file mode 100644 index 0000000..6f5948e --- /dev/null +++ b/services/ai_automation_smoke_service.py @@ -0,0 +1,210 @@ +"""Smoke checks for the four-agent AI automation control plane. + +The checks are read-only and intentionally avoid outbound network calls. They +are meant for a fast dashboard/API sanity check, not for deep production probes. +""" + +from __future__ import annotations + +import os +from datetime import datetime +from typing import Any, Dict, List + +from sqlalchemy import text + +from config import SYSTEM_VERSION +from database.manager import get_session + + +STATUS_RANK = {"ok": 0, "warning": 1, "critical": 2} + + +def _check(name: str, status: str, summary: str, details: Dict[str, Any] | None = None) -> Dict[str, Any]: + return { + "name": name, + "status": status, + "summary": summary, + "details": details or {}, + } + + +def _count_jsonl_lines(path: str) -> int: + try: + with open(path, "r", encoding="utf-8") as fh: + return sum(1 for line in fh if line.strip()) + except FileNotFoundError: + return 0 + + +def _event_router_check() -> Dict[str, Any]: + try: + from services import event_router + from services.ai_automation_metrics import snapshot + + queue_count = _count_jsonl_lines(event_router._QUEUE_PATH) + metrics = snapshot() + dispatch_total = sum( + value for (metric, _labels), value in metrics.get("counters", {}).items() + if metric == "event_router_dispatch_total" + ) + status = "warning" if queue_count else "ok" + summary = "EventRouter 可用,通知 queue 乾淨" if status == "ok" else "EventRouter 可用,但有待回放通知" + return _check( + "EventRouter 通知鏈", + status, + summary, + { + "dispatch_sync": callable(getattr(event_router, "dispatch_sync", None)), + "notify_failure": callable(getattr(event_router, "notify_failure", None)), + "queued_deliveries": queue_count, + "dispatch_metric_total": dispatch_total, + }, + ) + except Exception as exc: + return _check("EventRouter 通知鏈", "critical", f"EventRouter smoke 失敗:{exc}") + + +def _autoheal_check() -> Dict[str, Any]: + try: + import services.auto_heal_service as autoheal + + protected = set(getattr(autoheal, "_PROTECTED_CONTAINERS", set())) + required = {"momo-db", "momo-postgres"} + missing = sorted(required - protected) + allowed_actions = sorted(getattr(autoheal, "_ALLOWED_ACTION_TYPES", set())) + status = "critical" if missing else "ok" + summary = "AutoHeal 保護資料庫容器,安全邊界存在" if status == "ok" else "AutoHeal protected resource 缺漏" + return _check( + "AutoHeal 安全邊界", + status, + summary, + { + "protected_containers": sorted(protected), + "missing_required_protection": missing, + "allowed_actions": allowed_actions, + }, + ) + except Exception as exc: + return _check("AutoHeal 安全邊界", "critical", f"AutoHeal smoke 失敗:{exc}") + + +def _nemotron_check() -> Dict[str, Any]: + try: + import services.nemoton_dispatcher_service as nemotron + + dispatcher_cls = getattr(nemotron, "NemotronDispatcherService", None) + fallback_ready = bool(dispatcher_cls and hasattr(dispatcher_cls, "_hermes_rule_fallback")) + api_key_configured = bool(getattr(nemotron, "NIM_API_KEY", "")) + call_count = getattr(nemotron, "_nim_call_count", {}).get("count", 0) + daily_limit = getattr(nemotron, "NIM_DAILY_LIMIT", 80) + if not fallback_ready: + status = "critical" + summary = "NemoTron Hermes fallback 缺失" + elif not api_key_configured: + status = "warning" + summary = "NemoTron API key 未設定,目前會走 Hermes fallback" + elif call_count >= daily_limit: + status = "warning" + summary = "NemoTron 配額已達上限,會走 Hermes fallback" + else: + status = "ok" + summary = "NemoTron 與 Hermes fallback 機制可用" + return _check( + "NemoTron fallback", + status, + summary, + { + "fallback_ready": fallback_ready, + "api_key_configured": api_key_configured, + "call_count": call_count, + "daily_limit": daily_limit, + }, + ) + except Exception as exc: + return _check("NemoTron fallback", "critical", f"NemoTron smoke 失敗:{exc}") + + +def _embedding_queue_check() -> Dict[str, Any]: + session = None + try: + session = get_session() + rows = session.execute( + text("SELECT status, COUNT(*) AS count FROM embedding_retry_queue GROUP BY status") + ).fetchall() + counts = {str(row._mapping["status"]): int(row._mapping["count"]) for row in rows} + pending = counts.get("pending", 0) + processing = counts.get("processing", 0) + if pending > 1000 or processing > 200: + status = "warning" + summary = "OpenClaw embedding queue backlog 偏高" + else: + status = "ok" + summary = "OpenClaw embedding queue 可讀取且 backlog 正常" + return _check( + "OpenClaw embedding queue", + status, + summary, + {"counts": counts, "pending": pending, "processing": processing}, + ) + except Exception as exc: + return _check( + "OpenClaw embedding queue", + "warning", + f"Embedding queue 無法讀取,可能是 DB 離線或 migration 未套用:{exc}", + ) + finally: + if session is not None: + session.close() + + +def _elephant_hitl_check() -> Dict[str, Any]: + try: + from services.elephant_alpha_autonomous_engine import ElephantAlphaAutonomousEngine + + has_hitl = hasattr(ElephantAlphaAutonomousEngine, "_escalate_to_human") + has_timeout_guard = hasattr(ElephantAlphaAutonomousEngine, "_run_with_timeout") + api_key_configured = bool(os.getenv("OPENROUTER_API_KEY") or os.getenv("NVIDIA_API_KEY")) + if not has_hitl or not has_timeout_guard: + status = "critical" + summary = "ElephantAlpha HITL 或 timeout guard 缺失" + elif not api_key_configured: + status = "warning" + summary = "ElephantAlpha HITL 程式可用,但 API key 未設定" + else: + status = "ok" + summary = "ElephantAlpha HITL 與 timeout guard 可用" + return _check( + "ElephantAlpha HITL", + status, + summary, + { + "hitl_method": has_hitl, + "timeout_guard": has_timeout_guard, + "api_key_configured": api_key_configured, + }, + ) + except Exception as exc: + return _check("ElephantAlpha HITL", "critical", f"ElephantAlpha smoke 失敗:{exc}") + + +def collect_ai_automation_smoke() -> Dict[str, Any]: + checks: List[Dict[str, Any]] = [ + _event_router_check(), + _autoheal_check(), + _nemotron_check(), + _embedding_queue_check(), + _elephant_hitl_check(), + ] + worst = max(checks, key=lambda item: STATUS_RANK.get(item["status"], 2))["status"] + return { + "status": worst, + "version": SYSTEM_VERSION, + "generated_at": datetime.now().isoformat(timespec="seconds"), + "checks": checks, + "summary": { + "ok": sum(1 for item in checks if item["status"] == "ok"), + "warning": sum(1 for item in checks if item["status"] == "warning"), + "critical": sum(1 for item in checks if item["status"] == "critical"), + "total": len(checks), + }, + } diff --git a/templates/ai_automation_smoke.html b/templates/ai_automation_smoke.html new file mode 100644 index 0000000..b782dcd --- /dev/null +++ b/templates/ai_automation_smoke.html @@ -0,0 +1,179 @@ +{% extends 'base.html' %} + +{% block title %}AI 自動化 Smoke Dashboard - WOOO TECH{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+
+ FOUR-AGENT CONTROL PLANE +

AI 自動化 Smoke Dashboard

+

快速確認 EventRouter、AutoHeal、NemoTron、OpenClaw 與 ElephantAlpha 的閉環狀態。

+
+
+
讀取中...
+
版本 {{ system_version }}
+ +
+
+
+ +
+
OK
-
+
Warning
-
+
Critical
-
+
Generated
-
+
+ +
+{% endblock %} + +{% block extra_js %} + +{% endblock %} diff --git a/templates/components/_navbar.html b/templates/components/_navbar.html index ffad1c0..233c956 100755 --- a/templates/components/_navbar.html +++ b/templates/components/_navbar.html @@ -119,7 +119,7 @@ +
  • + + AI 自動化 Smoke + +
  • diff --git a/tests/test_ai_automation_smoke_service.py b/tests/test_ai_automation_smoke_service.py new file mode 100644 index 0000000..43518db --- /dev/null +++ b/tests/test_ai_automation_smoke_service.py @@ -0,0 +1,38 @@ +def test_event_router_smoke_reports_queued_deliveries(tmp_path, monkeypatch): + from services import ai_automation_metrics as metrics + from services import ai_automation_smoke_service as smoke + from services import event_router + + queue_path = tmp_path / "failed_deliveries.jsonl" + queue_path.write_text('{"event_key":"a"}\n{"event_key":"b"}\n', encoding="utf-8") + monkeypatch.setattr(event_router, "_QUEUE_PATH", str(queue_path)) + metrics.reset_for_tests() + metrics.record_event_router_dispatch( + tier="L1", + event_type="crawler_timeout", + delivered=False, + queued=True, + latency_ms=12, + ) + + result = smoke._event_router_check() + + assert result["status"] == "warning" + assert result["details"]["queued_deliveries"] == 2 + assert result["details"]["dispatch_metric_total"] == 1 + assert result["details"]["dispatch_sync"] is True + + +def test_collect_ai_automation_smoke_uses_worst_status(monkeypatch): + from services import ai_automation_smoke_service as smoke + + monkeypatch.setattr(smoke, "_event_router_check", lambda: smoke._check("event", "ok", "ok")) + monkeypatch.setattr(smoke, "_autoheal_check", lambda: smoke._check("autoheal", "warning", "warn")) + monkeypatch.setattr(smoke, "_nemotron_check", lambda: smoke._check("nemotron", "ok", "ok")) + monkeypatch.setattr(smoke, "_embedding_queue_check", lambda: smoke._check("embedding", "critical", "boom")) + monkeypatch.setattr(smoke, "_elephant_hitl_check", lambda: smoke._check("elephant", "ok", "ok")) + + result = smoke.collect_ai_automation_smoke() + + assert result["status"] == "critical" + assert result["summary"] == {"ok": 3, "warning": 1, "critical": 1, "total": 5}