新增 AI 自動化 Grafana 儀表板
Some checks failed
CD Pipeline / deploy (push) Has been cancelled

This commit is contained in:
OoO
2026-04-30 00:06:24 +08:00
parent d5f4fd7198
commit 5a61c020e3
13 changed files with 866 additions and 15 deletions

View File

@@ -2,7 +2,7 @@
> 本文件定義專案開發的核心準則與不可違反的規範
> **建立日期**: 2026-01-12
> **當前版本**: V10.9 (四 AI Agent 自動化 Smoke 每日摘要版)
> **當前版本**: V10.10 (四 AI Agent 自動化 Grafana 觀測版)
> **最後更新**: 2026-04-29
---
@@ -347,6 +347,7 @@
- ✅ **正確**EventRouter 是告警、降級、去重、通知 replay 與 L2 safe action 的入口
- ✅ **正確**AutoHeal 是自癒副作用入口,失敗時必須安全降級為 alert / log / file queue
- ✅ **正確**L2 safe action 必須可審計、可回放、低副作用
- ✅ **正確**AI 自動化觀測變更需同步 `/metrics`、Smoke dashboard 與 Grafana provisioning避免告警閉環變成黑盒
- ❌ **禁止**:自動 restart / stop / recreate `momo-db` 或 `momo-postgres`
- ❌ **禁止**AI 分析失敗導致 Telegram 告警完全不送出
- **依據**ADR-012、ADR-013、ADR-018

View File

@@ -7,17 +7,18 @@
- ADR-018四 AI Agent 自動化控制面立案。
- Memory新增 `docs/memory/ai_automation_closure_20260429.md`。
- Guide/Skills 替代:新增 `docs/guides/ai_automation_session_sop.md`。
- SOT更新 `docs/AI_INTELLIGENCE_MODULE_SOT.md` 至 V10.9 AI Automation Smoke Summary 架構。
- SOT更新 `docs/AI_INTELLIGENCE_MODULE_SOT.md` 至 V10.10 AI Automation Grafana Observability 架構。
- Codex 規則:更新 `AGENTS.md`、`CONSTITUTION.md`、ADR/memory 索引。
- Prometheus 指標化:新增 EventRouter / AutoHeal / safe action / replay in-process metrics並接入 `/metrics`。
- 線上 smoke dashboard新增 `/ai_automation_smoke` 與 `/api/ai-automation/smoke`,覆蓋 EventRouter、AutoHeal、NemoTron fallback、OpenClaw embedding queue、ElephantAlpha HITL。
- Smoke 趨勢保存:`/api/ai-automation/smoke` 每次快檢追加 JSONL 精簡紀錄dashboard 顯示最近趨勢。
- Smoke 趨勢管理:新增 JSONL 匯出、清理與每日摘要。
- Smoke 每日摘要:新增 Telegram 手動推播 API 與 momo-scheduler 每日 09:10 排程入口。
- Grafana 視覺化:新增 `MOMO AI Automation Overview` provisioning dashboard覆蓋 EventRouter、safe action、replay、AutoHeal 指標。
【下次待辦】
- Superset / Grafana 視覺化:`momo_ai_event_router_dispatch_total`、`momo_ai_event_router_latency_ms_*`、`momo_ai_autoheal_action_total`
- Grafana/Superset panel 設定與 Smoke 摘要成效觀察。
- 將 Grafana provisioning dashboard 部署到 110/188 監控環境後,觀察 panel 是否都有資料
- Superset panel 設定與 Smoke 摘要成效觀察。
================================================================================
品牌資產最終處理與維護 (Phase 7) [DONE]

5
app.py
View File

@@ -95,9 +95,8 @@ except Exception as e:
sys_log.error(f"無法檢測磁碟空間: {e}")
# 🚩 系統版本定義 (備份與顯示用)
# 🚩 2026-04-29 V10.9: AI Smoke 每日摘要 — Telegram 手動推播 /
# scheduler 09:10 排程入口
SYSTEM_VERSION = "V10.9"
# 🚩 2026-04-29 V10.10: AI 自動化 Grafana dashboard provisioning
SYSTEM_VERSION = "V10.10"
# ==========================================
# 🔒 SQL Injection 防護函數

View File

@@ -253,7 +253,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.9"
SYSTEM_VERSION = "V10.10"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -0,0 +1,779 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [
{
"icon": "external link",
"tags": [],
"targetBlank": true,
"title": "AI Automation Smoke Dashboard",
"tooltip": "App read-only smoke dashboard",
"type": "link",
"url": "/ai_automation_smoke"
}
],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 100,
"panels": [],
"title": "AI 自動化總覽",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "最近 1 小時 EventRouter 接收並處理的事件量。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 0,
"y": 1
},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"expr": "sum(increase(momo_ai_event_router_dispatch_total[1h]))",
"legendFormat": "dispatch / 1h",
"refId": "A"
}
],
"title": "EventRouter 1h 事件量",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "最近 24 小時 L2 SAFE_ACTIONS 執行量,依 action/status 分組。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 6,
"y": 1
},
"id": 2,
"options": {
"displayMode": "gradient",
"maxVizHeight": 300,
"minVizHeight": 16,
"minVizWidth": 8,
"namePlacement": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"valueMode": "color"
},
"targets": [
{
"expr": "sum by (action, status) (increase(momo_ai_event_router_safe_action_total[24h]))",
"legendFormat": "{{action}} / {{status}}",
"refId": "A"
}
],
"title": "L2 Safe Action 24h",
"type": "bargauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "最近 24 小時 Telegram queue replay 狀態。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 12,
"y": 1
},
"id": 3,
"options": {
"displayMode": "gradient",
"maxVizHeight": 300,
"minVizHeight": 16,
"minVizWidth": 8,
"namePlacement": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"valueMode": "color"
},
"targets": [
{
"expr": "sum by (status) (increase(momo_ai_event_router_replay_total[24h]))",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "Telegram Replay 24h",
"type": "bargauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "最近 24 小時 AutoHeal 動作結果,依 action/error_type/result 分組。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 18,
"y": 1
},
"id": 4,
"options": {
"displayMode": "gradient",
"maxVizHeight": 300,
"minVizHeight": 16,
"minVizWidth": 8,
"namePlacement": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"valueMode": "color"
},
"targets": [
{
"expr": "sum by (action, error_type, result) (increase(momo_ai_autoheal_action_total[24h]))",
"legendFormat": "{{action}} / {{error_type}} / {{result}}",
"refId": "A"
}
],
"title": "AutoHeal Action 24h",
"type": "bargauge"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 6
},
"id": 101,
"panels": [],
"title": "EventRouter 路由與延遲",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "EventRouter 每 5 分鐘新增 dispatch 數,依 outcome 分組。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 7
},
"id": 5,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"sum"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"expr": "sum by (outcome) (increase(momo_ai_event_router_dispatch_total[5m]))",
"legendFormat": "{{outcome}}",
"refId": "A"
}
],
"title": "EventRouter Dispatch by Outcome",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "EventRouter 平均延遲與最大延遲,依 tier/event_type 分組。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 3000
},
{
"color": "red",
"value": 10000
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 7
},
"id": 6,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"expr": "sum by (tier, event_type) (increase(momo_ai_event_router_latency_ms_sum[5m])) / clamp_min(sum by (tier, event_type) (increase(momo_ai_event_router_latency_ms_count[5m])), 1)",
"legendFormat": "avg {{tier}} / {{event_type}}",
"refId": "A"
},
{
"expr": "max by (tier, event_type) (momo_ai_event_router_latency_ms_max)",
"legendFormat": "max {{tier}} / {{event_type}}",
"refId": "B"
}
],
"title": "EventRouter Latency",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "最近 1 小時 dispatch 明細表,方便追 L1/L2/L3 與事件類型。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 15
},
"id": 7,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": true,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"targets": [
{
"expr": "sum by (tier, event_type, outcome) (increase(momo_ai_event_router_dispatch_total[1h]))",
"format": "table",
"instant": true,
"legendFormat": "{{tier}} / {{event_type}} / {{outcome}}",
"refId": "A"
}
],
"title": "EventRouter 1h 明細",
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 23
},
"id": 102,
"panels": [],
"title": "AutoHeal 自癒觀測",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "AutoHeal 每 5 分鐘平均耗時與最大耗時。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 3000
},
{
"color": "red",
"value": 10000
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 8,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"expr": "sum by (action, error_type) (increase(momo_ai_autoheal_duration_ms_sum[5m])) / clamp_min(sum by (action, error_type) (increase(momo_ai_autoheal_duration_ms_count[5m])), 1)",
"legendFormat": "avg {{action}} / {{error_type}}",
"refId": "A"
},
{
"expr": "max by (action, error_type) (momo_ai_autoheal_duration_ms_max)",
"legendFormat": "max {{action}} / {{error_type}}",
"refId": "B"
}
],
"title": "AutoHeal Duration",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "AutoHeal 最近 24 小時 action 明細表,確認是否有高風險類型集中。",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 9,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": true,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"targets": [
{
"expr": "sum by (action, error_type, result) (increase(momo_ai_autoheal_action_total[24h]))",
"format": "table",
"instant": true,
"legendFormat": "{{action}} / {{error_type}} / {{result}}",
"refId": "A"
}
],
"title": "AutoHeal 24h 明細",
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 32
},
"id": 103,
"panels": [],
"title": "操作說明",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 24,
"x": 0,
"y": 33
},
"id": 10,
"options": {
"code": {
"language": "plaintext",
"showLineNumbers": false,
"showMiniMap": false
},
"content": "資料來源momo-app /metrics。此 dashboard 只讀 Prometheus不觸發 AutoHeal、不清理 queue、不碰 momo-db。Smoke 狀態仍以 /ai_automation_smoke 與 /api/ai-automation/smoke 為準Grafana 用來觀察 dispatch、latency、safe action、replay、AutoHeal 趨勢。",
"mode": "markdown"
},
"pluginVersion": "10.0.0",
"title": "AI 自動化觀測邊界",
"type": "text"
}
],
"refresh": "30s",
"schemaVersion": 38,
"tags": [
"momo",
"ai-automation",
"event-router",
"autoheal"
],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "Asia/Taipei",
"title": "MOMO AI Automation Overview",
"uid": "momo-ai-automation-overview",
"version": 1,
"weekStart": ""
}

View File

@@ -71,9 +71,7 @@ scrape_configs:
# ===========================================================================
# ---------------------------------------------------------------------------
# Momo Flask 應用 - 資料庫應用指標
# ---------------------------------------------------------------------------
# Momo Flask 應用 - 健康檢查 (應用未提供 /metrics改用 /health)
# Momo Flask 應用 - 資料庫應用與 AI 自動化指標
# ---------------------------------------------------------------------------
- job_name: 'momo-app'
static_configs:

View File

@@ -1,8 +1,8 @@
# MOMO PRO — AI 競價情報模組 Single Source of Truth
> **最後更新**: 2026-04-29 (台北時間)
> **狀態**: 🟢 四 AI Agent 自動化閉環已落地 — EventRouter / AutoHeal / OpenClaw Memory / ElephantAlpha bridge / Prometheus metrics / Smoke Dashboard / Smoke Trend Management / Telegram Summary 具測試覆蓋
> **適用版本**: V10.9 AI Automation Smoke Summary 架構
> **狀態**: 🟢 四 AI Agent 自動化閉環已落地 — EventRouter / AutoHeal / OpenClaw Memory / ElephantAlpha bridge / Prometheus metrics / Smoke Dashboard / Smoke Trend Management / Telegram Summary / Grafana provisioning 具測試覆蓋
> **適用版本**: V10.10 AI Automation Grafana Observability 架構
---
@@ -67,6 +67,7 @@ SQL漏斗(~300筆)
- Smoke API 會將最近快檢結果保存到 JSONLdashboard 顯示最近狀態趨勢。
- Smoke history 支援 JSONL 匯出、清理與每日 OK / Warning / Critical 摘要。
- Smoke 每日摘要支援手動 Telegram 推播,並由 `momo-scheduler` 每日 09:10 呼叫 `run_ai_smoke_daily_summary_task()`
- Grafana provisioning 新增 `docker/grafana/provisioning/dashboards/json/ai-automation-overview.json`,觀測 EventRouter dispatch/latency、safe action、Telegram replay 與 AutoHeal action/duration。
---

View File

@@ -148,7 +148,8 @@ L1 Hermes 掛 → L0 模板直出 + 🟡 「AI 分析暫不可用」
- 2026-04-29 已補 smoke 結果 JSONL 保存與 dashboard 趨勢視覺化。
- 2026-04-29 已補 smoke history JSONL 匯出、清理與每日摘要。
- 2026-04-29 已補 smoke 每日摘要 Telegram 手動推播與 momo-scheduler 09:10 排程入口。
- 尚未完成Grafana/Superset 視覺化面板與推播成效觀察
- 2026-04-29 已補 Grafana provisioning dashboard`MOMO AI Automation Overview` 覆蓋 EventRouter、L2 safe action、Telegram replay 與 AutoHeal Prometheus 指標
- 尚未完成Superset 視覺化面板、Grafana 線上部署後資料觀察與 Smoke 摘要推播成效觀察。
## References
- `services/event_router.py` — 分流入口Phase 1

View File

@@ -26,6 +26,7 @@
- Telegram 失敗必須可暫存與 replay。
- EventRouter / AutoHeal 變更必須更新 `services/ai_automation_metrics.py` 指標或確認既有指標已覆蓋。
- AI 自動化閉環變更必須確認 `/api/ai-automation/smoke``/ai_automation_smoke` 仍能反映新狀態。
- AI 自動化 Prometheus 指標變更必須同步檢查 `docker/grafana/provisioning/dashboards/json/ai-automation-overview.json` 是否需要新增 panel 或查詢。
- Smoke dashboard 會保存 JSONL 趨勢;若新增檢查項目,要確保 history compact record 仍保持小而可讀。
- Smoke history 管理只能操作 `MOMO_AI_AUTOMATION_SMOKE_HISTORY` 指向的 JSONL不得清理 DB 或 EventRouter queue。
- Smoke 每日摘要推播只讀 history不得重新執行 smoke也不得把完整 details 寫進 Telegram。
@@ -41,6 +42,7 @@
- 若有架構決策,新增 ADR 並更新 `docs/adr/README.md`
- 若有長期實況,更新 `docs/memory/*.md``docs/memory/README.md`
- 若 AI 架構事實改變,更新 `docs/AI_INTELLIGENCE_MODULE_SOT.md`
- 若 AI 自動化可觀測性改變,更新 Grafana provisioning JSON 與對應測試。
- 若 Codex 工作規則改變,更新 `AGENTS.md`;若紅線改變,更新 `CONSTITUTION.md`
- 提交前跑 `git diff --check` 與相關 pytest。
- 使用者要求推版時commit 後 push 到遠端。

View File

@@ -13,7 +13,7 @@
| 檔案 | 用途 | 何時閱讀 |
|---|---|---|
| `history_logs.md` | 重大里程碑與歷史脈絡 | 要理解演進背景、排查「為何變成這樣」時 |
| `ai_automation_closure_20260429.md` | 四 AI Agent 自動化閉環與 2026-04-29 修復實況 | 接續 AI 自動化、EventRouter、AutoHeal、OpenClaw memory、ElephantAlpha 編排時 |
| `ai_automation_closure_20260429.md` | 四 AI Agent 自動化閉環、Smoke、metrics 與 Grafana 觀測實況 | 接續 AI 自動化、EventRouter、AutoHeal、OpenClaw memory、ElephantAlpha 編排、可觀測性時 |
| `credentials_passbook.md` | 伺服器、帳密、埠位對照 | 需要維運、部署、憑證核對時 |
| `feedback_db_metadata_import.md` | SQLAlchemy metadata / `create_all()` 漏表鐵律 | 新增 model、修 schema、排查 fresh env 漏表時 |
| `project_phase3f_cleanup_roadmap.md` | ADR-017 執行矩陣與階段紅線 | 正在做 3f 模組化收尾時 |

View File

@@ -14,6 +14,7 @@
- Smoke API 會保存最近快檢 JSONL 趨勢dashboard 顯示 OK / Warning / Critical 最近分布。
- Smoke history 已支援 JSONL 匯出、清理與每日摘要;清理只影響 smoke history不碰 DB 或 EventRouter queue。
- Smoke 每日摘要已支援手動 Telegram 推播與 scheduler 09:10 排程入口;摘要只讀 JSONL history。
- Grafana provisioning 已新增 `MOMO AI Automation Overview`,由 Prometheus `/metrics` 觀測 EventRouter、safe action、replay 與 AutoHeal 趨勢。
## 已落地範圍
@@ -30,6 +31,7 @@
- Smoke history 只保存精簡紀錄,不保存完整 details避免長期檔案膨脹與敏感資訊堆積。
- Export API 回傳 `application/x-ndjson`clear API 只刪除 `MOMO_AI_AUTOMATION_SMOKE_HISTORY` 指向檔案。
- Daily summary API`POST /api/ai-automation/smoke/daily-summary/send`
- Grafana dashboard 檔案:`docker/grafana/provisioning/dashboards/json/ai-automation-overview.json`provider 會載入 JSON 目錄,不需要修改 dashboard provider。
## 驗證紀錄
@@ -38,6 +40,8 @@
- 2026-04-29 AI smoke trend 批次:`5 passed`smoke + metrics
- 2026-04-29 AI smoke management 批次:`7 passed`smoke + metrics
- 2026-04-29 AI smoke summary 批次:`9 passed`smoke + metrics
- 2026-04-29 AI Grafana observability 批次:`3 passed`Grafana dashboard JSON 結構與必要 metric
- 2026-04-29 AI Grafana observability + AI core 回歸:`36 passed`collect-only`36 tests collected`
- 2026-04-29 L2 安全記憶批次:`24 passed`
- collect-only`48 tests collected`
- `git diff --check` 已通過。
@@ -54,6 +58,11 @@
- `5b25f55` 補齊 EventRouter 失敗通知回放
- `162a76b` 落地 L2 安全記憶動作
- `d58e4d0` 同步四 Agent AI 自動化治理紀錄
- `e6a1c9d` 補齊 AI 自動化可觀測性指標
- `cde8b0c` 新增 AI 自動化 Smoke Dashboard
- `81159b5` 保存 AI Smoke 趨勢紀錄
- `10bbd55` 補齊 AI Smoke 趨勢管理
- `d5f4fd7` 加入 AI Smoke 每日摘要推播
## 下次進場先看

View File

@@ -29,6 +29,7 @@
- **Smoke 趨勢保存**: Smoke API 追加 JSONL 精簡紀錄dashboard 顯示最近 OK / Warning / Critical 趨勢。
- **Smoke 趨勢管理**: Dashboard 增加 JSONL 匯出、清理與每日摘要,清理範圍限定 smoke history 檔。
- **Smoke 每日摘要推播**: 新增 Telegram 手動推播 API 與 momo-scheduler 每日 09:10 摘要任務,只讀 smoke history。
- **Grafana AI 觀測**: 新增 `MOMO AI Automation Overview` provisioning dashboard覆蓋 EventRouter、safe action、replay、AutoHeal Prometheus 指標。
### 2026-04-28~29Phase 3e 重構大戰 + daily_sales cache 隱形 bug 根除
- **app.py 縮減 -10.8%**: 7,386 → 6,590 行11 commits 全綠零 502。

View File

@@ -0,0 +1,59 @@
import json
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
DASHBOARD_PATH = ROOT / "docker/grafana/provisioning/dashboards/json/ai-automation-overview.json"
def _panel_targets(dashboard: dict):
for panel in dashboard.get("panels", []):
yield from panel.get("targets", [])
def test_ai_automation_grafana_dashboard_is_valid_json():
dashboard = json.loads(DASHBOARD_PATH.read_text(encoding="utf-8"))
assert dashboard["uid"] == "momo-ai-automation-overview"
assert dashboard["title"] == "MOMO AI Automation Overview"
assert "ai-automation" in dashboard["tags"]
assert dashboard["timezone"] == "Asia/Taipei"
def test_ai_automation_grafana_dashboard_tracks_required_metrics():
dashboard = json.loads(DASHBOARD_PATH.read_text(encoding="utf-8"))
expressions = "\n".join(
target["expr"]
for target in _panel_targets(dashboard)
if target.get("expr")
)
for metric in [
"momo_ai_event_router_dispatch_total",
"momo_ai_event_router_latency_ms_count",
"momo_ai_event_router_latency_ms_sum",
"momo_ai_event_router_latency_ms_max",
"momo_ai_event_router_safe_action_total",
"momo_ai_event_router_replay_total",
"momo_ai_autoheal_action_total",
"momo_ai_autoheal_duration_ms_count",
"momo_ai_autoheal_duration_ms_sum",
"momo_ai_autoheal_duration_ms_max",
]:
assert metric in expressions
def test_ai_automation_grafana_dashboard_uses_prometheus_datasource():
dashboard = json.loads(DASHBOARD_PATH.read_text(encoding="utf-8"))
metric_panels = [
panel
for panel in dashboard["panels"]
if panel.get("targets")
]
assert metric_panels
assert all(
panel.get("datasource", {}).get("uid") == "prometheus"
for panel in metric_panels
)