feat(monitoring): 監控工具區塊 — Grafana/Prometheus/SigNoz/Gitea 狀態
- 新增 GET /api/v1/monitoring/status,asyncio.gather 並行探測四工具 - 前端 MonitoringTools 元件,60s 輪詢顯示狀態/版本/統計 - 新增 monitoringTools i18n key Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
174
apps/api/src/api/v1/monitoring.py
Normal file
174
apps/api/src/api/v1/monitoring.py
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
"""
|
||||||
|
Monitoring Status API
|
||||||
|
=====================
|
||||||
|
探測各監控工具狀態:Grafana / Prometheus / SigNoz / Gitea
|
||||||
|
|
||||||
|
所有探測從後端發出,不暴露內網 IP 給前端。
|
||||||
|
|
||||||
|
建立時間: 2026-04-03 (台北時區)
|
||||||
|
建立者: Claude Code
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from src.core.logging import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/monitoring", tags=["Monitoring"])
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Internal service endpoints (backend-only)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
SERVICES = {
|
||||||
|
"grafana": {
|
||||||
|
"base": "http://192.168.0.188:3000",
|
||||||
|
"health": "/api/health",
|
||||||
|
"build": "/api/frontend/settings",
|
||||||
|
},
|
||||||
|
"prometheus": {
|
||||||
|
"base": "http://192.168.0.110:9090",
|
||||||
|
"health": "/-/healthy",
|
||||||
|
"build": "/api/v1/status/buildinfo",
|
||||||
|
"rules": "/api/v1/rules",
|
||||||
|
},
|
||||||
|
"signoz": {
|
||||||
|
"base": "http://192.168.0.188:3301",
|
||||||
|
"health": "/api/v1/health",
|
||||||
|
},
|
||||||
|
"gitea": {
|
||||||
|
"base": "http://192.168.0.110:3001",
|
||||||
|
"health": "/-/readiness",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
TIMEOUT = 3.0
|
||||||
|
|
||||||
|
|
||||||
|
async def _probe_grafana(client: httpx.AsyncClient) -> dict:
|
||||||
|
base = SERVICES["grafana"]["base"]
|
||||||
|
try:
|
||||||
|
r = await client.get(f"{base}/api/health", timeout=TIMEOUT)
|
||||||
|
if r.status_code == 200:
|
||||||
|
data = r.json()
|
||||||
|
version = data.get("version", "—")
|
||||||
|
# Try to get dashboard count
|
||||||
|
dash_r = await client.get(f"{base}/api/search?type=dash-db", timeout=TIMEOUT)
|
||||||
|
dash_count = len(dash_r.json()) if dash_r.status_code == 200 else None
|
||||||
|
return {
|
||||||
|
"name": "Grafana",
|
||||||
|
"status": "up",
|
||||||
|
"version": version,
|
||||||
|
"stats": f"面板 {dash_count} 個" if dash_count is not None else None,
|
||||||
|
"description": "監控面板 · 指標視覺化",
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("grafana_probe_failed", error=str(e))
|
||||||
|
return {"name": "Grafana", "status": "down", "version": None, "stats": None, "description": "監控面板 · 指標視覺化"}
|
||||||
|
|
||||||
|
|
||||||
|
async def _probe_prometheus(client: httpx.AsyncClient) -> dict:
|
||||||
|
base = SERVICES["prometheus"]["base"]
|
||||||
|
try:
|
||||||
|
health_r = await client.get(f"{base}/-/healthy", timeout=TIMEOUT)
|
||||||
|
if health_r.status_code == 200:
|
||||||
|
# Get build info
|
||||||
|
build_r = await client.get(f"{base}/api/v1/status/buildinfo", timeout=TIMEOUT)
|
||||||
|
version = None
|
||||||
|
if build_r.status_code == 200:
|
||||||
|
version = build_r.json().get("data", {}).get("version")
|
||||||
|
# Get rules count
|
||||||
|
rules_r = await client.get(f"{base}/api/v1/rules", timeout=TIMEOUT)
|
||||||
|
rules_count = None
|
||||||
|
firing_count = 0
|
||||||
|
if rules_r.status_code == 200:
|
||||||
|
groups = rules_r.json().get("data", {}).get("groups", [])
|
||||||
|
rules_count = sum(len(g.get("rules", [])) for g in groups)
|
||||||
|
firing_count = sum(
|
||||||
|
1 for g in groups for r in g.get("rules", [])
|
||||||
|
if r.get("state") == "firing"
|
||||||
|
)
|
||||||
|
stats_parts = []
|
||||||
|
if rules_count is not None:
|
||||||
|
stats_parts.append(f"規則 {rules_count} 條")
|
||||||
|
if firing_count > 0:
|
||||||
|
stats_parts.append(f"{firing_count} 觸發")
|
||||||
|
return {
|
||||||
|
"name": "Prometheus",
|
||||||
|
"status": "up",
|
||||||
|
"version": version,
|
||||||
|
"stats": " · ".join(stats_parts) if stats_parts else None,
|
||||||
|
"description": "時序資料庫 · 告警規則",
|
||||||
|
"firing_count": firing_count,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("prometheus_probe_failed", error=str(e))
|
||||||
|
return {"name": "Prometheus", "status": "down", "version": None, "stats": None, "description": "時序資料庫 · 告警規則", "firing_count": 0}
|
||||||
|
|
||||||
|
|
||||||
|
async def _probe_signoz(client: httpx.AsyncClient) -> dict:
|
||||||
|
base = SERVICES["signoz"]["base"]
|
||||||
|
try:
|
||||||
|
r = await client.get(f"{base}/api/v1/health", timeout=TIMEOUT)
|
||||||
|
if r.status_code == 200:
|
||||||
|
return {"name": "SigNoz", "status": "up", "version": None, "stats": "APM · 追蹤 · 日誌", "description": "可觀測性平台"}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("signoz_probe_failed", error=str(e))
|
||||||
|
# Fallback: try root
|
||||||
|
try:
|
||||||
|
r2 = await client.get(f"{base}/", timeout=TIMEOUT)
|
||||||
|
if r2.status_code in (200, 301, 302):
|
||||||
|
return {"name": "SigNoz", "status": "up", "version": None, "stats": "APM · 追蹤 · 日誌", "description": "可觀測性平台"}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {"name": "SigNoz", "status": "down", "version": None, "stats": None, "description": "可觀測性平台"}
|
||||||
|
|
||||||
|
|
||||||
|
async def _probe_gitea(client: httpx.AsyncClient) -> dict:
|
||||||
|
base = SERVICES["gitea"]["base"]
|
||||||
|
try:
|
||||||
|
r = await client.get(f"{base}/-/readiness", timeout=TIMEOUT)
|
||||||
|
if r.status_code == 200:
|
||||||
|
# Get version from API
|
||||||
|
ver_r = await client.get(f"{base}/api/v1/version", timeout=TIMEOUT)
|
||||||
|
version = None
|
||||||
|
if ver_r.status_code == 200:
|
||||||
|
version = ver_r.json().get("version")
|
||||||
|
return {"name": "Gitea", "status": "up", "version": version, "stats": "CI/CD · Git 倉庫", "description": "代碼倉庫 · Pipeline"}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("gitea_probe_failed", error=str(e))
|
||||||
|
return {"name": "Gitea", "status": "down", "version": None, "stats": None, "description": "代碼倉庫 · Pipeline"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/status")
|
||||||
|
async def get_monitoring_status() -> dict:
|
||||||
|
"""
|
||||||
|
並行探測所有監控工具狀態
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with tools list, each containing name/status/version/stats/description
|
||||||
|
"""
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
results = await asyncio.gather(
|
||||||
|
_probe_grafana(client),
|
||||||
|
_probe_prometheus(client),
|
||||||
|
_probe_signoz(client),
|
||||||
|
_probe_gitea(client),
|
||||||
|
return_exceptions=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
tools = []
|
||||||
|
for r in results:
|
||||||
|
if isinstance(r, Exception):
|
||||||
|
continue
|
||||||
|
tools.append({**r, "checked_at": datetime.now(UTC).isoformat()})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"tools": tools,
|
||||||
|
"checked_at": datetime.now(UTC).isoformat(),
|
||||||
|
}
|
||||||
@@ -57,6 +57,7 @@ from src.api.v1 import (
|
|||||||
from src.api.v1 import (
|
from src.api.v1 import (
|
||||||
signoz_webhook as signoz_webhook_v1, # Phase 21: SignOz → Telegram (ADR-037)
|
signoz_webhook as signoz_webhook_v1, # Phase 21: SignOz → Telegram (ADR-037)
|
||||||
)
|
)
|
||||||
|
from src.api.v1 import monitoring as monitoring_v1 # 2026-04-03: 監控工具狀態
|
||||||
from src.api.v1 import stats as stats_v1 # Phase 6.5: Statistics Analytics
|
from src.api.v1 import stats as stats_v1 # Phase 6.5: Statistics Analytics
|
||||||
from src.api.v1 import telegram as telegram_v1 # Phase 5.4: Telegram Gateway
|
from src.api.v1 import telegram as telegram_v1 # Phase 5.4: Telegram Gateway
|
||||||
from src.api.v1 import terminal as terminal_v1 # Phase 19.1: Omni-Terminal SSE
|
from src.api.v1 import terminal as terminal_v1 # Phase 19.1: Omni-Terminal SSE
|
||||||
@@ -402,6 +403,9 @@ app.include_router(
|
|||||||
app.include_router(
|
app.include_router(
|
||||||
stats_v1.router, prefix="/api/v1", tags=["Statistics"]
|
stats_v1.router, prefix="/api/v1", tags=["Statistics"]
|
||||||
) # Phase 6.5: Statistics Analytics
|
) # Phase 6.5: Statistics Analytics
|
||||||
|
app.include_router(
|
||||||
|
monitoring_v1.router, prefix="/api/v1", tags=["Monitoring"]
|
||||||
|
) # 2026-04-03: 監控工具狀態
|
||||||
app.include_router(
|
app.include_router(
|
||||||
github_webhook_v1.router, prefix="/api/v1", tags=["GitHub Webhook"]
|
github_webhook_v1.router, prefix="/api/v1", tags=["GitHub Webhook"]
|
||||||
) # Phase 13.1: GitHub → OpenClaw
|
) # Phase 13.1: GitHub → OpenClaw
|
||||||
|
|||||||
@@ -143,7 +143,13 @@
|
|||||||
"infrastructure": "INFRASTRUCTURE",
|
"infrastructure": "INFRASTRUCTURE",
|
||||||
"podHealth": "POD Health",
|
"podHealth": "POD Health",
|
||||||
"allRunning": "All Running",
|
"allRunning": "All Running",
|
||||||
"servicesUp": "Services Up"
|
"servicesUp": "Services Up",
|
||||||
|
"monitoringTools": "Monitoring Tools",
|
||||||
|
"monitoringStatus": {
|
||||||
|
"up": "OK",
|
||||||
|
"down": "Down",
|
||||||
|
"unknown": "Unknown"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"openclaw": {
|
"openclaw": {
|
||||||
"name": "OpenClaw",
|
"name": "OpenClaw",
|
||||||
|
|||||||
@@ -144,7 +144,13 @@
|
|||||||
"infrastructure": "基礎架構",
|
"infrastructure": "基礎架構",
|
||||||
"podHealth": "POD 健康",
|
"podHealth": "POD 健康",
|
||||||
"allRunning": "全部運行中",
|
"allRunning": "全部運行中",
|
||||||
"servicesUp": "服務上線"
|
"servicesUp": "服務上線",
|
||||||
|
"monitoringTools": "監控工具",
|
||||||
|
"monitoringStatus": {
|
||||||
|
"up": "正常",
|
||||||
|
"down": "離線",
|
||||||
|
"unknown": "未知"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"openclaw": {
|
"openclaw": {
|
||||||
"name": "OpenClaw",
|
"name": "OpenClaw",
|
||||||
|
|||||||
@@ -8,10 +8,12 @@
|
|||||||
* 統帥鐵律: 使用真實數據 Hook,禁止假數據!
|
* 統帥鐵律: 使用真實數據 Hook,禁止假數據!
|
||||||
*
|
*
|
||||||
* @updated 2026-04-02 Claude Code — Metrics Strip 7指標視覺強化
|
* @updated 2026-04-02 Claude Code — Metrics Strip 7指標視覺強化
|
||||||
|
* @updated 2026-04-03 Claude Code — 監控工具區塊 (Grafana/Prometheus/SigNoz/Gitea)
|
||||||
* 串接: incidents(count/P0/MTTR/autoRemediation) + dashboard(serviceHealth/pendingApprovals/podHealth)
|
* 串接: incidents(count/P0/MTTR/autoRemediation) + dashboard(serviceHealth/pendingApprovals/podHealth)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { useTranslations } from 'next-intl'
|
import { useTranslations } from 'next-intl'
|
||||||
|
import { useState, useEffect } from 'react'
|
||||||
import { useGlobalPulseMetrics } from '@/hooks/useGlobalPulseMetrics'
|
import { useGlobalPulseMetrics } from '@/hooks/useGlobalPulseMetrics'
|
||||||
import { useIncidents } from '@/hooks/useIncidents'
|
import { useIncidents } from '@/hooks/useIncidents'
|
||||||
import { useHosts, useDashboardStore } from '@/stores/dashboard.store'
|
import { useHosts, useDashboardStore } from '@/stores/dashboard.store'
|
||||||
@@ -20,6 +22,8 @@ import { OpenClawPanel } from '@/components/ai/openclaw-panel'
|
|||||||
import { HostGrid, type HostInfo } from '@/components/infra/host-grid'
|
import { HostGrid, type HostInfo } from '@/components/infra/host-grid'
|
||||||
import { AppLayout } from '@/components/layout'
|
import { AppLayout } from '@/components/layout'
|
||||||
|
|
||||||
|
const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? ''
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Types
|
// Types
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -55,6 +59,96 @@ function MiniSparkline({ values, color }: { values: number[]; color: string }) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Monitoring Tools Component
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
interface MonitoringTool {
|
||||||
|
name: string
|
||||||
|
status: string
|
||||||
|
version: string | null
|
||||||
|
stats: string | null
|
||||||
|
description: string
|
||||||
|
firing_count?: number
|
||||||
|
checked_at: string
|
||||||
|
}
|
||||||
|
|
||||||
|
function MonitoringTools() {
|
||||||
|
const [tools, setTools] = useState<MonitoringTool[]>([])
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const load = () => {
|
||||||
|
fetch(`${API_BASE}/api/v1/monitoring/status`)
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(d => { setTools(d.tools ?? []); setLoading(false) })
|
||||||
|
.catch(() => setLoading(false))
|
||||||
|
}
|
||||||
|
load()
|
||||||
|
const t = setInterval(load, 60000)
|
||||||
|
return () => clearInterval(t)
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const TOOL_ICONS: Record<string, string> = {
|
||||||
|
Grafana: '📊',
|
||||||
|
Prometheus: '🔥',
|
||||||
|
SigNoz: '🔭',
|
||||||
|
Gitea: '🐙',
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loading) return (
|
||||||
|
<div style={{ padding: '12px 14px', fontSize: 12, color: '#87867f', fontFamily: 'var(--font-body), monospace' }}>
|
||||||
|
載入中...
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div>
|
||||||
|
{tools.map((tool, i) => {
|
||||||
|
const isUp = tool.status === 'up'
|
||||||
|
const hasFiring = (tool.firing_count ?? 0) > 0
|
||||||
|
return (
|
||||||
|
<div key={tool.name} style={{
|
||||||
|
padding: '10px 14px',
|
||||||
|
borderBottom: i < tools.length - 1 ? '0.5px solid #f0ede4' : 'none',
|
||||||
|
display: 'flex',
|
||||||
|
alignItems: 'center',
|
||||||
|
gap: 10,
|
||||||
|
}}>
|
||||||
|
<div style={{ fontSize: 18, flexShrink: 0, width: 24, textAlign: 'center' }}>{TOOL_ICONS[tool.name] ?? '⚙️'}</div>
|
||||||
|
<div style={{ flex: 1, minWidth: 0 }}>
|
||||||
|
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 2 }}>
|
||||||
|
<span style={{ fontSize: 13, fontWeight: 700, color: '#141413', fontFamily: 'var(--font-body), monospace' }}>{tool.name}</span>
|
||||||
|
<span style={{
|
||||||
|
display: 'inline-flex', alignItems: 'center', gap: 3,
|
||||||
|
fontSize: 10, fontWeight: 600,
|
||||||
|
color: isUp ? (hasFiring ? '#F59E0B' : '#22C55E') : '#cc2200',
|
||||||
|
background: isUp ? (hasFiring ? 'rgba(245,158,11,0.08)' : 'rgba(34,197,94,0.08)') : 'rgba(204,34,0,0.08)',
|
||||||
|
border: `0.5px solid ${isUp ? (hasFiring ? 'rgba(245,158,11,0.25)' : 'rgba(34,197,94,0.25)') : 'rgba(204,34,0,0.25)'}`,
|
||||||
|
borderRadius: 4, padding: '1px 5px',
|
||||||
|
}}>
|
||||||
|
<span style={{ width: 4, height: 4, borderRadius: '50%', background: 'currentColor', display: 'inline-block' }} />
|
||||||
|
{isUp ? (hasFiring ? `${tool.firing_count} 觸發` : '正常') : '離線'}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div style={{ fontSize: 11, color: '#87867f', fontFamily: 'var(--font-body), monospace' }}>
|
||||||
|
{tool.description}
|
||||||
|
{tool.version && <span style={{ color: '#c0bdb4' }}> · v{tool.version}</span>}
|
||||||
|
</div>
|
||||||
|
{tool.stats && (
|
||||||
|
<div style={{ fontSize: 11, color: '#a0a09a', fontFamily: 'var(--font-body), monospace', marginTop: 1 }}>{tool.stats}</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div style={{ fontSize: 10, color: '#c0bdb4', fontFamily: 'var(--font-body), monospace', flexShrink: 0, textAlign: 'right' }}>
|
||||||
|
{new Date(tool.checked_at).toLocaleTimeString('zh-TW', { timeZone: 'Asia/Taipei', hour: '2-digit', minute: '2-digit' })}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Main Page
|
// Main Page
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -414,6 +508,29 @@ export default function Home({ params }: { params: { locale: string } }) {
|
|||||||
}))} />
|
}))} />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* 監控工具 */}
|
||||||
|
<div style={{
|
||||||
|
background: '#fff',
|
||||||
|
border: '0.5px solid #e0ddd4',
|
||||||
|
borderRadius: 12,
|
||||||
|
overflow: 'hidden',
|
||||||
|
boxShadow: '0 1px 4px rgba(0,0,0,0.05)',
|
||||||
|
flexShrink: 0,
|
||||||
|
}}>
|
||||||
|
<div style={{
|
||||||
|
padding: '10px 14px',
|
||||||
|
borderBottom: '0.5px solid #e0ddd4',
|
||||||
|
fontSize: 14, fontWeight: 700, color: '#141413',
|
||||||
|
letterSpacing: '0.5px',
|
||||||
|
fontFamily: 'var(--font-body), monospace', background: '#faf9f3',
|
||||||
|
display: 'flex', alignItems: 'center', gap: 8,
|
||||||
|
}}>
|
||||||
|
<div style={{ width: 6, height: 6, borderRadius: '50%', background: '#d97757', flexShrink: 0 }} />
|
||||||
|
{tDashboard('monitoringTools')}
|
||||||
|
</div>
|
||||||
|
<MonitoringTools />
|
||||||
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user