feat(monitoring): 監控工具區塊 — Grafana/Prometheus/SigNoz/Gitea 狀態
- 新增 GET /api/v1/monitoring/status,asyncio.gather 並行探測四工具 - 前端 MonitoringTools 元件,60s 輪詢顯示狀態/版本/統計 - 新增 monitoringTools i18n key Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
174
apps/api/src/api/v1/monitoring.py
Normal file
174
apps/api/src/api/v1/monitoring.py
Normal file
@@ -0,0 +1,174 @@
|
||||
"""
|
||||
Monitoring Status API
|
||||
=====================
|
||||
探測各監控工具狀態:Grafana / Prometheus / SigNoz / Gitea
|
||||
|
||||
所有探測從後端發出,不暴露內網 IP 給前端。
|
||||
|
||||
建立時間: 2026-04-03 (台北時區)
|
||||
建立者: Claude Code
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import UTC, datetime
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter
|
||||
|
||||
from src.core.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/monitoring", tags=["Monitoring"])
|
||||
|
||||
# =============================================================================
|
||||
# Internal service endpoints (backend-only)
|
||||
# =============================================================================
|
||||
|
||||
SERVICES = {
|
||||
"grafana": {
|
||||
"base": "http://192.168.0.188:3000",
|
||||
"health": "/api/health",
|
||||
"build": "/api/frontend/settings",
|
||||
},
|
||||
"prometheus": {
|
||||
"base": "http://192.168.0.110:9090",
|
||||
"health": "/-/healthy",
|
||||
"build": "/api/v1/status/buildinfo",
|
||||
"rules": "/api/v1/rules",
|
||||
},
|
||||
"signoz": {
|
||||
"base": "http://192.168.0.188:3301",
|
||||
"health": "/api/v1/health",
|
||||
},
|
||||
"gitea": {
|
||||
"base": "http://192.168.0.110:3001",
|
||||
"health": "/-/readiness",
|
||||
},
|
||||
}
|
||||
|
||||
TIMEOUT = 3.0
|
||||
|
||||
|
||||
async def _probe_grafana(client: httpx.AsyncClient) -> dict:
|
||||
base = SERVICES["grafana"]["base"]
|
||||
try:
|
||||
r = await client.get(f"{base}/api/health", timeout=TIMEOUT)
|
||||
if r.status_code == 200:
|
||||
data = r.json()
|
||||
version = data.get("version", "—")
|
||||
# Try to get dashboard count
|
||||
dash_r = await client.get(f"{base}/api/search?type=dash-db", timeout=TIMEOUT)
|
||||
dash_count = len(dash_r.json()) if dash_r.status_code == 200 else None
|
||||
return {
|
||||
"name": "Grafana",
|
||||
"status": "up",
|
||||
"version": version,
|
||||
"stats": f"面板 {dash_count} 個" if dash_count is not None else None,
|
||||
"description": "監控面板 · 指標視覺化",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("grafana_probe_failed", error=str(e))
|
||||
return {"name": "Grafana", "status": "down", "version": None, "stats": None, "description": "監控面板 · 指標視覺化"}
|
||||
|
||||
|
||||
async def _probe_prometheus(client: httpx.AsyncClient) -> dict:
|
||||
base = SERVICES["prometheus"]["base"]
|
||||
try:
|
||||
health_r = await client.get(f"{base}/-/healthy", timeout=TIMEOUT)
|
||||
if health_r.status_code == 200:
|
||||
# Get build info
|
||||
build_r = await client.get(f"{base}/api/v1/status/buildinfo", timeout=TIMEOUT)
|
||||
version = None
|
||||
if build_r.status_code == 200:
|
||||
version = build_r.json().get("data", {}).get("version")
|
||||
# Get rules count
|
||||
rules_r = await client.get(f"{base}/api/v1/rules", timeout=TIMEOUT)
|
||||
rules_count = None
|
||||
firing_count = 0
|
||||
if rules_r.status_code == 200:
|
||||
groups = rules_r.json().get("data", {}).get("groups", [])
|
||||
rules_count = sum(len(g.get("rules", [])) for g in groups)
|
||||
firing_count = sum(
|
||||
1 for g in groups for r in g.get("rules", [])
|
||||
if r.get("state") == "firing"
|
||||
)
|
||||
stats_parts = []
|
||||
if rules_count is not None:
|
||||
stats_parts.append(f"規則 {rules_count} 條")
|
||||
if firing_count > 0:
|
||||
stats_parts.append(f"{firing_count} 觸發")
|
||||
return {
|
||||
"name": "Prometheus",
|
||||
"status": "up",
|
||||
"version": version,
|
||||
"stats": " · ".join(stats_parts) if stats_parts else None,
|
||||
"description": "時序資料庫 · 告警規則",
|
||||
"firing_count": firing_count,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("prometheus_probe_failed", error=str(e))
|
||||
return {"name": "Prometheus", "status": "down", "version": None, "stats": None, "description": "時序資料庫 · 告警規則", "firing_count": 0}
|
||||
|
||||
|
||||
async def _probe_signoz(client: httpx.AsyncClient) -> dict:
|
||||
base = SERVICES["signoz"]["base"]
|
||||
try:
|
||||
r = await client.get(f"{base}/api/v1/health", timeout=TIMEOUT)
|
||||
if r.status_code == 200:
|
||||
return {"name": "SigNoz", "status": "up", "version": None, "stats": "APM · 追蹤 · 日誌", "description": "可觀測性平台"}
|
||||
except Exception as e:
|
||||
logger.warning("signoz_probe_failed", error=str(e))
|
||||
# Fallback: try root
|
||||
try:
|
||||
r2 = await client.get(f"{base}/", timeout=TIMEOUT)
|
||||
if r2.status_code in (200, 301, 302):
|
||||
return {"name": "SigNoz", "status": "up", "version": None, "stats": "APM · 追蹤 · 日誌", "description": "可觀測性平台"}
|
||||
except Exception:
|
||||
pass
|
||||
return {"name": "SigNoz", "status": "down", "version": None, "stats": None, "description": "可觀測性平台"}
|
||||
|
||||
|
||||
async def _probe_gitea(client: httpx.AsyncClient) -> dict:
|
||||
base = SERVICES["gitea"]["base"]
|
||||
try:
|
||||
r = await client.get(f"{base}/-/readiness", timeout=TIMEOUT)
|
||||
if r.status_code == 200:
|
||||
# Get version from API
|
||||
ver_r = await client.get(f"{base}/api/v1/version", timeout=TIMEOUT)
|
||||
version = None
|
||||
if ver_r.status_code == 200:
|
||||
version = ver_r.json().get("version")
|
||||
return {"name": "Gitea", "status": "up", "version": version, "stats": "CI/CD · Git 倉庫", "description": "代碼倉庫 · Pipeline"}
|
||||
except Exception as e:
|
||||
logger.warning("gitea_probe_failed", error=str(e))
|
||||
return {"name": "Gitea", "status": "down", "version": None, "stats": None, "description": "代碼倉庫 · Pipeline"}
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def get_monitoring_status() -> dict:
|
||||
"""
|
||||
並行探測所有監控工具狀態
|
||||
|
||||
Returns:
|
||||
dict with tools list, each containing name/status/version/stats/description
|
||||
"""
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
results = await asyncio.gather(
|
||||
_probe_grafana(client),
|
||||
_probe_prometheus(client),
|
||||
_probe_signoz(client),
|
||||
_probe_gitea(client),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
tools = []
|
||||
for r in results:
|
||||
if isinstance(r, Exception):
|
||||
continue
|
||||
tools.append({**r, "checked_at": datetime.now(UTC).isoformat()})
|
||||
|
||||
return {
|
||||
"tools": tools,
|
||||
"checked_at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
@@ -57,6 +57,7 @@ from src.api.v1 import (
|
||||
from src.api.v1 import (
|
||||
signoz_webhook as signoz_webhook_v1, # Phase 21: SignOz → Telegram (ADR-037)
|
||||
)
|
||||
from src.api.v1 import monitoring as monitoring_v1 # 2026-04-03: 監控工具狀態
|
||||
from src.api.v1 import stats as stats_v1 # Phase 6.5: Statistics Analytics
|
||||
from src.api.v1 import telegram as telegram_v1 # Phase 5.4: Telegram Gateway
|
||||
from src.api.v1 import terminal as terminal_v1 # Phase 19.1: Omni-Terminal SSE
|
||||
@@ -402,6 +403,9 @@ app.include_router(
|
||||
app.include_router(
|
||||
stats_v1.router, prefix="/api/v1", tags=["Statistics"]
|
||||
) # Phase 6.5: Statistics Analytics
|
||||
app.include_router(
|
||||
monitoring_v1.router, prefix="/api/v1", tags=["Monitoring"]
|
||||
) # 2026-04-03: 監控工具狀態
|
||||
app.include_router(
|
||||
github_webhook_v1.router, prefix="/api/v1", tags=["GitHub Webhook"]
|
||||
) # Phase 13.1: GitHub → OpenClaw
|
||||
|
||||
@@ -143,7 +143,13 @@
|
||||
"infrastructure": "INFRASTRUCTURE",
|
||||
"podHealth": "POD Health",
|
||||
"allRunning": "All Running",
|
||||
"servicesUp": "Services Up"
|
||||
"servicesUp": "Services Up",
|
||||
"monitoringTools": "Monitoring Tools",
|
||||
"monitoringStatus": {
|
||||
"up": "OK",
|
||||
"down": "Down",
|
||||
"unknown": "Unknown"
|
||||
}
|
||||
},
|
||||
"openclaw": {
|
||||
"name": "OpenClaw",
|
||||
|
||||
@@ -144,7 +144,13 @@
|
||||
"infrastructure": "基礎架構",
|
||||
"podHealth": "POD 健康",
|
||||
"allRunning": "全部運行中",
|
||||
"servicesUp": "服務上線"
|
||||
"servicesUp": "服務上線",
|
||||
"monitoringTools": "監控工具",
|
||||
"monitoringStatus": {
|
||||
"up": "正常",
|
||||
"down": "離線",
|
||||
"unknown": "未知"
|
||||
}
|
||||
},
|
||||
"openclaw": {
|
||||
"name": "OpenClaw",
|
||||
|
||||
@@ -8,10 +8,12 @@
|
||||
* 統帥鐵律: 使用真實數據 Hook,禁止假數據!
|
||||
*
|
||||
* @updated 2026-04-02 Claude Code — Metrics Strip 7指標視覺強化
|
||||
* @updated 2026-04-03 Claude Code — 監控工具區塊 (Grafana/Prometheus/SigNoz/Gitea)
|
||||
* 串接: incidents(count/P0/MTTR/autoRemediation) + dashboard(serviceHealth/pendingApprovals/podHealth)
|
||||
*/
|
||||
|
||||
import { useTranslations } from 'next-intl'
|
||||
import { useState, useEffect } from 'react'
|
||||
import { useGlobalPulseMetrics } from '@/hooks/useGlobalPulseMetrics'
|
||||
import { useIncidents } from '@/hooks/useIncidents'
|
||||
import { useHosts, useDashboardStore } from '@/stores/dashboard.store'
|
||||
@@ -20,6 +22,8 @@ import { OpenClawPanel } from '@/components/ai/openclaw-panel'
|
||||
import { HostGrid, type HostInfo } from '@/components/infra/host-grid'
|
||||
import { AppLayout } from '@/components/layout'
|
||||
|
||||
const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? ''
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
// =============================================================================
|
||||
@@ -55,6 +59,96 @@ function MiniSparkline({ values, color }: { values: number[]; color: string }) {
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Monitoring Tools Component
|
||||
// =============================================================================
|
||||
|
||||
interface MonitoringTool {
|
||||
name: string
|
||||
status: string
|
||||
version: string | null
|
||||
stats: string | null
|
||||
description: string
|
||||
firing_count?: number
|
||||
checked_at: string
|
||||
}
|
||||
|
||||
function MonitoringTools() {
|
||||
const [tools, setTools] = useState<MonitoringTool[]>([])
|
||||
const [loading, setLoading] = useState(true)
|
||||
|
||||
useEffect(() => {
|
||||
const load = () => {
|
||||
fetch(`${API_BASE}/api/v1/monitoring/status`)
|
||||
.then(r => r.json())
|
||||
.then(d => { setTools(d.tools ?? []); setLoading(false) })
|
||||
.catch(() => setLoading(false))
|
||||
}
|
||||
load()
|
||||
const t = setInterval(load, 60000)
|
||||
return () => clearInterval(t)
|
||||
}, [])
|
||||
|
||||
const TOOL_ICONS: Record<string, string> = {
|
||||
Grafana: '📊',
|
||||
Prometheus: '🔥',
|
||||
SigNoz: '🔭',
|
||||
Gitea: '🐙',
|
||||
}
|
||||
|
||||
if (loading) return (
|
||||
<div style={{ padding: '12px 14px', fontSize: 12, color: '#87867f', fontFamily: 'var(--font-body), monospace' }}>
|
||||
載入中...
|
||||
</div>
|
||||
)
|
||||
|
||||
return (
|
||||
<div>
|
||||
{tools.map((tool, i) => {
|
||||
const isUp = tool.status === 'up'
|
||||
const hasFiring = (tool.firing_count ?? 0) > 0
|
||||
return (
|
||||
<div key={tool.name} style={{
|
||||
padding: '10px 14px',
|
||||
borderBottom: i < tools.length - 1 ? '0.5px solid #f0ede4' : 'none',
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
gap: 10,
|
||||
}}>
|
||||
<div style={{ fontSize: 18, flexShrink: 0, width: 24, textAlign: 'center' }}>{TOOL_ICONS[tool.name] ?? '⚙️'}</div>
|
||||
<div style={{ flex: 1, minWidth: 0 }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 2 }}>
|
||||
<span style={{ fontSize: 13, fontWeight: 700, color: '#141413', fontFamily: 'var(--font-body), monospace' }}>{tool.name}</span>
|
||||
<span style={{
|
||||
display: 'inline-flex', alignItems: 'center', gap: 3,
|
||||
fontSize: 10, fontWeight: 600,
|
||||
color: isUp ? (hasFiring ? '#F59E0B' : '#22C55E') : '#cc2200',
|
||||
background: isUp ? (hasFiring ? 'rgba(245,158,11,0.08)' : 'rgba(34,197,94,0.08)') : 'rgba(204,34,0,0.08)',
|
||||
border: `0.5px solid ${isUp ? (hasFiring ? 'rgba(245,158,11,0.25)' : 'rgba(34,197,94,0.25)') : 'rgba(204,34,0,0.25)'}`,
|
||||
borderRadius: 4, padding: '1px 5px',
|
||||
}}>
|
||||
<span style={{ width: 4, height: 4, borderRadius: '50%', background: 'currentColor', display: 'inline-block' }} />
|
||||
{isUp ? (hasFiring ? `${tool.firing_count} 觸發` : '正常') : '離線'}
|
||||
</span>
|
||||
</div>
|
||||
<div style={{ fontSize: 11, color: '#87867f', fontFamily: 'var(--font-body), monospace' }}>
|
||||
{tool.description}
|
||||
{tool.version && <span style={{ color: '#c0bdb4' }}> · v{tool.version}</span>}
|
||||
</div>
|
||||
{tool.stats && (
|
||||
<div style={{ fontSize: 11, color: '#a0a09a', fontFamily: 'var(--font-body), monospace', marginTop: 1 }}>{tool.stats}</div>
|
||||
)}
|
||||
</div>
|
||||
<div style={{ fontSize: 10, color: '#c0bdb4', fontFamily: 'var(--font-body), monospace', flexShrink: 0, textAlign: 'right' }}>
|
||||
{new Date(tool.checked_at).toLocaleTimeString('zh-TW', { timeZone: 'Asia/Taipei', hour: '2-digit', minute: '2-digit' })}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Main Page
|
||||
// =============================================================================
|
||||
@@ -414,6 +508,29 @@ export default function Home({ params }: { params: { locale: string } }) {
|
||||
}))} />
|
||||
</div>
|
||||
|
||||
{/* 監控工具 */}
|
||||
<div style={{
|
||||
background: '#fff',
|
||||
border: '0.5px solid #e0ddd4',
|
||||
borderRadius: 12,
|
||||
overflow: 'hidden',
|
||||
boxShadow: '0 1px 4px rgba(0,0,0,0.05)',
|
||||
flexShrink: 0,
|
||||
}}>
|
||||
<div style={{
|
||||
padding: '10px 14px',
|
||||
borderBottom: '0.5px solid #e0ddd4',
|
||||
fontSize: 14, fontWeight: 700, color: '#141413',
|
||||
letterSpacing: '0.5px',
|
||||
fontFamily: 'var(--font-body), monospace', background: '#faf9f3',
|
||||
display: 'flex', alignItems: 'center', gap: 8,
|
||||
}}>
|
||||
<div style={{ width: 6, height: 6, borderRadius: '50%', background: '#d97757', flexShrink: 0 }} />
|
||||
{tDashboard('monitoringTools')}
|
||||
</div>
|
||||
<MonitoringTools />
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user