Files
awoooi/apps/api/src/services/health_check_service.py
OG T 8313a3787b
Some checks failed
E2E Health Check / e2e-health (push) Has been cancelled
refactor(api): Phase 22 P0 leWOOOgo 模組化修復
Router 層禁止直接 httpx.AsyncClient,抽取到 Service 層:

新增 Services:
- OpenClawHttpService: Error 分析/Code Review/CI 診斷
- GitHubApiService: PR Diff 取得
- HealthCheckService: HTTP/PostgreSQL/Redis 健康檢查

修改 Routers:
- sentry_webhook.py: 使用 OpenClawHttpService
- github_webhook.py: 使用 GitHubApiService + OpenClawHttpService
- health.py: 使用 HealthCheckService

遵循規範:
- Skill 09: Router 層禁止直接外部 API 呼叫
- feedback_lewooogo_modular_enforcement.md

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-31 16:06:35 +08:00

218 lines
6.4 KiB
Python

"""
Health Check Service - 統一健康檢查
===================================
Phase 22 P0 修復: Router 層禁止直接 httpx.AsyncClient
遵循規範:
- Skill 09: Router 層禁止直接外部 API 呼叫
- feedback_lewooogo_modular_enforcement.md: Service 層封裝
功能:
- HTTP 健康檢查 (通用)
- PostgreSQL TCP 檢查
- Redis PING 檢查
版本: v1.0
建立: 2026-03-31 (台北時區)
建立者: Claude Code (首席架構師 P0 修復)
"""
import asyncio
from dataclasses import dataclass
import httpx
import structlog
from src.core.config import settings
logger = structlog.get_logger(__name__)
# =============================================================================
# Response Models
# =============================================================================
@dataclass
class ComponentHealth:
"""組件健康狀態"""
status: str # "up" | "down"
latency_ms: float | None = None
error: str | None = None
# =============================================================================
# Health Check Service
# =============================================================================
class HealthCheckService:
"""
Health Check Service
統一健康檢查,符合 leWOOOgo 積木化原則
2026-03-31 Claude Code (Phase 22 P0 修復)
"""
def __init__(
self,
default_timeout: float | None = None,
):
self._default_timeout = default_timeout or settings.HEALTH_CHECK_TIMEOUT
async def http_health_check(
self,
name: str,
url: str,
path: str = "/health",
timeout: float | None = None,
) -> ComponentHealth:
"""
通用 HTTP 健康檢查
Args:
name: 組件名稱 (用於日誌)
url: 基礎 URL
path: 健康檢查路徑
timeout: 超時秒數
Returns:
ComponentHealth
"""
if settings.MOCK_MODE:
# Mock 模式: 模擬延遲
import random
latency = random.uniform(1.0, 15.0)
return ComponentHealth(status="up", latency_ms=round(latency, 2))
try:
start = asyncio.get_event_loop().time()
async with httpx.AsyncClient(
timeout=timeout or self._default_timeout
) as client:
response = await client.get(f"{url}{path}")
response.raise_for_status()
latency = (asyncio.get_event_loop().time() - start) * 1000
return ComponentHealth(status="up", latency_ms=round(latency, 2))
except httpx.TimeoutException:
logger.warning(f"{name}_health_check_timeout", url=url)
return ComponentHealth(status="down", error="timeout")
except httpx.ConnectError:
logger.warning(f"{name}_health_check_connect_error", url=url)
return ComponentHealth(status="down", error="connection refused")
except Exception as e:
logger.warning(f"{name}_health_check_failed", url=url, error=str(e))
return ComponentHealth(status="down", error=str(e))
async def postgresql_tcp_check(
self,
host: str = "192.168.0.188",
port: int = 5432,
timeout: float | None = None,
) -> ComponentHealth:
"""
PostgreSQL TCP 連線檢查
Args:
host: PostgreSQL 主機
port: PostgreSQL 端口
timeout: 超時秒數
Returns:
ComponentHealth
"""
if settings.MOCK_MODE:
import random
return ComponentHealth(
status="up", latency_ms=round(random.uniform(0.5, 3.0), 2)
)
try:
start = asyncio.get_event_loop().time()
reader, writer = await asyncio.wait_for(
asyncio.open_connection(host, port),
timeout=timeout or self._default_timeout,
)
writer.close()
await writer.wait_closed()
latency = (asyncio.get_event_loop().time() - start) * 1000
return ComponentHealth(status="up", latency_ms=round(latency, 2))
except TimeoutError:
logger.warning("postgresql_health_check_timeout", host=host, port=port)
return ComponentHealth(status="down", error="timeout")
except ConnectionRefusedError:
logger.warning(
"postgresql_health_check_refused",
host=host,
port=port,
)
return ComponentHealth(status="down", error="connection refused")
except Exception as e:
logger.warning(
"postgresql_health_check_failed",
host=host,
port=port,
error=str(e),
)
return ComponentHealth(status="down", error=str(e))
async def redis_ping_check(
self,
timeout: float | None = None,
) -> ComponentHealth:
"""
Redis PING 檢查
Args:
timeout: 超時秒數
Returns:
ComponentHealth
"""
if settings.MOCK_MODE:
import random
return ComponentHealth(
status="up", latency_ms=round(random.uniform(0.1, 1.0), 2)
)
try:
from src.core.redis_client import get_redis
start = asyncio.get_event_loop().time()
redis = get_redis()
await asyncio.wait_for(
redis.ping(),
timeout=timeout or self._default_timeout,
)
latency = (asyncio.get_event_loop().time() - start) * 1000
return ComponentHealth(status="up", latency_ms=round(latency, 2))
except TimeoutError:
logger.warning("redis_health_check_timeout")
return ComponentHealth(status="down", error="timeout")
except Exception as e:
logger.warning("redis_health_check_failed", error=str(e))
return ComponentHealth(status="down", error=str(e))
# =============================================================================
# Singleton
# =============================================================================
_health_check_service: HealthCheckService | None = None
def get_health_check_service() -> HealthCheckService:
"""取得 HealthCheckService singleton"""
global _health_check_service
if _health_check_service is None:
_health_check_service = HealthCheckService()
return _health_check_service