Files
awoooi/apps/api/src/services/stats_service.py
OG T d89f0520f9 fix(api): 修復 34 個 Ruff lint 錯誤
- 自動修復 import 排序、unused imports
- 手動修復 raise from、isinstance union、unused variable
- scripts/ 暫時保留 (非 CI 阻擋)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-29 15:27:49 +08:00

533 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Stats Service - Phase 17 P0 Router 層違規修復
=============================================
封裝統計 API 的快取邏輯與資料庫查詢,消除 Router 層直接存取 Redis/DB。
功能:
- 快取包裝器 (Redis)
- 統計計算 (透過 SQLAlchemy)
符合 leWOOOgo 積木化規範:
- Router -> Service -> Redis/Repository
@author Claude Code (首席架構師)
@version 2.0.0
@date 2026-03-28 (台北時間)
@see feedback_lewooogo_modular_enforcement.md
"""
import json
from collections.abc import Callable, Coroutine
from datetime import datetime, timedelta
from typing import Any, Protocol, runtime_checkable
import structlog
from sqlalchemy import func, select
from src.core.redis_client import get_redis
from src.db.base import get_db_context
from src.db.models import IncidentRecord
from src.models.incident import IncidentStatus
logger = structlog.get_logger(__name__)
# 快取 TTL (秒)
STATS_CACHE_TTL = 300 # 5 分鐘
# =============================================================================
# Protocol (Interface)
# =============================================================================
@runtime_checkable
class IStatsService(Protocol):
"""
統計服務介面
Phase 17 P1: 定義 Protocol 供依賴注入
"""
async def get_incident_summary(
self, days: int = 30
) -> dict[str, Any]:
"""取得事件總覽統計"""
...
async def get_resolution_stats(
self, days: int = 30
) -> dict[str, Any]:
"""取得解決時間統計"""
...
async def get_ai_performance(
self, days: int = 30
) -> dict[str, Any]:
"""取得 AI 效能統計"""
...
async def get_affected_services(
self, days: int = 30, limit: int = 10
) -> list[dict[str, Any]]:
"""取得受影響服務排名"""
...
async def get_incident_trends(
self, days: int = 30, period: str = "daily"
) -> dict[str, Any]:
"""取得事件趨勢"""
...
async def get_feedback_summary(
self, days: int = 30
) -> dict[str, Any]:
"""取得人類回饋摘要"""
...
# =============================================================================
# Implementation
# =============================================================================
class StatsService:
"""
統計服務實作
封裝統計 API 的快取邏輯與資料庫查詢
"""
# -------------------------------------------------------------------------
# 快取相關
# -------------------------------------------------------------------------
async def get_cached_or_compute(
self,
cache_key: str,
compute_fn: Callable[[], Coroutine[Any, Any, dict[str, Any]]],
ttl: int = STATS_CACHE_TTL,
) -> dict[str, Any]:
"""
快取包裝器: 先查 Redis沒有則計算並快取
Phase 17: 從 Router 層遷移至 Service 層
"""
redis_client = get_redis()
# 嘗試從快取取得
try:
cached = await redis_client.get(cache_key)
if cached:
logger.debug("stats_cache_hit", key=cache_key)
return json.loads(cached)
except Exception as e:
logger.warning("stats_cache_read_error", key=cache_key, error=str(e))
# 計算結果
result = await compute_fn()
# 寫入快取
try:
await redis_client.set(cache_key, json.dumps(result), ex=ttl)
logger.debug("stats_cache_set", key=cache_key, ttl=ttl)
except Exception as e:
logger.warning("stats_cache_write_error", key=cache_key, error=str(e))
return result
async def invalidate_cache(self, cache_key: str) -> bool:
"""清除指定快取"""
redis_client = get_redis()
try:
await redis_client.delete(cache_key)
logger.info("stats_cache_invalidated", key=cache_key)
return True
except Exception as e:
logger.warning("stats_cache_invalidate_error", key=cache_key, error=str(e))
return False
# -------------------------------------------------------------------------
# 統計查詢 (Phase 17 P1: 從 Router 層遷移)
# -------------------------------------------------------------------------
async def get_incident_summary(self, days: int = 30) -> dict[str, Any]:
"""
取得事件總覽統計
包含: 總事件數、狀態分佈、嚴重度分佈、解決率
"""
cache_key = f"stats:incident_summary:{days}"
async def compute() -> dict[str, Any]:
async with get_db_context() as db:
since = datetime.utcnow() - timedelta(days=days)
# 總數
total_result = await db.execute(
select(func.count(IncidentRecord.incident_id)).where(
IncidentRecord.created_at >= since
)
)
total = total_result.scalar() or 0
# 狀態分佈
status_result = await db.execute(
select(IncidentRecord.status, func.count(IncidentRecord.incident_id))
.where(IncidentRecord.created_at >= since)
.group_by(IncidentRecord.status)
)
status_dist = [
{"status": str(row[0]), "count": row[1]}
for row in status_result.all()
]
# 嚴重度分佈
severity_result = await db.execute(
select(IncidentRecord.severity, func.count(IncidentRecord.incident_id))
.where(IncidentRecord.created_at >= since)
.group_by(IncidentRecord.severity)
)
severity_dist = [
{"severity": str(row[0]), "count": row[1]}
for row in severity_result.all()
]
# 解決率
resolved_result = await db.execute(
select(func.count(IncidentRecord.incident_id)).where(
IncidentRecord.created_at >= since,
IncidentRecord.status.in_(
[IncidentStatus.RESOLVED, IncidentStatus.CLOSED]
),
)
)
resolved_count = resolved_result.scalar() or 0
resolved_rate = (resolved_count / total * 100) if total > 0 else 0.0
# 平均告警聚合數
signals_result = await db.execute(
select(func.avg(func.json_array_length(IncidentRecord.signals))).where(
IncidentRecord.created_at >= since
)
)
avg_signals = signals_result.scalar() or 0.0
logger.info(
"stats_incident_summary",
total=total,
resolved_rate=resolved_rate,
days=days,
)
return {
"total_incidents": total,
"status_distribution": status_dist,
"severity_distribution": severity_dist,
"resolved_rate": round(resolved_rate, 2),
"avg_signals_per_incident": round(float(avg_signals), 2),
}
return await self.get_cached_or_compute(cache_key, compute)
async def get_resolution_stats(self, days: int = 30) -> dict[str, Any]:
"""
取得解決時間統計
計算: 平均、P50、P95、最快、最慢解決時間
"""
cache_key = f"stats:resolution:{days}"
async def compute() -> dict[str, Any]:
async with get_db_context() as db:
since = datetime.utcnow() - timedelta(days=days)
result = await db.execute(
select(
IncidentRecord.created_at,
IncidentRecord.resolved_at,
).where(
IncidentRecord.created_at >= since,
IncidentRecord.resolved_at.isnot(None),
)
)
rows = result.all()
if not rows:
return {
"avg_minutes": None,
"p50_minutes": None,
"p95_minutes": None,
"fastest_minutes": None,
"slowest_minutes": None,
"sample_size": 0,
}
durations = []
for row in rows:
if row.resolved_at and row.created_at:
delta = row.resolved_at - row.created_at
durations.append(delta.total_seconds() / 60)
if not durations:
return {
"avg_minutes": None,
"p50_minutes": None,
"p95_minutes": None,
"fastest_minutes": None,
"slowest_minutes": None,
"sample_size": 0,
}
durations.sort()
n = len(durations)
return {
"avg_minutes": round(sum(durations) / n, 2),
"p50_minutes": round(durations[n // 2], 2),
"p95_minutes": round(durations[min(int(n * 0.95), n - 1)], 2),
"fastest_minutes": round(min(durations), 2),
"slowest_minutes": round(max(durations), 2),
"sample_size": n,
}
return await self.get_cached_or_compute(cache_key, compute)
async def get_ai_performance(self, days: int = 30) -> dict[str, Any]:
"""
取得 AI 提案效能統計
評估: 提案執行率、成功率、有效性評分
"""
cache_key = f"stats:ai_performance:{days}"
async def compute() -> dict[str, Any]:
async with get_db_context() as db:
since = datetime.utcnow() - timedelta(days=days)
result = await db.execute(
select(IncidentRecord.outcome).where(
IncidentRecord.created_at >= since,
IncidentRecord.outcome.isnot(None),
)
)
outcomes = [row[0] for row in result.all() if row[0]]
total = len(outcomes)
executed = sum(1 for o in outcomes if o.get("proposal_executed"))
success = sum(
1 for o in outcomes if o.get("proposal_executed") and o.get("execution_success")
)
effectiveness_dist: dict[int, int] = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
scores = []
for o in outcomes:
score = o.get("effectiveness_score")
if score and 1 <= score <= 5:
effectiveness_dist[score] += 1
scores.append(score)
avg_effectiveness = sum(scores) / len(scores) if scores else None
return {
"total_proposals": total,
"executed_count": executed,
"execution_rate": round((executed / total * 100) if total > 0 else 0, 2),
"success_count": success,
"success_rate": round((success / executed * 100) if executed > 0 else 0, 2),
"avg_effectiveness": round(avg_effectiveness, 2) if avg_effectiveness else None,
"effectiveness_distribution": effectiveness_dist,
}
return await self.get_cached_or_compute(cache_key, compute)
async def get_affected_services(
self, days: int = 30, limit: int = 10
) -> list[dict[str, Any]]:
"""
取得最常受影響的服務排名
"""
cache_key = f"stats:affected_services:{days}:{limit}"
async def compute() -> dict[str, Any]:
async with get_db_context() as db:
since = datetime.utcnow() - timedelta(days=days)
result = await db.execute(
select(
IncidentRecord.affected_services,
IncidentRecord.severity,
).where(IncidentRecord.created_at >= since)
)
service_stats: dict[str, dict[str, Any]] = {}
for row in result.all():
services = row[0] or []
severity = str(row[1])
for svc in services:
if svc not in service_stats:
service_stats[svc] = {"count": 0, "severity": {}}
service_stats[svc]["count"] += 1
service_stats[svc]["severity"][severity] = (
service_stats[svc]["severity"].get(severity, 0) + 1
)
sorted_services = sorted(
service_stats.items(), key=lambda x: x[1]["count"], reverse=True
)[:limit]
return {
"services": [
{
"service": svc,
"incident_count": stats["count"],
"severity_breakdown": stats["severity"],
}
for svc, stats in sorted_services
]
}
result = await self.get_cached_or_compute(cache_key, compute)
return result.get("services", [])
async def get_incident_trends(
self, days: int = 30, period: str = "daily"
) -> dict[str, Any]:
"""
取得事件趨勢數據 (SQL GROUP BY 優化版)
"""
cache_key = f"stats:incident_trends:{days}:{period}"
async def compute() -> dict[str, Any]:
async with get_db_context() as db:
since = datetime.utcnow() - timedelta(days=days)
trunc_unit = {"daily": "day", "weekly": "week", "monthly": "month"}.get(
period, "day"
)
result = await db.execute(
select(
func.date_trunc(trunc_unit, IncidentRecord.created_at).label("period"),
func.count(IncidentRecord.incident_id).label("count"),
)
.where(IncidentRecord.created_at >= since)
.group_by(func.date_trunc(trunc_unit, IncidentRecord.created_at))
.order_by(func.date_trunc(trunc_unit, IncidentRecord.created_at))
)
rows = result.all()
trend_data = []
for row in rows:
if row.period:
if period == "daily":
date_str = row.period.strftime("%Y-%m-%d")
elif period == "weekly":
date_str = row.period.strftime("%Y-W%W")
else:
date_str = row.period.strftime("%Y-%m")
trend_data.append({"date": date_str, "count": row.count})
logger.info(
"stats_incident_trends",
period=period,
days=days,
data_points=len(trend_data),
)
return {"period": period, "data": trend_data}
return await self.get_cached_or_compute(cache_key, compute)
async def get_feedback_summary(self, days: int = 30) -> dict[str, Any]:
"""
取得人類回饋統計
"""
cache_key = f"stats:feedback_summary:{days}"
async def compute() -> dict[str, Any]:
async with get_db_context() as db:
since = datetime.utcnow() - timedelta(days=days)
result = await db.execute(
select(IncidentRecord.outcome).where(
IncidentRecord.created_at >= since,
IncidentRecord.outcome.isnot(None),
)
)
outcomes = [row[0] for row in result.all() if row[0]]
positive = 0
neutral = 0
negative = 0
themes: dict[str, int] = {}
for o in outcomes:
score = o.get("effectiveness_score") or o.get("feedback_score")
if score:
if score >= 4:
positive += 1
elif score == 3:
neutral += 1
else:
negative += 1
notes = o.get("learning_notes") or o.get("notes") or ""
if notes:
notes_lower = notes.lower()
theme_keywords = {
"timeout": ["timeout", "超時", "timed out", "deadline"],
"latency": ["latency", "延遲", "slow", "", "p99", "p95"],
"memory": ["memory", "記憶體", "oom", "heap", "內存"],
"cpu": ["cpu", "處理器", "high load", "負載"],
"network": ["network", "網路", "dns", "connection refused"],
"connection": ["connection", "連線", "socket", "tcp"],
"disk": ["disk", "磁碟", "storage", "io", "iops"],
"database": ["database", "資料庫", "db", "query", "deadlock"],
"pod": ["pod", "container", "restart", "crashloop"],
"scaling": ["scale", "擴容", "replica", "hpa"],
"error": ["error", "錯誤", "exception", "fail"],
"config": ["config", "配置", "env", "secret"],
}
for theme, keywords in theme_keywords.items():
if any(kw in notes_lower for kw in keywords):
themes[theme] = themes.get(theme, 0) + 1
sorted_themes = sorted(themes.items(), key=lambda x: x[1], reverse=True)[:5]
common_themes = [t[0] for t in sorted_themes]
total = positive + neutral + negative
logger.info(
"stats_feedback_summary",
total=total,
positive=positive,
negative=negative,
days=days,
)
return {
"total_feedback": total,
"positive_count": positive,
"neutral_count": neutral,
"negative_count": negative,
"common_themes": common_themes,
}
return await self.get_cached_or_compute(cache_key, compute)
# =============================================================================
# Dependency Injection
# =============================================================================
_stats_service: StatsService | None = None
def get_stats_service() -> StatsService:
"""取得 StatsService 實例 (Singleton)"""
global _stats_service
if _stats_service is None:
_stats_service = StatsService()
return _stats_service