- 自動修復 import 排序、unused imports - 手動修復 raise from、isinstance union、unused variable - scripts/ 暫時保留 (非 CI 阻擋) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
533 lines
19 KiB
Python
533 lines
19 KiB
Python
"""
|
||
Stats Service - Phase 17 P0 Router 層違規修復
|
||
=============================================
|
||
|
||
封裝統計 API 的快取邏輯與資料庫查詢,消除 Router 層直接存取 Redis/DB。
|
||
|
||
功能:
|
||
- 快取包裝器 (Redis)
|
||
- 統計計算 (透過 SQLAlchemy)
|
||
|
||
符合 leWOOOgo 積木化規範:
|
||
- Router -> Service -> Redis/Repository
|
||
|
||
@author Claude Code (首席架構師)
|
||
@version 2.0.0
|
||
@date 2026-03-28 (台北時間)
|
||
@see feedback_lewooogo_modular_enforcement.md
|
||
"""
|
||
|
||
import json
|
||
from collections.abc import Callable, Coroutine
|
||
from datetime import datetime, timedelta
|
||
from typing import Any, Protocol, runtime_checkable
|
||
|
||
import structlog
|
||
from sqlalchemy import func, select
|
||
|
||
from src.core.redis_client import get_redis
|
||
from src.db.base import get_db_context
|
||
from src.db.models import IncidentRecord
|
||
from src.models.incident import IncidentStatus
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
# 快取 TTL (秒)
|
||
STATS_CACHE_TTL = 300 # 5 分鐘
|
||
|
||
|
||
# =============================================================================
|
||
# Protocol (Interface)
|
||
# =============================================================================
|
||
|
||
|
||
@runtime_checkable
|
||
class IStatsService(Protocol):
|
||
"""
|
||
統計服務介面
|
||
|
||
Phase 17 P1: 定義 Protocol 供依賴注入
|
||
"""
|
||
|
||
async def get_incident_summary(
|
||
self, days: int = 30
|
||
) -> dict[str, Any]:
|
||
"""取得事件總覽統計"""
|
||
...
|
||
|
||
async def get_resolution_stats(
|
||
self, days: int = 30
|
||
) -> dict[str, Any]:
|
||
"""取得解決時間統計"""
|
||
...
|
||
|
||
async def get_ai_performance(
|
||
self, days: int = 30
|
||
) -> dict[str, Any]:
|
||
"""取得 AI 效能統計"""
|
||
...
|
||
|
||
async def get_affected_services(
|
||
self, days: int = 30, limit: int = 10
|
||
) -> list[dict[str, Any]]:
|
||
"""取得受影響服務排名"""
|
||
...
|
||
|
||
async def get_incident_trends(
|
||
self, days: int = 30, period: str = "daily"
|
||
) -> dict[str, Any]:
|
||
"""取得事件趨勢"""
|
||
...
|
||
|
||
async def get_feedback_summary(
|
||
self, days: int = 30
|
||
) -> dict[str, Any]:
|
||
"""取得人類回饋摘要"""
|
||
...
|
||
|
||
|
||
# =============================================================================
|
||
# Implementation
|
||
# =============================================================================
|
||
|
||
|
||
class StatsService:
|
||
"""
|
||
統計服務實作
|
||
|
||
封裝統計 API 的快取邏輯與資料庫查詢
|
||
"""
|
||
|
||
# -------------------------------------------------------------------------
|
||
# 快取相關
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def get_cached_or_compute(
|
||
self,
|
||
cache_key: str,
|
||
compute_fn: Callable[[], Coroutine[Any, Any, dict[str, Any]]],
|
||
ttl: int = STATS_CACHE_TTL,
|
||
) -> dict[str, Any]:
|
||
"""
|
||
快取包裝器: 先查 Redis,沒有則計算並快取
|
||
|
||
Phase 17: 從 Router 層遷移至 Service 層
|
||
"""
|
||
redis_client = get_redis()
|
||
|
||
# 嘗試從快取取得
|
||
try:
|
||
cached = await redis_client.get(cache_key)
|
||
if cached:
|
||
logger.debug("stats_cache_hit", key=cache_key)
|
||
return json.loads(cached)
|
||
except Exception as e:
|
||
logger.warning("stats_cache_read_error", key=cache_key, error=str(e))
|
||
|
||
# 計算結果
|
||
result = await compute_fn()
|
||
|
||
# 寫入快取
|
||
try:
|
||
await redis_client.set(cache_key, json.dumps(result), ex=ttl)
|
||
logger.debug("stats_cache_set", key=cache_key, ttl=ttl)
|
||
except Exception as e:
|
||
logger.warning("stats_cache_write_error", key=cache_key, error=str(e))
|
||
|
||
return result
|
||
|
||
async def invalidate_cache(self, cache_key: str) -> bool:
|
||
"""清除指定快取"""
|
||
redis_client = get_redis()
|
||
try:
|
||
await redis_client.delete(cache_key)
|
||
logger.info("stats_cache_invalidated", key=cache_key)
|
||
return True
|
||
except Exception as e:
|
||
logger.warning("stats_cache_invalidate_error", key=cache_key, error=str(e))
|
||
return False
|
||
|
||
# -------------------------------------------------------------------------
|
||
# 統計查詢 (Phase 17 P1: 從 Router 層遷移)
|
||
# -------------------------------------------------------------------------
|
||
|
||
async def get_incident_summary(self, days: int = 30) -> dict[str, Any]:
|
||
"""
|
||
取得事件總覽統計
|
||
|
||
包含: 總事件數、狀態分佈、嚴重度分佈、解決率
|
||
"""
|
||
cache_key = f"stats:incident_summary:{days}"
|
||
|
||
async def compute() -> dict[str, Any]:
|
||
async with get_db_context() as db:
|
||
since = datetime.utcnow() - timedelta(days=days)
|
||
|
||
# 總數
|
||
total_result = await db.execute(
|
||
select(func.count(IncidentRecord.incident_id)).where(
|
||
IncidentRecord.created_at >= since
|
||
)
|
||
)
|
||
total = total_result.scalar() or 0
|
||
|
||
# 狀態分佈
|
||
status_result = await db.execute(
|
||
select(IncidentRecord.status, func.count(IncidentRecord.incident_id))
|
||
.where(IncidentRecord.created_at >= since)
|
||
.group_by(IncidentRecord.status)
|
||
)
|
||
status_dist = [
|
||
{"status": str(row[0]), "count": row[1]}
|
||
for row in status_result.all()
|
||
]
|
||
|
||
# 嚴重度分佈
|
||
severity_result = await db.execute(
|
||
select(IncidentRecord.severity, func.count(IncidentRecord.incident_id))
|
||
.where(IncidentRecord.created_at >= since)
|
||
.group_by(IncidentRecord.severity)
|
||
)
|
||
severity_dist = [
|
||
{"severity": str(row[0]), "count": row[1]}
|
||
for row in severity_result.all()
|
||
]
|
||
|
||
# 解決率
|
||
resolved_result = await db.execute(
|
||
select(func.count(IncidentRecord.incident_id)).where(
|
||
IncidentRecord.created_at >= since,
|
||
IncidentRecord.status.in_(
|
||
[IncidentStatus.RESOLVED, IncidentStatus.CLOSED]
|
||
),
|
||
)
|
||
)
|
||
resolved_count = resolved_result.scalar() or 0
|
||
resolved_rate = (resolved_count / total * 100) if total > 0 else 0.0
|
||
|
||
# 平均告警聚合數
|
||
signals_result = await db.execute(
|
||
select(func.avg(func.json_array_length(IncidentRecord.signals))).where(
|
||
IncidentRecord.created_at >= since
|
||
)
|
||
)
|
||
avg_signals = signals_result.scalar() or 0.0
|
||
|
||
logger.info(
|
||
"stats_incident_summary",
|
||
total=total,
|
||
resolved_rate=resolved_rate,
|
||
days=days,
|
||
)
|
||
|
||
return {
|
||
"total_incidents": total,
|
||
"status_distribution": status_dist,
|
||
"severity_distribution": severity_dist,
|
||
"resolved_rate": round(resolved_rate, 2),
|
||
"avg_signals_per_incident": round(float(avg_signals), 2),
|
||
}
|
||
|
||
return await self.get_cached_or_compute(cache_key, compute)
|
||
|
||
async def get_resolution_stats(self, days: int = 30) -> dict[str, Any]:
|
||
"""
|
||
取得解決時間統計
|
||
|
||
計算: 平均、P50、P95、最快、最慢解決時間
|
||
"""
|
||
cache_key = f"stats:resolution:{days}"
|
||
|
||
async def compute() -> dict[str, Any]:
|
||
async with get_db_context() as db:
|
||
since = datetime.utcnow() - timedelta(days=days)
|
||
|
||
result = await db.execute(
|
||
select(
|
||
IncidentRecord.created_at,
|
||
IncidentRecord.resolved_at,
|
||
).where(
|
||
IncidentRecord.created_at >= since,
|
||
IncidentRecord.resolved_at.isnot(None),
|
||
)
|
||
)
|
||
rows = result.all()
|
||
|
||
if not rows:
|
||
return {
|
||
"avg_minutes": None,
|
||
"p50_minutes": None,
|
||
"p95_minutes": None,
|
||
"fastest_minutes": None,
|
||
"slowest_minutes": None,
|
||
"sample_size": 0,
|
||
}
|
||
|
||
durations = []
|
||
for row in rows:
|
||
if row.resolved_at and row.created_at:
|
||
delta = row.resolved_at - row.created_at
|
||
durations.append(delta.total_seconds() / 60)
|
||
|
||
if not durations:
|
||
return {
|
||
"avg_minutes": None,
|
||
"p50_minutes": None,
|
||
"p95_minutes": None,
|
||
"fastest_minutes": None,
|
||
"slowest_minutes": None,
|
||
"sample_size": 0,
|
||
}
|
||
|
||
durations.sort()
|
||
n = len(durations)
|
||
|
||
return {
|
||
"avg_minutes": round(sum(durations) / n, 2),
|
||
"p50_minutes": round(durations[n // 2], 2),
|
||
"p95_minutes": round(durations[min(int(n * 0.95), n - 1)], 2),
|
||
"fastest_minutes": round(min(durations), 2),
|
||
"slowest_minutes": round(max(durations), 2),
|
||
"sample_size": n,
|
||
}
|
||
|
||
return await self.get_cached_or_compute(cache_key, compute)
|
||
|
||
async def get_ai_performance(self, days: int = 30) -> dict[str, Any]:
|
||
"""
|
||
取得 AI 提案效能統計
|
||
|
||
評估: 提案執行率、成功率、有效性評分
|
||
"""
|
||
cache_key = f"stats:ai_performance:{days}"
|
||
|
||
async def compute() -> dict[str, Any]:
|
||
async with get_db_context() as db:
|
||
since = datetime.utcnow() - timedelta(days=days)
|
||
|
||
result = await db.execute(
|
||
select(IncidentRecord.outcome).where(
|
||
IncidentRecord.created_at >= since,
|
||
IncidentRecord.outcome.isnot(None),
|
||
)
|
||
)
|
||
outcomes = [row[0] for row in result.all() if row[0]]
|
||
|
||
total = len(outcomes)
|
||
executed = sum(1 for o in outcomes if o.get("proposal_executed"))
|
||
success = sum(
|
||
1 for o in outcomes if o.get("proposal_executed") and o.get("execution_success")
|
||
)
|
||
|
||
effectiveness_dist: dict[int, int] = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
|
||
scores = []
|
||
for o in outcomes:
|
||
score = o.get("effectiveness_score")
|
||
if score and 1 <= score <= 5:
|
||
effectiveness_dist[score] += 1
|
||
scores.append(score)
|
||
|
||
avg_effectiveness = sum(scores) / len(scores) if scores else None
|
||
|
||
return {
|
||
"total_proposals": total,
|
||
"executed_count": executed,
|
||
"execution_rate": round((executed / total * 100) if total > 0 else 0, 2),
|
||
"success_count": success,
|
||
"success_rate": round((success / executed * 100) if executed > 0 else 0, 2),
|
||
"avg_effectiveness": round(avg_effectiveness, 2) if avg_effectiveness else None,
|
||
"effectiveness_distribution": effectiveness_dist,
|
||
}
|
||
|
||
return await self.get_cached_or_compute(cache_key, compute)
|
||
|
||
async def get_affected_services(
|
||
self, days: int = 30, limit: int = 10
|
||
) -> list[dict[str, Any]]:
|
||
"""
|
||
取得最常受影響的服務排名
|
||
"""
|
||
cache_key = f"stats:affected_services:{days}:{limit}"
|
||
|
||
async def compute() -> dict[str, Any]:
|
||
async with get_db_context() as db:
|
||
since = datetime.utcnow() - timedelta(days=days)
|
||
|
||
result = await db.execute(
|
||
select(
|
||
IncidentRecord.affected_services,
|
||
IncidentRecord.severity,
|
||
).where(IncidentRecord.created_at >= since)
|
||
)
|
||
|
||
service_stats: dict[str, dict[str, Any]] = {}
|
||
for row in result.all():
|
||
services = row[0] or []
|
||
severity = str(row[1])
|
||
for svc in services:
|
||
if svc not in service_stats:
|
||
service_stats[svc] = {"count": 0, "severity": {}}
|
||
service_stats[svc]["count"] += 1
|
||
service_stats[svc]["severity"][severity] = (
|
||
service_stats[svc]["severity"].get(severity, 0) + 1
|
||
)
|
||
|
||
sorted_services = sorted(
|
||
service_stats.items(), key=lambda x: x[1]["count"], reverse=True
|
||
)[:limit]
|
||
|
||
return {
|
||
"services": [
|
||
{
|
||
"service": svc,
|
||
"incident_count": stats["count"],
|
||
"severity_breakdown": stats["severity"],
|
||
}
|
||
for svc, stats in sorted_services
|
||
]
|
||
}
|
||
|
||
result = await self.get_cached_or_compute(cache_key, compute)
|
||
return result.get("services", [])
|
||
|
||
async def get_incident_trends(
|
||
self, days: int = 30, period: str = "daily"
|
||
) -> dict[str, Any]:
|
||
"""
|
||
取得事件趨勢數據 (SQL GROUP BY 優化版)
|
||
"""
|
||
cache_key = f"stats:incident_trends:{days}:{period}"
|
||
|
||
async def compute() -> dict[str, Any]:
|
||
async with get_db_context() as db:
|
||
since = datetime.utcnow() - timedelta(days=days)
|
||
|
||
trunc_unit = {"daily": "day", "weekly": "week", "monthly": "month"}.get(
|
||
period, "day"
|
||
)
|
||
|
||
result = await db.execute(
|
||
select(
|
||
func.date_trunc(trunc_unit, IncidentRecord.created_at).label("period"),
|
||
func.count(IncidentRecord.incident_id).label("count"),
|
||
)
|
||
.where(IncidentRecord.created_at >= since)
|
||
.group_by(func.date_trunc(trunc_unit, IncidentRecord.created_at))
|
||
.order_by(func.date_trunc(trunc_unit, IncidentRecord.created_at))
|
||
)
|
||
rows = result.all()
|
||
|
||
trend_data = []
|
||
for row in rows:
|
||
if row.period:
|
||
if period == "daily":
|
||
date_str = row.period.strftime("%Y-%m-%d")
|
||
elif period == "weekly":
|
||
date_str = row.period.strftime("%Y-W%W")
|
||
else:
|
||
date_str = row.period.strftime("%Y-%m")
|
||
trend_data.append({"date": date_str, "count": row.count})
|
||
|
||
logger.info(
|
||
"stats_incident_trends",
|
||
period=period,
|
||
days=days,
|
||
data_points=len(trend_data),
|
||
)
|
||
|
||
return {"period": period, "data": trend_data}
|
||
|
||
return await self.get_cached_or_compute(cache_key, compute)
|
||
|
||
async def get_feedback_summary(self, days: int = 30) -> dict[str, Any]:
|
||
"""
|
||
取得人類回饋統計
|
||
"""
|
||
cache_key = f"stats:feedback_summary:{days}"
|
||
|
||
async def compute() -> dict[str, Any]:
|
||
async with get_db_context() as db:
|
||
since = datetime.utcnow() - timedelta(days=days)
|
||
|
||
result = await db.execute(
|
||
select(IncidentRecord.outcome).where(
|
||
IncidentRecord.created_at >= since,
|
||
IncidentRecord.outcome.isnot(None),
|
||
)
|
||
)
|
||
outcomes = [row[0] for row in result.all() if row[0]]
|
||
|
||
positive = 0
|
||
neutral = 0
|
||
negative = 0
|
||
themes: dict[str, int] = {}
|
||
|
||
for o in outcomes:
|
||
score = o.get("effectiveness_score") or o.get("feedback_score")
|
||
if score:
|
||
if score >= 4:
|
||
positive += 1
|
||
elif score == 3:
|
||
neutral += 1
|
||
else:
|
||
negative += 1
|
||
|
||
notes = o.get("learning_notes") or o.get("notes") or ""
|
||
if notes:
|
||
notes_lower = notes.lower()
|
||
theme_keywords = {
|
||
"timeout": ["timeout", "超時", "timed out", "deadline"],
|
||
"latency": ["latency", "延遲", "slow", "慢", "p99", "p95"],
|
||
"memory": ["memory", "記憶體", "oom", "heap", "內存"],
|
||
"cpu": ["cpu", "處理器", "high load", "負載"],
|
||
"network": ["network", "網路", "dns", "connection refused"],
|
||
"connection": ["connection", "連線", "socket", "tcp"],
|
||
"disk": ["disk", "磁碟", "storage", "io", "iops"],
|
||
"database": ["database", "資料庫", "db", "query", "deadlock"],
|
||
"pod": ["pod", "container", "restart", "crashloop"],
|
||
"scaling": ["scale", "擴容", "replica", "hpa"],
|
||
"error": ["error", "錯誤", "exception", "fail"],
|
||
"config": ["config", "配置", "env", "secret"],
|
||
}
|
||
for theme, keywords in theme_keywords.items():
|
||
if any(kw in notes_lower for kw in keywords):
|
||
themes[theme] = themes.get(theme, 0) + 1
|
||
|
||
sorted_themes = sorted(themes.items(), key=lambda x: x[1], reverse=True)[:5]
|
||
common_themes = [t[0] for t in sorted_themes]
|
||
|
||
total = positive + neutral + negative
|
||
|
||
logger.info(
|
||
"stats_feedback_summary",
|
||
total=total,
|
||
positive=positive,
|
||
negative=negative,
|
||
days=days,
|
||
)
|
||
|
||
return {
|
||
"total_feedback": total,
|
||
"positive_count": positive,
|
||
"neutral_count": neutral,
|
||
"negative_count": negative,
|
||
"common_themes": common_themes,
|
||
}
|
||
|
||
return await self.get_cached_or_compute(cache_key, compute)
|
||
|
||
|
||
# =============================================================================
|
||
# Dependency Injection
|
||
# =============================================================================
|
||
|
||
_stats_service: StatsService | None = None
|
||
|
||
|
||
def get_stats_service() -> StatsService:
|
||
"""取得 StatsService 實例 (Singleton)"""
|
||
global _stats_service
|
||
if _stats_service is None:
|
||
_stats_service = StatsService()
|
||
return _stats_service
|