All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 14m56s
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
185 lines
5.2 KiB
Python
185 lines
5.2 KiB
Python
"""
|
||
Learning API - 學習系統 API
|
||
===========================
|
||
Phase D-G P0 修正: 新增學習 API 端點
|
||
|
||
端點:
|
||
- GET /api/v1/learning/summary/{anomaly_key} - 學習摘要
|
||
- GET /api/v1/learning/recommendation/{anomaly_key} - 修復推薦
|
||
|
||
版本: v1.0
|
||
建立: 2026-03-29 (台北時區)
|
||
建立者: Claude Code (Phase D-G P0 修正)
|
||
|
||
遵循原則:
|
||
- Router 只做 HTTP 轉發
|
||
- 業務邏輯在 Service 層
|
||
- 符合 API 路徑命名規範
|
||
"""
|
||
|
||
import structlog
|
||
from fastapi import APIRouter, HTTPException
|
||
from pydantic import BaseModel
|
||
|
||
from src.services.learning_service import get_learning_service
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
router = APIRouter(prefix="/learning", tags=["Learning"])
|
||
|
||
|
||
# =============================================================================
|
||
# Response Models
|
||
# =============================================================================
|
||
|
||
|
||
class BestAction(BaseModel):
|
||
"""最佳動作"""
|
||
action: str
|
||
success_rate: float
|
||
|
||
|
||
class LearningSummaryResponse(BaseModel):
|
||
"""學習摘要回應"""
|
||
anomaly_key: str
|
||
total_repair_attempts: int
|
||
overall_success_rate: float
|
||
actions_tried: list[str]
|
||
best_action: BestAction | None
|
||
learning_status: str # insufficient, learning, sufficient, excellent
|
||
|
||
|
||
class AlternativeAction(BaseModel):
|
||
"""替代動作"""
|
||
action: str
|
||
confidence: float
|
||
tier: int
|
||
|
||
|
||
class RecommendationResponse(BaseModel):
|
||
"""修復推薦回應"""
|
||
action: str
|
||
confidence: float
|
||
tier: int
|
||
based_on: str
|
||
avg_execution_time: float
|
||
alternatives: list[AlternativeAction]
|
||
|
||
|
||
# =============================================================================
|
||
# Endpoints
|
||
# =============================================================================
|
||
|
||
|
||
@router.get(
|
||
"/summary/{anomaly_key}",
|
||
response_model=LearningSummaryResponse,
|
||
summary="取得學習摘要",
|
||
description="根據異常 key 取得歷史學習摘要,包含嘗試過的修復動作和成功率",
|
||
)
|
||
async def get_learning_summary(anomaly_key: str) -> LearningSummaryResponse:
|
||
"""
|
||
取得異常學習摘要
|
||
|
||
Args:
|
||
anomaly_key: 異常 key (例如 "restart_pod:awoooi-api-*")
|
||
|
||
Returns:
|
||
LearningSummaryResponse: 學習摘要
|
||
"""
|
||
service = get_learning_service()
|
||
summary = await service.get_learning_summary(anomaly_key)
|
||
|
||
logger.info(
|
||
"learning_summary_fetched",
|
||
anomaly_key=anomaly_key,
|
||
total_attempts=summary.get("total_repair_attempts", 0),
|
||
)
|
||
|
||
return LearningSummaryResponse(**summary)
|
||
|
||
|
||
@router.get(
|
||
"/recommendation/{anomaly_key}",
|
||
response_model=RecommendationResponse,
|
||
summary="取得修復推薦",
|
||
description="根據歷史學習數據,推薦最佳修復方案",
|
||
)
|
||
async def get_recommendation(anomaly_key: str) -> RecommendationResponse:
|
||
"""
|
||
取得修復推薦
|
||
|
||
Args:
|
||
anomaly_key: 異常 key
|
||
|
||
Returns:
|
||
RecommendationResponse: 修復推薦 (包含動作、信心度、替代方案)
|
||
"""
|
||
service = get_learning_service()
|
||
recommendation = await service.get_recommended_fix(anomaly_key)
|
||
|
||
logger.info(
|
||
"learning_recommendation_fetched",
|
||
anomaly_key=anomaly_key,
|
||
recommended_action=recommendation.get("action"),
|
||
confidence=recommendation.get("confidence"),
|
||
)
|
||
|
||
return RecommendationResponse(**recommendation)
|
||
|
||
|
||
# =============================================================================
|
||
# Evolver Admin Endpoints
|
||
# =============================================================================
|
||
|
||
|
||
class EvolverRunResponse(BaseModel):
|
||
"""Evolver 執行報告回應"""
|
||
archived_count: int
|
||
merged_count: int
|
||
skipped_count: int
|
||
archived_ids: list[str]
|
||
merged_pairs: list[list[str]] # [[dropped_id, kept_id], ...]
|
||
errors: list[str]
|
||
total_affected: int
|
||
|
||
|
||
@router.post(
|
||
"/evolver/run",
|
||
response_model=EvolverRunResponse,
|
||
summary="手動觸發 Evolver Agent",
|
||
description=(
|
||
"立即執行 Playbook Evolver:低信任封存 + 休眠封存 + 相似合併。"
|
||
"需要 AIOPS_P3_EVOLVER_ENABLED=True,否則返回空報告(HTTP 200)。"
|
||
"ADR-083 Phase 3 手動演練端點。"
|
||
),
|
||
)
|
||
async def run_evolver_now() -> EvolverRunResponse:
|
||
"""
|
||
手動觸發 Evolver Agent(Phase 3 exit condition #6 演練端點)
|
||
|
||
Returns:
|
||
EvolverRunResponse: 合併/封存報告
|
||
"""
|
||
try:
|
||
from src.services.playbook_evolver import run_evolver
|
||
report = await run_evolver(force=True) # 管理員手動觸發,繞過 feature flag
|
||
except Exception as exc:
|
||
logger.exception("evolver_manual_run_failed")
|
||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||
|
||
logger.info(
|
||
"evolver_manual_run_done",
|
||
archived=report.archived_count,
|
||
merged=report.merged_count,
|
||
skipped=report.skipped_count,
|
||
)
|
||
return EvolverRunResponse(
|
||
archived_count=report.archived_count,
|
||
merged_count=report.merged_count,
|
||
skipped_count=report.skipped_count,
|
||
archived_ids=report.archived_ids,
|
||
merged_pairs=[list(p) for p in report.merged_pairs],
|
||
errors=report.errors,
|
||
total_affected=report.total_affected,
|
||
)
|