承接 Wave 6/7/8 多 engineer 在 agent 限額前完成的代碼,補 commit 解 production HEAD 隱性 import error(decision_fusion 已被 decision_manager 引用但檔案 untracked)。 新增(後端核心): - decision_fusion.py (562 行) — P2.1 方法 III(OpenClaw + Hermes + Elephant 三 LLM 融合) - aiops_timeline.py + aiops_timeline_service.py — critic B4 修復 /api/v1/aiops/timeline endpoint,DB 存取抽到 service 層遵守 leWOOOgo 積木化 - migrations/p2_decision_fusion_columns.sql + rollback — approval_records fusion 欄位 修改(後端整合): - decision_manager.py — fusion 三斷鏈修補(critic B1+B2+B3): · B1: 寫 _evidence_snapshot_ref 到 token.proposal_data · B2: fusion 前計算 complexity_score 並寫 token · B3: fusion composite 寫 token.proposal_data["decision_fusion"] - auto_approve.py — fusion + consensus 認識(critic B3+B5): · composite > 0.7 → auto_execute_eligible bypass min_confidence · source=consensus_engine + score>=0.6 → 規則可信路徑 - consensus_engine.py — db-fix _save_consensus 重用 agent_sessions - governance_agent.py — db-fix _alert PG 寫入 ai_governance_events - approval_db.py — fusion 3 欄位 + 2 partial index + CheckConstraint - db/models.py — schema 對齊 migration - core/config.py — vuln #1 修復:OLLAMA_URL/_FALLBACK_URL field_validator 拒絕公網 IP + 外部域名,僅允許私網/loopback/K8s SVC 白名單 - core/feature_flags.py — P2 fusion + consensus flags - main.py — governance_agent lifespan 啟動 - failover_alerter.py — Wave8-X2: in-memory dedup fallback(Redis 拒絕後不 fail-open) - ollama_*.py — metrics 整合 + recovery 改善 - auto_repair_service.py — verifier 接線 新增(測試 2438 行): - test_decision_fusion.py / test_governance_agent.py / test_consensus_integration.py - test_p2_db_fixes.py / test_wave8_fusion_fixes.py - test_config_url_validation.py(vuln #1 12 tests) - test_failover_alerter.py +Wave8-X2 in-memory dedup 補測 驗收: 116 tests pass (decision_fusion + wave8_fusion + config_url + consensus + governance + p2_db_fixes + failover_alerter) Conflict resolution: - 3 檔(config.py + auto_approve.py + decision_manager.py)git stash pop 衝突 保留 stashed (engineer 最終版),補回 ValueError 「公網 IP」字樣對齊 test Note: 此 commit 解 production HEAD 隱性 import error 仍未修: vuln #4 prompt injection / debugger B14 quota fail-closed / B25-B26 drain_pending_tasks / B8 governance fail alert Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Co-Authored-By: Multiple Engineers (Wave 6/7/8) <noreply@anthropic.com>
228 lines
6.8 KiB
Python
228 lines
6.8 KiB
Python
"""AIOps 時序服務 — 為 P2.5 frontend 提供 incident → learn 6 階段時序資料
|
||
|
||
leWOOOgo 積木化合規:DB 存取在 service 層,Router 只 call service method。
|
||
|
||
# 2026-04-27 Wave8-X3 by Claude — critic B4 timeline endpoint(積木化抽出)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime, timedelta, timezone
|
||
from typing import Any
|
||
|
||
import structlog
|
||
from sqlalchemy import select
|
||
|
||
from src.db.base import get_db_context
|
||
from src.db.models import (
|
||
ApprovalRecord,
|
||
AutoRepairExecution,
|
||
IncidentEvidence,
|
||
IncidentRecord,
|
||
)
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
async def fetch_aiops_timeline(
|
||
incident_id: str | None = None,
|
||
hours: int = 24,
|
||
severity: str | None = None,
|
||
limit: int = 50,
|
||
) -> list[dict[str, Any]]:
|
||
"""撈 Incident 6 階段 timeline。
|
||
|
||
Args:
|
||
incident_id: 指定 Incident ID(可選)
|
||
hours: 回溯小時數(1-168)
|
||
severity: 嚴重度過濾(P0/P1/P2/P3)
|
||
limit: 最多回傳筆數(預設 50,防止暴力掃表)
|
||
|
||
Returns:
|
||
list[dict]: 每筆 incident 含 stages(alert/diagnose/decide/execute/verify/learn)
|
||
"""
|
||
cutoff = datetime.now(tz=timezone.utc) - timedelta(hours=hours)
|
||
|
||
async with get_db_context() as db:
|
||
stmt = select(IncidentRecord).where(IncidentRecord.created_at >= cutoff)
|
||
if incident_id:
|
||
stmt = stmt.where(IncidentRecord.incident_id == incident_id)
|
||
if severity:
|
||
stmt = stmt.where(IncidentRecord.severity == severity)
|
||
stmt = stmt.order_by(IncidentRecord.created_at.desc()).limit(limit)
|
||
|
||
incidents = (await db.execute(stmt)).scalars().all()
|
||
|
||
results: list[dict[str, Any]] = []
|
||
for inc in incidents:
|
||
evidence = (
|
||
await db.execute(
|
||
select(IncidentEvidence)
|
||
.where(IncidentEvidence.incident_id == inc.incident_id)
|
||
.order_by(IncidentEvidence.collected_at.desc())
|
||
.limit(1)
|
||
)
|
||
).scalar_one_or_none()
|
||
|
||
approval = (
|
||
await db.execute(
|
||
select(ApprovalRecord)
|
||
.where(ApprovalRecord.incident_id == inc.incident_id)
|
||
.order_by(ApprovalRecord.created_at.desc())
|
||
.limit(1)
|
||
)
|
||
).scalar_one_or_none()
|
||
|
||
execution = (
|
||
await db.execute(
|
||
select(AutoRepairExecution)
|
||
.where(AutoRepairExecution.incident_id == inc.incident_id)
|
||
.order_by(AutoRepairExecution.created_at.desc())
|
||
.limit(1)
|
||
)
|
||
).scalar_one_or_none()
|
||
|
||
results.append(
|
||
{
|
||
"incident_id": inc.incident_id,
|
||
"title": inc.alertname or "unknown",
|
||
"severity": inc.severity or "P3",
|
||
"started_at": (
|
||
inc.created_at.isoformat() if inc.created_at else None
|
||
),
|
||
"stages": _build_stages(inc, evidence, approval, execution),
|
||
}
|
||
)
|
||
|
||
logger.info(
|
||
"aiops_timeline_fetched",
|
||
count=len(results),
|
||
hours=hours,
|
||
severity=severity,
|
||
incident_id=incident_id,
|
||
)
|
||
return results
|
||
|
||
|
||
def _build_stages(
|
||
incident: Any,
|
||
evidence: Any | None,
|
||
approval: Any | None,
|
||
execution: Any | None,
|
||
) -> list[dict[str, Any]]:
|
||
"""組裝 6 階段 timeline 資訊。"""
|
||
stages: list[dict[str, Any]] = []
|
||
|
||
stages.append(
|
||
{
|
||
"stage": "alert",
|
||
"status": "completed",
|
||
"timestamp": (
|
||
incident.created_at.isoformat() if incident.created_at else None
|
||
),
|
||
"data": {
|
||
"alert_name": incident.alertname,
|
||
"severity": incident.severity,
|
||
"signals": incident.signals or [],
|
||
},
|
||
}
|
||
)
|
||
|
||
stages.append(
|
||
{
|
||
"stage": "diagnose",
|
||
"status": "completed" if evidence else "skipped",
|
||
"timestamp": (
|
||
evidence.collected_at.isoformat()
|
||
if evidence and evidence.collected_at
|
||
else None
|
||
),
|
||
"data": {
|
||
"summary": evidence.evidence_summary if evidence else None,
|
||
"duration_ms": (
|
||
evidence.collection_duration_ms if evidence else None
|
||
),
|
||
"sensors_succeeded": (
|
||
evidence.sensors_succeeded if evidence else None
|
||
),
|
||
},
|
||
}
|
||
)
|
||
|
||
stages.append(
|
||
{
|
||
"stage": "decide",
|
||
"status": "completed" if approval else "skipped",
|
||
"timestamp": (
|
||
approval.created_at.isoformat()
|
||
if approval and approval.created_at
|
||
else None
|
||
),
|
||
"data": {
|
||
"approval_id": approval.id if approval else None,
|
||
"composite_score": (
|
||
approval.composite_score if approval else None
|
||
),
|
||
"complexity_tier": (
|
||
approval.complexity_tier if approval else None
|
||
),
|
||
"fusion_details": (
|
||
approval.decision_fusion_details if approval else None
|
||
),
|
||
"status": approval.status if approval else None,
|
||
},
|
||
}
|
||
)
|
||
|
||
stages.append(
|
||
{
|
||
"stage": "execute",
|
||
"status": "completed" if execution else "skipped",
|
||
"timestamp": (
|
||
execution.created_at.isoformat()
|
||
if execution and execution.created_at
|
||
else None
|
||
),
|
||
"data": {
|
||
"success": execution.success if execution else None,
|
||
"execution_time_ms": (
|
||
execution.execution_time_ms if execution else None
|
||
),
|
||
},
|
||
}
|
||
)
|
||
|
||
stages.append(
|
||
{
|
||
"stage": "verify",
|
||
"status": (
|
||
"completed"
|
||
if evidence and evidence.verification_result
|
||
else "skipped"
|
||
),
|
||
"timestamp": None,
|
||
"data": {
|
||
"outcome": evidence.verification_result if evidence else None,
|
||
},
|
||
}
|
||
)
|
||
|
||
stages.append(
|
||
{
|
||
"stage": "learn",
|
||
"status": (
|
||
"completed"
|
||
if approval and approval.matched_playbook_id
|
||
else "skipped"
|
||
),
|
||
"timestamp": None,
|
||
"data": {
|
||
"playbook_id": (
|
||
approval.matched_playbook_id if approval else None
|
||
),
|
||
},
|
||
}
|
||
)
|
||
|
||
return stages
|