Files
awoooi/apps/api/src/services/aiops_timeline_service.py
Your Name cc547736ab feat(wave6-8): P2.1 fusion + P2.2 governance + P2.4 consensus + Wave 7/8 BLOCKER 修復
承接 Wave 6/7/8 多 engineer 在 agent 限額前完成的代碼,補 commit 解 production
HEAD 隱性 import error(decision_fusion 已被 decision_manager 引用但檔案 untracked)。

新增(後端核心):
- decision_fusion.py (562 行) — P2.1 方法 III(OpenClaw + Hermes + Elephant 三 LLM 融合)
- aiops_timeline.py + aiops_timeline_service.py — critic B4 修復
  /api/v1/aiops/timeline endpoint,DB 存取抽到 service 層遵守 leWOOOgo 積木化
- migrations/p2_decision_fusion_columns.sql + rollback — approval_records fusion 欄位

修改(後端整合):
- decision_manager.py — fusion 三斷鏈修補(critic B1+B2+B3):
  · B1: 寫 _evidence_snapshot_ref 到 token.proposal_data
  · B2: fusion 前計算 complexity_score 並寫 token
  · B3: fusion composite 寫 token.proposal_data["decision_fusion"]
- auto_approve.py — fusion + consensus 認識(critic B3+B5):
  · composite > 0.7 → auto_execute_eligible bypass min_confidence
  · source=consensus_engine + score>=0.6 → 規則可信路徑
- consensus_engine.py — db-fix _save_consensus 重用 agent_sessions
- governance_agent.py — db-fix _alert PG 寫入 ai_governance_events
- approval_db.py — fusion 3 欄位 + 2 partial index + CheckConstraint
- db/models.py — schema 對齊 migration
- core/config.py — vuln #1 修復:OLLAMA_URL/_FALLBACK_URL field_validator
  拒絕公網 IP + 外部域名,僅允許私網/loopback/K8s SVC 白名單
- core/feature_flags.py — P2 fusion + consensus flags
- main.py — governance_agent lifespan 啟動
- failover_alerter.py — Wave8-X2: in-memory dedup fallback(Redis 拒絕後不 fail-open)
- ollama_*.py — metrics 整合 + recovery 改善
- auto_repair_service.py — verifier 接線

新增(測試 2438 行):
- test_decision_fusion.py / test_governance_agent.py / test_consensus_integration.py
- test_p2_db_fixes.py / test_wave8_fusion_fixes.py
- test_config_url_validation.py(vuln #1 12 tests)
- test_failover_alerter.py +Wave8-X2 in-memory dedup 補測

驗收: 116 tests pass (decision_fusion + wave8_fusion + config_url + consensus +
                      governance + p2_db_fixes + failover_alerter)

Conflict resolution:
- 3 檔(config.py + auto_approve.py + decision_manager.py)git stash pop 衝突
  保留 stashed (engineer 最終版),補回 ValueError 「公網 IP」字樣對齊 test

Note: 此 commit 解 production HEAD 隱性 import error
仍未修: vuln #4 prompt injection / debugger B14 quota fail-closed
       / B25-B26 drain_pending_tasks / B8 governance fail alert

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Multiple Engineers (Wave 6/7/8) <noreply@anthropic.com>
2026-04-27 08:11:40 +08:00

228 lines
6.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""AIOps 時序服務 — 為 P2.5 frontend 提供 incident → learn 6 階段時序資料
leWOOOgo 積木化合規DB 存取在 service 層Router 只 call service method。
# 2026-04-27 Wave8-X3 by Claude — critic B4 timeline endpoint積木化抽出
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from typing import Any
import structlog
from sqlalchemy import select
from src.db.base import get_db_context
from src.db.models import (
ApprovalRecord,
AutoRepairExecution,
IncidentEvidence,
IncidentRecord,
)
logger = structlog.get_logger(__name__)
async def fetch_aiops_timeline(
incident_id: str | None = None,
hours: int = 24,
severity: str | None = None,
limit: int = 50,
) -> list[dict[str, Any]]:
"""撈 Incident 6 階段 timeline。
Args:
incident_id: 指定 Incident ID可選
hours: 回溯小時數1-168
severity: 嚴重度過濾P0/P1/P2/P3
limit: 最多回傳筆數(預設 50防止暴力掃表
Returns:
list[dict]: 每筆 incident 含 stagesalert/diagnose/decide/execute/verify/learn
"""
cutoff = datetime.now(tz=timezone.utc) - timedelta(hours=hours)
async with get_db_context() as db:
stmt = select(IncidentRecord).where(IncidentRecord.created_at >= cutoff)
if incident_id:
stmt = stmt.where(IncidentRecord.incident_id == incident_id)
if severity:
stmt = stmt.where(IncidentRecord.severity == severity)
stmt = stmt.order_by(IncidentRecord.created_at.desc()).limit(limit)
incidents = (await db.execute(stmt)).scalars().all()
results: list[dict[str, Any]] = []
for inc in incidents:
evidence = (
await db.execute(
select(IncidentEvidence)
.where(IncidentEvidence.incident_id == inc.incident_id)
.order_by(IncidentEvidence.collected_at.desc())
.limit(1)
)
).scalar_one_or_none()
approval = (
await db.execute(
select(ApprovalRecord)
.where(ApprovalRecord.incident_id == inc.incident_id)
.order_by(ApprovalRecord.created_at.desc())
.limit(1)
)
).scalar_one_or_none()
execution = (
await db.execute(
select(AutoRepairExecution)
.where(AutoRepairExecution.incident_id == inc.incident_id)
.order_by(AutoRepairExecution.created_at.desc())
.limit(1)
)
).scalar_one_or_none()
results.append(
{
"incident_id": inc.incident_id,
"title": inc.alertname or "unknown",
"severity": inc.severity or "P3",
"started_at": (
inc.created_at.isoformat() if inc.created_at else None
),
"stages": _build_stages(inc, evidence, approval, execution),
}
)
logger.info(
"aiops_timeline_fetched",
count=len(results),
hours=hours,
severity=severity,
incident_id=incident_id,
)
return results
def _build_stages(
incident: Any,
evidence: Any | None,
approval: Any | None,
execution: Any | None,
) -> list[dict[str, Any]]:
"""組裝 6 階段 timeline 資訊。"""
stages: list[dict[str, Any]] = []
stages.append(
{
"stage": "alert",
"status": "completed",
"timestamp": (
incident.created_at.isoformat() if incident.created_at else None
),
"data": {
"alert_name": incident.alertname,
"severity": incident.severity,
"signals": incident.signals or [],
},
}
)
stages.append(
{
"stage": "diagnose",
"status": "completed" if evidence else "skipped",
"timestamp": (
evidence.collected_at.isoformat()
if evidence and evidence.collected_at
else None
),
"data": {
"summary": evidence.evidence_summary if evidence else None,
"duration_ms": (
evidence.collection_duration_ms if evidence else None
),
"sensors_succeeded": (
evidence.sensors_succeeded if evidence else None
),
},
}
)
stages.append(
{
"stage": "decide",
"status": "completed" if approval else "skipped",
"timestamp": (
approval.created_at.isoformat()
if approval and approval.created_at
else None
),
"data": {
"approval_id": approval.id if approval else None,
"composite_score": (
approval.composite_score if approval else None
),
"complexity_tier": (
approval.complexity_tier if approval else None
),
"fusion_details": (
approval.decision_fusion_details if approval else None
),
"status": approval.status if approval else None,
},
}
)
stages.append(
{
"stage": "execute",
"status": "completed" if execution else "skipped",
"timestamp": (
execution.created_at.isoformat()
if execution and execution.created_at
else None
),
"data": {
"success": execution.success if execution else None,
"execution_time_ms": (
execution.execution_time_ms if execution else None
),
},
}
)
stages.append(
{
"stage": "verify",
"status": (
"completed"
if evidence and evidence.verification_result
else "skipped"
),
"timestamp": None,
"data": {
"outcome": evidence.verification_result if evidence else None,
},
}
)
stages.append(
{
"stage": "learn",
"status": (
"completed"
if approval and approval.matched_playbook_id
else "skipped"
),
"timestamp": None,
"data": {
"playbook_id": (
approval.matched_playbook_id if approval else None
),
},
}
)
return stages