5699 lines
208 KiB
Python
5699 lines
208 KiB
Python
"""
|
||
AwoooP Operator Console — Platform Operator Service
|
||
====================================================
|
||
leWOOOgo 積木化:DB 存取集中在 Service 層,Router 不直接引用 get_db。
|
||
ADR-106(AwoooP Agent Platform)
|
||
2026-05-05 ogt + Claude Sonnet 4.6
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import os
|
||
import re
|
||
import time
|
||
import uuid
|
||
from collections import defaultdict
|
||
from collections.abc import Mapping
|
||
from datetime import UTC, datetime, timedelta
|
||
from pathlib import Path
|
||
from typing import Any, get_args
|
||
from urllib.parse import urlencode
|
||
from uuid import UUID
|
||
|
||
import httpx
|
||
import structlog
|
||
from fastapi import HTTPException, status
|
||
from sqlalchemy import func, select, text, update
|
||
from sqlalchemy import or_ as sa_or
|
||
|
||
from src.core.config import get_settings
|
||
from src.db.awooop_models import (
|
||
AwoooPContractRevision,
|
||
AwoooPConversationEvent,
|
||
AwoooPMcpGatewayAudit,
|
||
AwoooPOutboundMessage,
|
||
AwoooPRunState,
|
||
AwoooPRunStepJournal,
|
||
)
|
||
from src.db.base import get_db_context
|
||
from src.db.models import IncidentRecord, MCPAuditLog
|
||
from src.services.audit_sink import write_audit
|
||
from src.services.awooop_ansible_audit_service import summarize_ansible_execution
|
||
from src.services.awooop_approval_token import issue_approval_token, record_approval
|
||
from src.services.awooop_truth_chain_service import (
|
||
_summarize_gateway_mcp,
|
||
_summarize_mcp,
|
||
fetch_truth_chain,
|
||
)
|
||
from src.services.governance_km_stale_review_service import (
|
||
query_km_stale_owner_review_completion_queue,
|
||
)
|
||
from src.services.ollama_endpoint_resolver import (
|
||
OllamaEndpointSelection,
|
||
OllamaWorkloadType,
|
||
resolve_ollama_order,
|
||
)
|
||
from src.services.ollama_failover_manager import (
|
||
OllamaEndpoint,
|
||
OllamaRoutingResult,
|
||
get_ollama_failover_manager,
|
||
)
|
||
from src.services.ollama_health_monitor import HealthReport, HealthStatus
|
||
from src.services.operator_outcome import build_operator_outcome
|
||
from src.services.operator_summary_cache import (
|
||
get_cached_operator_summary_async,
|
||
store_operator_summary_async,
|
||
)
|
||
from src.services.run_state_machine import transition
|
||
from src.services.snapshot_paths import resolve_repo_root
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
_MAX_CONTRACTS = 200
|
||
_DEFAULT_PER_PAGE = 50
|
||
_MAX_PER_PAGE = 200
|
||
_MAX_EVENTS = 100
|
||
_MAX_TIMELINE_ITEMS = 100
|
||
_MAX_LIST_CONTEXT_ROWS = 500
|
||
_RUN_CONTEXT_QUERY_CHUNK_SIZE = 500
|
||
_MAX_STEP_SUMMARY_CHARS = 128
|
||
_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS = 12.0
|
||
_AI_ROUTE_STATUS_CONNECTIVITY_TIMEOUT_SECONDS = 2.5
|
||
_REMEDIATION_HISTORY_LIMIT = 20
|
||
_ADR100_GATE5_PROJECTION_TRIGGER = "adr100_runtime_replay_gate5"
|
||
_CALLBACK_REPLY_CACHE_TTL_SECONDS = int(
|
||
os.getenv("AWOOOP_CALLBACK_REPLY_CACHE_TTL_SECONDS", "20")
|
||
)
|
||
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
|
||
_REMEDIATION_STATUS_FILTERS = {
|
||
"mcp_observed",
|
||
"no_evidence",
|
||
"read_only_dry_run",
|
||
"write_observed",
|
||
"blocked",
|
||
"observed",
|
||
}
|
||
_CALLBACK_REPLY_STATUS_FILTERS = {
|
||
"no_callback",
|
||
"sent",
|
||
"fallback_sent",
|
||
"rescue_sent",
|
||
"failed",
|
||
"observed",
|
||
}
|
||
_CALLBACK_REPLY_RAW_STATUS_BY_FILTER = {
|
||
"sent": "callback_reply_sent",
|
||
"fallback_sent": "callback_reply_fallback_sent",
|
||
"rescue_sent": "callback_reply_rescue_sent",
|
||
"failed": "callback_reply_failed",
|
||
}
|
||
_CALLBACK_REPLY_ACTION_RE = re.compile(r"^[a-z0-9_:-]{1,64}$", re.IGNORECASE)
|
||
_CICD_STATUS_FILTERS = {"running", "success", "failed", "pending"}
|
||
_CICD_STAGE_RE = re.compile(r"^[a-z0-9_:-]{1,64}$", re.IGNORECASE)
|
||
_AI_ROUTE_STATUS_SCHEMA_VERSION = "awooop_ai_route_status_v1"
|
||
_AI_ROUTE_WORKLOADS = set(get_args(OllamaWorkloadType))
|
||
_AI_ROUTE_REPAIR_EVIDENCE_PROVIDER = "ai_route_repair"
|
||
_AI_ROUTE_REPAIR_EVIDENCE_STAGE = "repair_diagnosis"
|
||
_SOURCE_CORRELATION_SCHEMA_VERSION = "source_provider_correlation_v1"
|
||
_SOURCE_CORRELATION_PROVIDERS = ("sentry", "signoz")
|
||
_SOURCE_CORRELATION_EVENT_LIMIT = 200
|
||
_SOURCE_CORRELATION_LOOKBACK_DAYS = 7
|
||
_SOURCE_CORRELATION_PRE_WINDOW_HOURS = 2
|
||
_KM_STALE_COMPLETION_CALLBACK_SCHEMA_VERSION = (
|
||
"km_stale_owner_review_completion_callback_summary_v1"
|
||
)
|
||
_CALLBACK_EVIDENCE_CAPTURE_STATUS_SCHEMA_VERSION = "callback_evidence_capture_status_v1"
|
||
_CALLBACK_REPLY_AUDIT_SUMMARY_SCHEMA_VERSION = (
|
||
"telegram_callback_reply_audit_summary_v1"
|
||
)
|
||
|
||
# =============================================================================
|
||
# Tenants
|
||
# =============================================================================
|
||
|
||
_PUBLIC_GATEWAY_INVENTORY_SNAPSHOT = (
|
||
"docs/security/public-gateway-preflight-inventory.snapshot.json"
|
||
)
|
||
_SOURCE_CONTROL_READINESS_SNAPSHOT = (
|
||
"docs/security/source-control-primary-readiness-gate.snapshot.json"
|
||
)
|
||
|
||
_DOMAIN_PRODUCT_OVERRIDES: dict[str, dict[str, str]] = {
|
||
"aiops.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"awoooi.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"api.awoooi.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"app.awoooi.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"api.aiops.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"clawbot.aiops.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"command.aiops.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"security.wooo.work": {
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"category": "core_platform",
|
||
},
|
||
"mo.wooo.work": {
|
||
"product_id": "ewoooc",
|
||
"product_name": "EwoooC / Mo",
|
||
"category": "business_product",
|
||
},
|
||
"2026fifa.wooo.work": {
|
||
"product_id": "2026-fifa-world-cup",
|
||
"product_name": "2026 FIFA World Cup",
|
||
"category": "business_product",
|
||
},
|
||
"vibework.wooo.work": {
|
||
"product_id": "vibework",
|
||
"product_name": "VibeWork",
|
||
"category": "business_product",
|
||
},
|
||
"agent.wooo.work": {
|
||
"product_id": "agent-bounty-protocol",
|
||
"product_name": "Agent Bounty Protocol",
|
||
"category": "business_product",
|
||
},
|
||
"stock.wooo.work": {
|
||
"product_id": "stockplatform",
|
||
"product_name": "StockPlatform",
|
||
"category": "business_product",
|
||
},
|
||
"bitan.wooo.work": {
|
||
"product_id": "bitan-pharmacy",
|
||
"product_name": "Bitan Pharmacy",
|
||
"category": "business_product",
|
||
},
|
||
"tsenyang.com": {
|
||
"product_id": "tsenyang-website",
|
||
"product_name": "TsenYang Website",
|
||
"category": "public_site",
|
||
},
|
||
"www.tsenyang.com": {
|
||
"product_id": "tsenyang-website",
|
||
"product_name": "TsenYang Website",
|
||
"category": "public_site",
|
||
},
|
||
"tsenyang.wooo.work": {
|
||
"product_id": "tsenyang-website",
|
||
"product_name": "TsenYang Website",
|
||
"category": "public_site",
|
||
},
|
||
"vtuber.wooo.work": {
|
||
"product_id": "vtuber",
|
||
"product_name": "VTuber",
|
||
"category": "public_site",
|
||
},
|
||
"design.wooo.work": {
|
||
"product_id": "wooo-open-design",
|
||
"product_name": "WOOO Open Design",
|
||
"category": "platform_tool",
|
||
},
|
||
"grist.wooo.work": {
|
||
"product_id": "data-workspace",
|
||
"product_name": "Data Workspace / Grist",
|
||
"category": "platform_tool",
|
||
},
|
||
"n8n.wooo.work": {
|
||
"product_id": "workflow-automation",
|
||
"product_name": "Workflow Automation / n8n",
|
||
"category": "platform_tool",
|
||
},
|
||
"vault.wooo.work": {
|
||
"product_id": "security-secrets-platform",
|
||
"product_name": "Security / Vault",
|
||
"category": "platform_tool",
|
||
},
|
||
"ollama.wooo.work": {
|
||
"product_id": "ai-model-gateway",
|
||
"product_name": "AI Model Gateway / Ollama",
|
||
"category": "platform_tool",
|
||
},
|
||
"gitea.wooo.work": {
|
||
"product_id": "source-control",
|
||
"product_name": "Source Control / DevOps",
|
||
"category": "platform_tool",
|
||
},
|
||
"gitlab.wooo.work": {
|
||
"product_id": "source-control",
|
||
"product_name": "Source Control / DevOps",
|
||
"category": "platform_tool",
|
||
},
|
||
"harbor.wooo.work": {
|
||
"product_id": "source-control",
|
||
"product_name": "Source Control / DevOps",
|
||
"category": "platform_tool",
|
||
},
|
||
"registry.wooo.work": {
|
||
"product_id": "source-control",
|
||
"product_name": "Source Control / DevOps",
|
||
"category": "platform_tool",
|
||
},
|
||
"sentry.wooo.work": {
|
||
"product_id": "observability-tooling",
|
||
"product_name": "Observability / LLMOps",
|
||
"category": "platform_tool",
|
||
},
|
||
"signoz.wooo.work": {
|
||
"product_id": "observability-tooling",
|
||
"product_name": "Observability / LLMOps",
|
||
"category": "platform_tool",
|
||
},
|
||
"langfuse.wooo.work": {
|
||
"product_id": "observability-tooling",
|
||
"product_name": "Observability / LLMOps",
|
||
"category": "platform_tool",
|
||
},
|
||
"monitor.wooo.work": {
|
||
"product_id": "observability-tooling",
|
||
"product_name": "Observability / LLMOps",
|
||
"category": "platform_tool",
|
||
},
|
||
}
|
||
|
||
_CANONICAL_PRODUCT_SURFACES: tuple[dict[str, Any], ...] = (
|
||
{
|
||
"product_id": "awoooi",
|
||
"product_name": "AWOOOI / AwoooP / IwoooS",
|
||
"project_id": "awoooi",
|
||
"category": "core_platform",
|
||
"surface_kind": "platform_product",
|
||
"owner_lane": "S4.9-S4.13",
|
||
"coverage_status": "read_only_visible",
|
||
"public_routes": [
|
||
"awoooi.wooo.work",
|
||
"aiops.wooo.work",
|
||
"api.awoooi.wooo.work",
|
||
"app.awoooi.wooo.work",
|
||
"api.aiops.wooo.work",
|
||
"clawbot.aiops.wooo.work",
|
||
"command.aiops.wooo.work",
|
||
"security.wooo.work",
|
||
],
|
||
"source_keys": ["wooo/awoooi"],
|
||
},
|
||
{
|
||
"product_id": "ewoooc",
|
||
"product_name": "EwoooC / Mo",
|
||
"project_id": "ewoooc",
|
||
"category": "business_product",
|
||
"surface_kind": "product_console",
|
||
"owner_lane": "S4.10",
|
||
"coverage_status": "owner_response_required",
|
||
"public_routes": ["mo.wooo.work"],
|
||
"source_keys": ["wooo/ewoooc / root/momo-pro-system / momo working trees"],
|
||
},
|
||
{
|
||
"product_id": "2026-fifa-world-cup",
|
||
"product_name": "2026 FIFA World Cup",
|
||
"project_id": "2026FIFAWorldCup",
|
||
"category": "business_product",
|
||
"surface_kind": "product_site",
|
||
"owner_lane": "product_onboarding",
|
||
"coverage_status": "owner_response_required",
|
||
"public_routes": ["2026fifa.wooo.work"],
|
||
"source_keys": ["2026FIFAWorldCup"],
|
||
},
|
||
{
|
||
"product_id": "vibework",
|
||
"product_name": "VibeWork",
|
||
"project_id": "vibework",
|
||
"category": "business_product",
|
||
"surface_kind": "product_site",
|
||
"owner_lane": "S4.10",
|
||
"coverage_status": "owner_response_required",
|
||
"public_routes": ["vibework.wooo.work"],
|
||
"source_keys": ["vibework"],
|
||
},
|
||
{
|
||
"product_id": "agent-bounty-protocol",
|
||
"product_name": "Agent Bounty Protocol",
|
||
"project_id": "agent-bounty-protocol",
|
||
"category": "business_product",
|
||
"surface_kind": "agent_protocol",
|
||
"owner_lane": "S4.10",
|
||
"coverage_status": "owner_response_required",
|
||
"public_routes": ["agent.wooo.work"],
|
||
"source_keys": ["agent-bounty-protocol"],
|
||
},
|
||
{
|
||
"product_id": "stockplatform",
|
||
"product_name": "StockPlatform",
|
||
"project_id": "stockplatform",
|
||
"category": "business_product",
|
||
"surface_kind": "product_site",
|
||
"owner_lane": "P0-CRON-001",
|
||
"coverage_status": "read_only_visible",
|
||
"public_routes": ["stock.wooo.work"],
|
||
"source_keys": ["stockplatform"],
|
||
},
|
||
{
|
||
"product_id": "bitan-pharmacy",
|
||
"product_name": "Bitan Pharmacy",
|
||
"project_id": "bitan-pharmacy",
|
||
"category": "business_product",
|
||
"surface_kind": "product_site",
|
||
"owner_lane": "P0-CRON-002",
|
||
"coverage_status": "read_only_visible",
|
||
"public_routes": ["bitan.wooo.work"],
|
||
"source_keys": ["bitan-pharmacy"],
|
||
},
|
||
{
|
||
"product_id": "tsenyang-website",
|
||
"product_name": "TsenYang Website",
|
||
"project_id": "tsenyang-website",
|
||
"category": "public_site",
|
||
"surface_kind": "public_site",
|
||
"owner_lane": "S4.10",
|
||
"coverage_status": "owner_response_required",
|
||
"public_routes": ["tsenyang.com", "www.tsenyang.com", "tsenyang.wooo.work"],
|
||
"source_keys": ["tsenyang-website"],
|
||
},
|
||
{
|
||
"product_id": "vtuber",
|
||
"product_name": "VTuber",
|
||
"project_id": "vtuber",
|
||
"category": "public_site",
|
||
"surface_kind": "public_site",
|
||
"owner_lane": "public_gateway",
|
||
"coverage_status": "read_only_visible",
|
||
"public_routes": ["vtuber.wooo.work"],
|
||
"source_keys": [],
|
||
},
|
||
{
|
||
"product_id": "wooo-open-design",
|
||
"product_name": "WOOO Open Design",
|
||
"project_id": "open-design",
|
||
"category": "platform_tool",
|
||
"surface_kind": "design_system",
|
||
"owner_lane": "S4.10",
|
||
"coverage_status": "read_only_visible",
|
||
"public_routes": ["design.wooo.work"],
|
||
"source_keys": ["open-design"],
|
||
},
|
||
{
|
||
"product_id": "workflow-automation",
|
||
"product_name": "Workflow Automation / n8n",
|
||
"project_id": "__platform__",
|
||
"category": "platform_tool",
|
||
"surface_kind": "automation_tool",
|
||
"owner_lane": "observability_tooling",
|
||
"coverage_status": "read_only_candidate",
|
||
"public_routes": ["n8n.wooo.work"],
|
||
"source_keys": [],
|
||
},
|
||
{
|
||
"product_id": "data-workspace",
|
||
"product_name": "Data Workspace / Grist",
|
||
"project_id": "__platform__",
|
||
"category": "platform_tool",
|
||
"surface_kind": "data_tool",
|
||
"owner_lane": "observability_tooling",
|
||
"coverage_status": "read_only_candidate",
|
||
"public_routes": ["grist.wooo.work"],
|
||
"source_keys": [],
|
||
},
|
||
{
|
||
"product_id": "security-secrets-platform",
|
||
"product_name": "Security / Vault",
|
||
"project_id": "__platform__",
|
||
"category": "platform_tool",
|
||
"surface_kind": "security_tool",
|
||
"owner_lane": "S4.12",
|
||
"coverage_status": "read_only_candidate",
|
||
"public_routes": ["vault.wooo.work"],
|
||
"source_keys": [],
|
||
},
|
||
{
|
||
"product_id": "ai-model-gateway",
|
||
"product_name": "AI Model Gateway / Ollama",
|
||
"project_id": "__platform__",
|
||
"category": "platform_tool",
|
||
"surface_kind": "ai_provider_tool",
|
||
"owner_lane": "P1-004",
|
||
"coverage_status": "read_only_candidate",
|
||
"public_routes": ["ollama.wooo.work"],
|
||
"source_keys": [],
|
||
},
|
||
{
|
||
"product_id": "source-control",
|
||
"product_name": "Source Control / DevOps",
|
||
"project_id": "__platform__",
|
||
"category": "platform_tool",
|
||
"surface_kind": "tooling",
|
||
"owner_lane": "S4.9-S4.13",
|
||
"coverage_status": "read_only_visible",
|
||
"public_routes": [
|
||
"gitea.wooo.work",
|
||
"gitlab.wooo.work",
|
||
"harbor.wooo.work",
|
||
"registry.wooo.work",
|
||
],
|
||
"source_keys": ["wooo/wooo-infra-config"],
|
||
},
|
||
{
|
||
"product_id": "observability-tooling",
|
||
"product_name": "Observability / LLMOps",
|
||
"project_id": "__platform__",
|
||
"category": "platform_tool",
|
||
"surface_kind": "tooling",
|
||
"owner_lane": "P1-003",
|
||
"coverage_status": "read_only_visible",
|
||
"public_routes": [
|
||
"sentry.wooo.work",
|
||
"signoz.wooo.work",
|
||
"langfuse.wooo.work",
|
||
"monitor.wooo.work",
|
||
],
|
||
"source_keys": [],
|
||
},
|
||
)
|
||
|
||
_ADDITIONAL_PUBLIC_ROUTES: tuple[dict[str, Any], ...] = (
|
||
{
|
||
"domain": "awoooi.wooo.work",
|
||
"coverage_status": "read_only_visible",
|
||
"source": "awoooi_canonical_route",
|
||
},
|
||
{
|
||
"domain": "api.awoooi.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "app.awoooi.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "api.aiops.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "clawbot.aiops.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "command.aiops.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "security.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "2026fifa.wooo.work",
|
||
"coverage_status": "owner_response_required",
|
||
"source": "cross_workspace_product_candidate",
|
||
},
|
||
{
|
||
"domain": "vibework.wooo.work",
|
||
"coverage_status": "owner_response_required",
|
||
"source": "vibework_onboarding_handoff",
|
||
},
|
||
{
|
||
"domain": "agent.wooo.work",
|
||
"coverage_status": "owner_response_required",
|
||
"source": "agent_bounty_onboarding_handoff",
|
||
},
|
||
{
|
||
"domain": "tsenyang.wooo.work",
|
||
"coverage_status": "owner_response_required",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "design.wooo.work",
|
||
"coverage_status": "read_only_visible",
|
||
"source": "open_design_route_candidate",
|
||
},
|
||
{
|
||
"domain": "grist.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "n8n.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "vault.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "ollama.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
{
|
||
"domain": "monitor.wooo.work",
|
||
"coverage_status": "read_only_candidate",
|
||
"source": "repo_domain_scan_candidate",
|
||
},
|
||
)
|
||
|
||
|
||
def _load_committed_snapshot(relative_path: str) -> dict[str, Any]:
|
||
repo_root = resolve_repo_root(Path(__file__))
|
||
snapshot_path = repo_root / relative_path
|
||
try:
|
||
return json.loads(snapshot_path.read_text(encoding="utf-8"))
|
||
except FileNotFoundError:
|
||
logger.warning("tenant_asset_snapshot_missing", path=relative_path)
|
||
except json.JSONDecodeError as exc:
|
||
logger.warning(
|
||
"tenant_asset_snapshot_invalid_json",
|
||
path=relative_path,
|
||
error=str(exc),
|
||
)
|
||
return {}
|
||
|
||
|
||
def _domain_product(domain: str) -> dict[str, str]:
|
||
return _DOMAIN_PRODUCT_OVERRIDES.get(
|
||
domain,
|
||
{
|
||
"product_id": domain.replace(".", "-"),
|
||
"product_name": domain,
|
||
"category": "public_route",
|
||
},
|
||
)
|
||
|
||
|
||
def _route_asset_from_gateway_row(row: Mapping[str, Any]) -> dict[str, Any]:
|
||
domain = str(row.get("domain") or "")
|
||
product = _domain_product(domain)
|
||
route_smoke_accepted = bool(row.get("route_smoke_accepted"))
|
||
owner_response_accepted = bool(row.get("owner_response_accepted"))
|
||
coverage_status = (
|
||
"verified"
|
||
if route_smoke_accepted and owner_response_accepted
|
||
else "read_only_pending_smoke"
|
||
)
|
||
return {
|
||
"domain": domain,
|
||
"product_id": product["product_id"],
|
||
"product_name": product["product_name"],
|
||
"category": product["category"],
|
||
"coverage_status": coverage_status,
|
||
"control_tier": row.get("control_tier") or "unknown",
|
||
"upstream_count": int(row.get("upstream_count") or 0),
|
||
"admin_route_count": int(row.get("admin_route_count") or 0),
|
||
"websocket_route_count": int(row.get("websocket_route_count") or 0),
|
||
"public_route_smoke_required": bool(row.get("public_route_smoke_required")),
|
||
"route_smoke_accepted": route_smoke_accepted,
|
||
"owner_response_accepted": owner_response_accepted,
|
||
"runtime_gate_count": 0,
|
||
"action_button_count": 0,
|
||
"source": "public_gateway_preflight_inventory",
|
||
}
|
||
|
||
|
||
def _build_route_assets(public_gateway_snapshot: Mapping[str, Any]) -> list[dict[str, Any]]:
|
||
seen_domains: set[str] = set()
|
||
route_assets: list[dict[str, Any]] = []
|
||
for row in public_gateway_snapshot.get("route_impacts") or []:
|
||
if not isinstance(row, Mapping):
|
||
continue
|
||
asset = _route_asset_from_gateway_row(row)
|
||
if not asset["domain"] or asset["domain"] in seen_domains:
|
||
continue
|
||
seen_domains.add(asset["domain"])
|
||
route_assets.append(asset)
|
||
|
||
for row in _ADDITIONAL_PUBLIC_ROUTES:
|
||
domain = str(row["domain"])
|
||
if domain in seen_domains:
|
||
continue
|
||
product = _domain_product(domain)
|
||
route_assets.append(
|
||
{
|
||
"domain": domain,
|
||
"product_id": product["product_id"],
|
||
"product_name": product["product_name"],
|
||
"category": product["category"],
|
||
"coverage_status": row["coverage_status"],
|
||
"control_tier": "candidate",
|
||
"upstream_count": 0,
|
||
"admin_route_count": 0,
|
||
"websocket_route_count": 0,
|
||
"public_route_smoke_required": True,
|
||
"route_smoke_accepted": False,
|
||
"owner_response_accepted": False,
|
||
"runtime_gate_count": 0,
|
||
"action_button_count": 0,
|
||
"source": row["source"],
|
||
}
|
||
)
|
||
return route_assets
|
||
|
||
|
||
def _build_source_repo_assets(
|
||
source_control_snapshot: Mapping[str, Any],
|
||
) -> list[dict[str, Any]]:
|
||
source_assets: list[dict[str, Any]] = []
|
||
for row in source_control_snapshot.get("repo_readiness") or []:
|
||
if not isinstance(row, Mapping):
|
||
continue
|
||
source_key = str(row.get("source_key") or "")
|
||
github_repo = str(row.get("github_repo") or "")
|
||
product = {
|
||
"product_id": (source_key or github_repo).replace("/", "-") or "unknown-source",
|
||
"product_name": source_key or github_repo or "unknown-source",
|
||
"category": "source_repo",
|
||
}
|
||
for surface in _CANONICAL_PRODUCT_SURFACES:
|
||
if source_key in surface.get("source_keys", []) or github_repo in surface.get(
|
||
"source_keys", []
|
||
):
|
||
product = {
|
||
"product_id": str(surface["product_id"]),
|
||
"product_name": str(surface["product_name"]),
|
||
"category": str(surface["category"]),
|
||
}
|
||
break
|
||
source_assets.append(
|
||
{
|
||
"github_repo": github_repo,
|
||
"source_key": source_key,
|
||
"product_id": product["product_id"],
|
||
"product_name": product["product_name"],
|
||
"category": product["category"],
|
||
"scope_status": row.get("scope_status") or "unknown",
|
||
"readiness_state": row.get("readiness_state") or "unknown",
|
||
"risk": row.get("risk") or "UNKNOWN",
|
||
"primary_ready": bool(row.get("primary_ready")),
|
||
"blocker_count": len(row.get("blockers") or []),
|
||
"runtime_gate_count": 0,
|
||
"action_button_count": 0,
|
||
}
|
||
)
|
||
return source_assets
|
||
|
||
|
||
def build_tenant_asset_inventory(tenants: list[Mapping[str, Any]]) -> dict[str, Any]:
|
||
"""Build a read-only product / route / source-control asset view for tenants."""
|
||
public_gateway = _load_committed_snapshot(_PUBLIC_GATEWAY_INVENTORY_SNAPSHOT)
|
||
source_control = _load_committed_snapshot(_SOURCE_CONTROL_READINESS_SNAPSHOT)
|
||
public_routes = _build_route_assets(public_gateway)
|
||
source_repos = _build_source_repo_assets(source_control)
|
||
|
||
public_route_domains = {asset["domain"] for asset in public_routes}
|
||
source_keys = {asset["source_key"] for asset in source_repos}
|
||
products: list[dict[str, Any]] = []
|
||
for surface in _CANONICAL_PRODUCT_SURFACES:
|
||
public_route_count = len(
|
||
[domain for domain in surface["public_routes"] if domain in public_route_domains]
|
||
)
|
||
source_repo_count = len(
|
||
[source for source in surface["source_keys"] if source in source_keys]
|
||
)
|
||
missing_public_routes = [
|
||
domain for domain in surface["public_routes"] if domain not in public_route_domains
|
||
]
|
||
products.append(
|
||
{
|
||
**surface,
|
||
"public_route_count": public_route_count,
|
||
"source_repo_count": source_repo_count,
|
||
"missing_public_routes": missing_public_routes,
|
||
"owner_response_received_count": 0,
|
||
"owner_response_accepted_count": 0,
|
||
"runtime_gate_count": 0,
|
||
"action_button_count": 0,
|
||
}
|
||
)
|
||
|
||
source_summary = source_control.get("summary") or {}
|
||
gateway_summary = public_gateway.get("summary") or {}
|
||
return {
|
||
"schema_version": "awooop_tenant_asset_inventory_v1",
|
||
"mode": "read_only_global_asset_inventory",
|
||
"evidence_refs": [
|
||
_PUBLIC_GATEWAY_INVENTORY_SNAPSHOT,
|
||
_SOURCE_CONTROL_READINESS_SNAPSHOT,
|
||
"docs/security/SECURITY-MIRROR-STATUS-ROLLUP.md",
|
||
],
|
||
"summary": {
|
||
"tenant_table_count": len(tenants),
|
||
"product_surface_count": len(products),
|
||
"public_route_count": len(public_routes),
|
||
"public_gateway_snapshot_route_count": int(
|
||
gateway_summary.get("route_impact_count") or 0
|
||
),
|
||
"source_candidate_repo_count": int(
|
||
source_summary.get("candidate_repo_count") or len(source_repos)
|
||
),
|
||
"source_in_scope_repo_count": int(source_summary.get("in_scope_repo_count") or 0),
|
||
"source_primary_ready_count": int(source_summary.get("primary_ready_count") or 0),
|
||
"owner_response_received_count": 0,
|
||
"owner_response_accepted_count": 0,
|
||
"runtime_gate_count": 0,
|
||
"action_button_count": 0,
|
||
},
|
||
"products": products,
|
||
"public_routes": public_routes,
|
||
"source_repos": source_repos,
|
||
"boundaries": [
|
||
"read_only_inventory_only=true",
|
||
"owner_response_received_count=0",
|
||
"owner_response_accepted_count=0",
|
||
"runtime_execution_authorized=false",
|
||
"active_runtime_gate_count=0",
|
||
"action_buttons_allowed=false",
|
||
"repo_creation_authorized=false",
|
||
"refs_sync_authorized=false",
|
||
"workflow_modification_authorized=false",
|
||
"public_route_change_authorized=false",
|
||
],
|
||
}
|
||
|
||
|
||
async def list_tenants() -> dict[str, Any]:
|
||
"""列出所有 AwoooP 租戶(Operator Console,不依 RLS 過濾)。"""
|
||
async with get_db_context("awoooi") as db:
|
||
result = await db.execute(
|
||
text("""
|
||
SELECT
|
||
project_id,
|
||
display_name,
|
||
migration_mode,
|
||
budget_limit_usd,
|
||
is_active,
|
||
created_at
|
||
FROM awooop_operator_list_projects()
|
||
""")
|
||
)
|
||
rows = list(result.mappings().all())
|
||
|
||
tenants = [
|
||
{
|
||
"project_id": r["project_id"],
|
||
"display_name": r["display_name"],
|
||
"migration_mode": r["migration_mode"],
|
||
"budget_limit_usd": r["budget_limit_usd"],
|
||
"is_active": r["is_active"],
|
||
"created_at": r["created_at"],
|
||
}
|
||
for r in rows
|
||
]
|
||
return {
|
||
"tenants": tenants,
|
||
"total": len(tenants),
|
||
"asset_inventory": build_tenant_asset_inventory(tenants),
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# Contracts
|
||
# =============================================================================
|
||
|
||
async def list_contracts(
|
||
project_id: str | None,
|
||
lifecycle_status: str | None,
|
||
) -> dict[str, Any]:
|
||
"""列出合約 revisions(可 filter by project_id / lifecycle_status)。"""
|
||
async with get_db_context("awoooi") as db:
|
||
stmt = select(AwoooPContractRevision).order_by(
|
||
AwoooPContractRevision.created_at.desc()
|
||
)
|
||
if project_id is not None:
|
||
stmt = stmt.where(AwoooPContractRevision.project_id == project_id)
|
||
if lifecycle_status is not None:
|
||
stmt = stmt.where(
|
||
AwoooPContractRevision.lifecycle_status == lifecycle_status
|
||
)
|
||
|
||
count_stmt = select(func.count()).select_from(stmt.subquery())
|
||
total_result = await db.execute(count_stmt)
|
||
total = total_result.scalar_one()
|
||
|
||
stmt = stmt.limit(_MAX_CONTRACTS)
|
||
result = await db.execute(stmt)
|
||
rows = list(result.scalars().all())
|
||
|
||
contracts = [
|
||
{
|
||
"revision_id": r.revision_id,
|
||
"contract_id": r.contract_id,
|
||
"contract_family": r.contract_family,
|
||
"lifecycle_status": r.lifecycle_status,
|
||
"body_hash": r.body_hash,
|
||
"version_major": r.version_major,
|
||
"version_minor": r.version_minor,
|
||
"created_at": r.created_at,
|
||
"project_id": r.project_id,
|
||
}
|
||
for r in rows
|
||
]
|
||
return {"contracts": contracts, "total": total}
|
||
|
||
|
||
# =============================================================================
|
||
# Runs
|
||
# =============================================================================
|
||
|
||
async def list_runs(
|
||
project_id: str | None,
|
||
state: str | None,
|
||
remediation_status: str | None,
|
||
callback_reply_status: str | None,
|
||
incident_id: str | None,
|
||
page: int,
|
||
per_page: int,
|
||
) -> dict[str, Any]:
|
||
"""列出 runs,支援 project/state/evidence/callback/incident filter 與分頁。"""
|
||
_validate_remediation_status_filter(remediation_status)
|
||
_validate_callback_reply_status_filter(callback_reply_status)
|
||
_validate_incident_id_filter(incident_id)
|
||
|
||
async with get_db_context("awoooi") as db:
|
||
stmt = select(AwoooPRunState).order_by(AwoooPRunState.created_at.desc())
|
||
if project_id is not None:
|
||
stmt = stmt.where(AwoooPRunState.project_id == project_id)
|
||
if state is not None:
|
||
stmt = stmt.where(AwoooPRunState.state == state)
|
||
|
||
offset = (page - 1) * per_page
|
||
if remediation_status or incident_id or callback_reply_status:
|
||
result = await db.execute(stmt)
|
||
candidate_rows = list(result.scalars().all())
|
||
context_limit = _list_filter_context_limit(len(candidate_rows))
|
||
inbound_by_run, outbound_by_run = await _load_run_message_context(
|
||
db,
|
||
candidate_rows,
|
||
limit=context_limit,
|
||
)
|
||
remediation_summaries = await _build_run_remediation_summaries(
|
||
runs=candidate_rows,
|
||
inbound_by_run=inbound_by_run,
|
||
outbound_by_run=outbound_by_run,
|
||
)
|
||
callback_reply_summaries = {
|
||
row.run_id: _run_callback_reply_summary(outbound_by_run.get(row.run_id, []))
|
||
for row in candidate_rows
|
||
}
|
||
filtered_rows = [
|
||
row
|
||
for row in candidate_rows
|
||
if _remediation_summary_matches_status(
|
||
remediation_summaries.get(row.run_id),
|
||
remediation_status,
|
||
)
|
||
and _remediation_summary_matches_incident_id(
|
||
remediation_summaries.get(row.run_id),
|
||
incident_id,
|
||
)
|
||
and _callback_reply_summary_matches_status(
|
||
callback_reply_summaries.get(row.run_id),
|
||
callback_reply_status,
|
||
)
|
||
]
|
||
total = len(filtered_rows)
|
||
rows = filtered_rows[offset : offset + per_page]
|
||
else:
|
||
count_stmt = select(func.count()).select_from(stmt.subquery())
|
||
total_result = await db.execute(count_stmt)
|
||
total = total_result.scalar_one()
|
||
|
||
stmt = stmt.offset(offset).limit(per_page)
|
||
result = await db.execute(stmt)
|
||
rows = list(result.scalars().all())
|
||
inbound_by_run, outbound_by_run = await _load_run_message_context(db, rows)
|
||
remediation_summaries = await _build_run_remediation_summaries(
|
||
runs=rows,
|
||
inbound_by_run=inbound_by_run,
|
||
outbound_by_run=outbound_by_run,
|
||
)
|
||
callback_reply_summaries = {
|
||
row.run_id: _run_callback_reply_summary(outbound_by_run.get(row.run_id, []))
|
||
for row in rows
|
||
}
|
||
|
||
runs = [
|
||
{
|
||
"run_id": r.run_id,
|
||
"project_id": r.project_id,
|
||
"agent_id": r.agent_id,
|
||
"state": r.state,
|
||
"is_shadow": r.is_shadow,
|
||
"cost_usd": r.cost_usd,
|
||
"step_count": r.step_count,
|
||
"created_at": r.created_at,
|
||
"timeout_at": r.timeout_at,
|
||
"remediation_summary": remediation_summaries.get(r.run_id),
|
||
"callback_reply_summary": callback_reply_summaries.get(r.run_id),
|
||
}
|
||
for r in rows
|
||
]
|
||
return {"runs": runs, "total": total, "page": page, "per_page": per_page}
|
||
|
||
|
||
async def list_callback_replies(
|
||
project_id: str | None,
|
||
callback_reply_status: str | None,
|
||
action: str | None,
|
||
incident_id: str | None,
|
||
page: int,
|
||
per_page: int,
|
||
refresh: bool = False,
|
||
) -> dict[str, Any]:
|
||
"""列出 Telegram detail/history callback reply evidence,不改 runtime 狀態。"""
|
||
_validate_callback_reply_status_filter(callback_reply_status)
|
||
callback_action = _validate_callback_reply_action_filter(action)
|
||
_validate_incident_id_filter(incident_id)
|
||
normalized_project_id = project_id or "awoooi"
|
||
|
||
if callback_reply_status == "no_callback":
|
||
return {
|
||
"items": [],
|
||
"total": 0,
|
||
"page": page,
|
||
"per_page": per_page,
|
||
}
|
||
|
||
callback_summary_cache_key = {
|
||
"project_id": project_id or "__all__",
|
||
"callback_reply_status": callback_reply_status or "",
|
||
"action": callback_action or "",
|
||
"incident_id": incident_id or "",
|
||
"page": page,
|
||
"per_page": per_page,
|
||
}
|
||
if not refresh:
|
||
cached_response = await get_cached_operator_summary_async(
|
||
"callback_replies",
|
||
callback_summary_cache_key,
|
||
ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
|
||
)
|
||
if cached_response is not None:
|
||
logger.info(
|
||
"operator_callback_replies_cache_hit",
|
||
project_id=normalized_project_id,
|
||
callback_reply_status=callback_reply_status,
|
||
action=callback_action,
|
||
incident_id=incident_id,
|
||
page=page,
|
||
per_page=per_page,
|
||
ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
|
||
)
|
||
return cached_response
|
||
|
||
where_clauses = [
|
||
"m.source_envelope ? 'callback_reply'",
|
||
]
|
||
params: dict[str, Any] = {
|
||
"limit": per_page,
|
||
"offset": (page - 1) * per_page,
|
||
}
|
||
|
||
if project_id:
|
||
where_clauses.append("m.project_id = :project_id")
|
||
params["project_id"] = project_id
|
||
|
||
raw_status = _CALLBACK_REPLY_RAW_STATUS_BY_FILTER.get(
|
||
str(callback_reply_status or "")
|
||
)
|
||
if raw_status:
|
||
where_clauses.append(
|
||
"m.source_envelope #>> '{callback_reply,status}' = :raw_status"
|
||
)
|
||
params["raw_status"] = raw_status
|
||
|
||
if callback_action:
|
||
where_clauses.append(
|
||
"LOWER(m.source_envelope #>> '{callback_reply,action}') = :callback_action"
|
||
)
|
||
params["callback_action"] = callback_action
|
||
if incident_id:
|
||
where_clauses.append(
|
||
"m.source_envelope #>> '{callback_reply,incident_id}' = :incident_id"
|
||
)
|
||
params["incident_id"] = incident_id
|
||
|
||
where_sql = " AND ".join(where_clauses)
|
||
count_sql = text(f"""
|
||
SELECT COUNT(*) AS total
|
||
FROM awooop_outbound_message m
|
||
WHERE {where_sql}
|
||
""")
|
||
list_sql = text(f"""
|
||
SELECT
|
||
m.message_id,
|
||
m.project_id,
|
||
m.run_id,
|
||
m.channel_type,
|
||
m.message_type,
|
||
m.content_preview,
|
||
m.provider_message_id,
|
||
m.send_status,
|
||
m.send_error,
|
||
m.queued_at,
|
||
m.sent_at,
|
||
m.triggered_by_state,
|
||
m.source_envelope -> 'callback_reply' AS callback_reply,
|
||
m.source_envelope -> 'awooop_status_chain'
|
||
AS persisted_awooop_status_chain,
|
||
m.source_envelope -> 'km_stale_completion_summary'
|
||
AS persisted_km_stale_completion_summary,
|
||
r.agent_id,
|
||
r.state AS run_state,
|
||
r.created_at AS run_created_at
|
||
FROM awooop_outbound_message m
|
||
LEFT JOIN awooop_run_state r
|
||
ON r.project_id = m.project_id
|
||
AND r.run_id = m.run_id
|
||
WHERE {where_sql}
|
||
ORDER BY COALESCE(m.sent_at, m.queued_at) DESC, m.message_id DESC
|
||
LIMIT :limit OFFSET :offset
|
||
""")
|
||
|
||
async with get_db_context(normalized_project_id) as db:
|
||
count_result = await db.execute(count_sql, params)
|
||
total = count_result.scalar_one()
|
||
rows_result = await db.execute(list_sql, params)
|
||
rows = list(rows_result.mappings().all())
|
||
audit_summary = await _fetch_callback_reply_audit_summary(
|
||
db,
|
||
project_id=normalized_project_id,
|
||
)
|
||
|
||
items = [_callback_reply_event_item(row) for row in rows]
|
||
status_chain_cache: dict[tuple[str, str], dict[str, Any]] = {}
|
||
km_completion_queue_cache: dict[str, Any] = {}
|
||
km_completion_summary_cache: dict[tuple[str, str | None], dict[str, Any]] = {}
|
||
for item in items:
|
||
incident = item.get("incident_id")
|
||
item_project_id = str(item.get("project_id") or project_id or "awoooi")
|
||
if not incident:
|
||
item["awooop_status_chain"] = _build_awooop_status_chain(
|
||
incident_ids=[],
|
||
source_id=None,
|
||
)
|
||
item["km_stale_completion_summary"] = (
|
||
_empty_km_stale_completion_summary(
|
||
project_id=item_project_id,
|
||
incident_id=None,
|
||
status_value="no_incident",
|
||
reason="callback_reply_missing_incident_id",
|
||
)
|
||
)
|
||
continue
|
||
incident_id = str(incident)
|
||
status_chain_cache_key = (item_project_id, incident_id)
|
||
cached = status_chain_cache.get(status_chain_cache_key)
|
||
if cached is not None:
|
||
item["awooop_status_chain"] = cached
|
||
else:
|
||
remediation_history = await _fetch_run_remediation_history(
|
||
[incident_id],
|
||
limit=5,
|
||
)
|
||
chain = await _fetch_awooop_status_chain(
|
||
incident_ids=[incident_id],
|
||
project_id=item_project_id,
|
||
remediation_history=remediation_history,
|
||
)
|
||
status_chain_cache[status_chain_cache_key] = chain
|
||
item["awooop_status_chain"] = chain
|
||
|
||
km_summary_cache_key = (item_project_id, incident_id)
|
||
km_summary = km_completion_summary_cache.get(km_summary_cache_key)
|
||
if km_summary is None:
|
||
km_summary = await _fetch_km_stale_completion_summary_for_incident(
|
||
project_id=item_project_id,
|
||
incident_id=incident_id,
|
||
queue_cache=km_completion_queue_cache,
|
||
)
|
||
km_completion_summary_cache[km_summary_cache_key] = km_summary
|
||
item["km_stale_completion_summary"] = km_summary
|
||
|
||
response = {
|
||
"items": items,
|
||
"total": total,
|
||
"page": page,
|
||
"per_page": per_page,
|
||
"summary": audit_summary,
|
||
}
|
||
logger.info(
|
||
"operator_callback_replies_fetched",
|
||
project_id=normalized_project_id,
|
||
callback_reply_status=callback_reply_status,
|
||
action=callback_action,
|
||
incident_id=incident_id,
|
||
page=page,
|
||
per_page=per_page,
|
||
total=total,
|
||
cache_status="miss",
|
||
cache_ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
|
||
)
|
||
return await store_operator_summary_async(
|
||
"callback_replies",
|
||
callback_summary_cache_key,
|
||
response,
|
||
ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
|
||
)
|
||
|
||
|
||
async def _fetch_callback_reply_audit_summary(
|
||
db: Any,
|
||
*,
|
||
project_id: str,
|
||
) -> dict[str, Any]:
|
||
"""Summarize Telegram outbound mirror and callback evidence capture coverage."""
|
||
result = await db.execute(
|
||
text("""
|
||
WITH outbound AS (
|
||
SELECT
|
||
m.*,
|
||
EXISTS (
|
||
SELECT 1
|
||
FROM jsonb_each(
|
||
CASE
|
||
WHEN jsonb_typeof(
|
||
COALESCE(
|
||
m.source_envelope -> 'source_refs',
|
||
'{}'::jsonb
|
||
)
|
||
) = 'object'
|
||
THEN COALESCE(
|
||
m.source_envelope -> 'source_refs',
|
||
'{}'::jsonb
|
||
)
|
||
ELSE '{}'::jsonb
|
||
END
|
||
) AS refs(key, value)
|
||
WHERE jsonb_typeof(refs.value) = 'array'
|
||
AND refs.value <> '[]'::jsonb
|
||
) AS has_trace_ref
|
||
FROM awooop_outbound_message m
|
||
WHERE m.project_id = :project_id
|
||
AND m.channel_type = 'telegram'
|
||
),
|
||
trace_gap_cutoff AS (
|
||
SELECT
|
||
MAX(COALESCE(sent_at, queued_at))
|
||
AS latest_missing_trace_ref_at
|
||
FROM outbound
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND NOT has_trace_ref
|
||
),
|
||
inbound_callbacks AS (
|
||
SELECT
|
||
COUNT(*) AS inbound_callback_total,
|
||
COUNT(*) FILTER (
|
||
WHERE received_at >= NOW() - INTERVAL '24 hours'
|
||
) AS inbound_callback_recent_24h_total,
|
||
MAX(received_at) AS inbound_callback_latest_at
|
||
FROM awooop_conversation_event
|
||
WHERE project_id = :project_id
|
||
AND channel_type = 'telegram'
|
||
AND content_type = 'callback_query'
|
||
)
|
||
SELECT
|
||
COUNT(*) AS outbound_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope <> '{}'::jsonb
|
||
) AS outbound_source_envelope_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope ? 'source_refs'
|
||
) AS outbound_source_refs_total,
|
||
COUNT(*) FILTER (
|
||
WHERE has_trace_ref
|
||
) AS outbound_trace_ref_total,
|
||
COUNT(*) FILTER (
|
||
WHERE COALESCE(
|
||
source_envelope #> '{source_refs,incident_ids}',
|
||
'[]'::jsonb
|
||
) <> '[]'::jsonb
|
||
) AS outbound_incident_ref_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
) AS outbound_reply_markup_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND COALESCE(
|
||
source_envelope #> '{source_refs,incident_ids}',
|
||
'[]'::jsonb
|
||
) = '[]'::jsonb
|
||
) AS outbound_reply_markup_missing_incident_ref_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND COALESCE(
|
||
source_envelope #> '{source_refs,incident_ids}',
|
||
'[]'::jsonb
|
||
) = '[]'::jsonb
|
||
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '1 hour'
|
||
) AS outbound_reply_markup_missing_incident_ref_recent_1h_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND COALESCE(
|
||
source_envelope #> '{source_refs,incident_ids}',
|
||
'[]'::jsonb
|
||
) = '[]'::jsonb
|
||
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '24 hours'
|
||
) AS outbound_reply_markup_missing_incident_ref_recent_24h_total,
|
||
MAX(COALESCE(sent_at, queued_at)) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND COALESCE(
|
||
source_envelope #> '{source_refs,incident_ids}',
|
||
'[]'::jsonb
|
||
) = '[]'::jsonb
|
||
) AS outbound_reply_markup_missing_incident_ref_latest_sent_at,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND NOT has_trace_ref
|
||
) AS outbound_reply_markup_missing_trace_ref_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND NOT has_trace_ref
|
||
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '1 hour'
|
||
) AS outbound_reply_markup_missing_trace_ref_recent_1h_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND NOT has_trace_ref
|
||
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '24 hours'
|
||
) AS outbound_reply_markup_missing_trace_ref_recent_24h_total,
|
||
MAX(COALESCE(sent_at, queued_at)) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND NOT has_trace_ref
|
||
) AS outbound_reply_markup_missing_trace_ref_latest_sent_at,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND has_trace_ref
|
||
AND trace_gap_cutoff.latest_missing_trace_ref_at
|
||
IS NOT NULL
|
||
AND COALESCE(sent_at, queued_at)
|
||
> trace_gap_cutoff.latest_missing_trace_ref_at
|
||
) AS outbound_reply_markup_trace_ref_after_gap_total,
|
||
MIN(COALESCE(sent_at, queued_at)) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND has_trace_ref
|
||
AND trace_gap_cutoff.latest_missing_trace_ref_at
|
||
IS NOT NULL
|
||
AND COALESCE(sent_at, queued_at)
|
||
> trace_gap_cutoff.latest_missing_trace_ref_at
|
||
) AS outbound_reply_markup_trace_ref_after_gap_first_sent_at,
|
||
MAX(COALESCE(sent_at, queued_at)) FILTER (
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND has_trace_ref
|
||
AND trace_gap_cutoff.latest_missing_trace_ref_at
|
||
IS NOT NULL
|
||
AND COALESCE(sent_at, queued_at)
|
||
> trace_gap_cutoff.latest_missing_trace_ref_at
|
||
) AS outbound_reply_markup_trace_ref_after_gap_latest_sent_at,
|
||
COALESCE((
|
||
SELECT jsonb_agg(
|
||
jsonb_build_object(
|
||
'prefix', prefix,
|
||
'total', total,
|
||
'recent_24h_total', recent_24h_total,
|
||
'first_sent_at', first_sent_at,
|
||
'last_sent_at', last_sent_at
|
||
)
|
||
ORDER BY total DESC, prefix ASC
|
||
)
|
||
FROM (
|
||
SELECT
|
||
COALESCE(
|
||
NULLIF(
|
||
source_envelope #>>
|
||
'{reply_markup,buttons,0,callback_prefix}',
|
||
''
|
||
),
|
||
'unknown'
|
||
) AS prefix,
|
||
COUNT(*) AS total,
|
||
COUNT(*) FILTER (
|
||
WHERE COALESCE(sent_at, queued_at)
|
||
>= NOW() - INTERVAL '24 hours'
|
||
) AS recent_24h_total,
|
||
MIN(COALESCE(sent_at, queued_at)) AS first_sent_at,
|
||
MAX(COALESCE(sent_at, queued_at)) AS last_sent_at
|
||
FROM outbound
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND COALESCE(
|
||
source_envelope #> '{source_refs,incident_ids}',
|
||
'[]'::jsonb
|
||
) = '[]'::jsonb
|
||
GROUP BY 1
|
||
ORDER BY total DESC, prefix ASC
|
||
LIMIT 5
|
||
) missing_prefixes
|
||
), '[]'::jsonb)
|
||
AS outbound_reply_markup_missing_incident_ref_top_prefixes,
|
||
COALESCE((
|
||
SELECT jsonb_agg(
|
||
jsonb_build_object(
|
||
'prefix', prefix,
|
||
'total', total,
|
||
'recent_24h_total', recent_24h_total,
|
||
'first_sent_at', first_sent_at,
|
||
'last_sent_at', last_sent_at
|
||
)
|
||
ORDER BY total DESC, prefix ASC
|
||
)
|
||
FROM (
|
||
SELECT
|
||
COALESCE(
|
||
NULLIF(
|
||
source_envelope #>>
|
||
'{reply_markup,buttons,0,callback_prefix}',
|
||
''
|
||
),
|
||
'unknown'
|
||
) AS prefix,
|
||
COUNT(*) AS total,
|
||
COUNT(*) FILTER (
|
||
WHERE COALESCE(sent_at, queued_at)
|
||
>= NOW() - INTERVAL '24 hours'
|
||
) AS recent_24h_total,
|
||
MIN(COALESCE(sent_at, queued_at)) AS first_sent_at,
|
||
MAX(COALESCE(sent_at, queued_at)) AS last_sent_at
|
||
FROM outbound
|
||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||
AND NOT has_trace_ref
|
||
GROUP BY 1
|
||
ORDER BY total DESC, prefix ASC
|
||
LIMIT 5
|
||
) missing_trace_prefixes
|
||
), '[]'::jsonb)
|
||
AS outbound_reply_markup_missing_trace_ref_top_prefixes,
|
||
COUNT(*) FILTER (
|
||
WHERE send_status = 'failed'
|
||
) AS outbound_failed_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope ? 'callback_reply'
|
||
) AS callback_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{callback_reply,status}'
|
||
= 'callback_reply_sent'
|
||
) AS callback_sent_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{callback_reply,status}'
|
||
= 'callback_reply_fallback_sent'
|
||
) AS callback_fallback_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{callback_reply,status}'
|
||
= 'callback_reply_rescue_sent'
|
||
) AS callback_rescue_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope #>> '{callback_reply,status}'
|
||
= 'callback_reply_failed'
|
||
) AS callback_failed_total,
|
||
COUNT(*) FILTER (
|
||
WHERE LOWER(source_envelope #>> '{callback_reply,action}')
|
||
= 'detail'
|
||
) AS callback_detail_total,
|
||
COUNT(*) FILTER (
|
||
WHERE LOWER(source_envelope #>> '{callback_reply,action}')
|
||
= 'history'
|
||
) AS callback_history_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope ? 'callback_reply'
|
||
AND source_envelope ? 'awooop_status_chain'
|
||
AND source_envelope ? 'km_stale_completion_summary'
|
||
) AS callback_snapshot_captured_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope ? 'callback_reply'
|
||
AND (
|
||
source_envelope ? 'awooop_status_chain'
|
||
OR source_envelope ? 'km_stale_completion_summary'
|
||
)
|
||
AND NOT (
|
||
source_envelope ? 'awooop_status_chain'
|
||
AND source_envelope ? 'km_stale_completion_summary'
|
||
)
|
||
) AS callback_snapshot_partial_total,
|
||
COUNT(*) FILTER (
|
||
WHERE source_envelope ? 'callback_reply'
|
||
AND NOT (
|
||
source_envelope ? 'awooop_status_chain'
|
||
OR source_envelope ? 'km_stale_completion_summary'
|
||
)
|
||
) AS callback_snapshot_missing_total,
|
||
COUNT(DISTINCT source_envelope #>> '{callback_reply,incident_id}')
|
||
FILTER (
|
||
WHERE source_envelope ? 'callback_reply'
|
||
AND COALESCE(
|
||
source_envelope #>> '{callback_reply,incident_id}',
|
||
''
|
||
) <> ''
|
||
) AS callback_incident_total,
|
||
MAX(inbound_callbacks.inbound_callback_total)
|
||
AS inbound_callback_total,
|
||
MAX(inbound_callbacks.inbound_callback_recent_24h_total)
|
||
AS inbound_callback_recent_24h_total,
|
||
MAX(inbound_callbacks.inbound_callback_latest_at)
|
||
AS inbound_callback_latest_at,
|
||
MAX(COALESCE(sent_at, queued_at)) AS latest_outbound_at,
|
||
MAX(COALESCE(sent_at, queued_at)) FILTER (
|
||
WHERE source_envelope ? 'callback_reply'
|
||
) AS latest_callback_at
|
||
FROM outbound
|
||
CROSS JOIN trace_gap_cutoff
|
||
CROSS JOIN inbound_callbacks
|
||
"""),
|
||
{"project_id": project_id},
|
||
)
|
||
return _callback_reply_audit_summary_from_row(
|
||
result.mappings().one(),
|
||
project_id=project_id,
|
||
)
|
||
|
||
|
||
def _callback_reply_audit_summary_from_row(
|
||
row: Mapping[str, Any],
|
||
*,
|
||
project_id: str,
|
||
) -> dict[str, Any]:
|
||
"""Convert aggregate SQL row into the public callback evidence audit summary."""
|
||
outbound_total = _safe_int(row.get("outbound_total"))
|
||
callback_total = _safe_int(row.get("callback_total"))
|
||
captured = _safe_int(row.get("callback_snapshot_captured_total"))
|
||
partial = _safe_int(row.get("callback_snapshot_partial_total"))
|
||
missing = _safe_int(row.get("callback_snapshot_missing_total"))
|
||
outbound_incident_refs = _safe_int(row.get("outbound_incident_ref_total"))
|
||
top_missing_prefixes = _reply_markup_gap_prefixes_from_value(
|
||
row.get("outbound_reply_markup_missing_incident_ref_top_prefixes")
|
||
)
|
||
top_missing_trace_prefixes = _reply_markup_gap_prefixes_from_value(
|
||
row.get("outbound_reply_markup_missing_trace_ref_top_prefixes")
|
||
)
|
||
missing_trace_total = _safe_int(
|
||
row.get("outbound_reply_markup_missing_trace_ref_total")
|
||
)
|
||
missing_trace_recent_1h = _safe_int(
|
||
row.get("outbound_reply_markup_missing_trace_ref_recent_1h_total")
|
||
)
|
||
missing_trace_recent_24h = _safe_int(
|
||
row.get("outbound_reply_markup_missing_trace_ref_recent_24h_total")
|
||
)
|
||
trace_gap_status, trace_gap_next_action = _trace_ref_gap_decision(
|
||
total=missing_trace_total,
|
||
recent_1h=missing_trace_recent_1h,
|
||
recent_24h=missing_trace_recent_24h,
|
||
)
|
||
trace_ref_after_gap_total = _safe_int(
|
||
row.get("outbound_reply_markup_trace_ref_after_gap_total")
|
||
)
|
||
trace_gap_recovery_status = _trace_ref_gap_recovery_status(
|
||
missing_total=missing_trace_total,
|
||
after_gap_total=trace_ref_after_gap_total,
|
||
)
|
||
inbound_callback_total = _safe_int(row.get("inbound_callback_total"))
|
||
inbound_callback_recent_24h_total = _safe_int(
|
||
row.get("inbound_callback_recent_24h_total")
|
||
)
|
||
if inbound_callback_total > 0:
|
||
inbound_callback_mirror_status = "capturing"
|
||
inbound_callback_next_action = "none"
|
||
elif callback_total > 0:
|
||
inbound_callback_mirror_status = "reply_only_gap"
|
||
inbound_callback_next_action = "press_any_telegram_callback_after_rollout"
|
||
else:
|
||
inbound_callback_mirror_status = "no_callback_observed"
|
||
inbound_callback_next_action = "press_any_telegram_callback_after_rollout"
|
||
|
||
if callback_total <= 0:
|
||
snapshot_status = "no_callback"
|
||
next_action = "press_telegram_detail_or_history"
|
||
elif captured > 0 and (missing > 0 or partial > 0):
|
||
snapshot_status = "partial"
|
||
next_action = "review_legacy_callback_snapshot_gap"
|
||
elif partial > 0:
|
||
snapshot_status = "partial"
|
||
next_action = "press_telegram_detail_or_history_after_rollout"
|
||
elif missing > 0:
|
||
snapshot_status = "not_captured"
|
||
next_action = "press_telegram_detail_or_history_after_rollout"
|
||
elif outbound_total > 0 and outbound_incident_refs == 0:
|
||
snapshot_status = "captured"
|
||
next_action = "review_outbound_source_refs"
|
||
else:
|
||
snapshot_status = "captured"
|
||
next_action = "none"
|
||
|
||
return {
|
||
"schema_version": _CALLBACK_REPLY_AUDIT_SUMMARY_SCHEMA_VERSION,
|
||
"project_id": project_id,
|
||
"outbound_total": outbound_total,
|
||
"outbound_source_envelope_total": _safe_int(
|
||
row.get("outbound_source_envelope_total")
|
||
),
|
||
"outbound_source_refs_total": _safe_int(
|
||
row.get("outbound_source_refs_total")
|
||
),
|
||
"outbound_trace_ref_total": _safe_int(row.get("outbound_trace_ref_total")),
|
||
"outbound_incident_ref_total": outbound_incident_refs,
|
||
"outbound_reply_markup_total": _safe_int(
|
||
row.get("outbound_reply_markup_total")
|
||
),
|
||
"outbound_reply_markup_missing_incident_ref_total": _safe_int(
|
||
row.get("outbound_reply_markup_missing_incident_ref_total")
|
||
),
|
||
"outbound_reply_markup_missing_incident_ref_recent_1h_total": _safe_int(
|
||
row.get("outbound_reply_markup_missing_incident_ref_recent_1h_total")
|
||
),
|
||
"outbound_reply_markup_missing_incident_ref_recent_24h_total": _safe_int(
|
||
row.get("outbound_reply_markup_missing_incident_ref_recent_24h_total")
|
||
),
|
||
"outbound_reply_markup_missing_incident_ref_latest_sent_at": row.get(
|
||
"outbound_reply_markup_missing_incident_ref_latest_sent_at"
|
||
),
|
||
"outbound_reply_markup_missing_trace_ref_total": missing_trace_total,
|
||
"outbound_reply_markup_missing_trace_ref_recent_1h_total": (
|
||
missing_trace_recent_1h
|
||
),
|
||
"outbound_reply_markup_missing_trace_ref_recent_24h_total": (
|
||
missing_trace_recent_24h
|
||
),
|
||
"outbound_reply_markup_missing_trace_ref_latest_sent_at": row.get(
|
||
"outbound_reply_markup_missing_trace_ref_latest_sent_at"
|
||
),
|
||
"outbound_reply_markup_trace_ref_gap_status": trace_gap_status,
|
||
"outbound_reply_markup_trace_ref_gap_next_action": trace_gap_next_action,
|
||
"outbound_reply_markup_trace_ref_after_gap_total": (
|
||
trace_ref_after_gap_total
|
||
),
|
||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": row.get(
|
||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at"
|
||
),
|
||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": row.get(
|
||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at"
|
||
),
|
||
"outbound_reply_markup_trace_ref_gap_recovery_status": (
|
||
trace_gap_recovery_status
|
||
),
|
||
"outbound_reply_markup_missing_incident_ref_top_prefixes": (
|
||
top_missing_prefixes
|
||
),
|
||
"outbound_reply_markup_missing_trace_ref_top_prefixes": (
|
||
top_missing_trace_prefixes
|
||
),
|
||
"outbound_failed_total": _safe_int(row.get("outbound_failed_total")),
|
||
"callback_total": callback_total,
|
||
"callback_sent_total": _safe_int(row.get("callback_sent_total")),
|
||
"callback_fallback_total": _safe_int(row.get("callback_fallback_total")),
|
||
"callback_rescue_total": _safe_int(row.get("callback_rescue_total")),
|
||
"callback_failed_total": _safe_int(row.get("callback_failed_total")),
|
||
"callback_detail_total": _safe_int(row.get("callback_detail_total")),
|
||
"callback_history_total": _safe_int(row.get("callback_history_total")),
|
||
"callback_snapshot_captured_total": captured,
|
||
"callback_snapshot_partial_total": partial,
|
||
"callback_snapshot_missing_total": missing,
|
||
"callback_incident_total": _safe_int(row.get("callback_incident_total")),
|
||
"inbound_callback_total": inbound_callback_total,
|
||
"inbound_callback_recent_24h_total": inbound_callback_recent_24h_total,
|
||
"inbound_callback_latest_at": row.get("inbound_callback_latest_at"),
|
||
"inbound_callback_mirror_status": inbound_callback_mirror_status,
|
||
"inbound_callback_next_action": inbound_callback_next_action,
|
||
"snapshot_status": snapshot_status,
|
||
"next_action": next_action,
|
||
"latest_outbound_at": row.get("latest_outbound_at"),
|
||
"latest_callback_at": row.get("latest_callback_at"),
|
||
}
|
||
|
||
|
||
def _trace_ref_gap_decision(
|
||
*,
|
||
total: int,
|
||
recent_1h: int,
|
||
recent_24h: int,
|
||
) -> tuple[str, str]:
|
||
"""Classify reply_markup messages without any source_refs into operator actions."""
|
||
if total <= 0:
|
||
return "clean", "none"
|
||
if recent_1h > 0:
|
||
return "active_gap", "inspect_recent_outbound_source_refs"
|
||
if recent_24h > 0:
|
||
return "recent_backlog", "watch_24h_decay"
|
||
return "legacy_backlog", "backfill_or_archive_legacy_callbacks"
|
||
|
||
|
||
def _trace_ref_gap_recovery_status(
|
||
*,
|
||
missing_total: int,
|
||
after_gap_total: int,
|
||
) -> str:
|
||
"""Describe whether traced reply_markup messages resumed after the last gap."""
|
||
if missing_total <= 0:
|
||
return "not_needed"
|
||
if after_gap_total > 0:
|
||
return "recovered_after_gap"
|
||
return "no_recovery_signal"
|
||
|
||
|
||
def _reply_markup_gap_prefixes_from_value(value: Any) -> list[dict[str, Any]]:
|
||
if not isinstance(value, list):
|
||
return []
|
||
|
||
prefixes: list[dict[str, Any]] = []
|
||
for item in value:
|
||
if not isinstance(item, Mapping):
|
||
continue
|
||
prefix = str(item.get("prefix") or "unknown").strip() or "unknown"
|
||
prefixes.append({
|
||
"prefix": prefix[:80],
|
||
"total": _safe_int(item.get("total")),
|
||
"recent_24h_total": _safe_int(item.get("recent_24h_total")),
|
||
"first_sent_at": item.get("first_sent_at"),
|
||
"last_sent_at": item.get("last_sent_at"),
|
||
})
|
||
if len(prefixes) >= 5:
|
||
break
|
||
return prefixes
|
||
|
||
|
||
async def _fetch_km_stale_completion_summary_for_incident(
|
||
*,
|
||
project_id: str,
|
||
incident_id: str | None,
|
||
queue_cache: dict[str, Any] | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Fetch read-only KM owner-review completion context for callback evidence."""
|
||
normalized_project_id = project_id or "awoooi"
|
||
normalized_incident_id = str(incident_id or "").strip() or None
|
||
if not normalized_incident_id:
|
||
return _empty_km_stale_completion_summary(
|
||
project_id=normalized_project_id,
|
||
incident_id=None,
|
||
status_value="no_incident",
|
||
reason="callback_reply_missing_incident_id",
|
||
)
|
||
|
||
cache = queue_cache if queue_cache is not None else {}
|
||
queue = cache.get(normalized_project_id)
|
||
if queue is None:
|
||
try:
|
||
queue = await query_km_stale_owner_review_completion_queue(
|
||
project_id=normalized_project_id,
|
||
status_bucket="all",
|
||
limit=100,
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"operator_km_stale_completion_summary_fetch_failed",
|
||
project_id=normalized_project_id,
|
||
incident_id=normalized_incident_id,
|
||
error=str(exc),
|
||
)
|
||
return _empty_km_stale_completion_summary(
|
||
project_id=normalized_project_id,
|
||
incident_id=normalized_incident_id,
|
||
status_value="fetch_failed",
|
||
reason="km_stale_completion_queue_fetch_failed",
|
||
)
|
||
cache[normalized_project_id] = queue
|
||
|
||
return _build_km_stale_completion_summary(
|
||
queue=queue,
|
||
project_id=normalized_project_id,
|
||
incident_id=normalized_incident_id,
|
||
)
|
||
|
||
|
||
async def list_cicd_events(
|
||
*,
|
||
project_id: str | None,
|
||
stage: str | None,
|
||
status_filter: str | None,
|
||
limit: int,
|
||
) -> dict[str, Any]:
|
||
"""列出 CI/CD notification evidence,來源是 alert_operation_log。"""
|
||
safe_limit = max(1, min(limit, 50))
|
||
normalized_stage = _validate_cicd_stage_filter(stage)
|
||
normalized_status = _validate_cicd_status_filter(status_filter)
|
||
|
||
# alert_operation_log 目前是 legacy/global evidence table,CI/CD notification
|
||
# 只屬於 AWOOOI production;非 awoooi project filter 回空集合,避免誤導多租戶 UI。
|
||
if project_id and project_id != "awoooi":
|
||
return {"items": [], "total": 0, "limit": safe_limit}
|
||
|
||
where_clauses = [
|
||
"event_type = 'ALERT_RECEIVED'",
|
||
"actor = 'alertmanager'",
|
||
"""
|
||
COALESCE(
|
||
context #>> '{labels,alertname}',
|
||
context ->> 'alertname',
|
||
''
|
||
) LIKE 'CI_%'
|
||
""",
|
||
]
|
||
params: dict[str, Any] = {"limit": safe_limit}
|
||
if normalized_stage:
|
||
where_clauses.append(
|
||
"LOWER(COALESCE(context #>> '{labels,stage}', '')) = :stage"
|
||
)
|
||
params["stage"] = normalized_stage
|
||
if normalized_status:
|
||
where_clauses.append(
|
||
"LOWER(COALESCE(context #>> '{labels,status}', '')) = :status"
|
||
)
|
||
params["status"] = normalized_status
|
||
|
||
where_sql = " AND ".join(where_clauses)
|
||
sql = text(f"""
|
||
SELECT
|
||
id,
|
||
action_detail,
|
||
success,
|
||
created_at,
|
||
context,
|
||
COALESCE(
|
||
context #>> '{{labels,alertname}}',
|
||
context ->> 'alertname',
|
||
''
|
||
) AS alertname,
|
||
context #>> '{{labels,stage}}' AS stage,
|
||
context #>> '{{labels,status}}' AS status,
|
||
context #>> '{{labels,severity}}' AS severity,
|
||
context #>> '{{labels,commit}}' AS commit_sha,
|
||
context #>> '{{labels,triggered_by}}' AS triggered_by,
|
||
context #>> '{{labels,duration_seconds}}' AS duration_seconds,
|
||
context #>> '{{annotations,summary}}' AS summary,
|
||
context #>> '{{annotations,description}}' AS description,
|
||
context #>> '{{annotations,workflow_url}}' AS workflow_url,
|
||
context ->> 'alert_id' AS alert_id,
|
||
context ->> 'source' AS source
|
||
FROM alert_operation_log
|
||
WHERE {where_sql}
|
||
ORDER BY created_at DESC, id DESC
|
||
LIMIT :limit
|
||
""")
|
||
|
||
async with get_db_context("awoooi") as db:
|
||
result = await db.execute(sql, params)
|
||
rows = list(result.mappings().all())
|
||
|
||
items = [_cicd_event_item_from_row(row, project_id=project_id or "awoooi") for row in rows]
|
||
return {"items": items, "total": len(items), "limit": safe_limit}
|
||
|
||
|
||
async def get_ai_route_status(
|
||
workload_type: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""回傳目前 AI/Ollama provider routing 的只讀狀態,供 Operator Console 顯示。"""
|
||
workload = _validate_ai_route_workload(workload_type)
|
||
policy_order = _ai_route_policy_order(workload)
|
||
checked_at = _utc_now_naive()
|
||
|
||
try:
|
||
route = await asyncio.wait_for(
|
||
get_ollama_failover_manager().select_provider(task_type=workload),
|
||
timeout=_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS,
|
||
)
|
||
except TimeoutError:
|
||
logger.warning(
|
||
"ai_route_status_check_timeout",
|
||
workload_type=workload,
|
||
timeout_seconds=_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS,
|
||
)
|
||
return await _ai_route_lightweight_status_from_policy(
|
||
workload=workload,
|
||
policy_order=policy_order,
|
||
checked_at=checked_at,
|
||
route_reason="route_check_timeout",
|
||
route_error=(
|
||
f"route status timed out after "
|
||
f"{_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS:g}s"
|
||
),
|
||
)
|
||
except Exception as exc:
|
||
return await _ai_route_lightweight_status_from_policy(
|
||
workload=workload,
|
||
policy_order=policy_order,
|
||
checked_at=checked_at,
|
||
route_reason="route_check_failed",
|
||
route_error=str(exc),
|
||
)
|
||
|
||
health = _ai_route_health_map(route)
|
||
response = {
|
||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||
"workload_type": workload,
|
||
"policy_order": policy_order,
|
||
"selected_provider": route.primary.provider_name,
|
||
"selected_url": route.primary.url or None,
|
||
"selected_model": route.primary.model,
|
||
"fallback_chain": [
|
||
_ai_route_runtime_endpoint_item(endpoint, priority=index + 2)
|
||
for index, endpoint in enumerate(route.fallback_chain)
|
||
],
|
||
"route_reason": route.routing_reason,
|
||
"route_source": "ollama_failover_manager",
|
||
"route_error": None,
|
||
"health": health,
|
||
"checked_at": checked_at,
|
||
}
|
||
response.update(_ai_route_lane_state(
|
||
policy_order=policy_order,
|
||
selected_provider=route.primary.provider_name,
|
||
health=health,
|
||
))
|
||
return await _ai_route_response_with_repair_evidence(response)
|
||
|
||
|
||
def _validate_ai_route_workload(workload_type: str | None) -> OllamaWorkloadType:
|
||
"""Normalize and validate workload filter for the public route status endpoint."""
|
||
workload = str(workload_type or "deep_rca").strip() or "deep_rca"
|
||
if workload not in _AI_ROUTE_WORKLOADS:
|
||
allowed = ", ".join(sorted(_AI_ROUTE_WORKLOADS))
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
||
detail=f"Unsupported workload_type: {workload}. Allowed: {allowed}",
|
||
)
|
||
return workload # type: ignore[return-value]
|
||
|
||
|
||
def _ai_route_policy_order(workload: OllamaWorkloadType) -> list[dict[str, Any]]:
|
||
"""Expose configured policy order: GCP-A -> GCP-B -> 111 -> Gemini."""
|
||
items = [
|
||
_ai_route_policy_endpoint_item(endpoint, priority=index + 1)
|
||
for index, endpoint in enumerate(resolve_ollama_order(workload))
|
||
]
|
||
items.append({
|
||
"priority": len(items) + 1,
|
||
"provider_name": "gemini",
|
||
"url": None,
|
||
"workload_type": workload,
|
||
"reason": "final_cloud_fallback_after_all_ollama_endpoints",
|
||
"role": "final_fallback",
|
||
"runtime": "cloud",
|
||
})
|
||
return items
|
||
|
||
|
||
async def _ai_route_lightweight_status_from_policy(
|
||
*,
|
||
workload: OllamaWorkloadType,
|
||
policy_order: list[dict[str, Any]],
|
||
checked_at: datetime,
|
||
route_reason: str,
|
||
route_error: str,
|
||
) -> dict[str, Any]:
|
||
"""Fallback read model for route status; never changes the execution router."""
|
||
endpoints = list(resolve_ollama_order(workload))
|
||
try:
|
||
reports = await asyncio.gather(
|
||
*[_ai_route_probe_connectivity(endpoint) for endpoint in endpoints],
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"ai_route_status_lightweight_probe_failed",
|
||
workload_type=workload,
|
||
route_reason=route_reason,
|
||
error=str(exc),
|
||
)
|
||
response = _ai_route_unavailable_status(
|
||
workload=workload,
|
||
policy_order=policy_order,
|
||
checked_at=checked_at,
|
||
route_reason=route_reason,
|
||
route_error=route_error,
|
||
route_source="ollama_failover_manager",
|
||
)
|
||
return await _ai_route_response_with_repair_evidence(response)
|
||
|
||
health_by_provider = {
|
||
endpoint.provider_name: _ai_route_health_item(report)
|
||
for endpoint, report in zip(endpoints, reports, strict=False)
|
||
}
|
||
selected_index = next(
|
||
(
|
||
index
|
||
for index, report in enumerate(reports)
|
||
if report.status != HealthStatus.OFFLINE
|
||
),
|
||
None,
|
||
)
|
||
|
||
if selected_index is None:
|
||
response = {
|
||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||
"workload_type": workload,
|
||
"policy_order": policy_order,
|
||
"selected_provider": "gemini",
|
||
"selected_url": None,
|
||
"selected_model": None,
|
||
"fallback_chain": [],
|
||
"route_reason": (
|
||
f"{route_reason}; lightweight connectivity found all Ollama "
|
||
"endpoints offline; final fallback policy is Gemini"
|
||
),
|
||
"route_source": "lightweight_connectivity_fallback",
|
||
"route_error": None,
|
||
"health": health_by_provider,
|
||
"checked_at": checked_at,
|
||
}
|
||
response.update(_ai_route_lane_state(
|
||
policy_order=policy_order,
|
||
selected_provider="gemini",
|
||
health=health_by_provider,
|
||
))
|
||
return await _ai_route_response_with_repair_evidence(response)
|
||
|
||
selected = endpoints[selected_index]
|
||
model = get_settings().OLLAMA_HEALTH_CHECK_MODEL
|
||
fallback_chain = [
|
||
_ai_route_runtime_policy_endpoint_item(
|
||
endpoint,
|
||
priority=index + 1,
|
||
model=model,
|
||
)
|
||
for index, endpoint in enumerate(endpoints[selected_index + 1 :], start=selected_index + 1)
|
||
]
|
||
fallback_chain.append({
|
||
"priority": len(endpoints) + 1,
|
||
"provider_name": "gemini",
|
||
"url": None,
|
||
"model": None,
|
||
"runtime": "cloud",
|
||
})
|
||
|
||
response = {
|
||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||
"workload_type": workload,
|
||
"policy_order": policy_order,
|
||
"selected_provider": selected.provider_name,
|
||
"selected_url": selected.url,
|
||
"selected_model": model,
|
||
"fallback_chain": fallback_chain,
|
||
"route_reason": (
|
||
f"{route_reason}; lightweight connectivity selected "
|
||
f"{selected.provider_name}"
|
||
),
|
||
"route_source": "lightweight_connectivity_fallback",
|
||
"route_error": None,
|
||
"health": health_by_provider,
|
||
"checked_at": checked_at,
|
||
}
|
||
response.update(_ai_route_lane_state(
|
||
policy_order=policy_order,
|
||
selected_provider=selected.provider_name,
|
||
health=health_by_provider,
|
||
))
|
||
return await _ai_route_response_with_repair_evidence(response)
|
||
|
||
|
||
async def _ai_route_probe_connectivity(
|
||
endpoint: OllamaEndpointSelection,
|
||
) -> HealthReport:
|
||
"""Cheap read-only /api/tags probe for Operator Console status fallback."""
|
||
if not endpoint.url:
|
||
return HealthReport(
|
||
status=HealthStatus.OFFLINE,
|
||
host=endpoint.url,
|
||
reason="no_ollama_endpoint_url",
|
||
)
|
||
|
||
start = time.perf_counter()
|
||
try:
|
||
async with httpx.AsyncClient(
|
||
timeout=httpx.Timeout(_AI_ROUTE_STATUS_CONNECTIVITY_TIMEOUT_SECONDS),
|
||
) as client:
|
||
response = await client.get(f"{endpoint.url.rstrip('/')}/api/tags")
|
||
latency_ms = (time.perf_counter() - start) * 1000
|
||
if response.status_code == 200:
|
||
return HealthReport(
|
||
status=HealthStatus.HEALTHY,
|
||
host=endpoint.url,
|
||
latency_ms=latency_ms,
|
||
reason="status_only_connectivity_ok",
|
||
)
|
||
return HealthReport(
|
||
status=HealthStatus.OFFLINE,
|
||
host=endpoint.url,
|
||
latency_ms=latency_ms,
|
||
reason=f"status_only_connectivity_http_{response.status_code}",
|
||
)
|
||
except Exception as exc:
|
||
latency_ms = (time.perf_counter() - start) * 1000
|
||
return HealthReport(
|
||
status=HealthStatus.OFFLINE,
|
||
host=endpoint.url,
|
||
latency_ms=latency_ms,
|
||
reason=f"status_only_connectivity_error:{type(exc).__name__}",
|
||
)
|
||
|
||
|
||
def _ai_route_runtime_policy_endpoint_item(
|
||
endpoint: OllamaEndpointSelection,
|
||
*,
|
||
priority: int,
|
||
model: str,
|
||
) -> dict[str, Any]:
|
||
return {
|
||
"priority": priority,
|
||
"provider_name": endpoint.provider_name,
|
||
"url": endpoint.url or None,
|
||
"model": model,
|
||
"runtime": "ollama",
|
||
}
|
||
|
||
|
||
def _ai_route_unavailable_status(
|
||
*,
|
||
workload: OllamaWorkloadType,
|
||
policy_order: list[dict[str, Any]],
|
||
checked_at: datetime,
|
||
route_reason: str,
|
||
route_error: str,
|
||
route_source: str,
|
||
) -> dict[str, Any]:
|
||
response = {
|
||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||
"workload_type": workload,
|
||
"policy_order": policy_order,
|
||
"selected_provider": None,
|
||
"selected_url": None,
|
||
"selected_model": None,
|
||
"fallback_chain": [],
|
||
"route_reason": route_reason,
|
||
"route_source": route_source,
|
||
"route_error": route_error,
|
||
"health": {},
|
||
"checked_at": checked_at,
|
||
}
|
||
response.update(_ai_route_lane_state(
|
||
policy_order=policy_order,
|
||
selected_provider=None,
|
||
health={},
|
||
))
|
||
return response
|
||
|
||
|
||
async def _ai_route_response_with_repair_evidence(
|
||
response: dict[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Attach latest read-only repair dossier evidence when a lane is degraded."""
|
||
response["repair_evidence"] = None
|
||
if response.get("lane_mode") not in {
|
||
"degraded_failover",
|
||
"cloud_fallback",
|
||
"unavailable",
|
||
}:
|
||
return response
|
||
|
||
target_provider = _ai_route_repair_evidence_target(response)
|
||
response["repair_evidence"] = await _latest_ai_route_repair_evidence(
|
||
target_provider=target_provider,
|
||
)
|
||
return response
|
||
|
||
|
||
def _ai_route_repair_evidence_target(response: Mapping[str, Any]) -> str | None:
|
||
skipped_lanes = response.get("skipped_lanes")
|
||
if isinstance(skipped_lanes, list):
|
||
for lane in skipped_lanes:
|
||
if not isinstance(lane, dict):
|
||
continue
|
||
provider_name = str(lane.get("provider_name") or "").strip()
|
||
if provider_name and lane.get("action_required") is True:
|
||
return provider_name
|
||
for lane in skipped_lanes:
|
||
if isinstance(lane, dict):
|
||
provider_name = str(lane.get("provider_name") or "").strip()
|
||
if provider_name:
|
||
return provider_name
|
||
policy_order = response.get("policy_order")
|
||
if isinstance(policy_order, list):
|
||
for item in policy_order:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
if item.get("runtime") == "ollama":
|
||
provider_name = str(item.get("provider_name") or "").strip()
|
||
if provider_name:
|
||
return provider_name
|
||
return None
|
||
|
||
|
||
async def _latest_ai_route_repair_evidence(
|
||
*,
|
||
project_id: str = "awoooi",
|
||
target_provider: str | None = None,
|
||
) -> dict[str, Any] | None:
|
||
"""Fetch the newest AI route repair diagnosis stored in AwoooP event DB."""
|
||
params: dict[str, Any] = {
|
||
"project_id": project_id,
|
||
"provider": _AI_ROUTE_REPAIR_EVIDENCE_PROVIDER,
|
||
"stage": _AI_ROUTE_REPAIR_EVIDENCE_STAGE,
|
||
}
|
||
target_clause = ""
|
||
if target_provider:
|
||
target_clause = """
|
||
AND COALESCE(
|
||
NULLIF(source_envelope #>> '{log_correlation,target_resource}', ''),
|
||
NULLIF(source_envelope #>> '{extra,payload,target_resource}', '')
|
||
) = :target_provider
|
||
"""
|
||
params["target_provider"] = target_provider
|
||
|
||
try:
|
||
item = await _fetch_latest_ai_route_repair_evidence(
|
||
params=params,
|
||
target_clause=target_clause,
|
||
)
|
||
if item is None and target_provider:
|
||
params.pop("target_provider", None)
|
||
item = await _fetch_latest_ai_route_repair_evidence(
|
||
params=params,
|
||
target_clause="",
|
||
)
|
||
return item
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"ai_route_repair_evidence_fetch_failed",
|
||
project_id=project_id,
|
||
target_provider=target_provider,
|
||
error=str(exc),
|
||
)
|
||
return None
|
||
|
||
|
||
async def _fetch_latest_ai_route_repair_evidence(
|
||
*,
|
||
params: dict[str, Any],
|
||
target_clause: str,
|
||
) -> dict[str, Any] | None:
|
||
sql = text(f"""
|
||
SELECT
|
||
event_id,
|
||
run_id,
|
||
provider_event_id,
|
||
source_envelope,
|
||
provider_ts,
|
||
received_at
|
||
FROM awooop_conversation_event
|
||
WHERE project_id = :project_id
|
||
AND LOWER(COALESCE(
|
||
NULLIF(source_envelope->>'provider', ''),
|
||
NULLIF(split_part(provider_event_id, ':', 1), ''),
|
||
channel_type
|
||
)) = :provider
|
||
AND LOWER(COALESCE(NULLIF(source_envelope->>'stage', ''), 'received')) = :stage
|
||
{target_clause}
|
||
ORDER BY received_at DESC, event_id DESC
|
||
LIMIT 1
|
||
""")
|
||
async with get_db_context("awoooi") as db:
|
||
result = await db.execute(sql, params)
|
||
row = result.mappings().first()
|
||
return _ai_route_repair_evidence_item(row) if row else None
|
||
|
||
|
||
def _ai_route_repair_evidence_item(
|
||
row: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Project route-repair source envelopes into a compact operator-safe view."""
|
||
envelope = _as_dict(row.get("source_envelope"))
|
||
extra = _as_dict(envelope.get("extra"))
|
||
payload = _as_dict(extra.get("payload"))
|
||
log_correlation = _as_dict(envelope.get("log_correlation"))
|
||
live_probe = _as_dict(payload.get("live_probe"))
|
||
observed_state = _as_dict(payload.get("observed_state"))
|
||
side_effects = _ai_route_repair_side_effects(payload.get("side_effects"))
|
||
|
||
evidence = {
|
||
"schema_version": (
|
||
payload.get("schema_version")
|
||
or envelope.get("schema_version")
|
||
or "ai_route_repair_evidence_projection_v1"
|
||
),
|
||
"provider": (
|
||
envelope.get("provider")
|
||
or str(row.get("provider_event_id") or "").split(":", 1)[0]
|
||
),
|
||
"stage": envelope.get("stage") or _AI_ROUTE_REPAIR_EVIDENCE_STAGE,
|
||
"provider_event_id": row.get("provider_event_id"),
|
||
"conversation_event_id": _string_or_none(row.get("event_id")),
|
||
"run_id": _string_or_none(row.get("run_id")),
|
||
"alertname": log_correlation.get("alertname"),
|
||
"severity": log_correlation.get("severity"),
|
||
"fingerprint": log_correlation.get("fingerprint"),
|
||
"target_resource": (
|
||
log_correlation.get("target_resource")
|
||
or payload.get("target_resource")
|
||
or observed_state.get("target_resource")
|
||
),
|
||
"observed_state": observed_state,
|
||
"live_probe": live_probe,
|
||
"access_blockers": _as_string_list(payload.get("access_blockers")),
|
||
"side_effects": side_effects,
|
||
"source_ref_count": _source_ref_count(envelope),
|
||
"provider_ts": row.get("provider_ts"),
|
||
"received_at": row.get("received_at"),
|
||
}
|
||
evidence["work_item"] = _ai_route_repair_work_item(evidence)
|
||
evidence["playbook_recommendation"] = _ai_route_repair_playbook_recommendation(
|
||
evidence
|
||
)
|
||
evidence["owner_action"] = _ai_route_repair_owner_action(evidence)
|
||
return evidence
|
||
|
||
|
||
def _ai_route_repair_side_effects(value: Any) -> dict[str, bool | None]:
|
||
raw = _as_dict(value)
|
||
return {
|
||
"incident_created": _bool_or_none(raw.get("incident_created")),
|
||
"telegram_sent": _bool_or_none(raw.get("telegram_sent")),
|
||
"approval_created": _bool_or_none(raw.get("approval_created")),
|
||
"runtime_route_changed": _bool_or_none(raw.get("runtime_route_changed")),
|
||
}
|
||
|
||
|
||
def _as_string_list(value: Any) -> list[str]:
|
||
if isinstance(value, list):
|
||
return [str(item) for item in value if str(item or "").strip()]
|
||
if value not in (None, ""):
|
||
return [str(value)]
|
||
return []
|
||
|
||
|
||
def _string_or_none(value: Any) -> str | None:
|
||
if value in (None, ""):
|
||
return None
|
||
return str(value)
|
||
|
||
|
||
def _bool_or_none(value: Any) -> bool | None:
|
||
return value if isinstance(value, bool) else None
|
||
|
||
|
||
def _source_ref_count(envelope: Any) -> int:
|
||
source_refs = _as_dict(_as_dict(envelope).get("source_refs"))
|
||
total = 0
|
||
for value in source_refs.values():
|
||
if isinstance(value, list):
|
||
total += len([item for item in value if str(item or "").strip()])
|
||
elif value not in (None, ""):
|
||
total += 1
|
||
return total
|
||
|
||
|
||
def _recent_event_source_summary(row: AwoooPConversationEvent) -> dict[str, Any]:
|
||
"""Return redaction-safe source context for recent channel events."""
|
||
envelope = _as_dict(row.source_envelope)
|
||
extra = _as_dict(envelope.get("extra"))
|
||
telegram_callback = _as_dict(
|
||
extra.get("telegram_callback_query")
|
||
or envelope.get("telegram_callback_query")
|
||
)
|
||
log_correlation = _as_dict(envelope.get("log_correlation"))
|
||
provider = (
|
||
envelope.get("provider")
|
||
or str(row.provider_event_id or "").split(":", 1)[0]
|
||
or row.channel_type
|
||
)
|
||
|
||
summary: dict[str, Any] = {
|
||
"schema_version": "awooop_recent_event_source_summary_v1",
|
||
"provider": provider,
|
||
"stage": envelope.get("stage"),
|
||
"provider_event_id": row.provider_event_id,
|
||
"source_ref_count": _source_ref_count(envelope),
|
||
"redaction_version": envelope.get("redaction_version"),
|
||
}
|
||
if telegram_callback:
|
||
summary["telegram_callback_query"] = {
|
||
"action": _string_or_none(
|
||
telegram_callback.get("callback_action")
|
||
),
|
||
"callback_ref": _string_or_none(
|
||
telegram_callback.get("callback_ref")
|
||
),
|
||
"incident_id": _string_or_none(
|
||
telegram_callback.get("incident_id")
|
||
),
|
||
"approval_id": _string_or_none(
|
||
telegram_callback.get("approval_id")
|
||
),
|
||
"message_id": _string_or_none(
|
||
telegram_callback.get("message_id")
|
||
),
|
||
"username_present": _bool_or_none(
|
||
telegram_callback.get("username_present")
|
||
),
|
||
}
|
||
if log_correlation:
|
||
summary["log_correlation"] = {
|
||
"alertname": _string_or_none(log_correlation.get("alertname")),
|
||
"severity": _string_or_none(log_correlation.get("severity")),
|
||
"namespace": _string_or_none(log_correlation.get("namespace")),
|
||
"target_resource": _string_or_none(
|
||
log_correlation.get("target_resource")
|
||
),
|
||
"fingerprint": _string_or_none(
|
||
log_correlation.get("fingerprint")
|
||
),
|
||
}
|
||
return summary
|
||
|
||
|
||
def _recent_channel_event_item(row: AwoooPConversationEvent) -> dict[str, Any]:
|
||
"""Project one recent channel event into the Operator Console DTO."""
|
||
return {
|
||
"event_id": row.event_id,
|
||
"project_id": row.project_id,
|
||
"channel_type": row.channel_type,
|
||
"provider_event_id": row.provider_event_id,
|
||
"channel_chat_id": row.channel_chat_id,
|
||
"run_id": row.run_id,
|
||
"content_type": row.content_type,
|
||
"content_preview": row.content_preview,
|
||
"is_duplicate": row.is_duplicate,
|
||
"received_at": row.received_at,
|
||
"source_summary": _recent_event_source_summary(row),
|
||
}
|
||
|
||
|
||
def _ai_route_repair_work_item(evidence: Mapping[str, Any]) -> dict[str, Any]:
|
||
target = str(evidence.get("target_resource") or "unknown").strip()
|
||
blockers = _as_string_list(evidence.get("access_blockers"))
|
||
open_item = bool(blockers)
|
||
work_item_id = f"ai-route-repair:{target or 'unknown'}"
|
||
return {
|
||
"schema_version": "awooop_ai_route_repair_work_item_v1",
|
||
"work_item_id": work_item_id,
|
||
"status": "open" if open_item else "watching",
|
||
"kind": "ai_route_primary_lane_repair",
|
||
"next_step": (
|
||
"restore_primary_ollama_lane_access"
|
||
if open_item
|
||
else "continue_route_monitoring"
|
||
),
|
||
"reason": "primary_lane_unavailable" if open_item else "primary_lane_observed",
|
||
"needs_human": open_item,
|
||
"owner": "cloud_sre_operator",
|
||
"target_resource": target or None,
|
||
"target_href": "/awooop/runs",
|
||
"decision_effect": "none",
|
||
"safety_level": "read_only_work_item_projection",
|
||
"writes_incident_state": False,
|
||
"writes_auto_repair_result": False,
|
||
"writes_runtime_route": False,
|
||
}
|
||
|
||
|
||
def _ai_route_repair_playbook_recommendation(
|
||
evidence: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
blockers = set(_as_string_list(evidence.get("access_blockers")))
|
||
live_probe = _as_dict(evidence.get("live_probe"))
|
||
steps: list[dict[str, Any]] = []
|
||
if any(blocker.startswith("gcloud_") for blocker in blockers):
|
||
steps.append({
|
||
"step": "verify_cloud_control_plane_access",
|
||
"scope": "gcp_compute_read",
|
||
"mode": "manual_or_approved",
|
||
})
|
||
if "gcp_a_ssh_refused" in blockers or (
|
||
live_probe.get("gcp_a_direct_22") == "connection_refused"
|
||
):
|
||
steps.append({
|
||
"step": "restore_gcp_a_os_access",
|
||
"scope": "gcp_serial_console_or_os_login",
|
||
"mode": "manual_or_approved",
|
||
})
|
||
if "gcp_a_ollama_11434_refused" in blockers or (
|
||
live_probe.get("gcp_a_direct_11434") == "connection_refused"
|
||
):
|
||
steps.append({
|
||
"step": "restore_ollama_service_on_gcp_a",
|
||
"scope": "systemd_ollama",
|
||
"mode": "manual_or_approved",
|
||
})
|
||
if live_probe.get("proxy_110_11435") == "http_502":
|
||
steps.append({
|
||
"step": "verify_110_proxy_after_gcp_a_recovery",
|
||
"scope": "nginx_proxy_readback",
|
||
"mode": "read_only_verification",
|
||
})
|
||
steps.append({
|
||
"step": "verify_ai_route_status_returns_primary",
|
||
"scope": "awooop_ai_route_status",
|
||
"mode": "read_only_verification",
|
||
})
|
||
|
||
return {
|
||
"schema_version": "awooop_ai_route_playbook_recommendation_v1",
|
||
"playbook_id": "ai_route_primary_lane_recovery",
|
||
"status": "candidate_from_live_evidence",
|
||
"safe_to_auto_execute": False,
|
||
"requires_approval": True,
|
||
"decision_effect": "none",
|
||
"steps": steps,
|
||
}
|
||
|
||
|
||
def _ai_route_repair_owner_action(evidence: Mapping[str, Any]) -> dict[str, Any]:
|
||
work_item = _as_dict(evidence.get("work_item"))
|
||
playbook = _as_dict(evidence.get("playbook_recommendation"))
|
||
return {
|
||
"schema_version": "awooop_ai_route_owner_action_v1",
|
||
"lead_agent": "Hermes",
|
||
"supporting_agents": ["OpenClaw", "ElephantAlpha"],
|
||
"human_owner": "Cloud/SRE owner",
|
||
"automation_state": "blocked_by_external_cloud_or_os_access",
|
||
"next_step": work_item.get("next_step") or "continue_route_monitoring",
|
||
"playbook_id": playbook.get("playbook_id"),
|
||
"safe_to_auto_repair": False,
|
||
"blocking_reason": work_item.get("reason") or "unknown",
|
||
}
|
||
|
||
|
||
def _ai_route_lane_state(
|
||
*,
|
||
policy_order: list[dict[str, Any]],
|
||
selected_provider: str | None,
|
||
health: dict[str, dict[str, Any]],
|
||
) -> dict[str, Any]:
|
||
"""Expose failover lane state separately from policy labels."""
|
||
selected_index = next(
|
||
(
|
||
index
|
||
for index, item in enumerate(policy_order)
|
||
if item.get("provider_name") == selected_provider
|
||
),
|
||
None,
|
||
)
|
||
active_item = (
|
||
policy_order[selected_index]
|
||
if selected_index is not None
|
||
else None
|
||
)
|
||
skipped_items = policy_order[:selected_index] if selected_index is not None else []
|
||
|
||
skipped_lanes = [
|
||
_ai_route_lane_item(item, health.get(str(item.get("provider_name"))))
|
||
for item in skipped_items
|
||
if item.get("runtime") == "ollama"
|
||
]
|
||
|
||
if not selected_provider or active_item is None:
|
||
lane_mode = "unavailable"
|
||
operator_action = {
|
||
"human_required": True,
|
||
"action": "inspect_ai_router",
|
||
"reason": "no_active_provider",
|
||
}
|
||
elif active_item.get("runtime") == "cloud":
|
||
lane_mode = "cloud_fallback"
|
||
operator_action = {
|
||
"human_required": True,
|
||
"action": "restore_ollama_lanes",
|
||
"reason": "all_ollama_lanes_unavailable",
|
||
}
|
||
elif skipped_lanes:
|
||
lane_mode = "degraded_failover"
|
||
operator_action = {
|
||
"human_required": True,
|
||
"action": "repair_skipped_primary_lane",
|
||
"reason": "fallback_lane_active",
|
||
}
|
||
else:
|
||
lane_mode = "primary"
|
||
operator_action = {
|
||
"human_required": False,
|
||
"action": "monitor",
|
||
"reason": "primary_lane_active",
|
||
}
|
||
|
||
return {
|
||
"lane_mode": lane_mode,
|
||
"active_lane": (
|
||
_ai_route_lane_item(active_item, health.get(str(active_item.get("provider_name"))))
|
||
if active_item
|
||
else None
|
||
),
|
||
"skipped_lanes": skipped_lanes,
|
||
"operator_action": operator_action,
|
||
}
|
||
|
||
|
||
def _ai_route_lane_item(
|
||
item: dict[str, Any],
|
||
health_item: dict[str, Any] | None,
|
||
) -> dict[str, Any]:
|
||
return {
|
||
"priority": item.get("priority"),
|
||
"provider_name": item.get("provider_name"),
|
||
"role": item.get("role"),
|
||
"runtime": item.get("runtime"),
|
||
"url": item.get("url"),
|
||
"health_status": (health_item or {}).get("status", "not_checked"),
|
||
"reason": (health_item or {}).get("reason") or item.get("reason"),
|
||
"action_required": (health_item or {}).get("status") not in {
|
||
"healthy",
|
||
"not_checked",
|
||
None,
|
||
},
|
||
}
|
||
|
||
|
||
def _ai_route_policy_endpoint_item(
|
||
endpoint: OllamaEndpointSelection,
|
||
*,
|
||
priority: int,
|
||
) -> dict[str, Any]:
|
||
role = {
|
||
"ollama_gcp_a": "primary",
|
||
"ollama_gcp_b": "secondary",
|
||
"ollama_local": "local_fallback",
|
||
}.get(endpoint.provider_name, "ollama")
|
||
return {
|
||
"priority": priority,
|
||
"provider_name": endpoint.provider_name,
|
||
"url": endpoint.url,
|
||
"workload_type": endpoint.workload_type,
|
||
"reason": endpoint.reason,
|
||
"role": role,
|
||
"runtime": "ollama",
|
||
}
|
||
|
||
|
||
def _ai_route_runtime_endpoint_item(
|
||
endpoint: OllamaEndpoint,
|
||
*,
|
||
priority: int,
|
||
) -> dict[str, Any]:
|
||
return {
|
||
"priority": priority,
|
||
"provider_name": endpoint.provider_name,
|
||
"url": endpoint.url or None,
|
||
"model": endpoint.model,
|
||
"runtime": "ollama" if endpoint.provider_name.startswith("ollama") else "cloud",
|
||
}
|
||
|
||
|
||
def _ai_route_health_map(route: OllamaRoutingResult) -> dict[str, dict[str, Any]]:
|
||
"""Convert failover health reports into provider keyed status for the UI."""
|
||
health: dict[str, dict[str, Any]] = {
|
||
"ollama_gcp_a": _ai_route_health_item(route.health_gcp_a),
|
||
}
|
||
if route.health_gcp_b:
|
||
health["ollama_gcp_b"] = _ai_route_health_item(route.health_gcp_b)
|
||
else:
|
||
health["ollama_gcp_b"] = _ai_route_not_checked_health_item()
|
||
if route.health_local:
|
||
health["ollama_local"] = _ai_route_health_item(route.health_local)
|
||
else:
|
||
health["ollama_local"] = _ai_route_not_checked_health_item()
|
||
return health
|
||
|
||
|
||
def _ai_route_health_item(report: HealthReport) -> dict[str, Any]:
|
||
payload = report.to_dict()
|
||
payload["checked"] = True
|
||
return payload
|
||
|
||
|
||
def _ai_route_not_checked_health_item() -> dict[str, Any]:
|
||
return {
|
||
"status": "not_checked",
|
||
"host": "",
|
||
"latency_ms": None,
|
||
"reason": "standby_not_checked_primary_healthy",
|
||
"checked_at": None,
|
||
"from_cache": False,
|
||
"checked": False,
|
||
}
|
||
|
||
|
||
def _timeline_item(
|
||
*,
|
||
ts: Any,
|
||
kind: str,
|
||
title: str,
|
||
status: str,
|
||
summary: str | None = None,
|
||
metadata: dict[str, Any] | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Build one Operator Console timeline item."""
|
||
return {
|
||
"ts": ts,
|
||
"kind": kind,
|
||
"title": title,
|
||
"status": status,
|
||
"summary": summary,
|
||
"metadata": metadata or {},
|
||
}
|
||
|
||
|
||
def _utc_now_naive() -> datetime:
|
||
"""回傳與 AwoooP timestamp-without-timezone 欄位相容的 UTC 時間。"""
|
||
return datetime.now(UTC).replace(tzinfo=None)
|
||
|
||
|
||
def _truncate_step_summary(value: str | None) -> str | None:
|
||
"""壓縮 Step summary,避免超過 DB 欄位與前端 timeline 需要的短摘要。"""
|
||
if not value:
|
||
return None
|
||
compact = " ".join(str(value).split())
|
||
if len(compact) <= _MAX_STEP_SUMMARY_CHARS:
|
||
return compact
|
||
return f"{compact[: _MAX_STEP_SUMMARY_CHARS - 1]}…"
|
||
|
||
|
||
def _approval_step_title(tool_name: str, step_seq: int) -> str:
|
||
"""將 operator_console.* step 轉成人能一眼理解的 timeline 標題。"""
|
||
if tool_name == "operator_console.approve":
|
||
return f"人工審批 {step_seq}: 核准"
|
||
if tool_name == "operator_console.reject":
|
||
return f"人工審批 {step_seq}: 拒絕"
|
||
return f"Step {step_seq}: {tool_name}"
|
||
|
||
|
||
def _outbound_timeline_title(
|
||
channel_type: str,
|
||
message_type: str,
|
||
content_preview: str | None,
|
||
callback_reply: dict[str, Any] | None = None,
|
||
) -> str:
|
||
"""將 legacy Telegram outbound 分類成 Operator 看得懂的語義標題。"""
|
||
channel = channel_type.upper()
|
||
preview = content_preview or ""
|
||
if callback_reply:
|
||
action = str(callback_reply.get("action") or "").strip()
|
||
status = str(callback_reply.get("status") or "").strip()
|
||
action_label = {
|
||
"detail": "詳情",
|
||
"history": "歷史",
|
||
}.get(action, action or "Callback")
|
||
status_label = {
|
||
"callback_reply_sent": "已送出",
|
||
"callback_reply_fallback_sent": " fallback 已送出",
|
||
"callback_reply_rescue_sent": "救援已送出",
|
||
"callback_reply_failed": "送出失敗",
|
||
}.get(status, status or "已記錄")
|
||
return f"{channel}:{action_label}回覆{status_label}"
|
||
|
||
if "RUNBOOK REVIEW" in preview:
|
||
return f"{channel}:Runbook 待人工審核"
|
||
if "[AWOOOI CI/CD]" in preview or "AWOOOI CI/CD" in preview:
|
||
return f"{channel}:CI/CD 狀態通知"
|
||
if "AI 治理警報" in preview:
|
||
return f"{channel}:AI 治理警報"
|
||
if "HANDOFF REQUIRED" in preview or "AI 自動修復失敗" in preview:
|
||
return f"{channel}:AI 自動修復失敗,已轉人工"
|
||
if "AUTO RESOLVED" in preview or "AI 自動修復完成" in preview:
|
||
return f"{channel}:AI 自動修復完成"
|
||
if "ESCALATION" in preview or "事故升級" in preview:
|
||
return f"{channel}:事故升級通知"
|
||
if "ACTION REQUIRED" in preview:
|
||
return f"{channel}:告警審批卡"
|
||
|
||
fallback = {
|
||
"approval_request": "人工審批請求",
|
||
"error": "錯誤回覆",
|
||
"final": "處置結果",
|
||
"interim": "漸進式狀態回饋",
|
||
}.get(message_type, message_type)
|
||
return f"{channel}:{fallback}"
|
||
|
||
|
||
def _outbound_callback_reply(source_envelope: Any) -> dict[str, Any] | None:
|
||
"""Extract Telegram callback reply evidence from outbound source envelope."""
|
||
if not isinstance(source_envelope, dict):
|
||
return None
|
||
callback_reply = source_envelope.get("callback_reply")
|
||
return callback_reply if isinstance(callback_reply, dict) else None
|
||
|
||
|
||
def _callback_reply_public_status(callback_reply: dict[str, Any]) -> str:
|
||
"""Map raw Telegram callback reply result into the Operator Console filter value."""
|
||
raw_status = str(callback_reply.get("status") or "")
|
||
return {
|
||
"callback_reply_sent": "sent",
|
||
"callback_reply_fallback_sent": "fallback_sent",
|
||
"callback_reply_rescue_sent": "rescue_sent",
|
||
"callback_reply_failed": "failed",
|
||
}.get(raw_status, "observed")
|
||
|
||
|
||
def _callback_reply_evidence_capture_status(
|
||
*,
|
||
callback_reply: Mapping[str, Any],
|
||
persisted_awooop_status_chain: dict[str, Any] | None,
|
||
persisted_km_stale_completion_summary: dict[str, Any] | None,
|
||
event_at: Any,
|
||
) -> dict[str, Any]:
|
||
"""Explain whether callback-time evidence snapshots were persisted."""
|
||
captured: list[str] = []
|
||
missing: list[str] = []
|
||
if persisted_awooop_status_chain:
|
||
captured.append("awooop_status_chain")
|
||
else:
|
||
missing.append("awooop_status_chain")
|
||
if persisted_km_stale_completion_summary:
|
||
captured.append("km_stale_completion_summary")
|
||
else:
|
||
missing.append("km_stale_completion_summary")
|
||
|
||
if not missing:
|
||
status_value = "captured"
|
||
reason = "ok"
|
||
next_action = "none"
|
||
elif captured:
|
||
status_value = "partial"
|
||
reason = "partial_snapshot_rollout_transition"
|
||
next_action = "press_telegram_detail_or_history_after_rollout"
|
||
else:
|
||
status_value = "not_captured"
|
||
raw_status = str(callback_reply.get("status") or "")
|
||
reason = (
|
||
"callback_reply_delivery_failed_snapshot_missing"
|
||
if raw_status == "callback_reply_failed"
|
||
else "legacy_callback_before_snapshot_rollout"
|
||
)
|
||
next_action = "press_telegram_detail_or_history_after_rollout"
|
||
|
||
return {
|
||
"schema_version": _CALLBACK_EVIDENCE_CAPTURE_STATUS_SCHEMA_VERSION,
|
||
"status": status_value,
|
||
"reason": reason,
|
||
"action": str(callback_reply.get("action") or "").strip() or None,
|
||
"captured": captured,
|
||
"missing": missing,
|
||
"snapshot_rollout": "t167_t169",
|
||
"next_action": next_action,
|
||
"event_at": event_at,
|
||
}
|
||
|
||
|
||
def _callback_reply_capture_status_from_outbound(
|
||
row: AwoooPOutboundMessage,
|
||
callback_reply: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Build capture status directly from one outbound source envelope."""
|
||
source_envelope = _as_dict(row.source_envelope)
|
||
return _callback_reply_evidence_capture_status(
|
||
callback_reply=callback_reply,
|
||
persisted_awooop_status_chain=(
|
||
_as_dict(source_envelope.get("awooop_status_chain")) or None
|
||
),
|
||
persisted_km_stale_completion_summary=(
|
||
_as_dict(source_envelope.get("km_stale_completion_summary")) or None
|
||
),
|
||
event_at=row.sent_at or row.queued_at,
|
||
)
|
||
|
||
|
||
def _callback_reply_event_item(row: Mapping[str, Any]) -> dict[str, Any]:
|
||
"""Convert one callback reply outbound row into a read-only evidence item."""
|
||
callback_reply = _as_dict(row.get("callback_reply"))
|
||
action = str(callback_reply.get("action") or "").strip() or None
|
||
incident_id = str(callback_reply.get("incident_id") or "").strip() or None
|
||
project_id = str(row.get("project_id") or "")
|
||
run_id = row.get("run_id")
|
||
status_value = _callback_reply_public_status(callback_reply)
|
||
event_at = row.get("sent_at") or row.get("queued_at")
|
||
persisted_awooop_status_chain = _as_dict(
|
||
row.get("persisted_awooop_status_chain"),
|
||
) or None
|
||
persisted_km_stale_completion_summary = _as_dict(
|
||
row.get("persisted_km_stale_completion_summary"),
|
||
) or None
|
||
|
||
return {
|
||
"message_id": row.get("message_id"),
|
||
"run_id": run_id,
|
||
"project_id": project_id,
|
||
"status": status_value,
|
||
"needs_human": status_value == "failed",
|
||
"action": action,
|
||
"incident_id": incident_id,
|
||
"event_at": event_at,
|
||
"channel_type": row.get("channel_type"),
|
||
"message_type": row.get("message_type"),
|
||
"send_status": row.get("send_status"),
|
||
"send_error": row.get("send_error"),
|
||
"provider_message_id": row.get("provider_message_id"),
|
||
"triggered_by_state": row.get("triggered_by_state"),
|
||
"content_preview": row.get("content_preview"),
|
||
"run_state": row.get("run_state"),
|
||
"agent_id": row.get("agent_id"),
|
||
"run_created_at": row.get("run_created_at"),
|
||
"callback_reply": callback_reply,
|
||
"persisted_awooop_status_chain": persisted_awooop_status_chain,
|
||
"persisted_km_stale_completion_summary": (
|
||
persisted_km_stale_completion_summary
|
||
),
|
||
"evidence_capture_status": _callback_reply_evidence_capture_status(
|
||
callback_reply=callback_reply,
|
||
persisted_awooop_status_chain=persisted_awooop_status_chain,
|
||
persisted_km_stale_completion_summary=(
|
||
persisted_km_stale_completion_summary
|
||
),
|
||
event_at=event_at,
|
||
),
|
||
"run_detail_href": (
|
||
f"/awooop/runs/{run_id}?project_id={project_id}"
|
||
if run_id and project_id
|
||
else None
|
||
),
|
||
}
|
||
|
||
|
||
def _empty_km_stale_completion_summary(
|
||
*,
|
||
project_id: str,
|
||
incident_id: str | None,
|
||
status_value: str,
|
||
reason: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Build the nullable KM owner-review summary shape for callback evidence."""
|
||
return {
|
||
"schema_version": _KM_STALE_COMPLETION_CALLBACK_SCHEMA_VERSION,
|
||
"project_id": project_id,
|
||
"incident_id": incident_id,
|
||
"status": status_value,
|
||
"missing_reason": reason,
|
||
"total": 0,
|
||
"returned": 0,
|
||
"pending_count": 0,
|
||
"ready_count": 0,
|
||
"blocked_count": 0,
|
||
"completed_count": 0,
|
||
"failed_count": 0,
|
||
"writes_on_read": False,
|
||
"manual_review_required": True,
|
||
"batch_writes_allowed": False,
|
||
"items_truncated": False,
|
||
"related_total": 0,
|
||
"related_items": [],
|
||
"work_item": _km_stale_callback_owner_review_work_item(
|
||
project_id=project_id,
|
||
incident_id=incident_id,
|
||
status_value=status_value,
|
||
reason=reason,
|
||
),
|
||
}
|
||
|
||
|
||
def _object_field(payload: Any, name: str, default: Any = None) -> Any:
|
||
if isinstance(payload, Mapping):
|
||
return payload.get(name, default)
|
||
return getattr(payload, name, default)
|
||
|
||
|
||
def _object_int_field(payload: Any, name: str) -> int:
|
||
try:
|
||
return int(_object_field(payload, name, 0) or 0)
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
|
||
|
||
def _build_km_stale_completion_summary(
|
||
*,
|
||
queue: Any,
|
||
project_id: str,
|
||
incident_id: str,
|
||
) -> dict[str, Any]:
|
||
"""Summarize KM owner-review completion queue state for one incident."""
|
||
related_items: list[dict[str, Any]] = []
|
||
for item in list(_object_field(queue, "items", []) or []):
|
||
if str(_object_field(item, "related_incident_id") or "").strip() != incident_id:
|
||
continue
|
||
related_items.append({
|
||
"entry_id": _object_field(item, "entry_id"),
|
||
"title": _object_field(item, "title"),
|
||
"dispatch_id": _object_field(item, "dispatch_id"),
|
||
"governance_event_id": _object_field(item, "governance_event_id"),
|
||
"readiness": _object_field(item, "readiness"),
|
||
"workflow_stage": _object_field(item, "workflow_stage"),
|
||
"next_action": _object_field(item, "next_action"),
|
||
"priority_tier": _object_field(item, "priority_tier"),
|
||
"recommended_completion_outcome": _object_field(
|
||
item,
|
||
"recommended_completion_outcome",
|
||
),
|
||
"can_preview": bool(_object_field(item, "can_preview", False)),
|
||
})
|
||
|
||
total = _object_int_field(queue, "total")
|
||
returned = _object_int_field(queue, "returned")
|
||
return {
|
||
"schema_version": _KM_STALE_COMPLETION_CALLBACK_SCHEMA_VERSION,
|
||
"project_id": project_id,
|
||
"incident_id": incident_id,
|
||
"status": "matched_owner_review"
|
||
if related_items
|
||
else "no_related_owner_review",
|
||
"missing_reason": None if related_items else "no_matching_completion_item",
|
||
"total": total,
|
||
"returned": returned,
|
||
"pending_count": _object_int_field(queue, "pending_count"),
|
||
"ready_count": _object_int_field(queue, "ready_count"),
|
||
"blocked_count": _object_int_field(queue, "blocked_count"),
|
||
"completed_count": _object_int_field(queue, "completed_count"),
|
||
"failed_count": _object_int_field(queue, "failed_count"),
|
||
"writes_on_read": bool(_object_field(queue, "writes_on_read", False)),
|
||
"manual_review_required": bool(
|
||
_object_field(queue, "manual_review_required", True)
|
||
),
|
||
"batch_writes_allowed": bool(
|
||
_object_field(queue, "batch_writes_allowed", False)
|
||
),
|
||
"items_truncated": total > returned,
|
||
"related_total": len(related_items),
|
||
"related_items": related_items[:3],
|
||
"work_item": _km_stale_callback_owner_review_work_item(
|
||
project_id=project_id,
|
||
incident_id=incident_id,
|
||
status_value=(
|
||
"matched_owner_review"
|
||
if related_items
|
||
else "no_related_owner_review"
|
||
),
|
||
reason=None if related_items else "no_matching_completion_item",
|
||
),
|
||
}
|
||
|
||
|
||
def _km_stale_callback_owner_review_work_item(
|
||
*,
|
||
project_id: str,
|
||
incident_id: str | None,
|
||
status_value: str,
|
||
reason: str | None,
|
||
) -> dict[str, Any] | None:
|
||
"""Generate a read-only Work Items link for callback evidence gaps."""
|
||
if not incident_id or status_value != "no_related_owner_review":
|
||
return None
|
||
work_item_id = f"km-callback-owner-review:{project_id}:{incident_id}"
|
||
target_query = urlencode(
|
||
{
|
||
"project_id": project_id,
|
||
"incident_id": incident_id,
|
||
"callback_reply_status": "sent",
|
||
}
|
||
)
|
||
work_item_query = urlencode(
|
||
{
|
||
"project_id": project_id,
|
||
"work_item_id": work_item_id,
|
||
"incident_id": incident_id,
|
||
}
|
||
)
|
||
return {
|
||
"schema_version": "km_stale_callback_owner_review_work_item_v1",
|
||
"work_item_id": work_item_id,
|
||
"kind": "km_stale_callback_owner_review",
|
||
"status": "open",
|
||
"project_id": project_id,
|
||
"incident_id": incident_id,
|
||
"reason": reason or "no_matching_completion_item",
|
||
"title": "Telegram callback incident has no matching KM owner-review item",
|
||
"next_step": "review_or_queue_km_owner_review",
|
||
"target_surface": "awooop_runs_callback_evidence",
|
||
"target_href": f"/awooop/runs?{target_query}",
|
||
"work_item_href": f"/awooop/work-items?{work_item_query}",
|
||
"triage": {
|
||
"schema_version": "km_stale_callback_owner_review_triage_v1",
|
||
"flow_stage": "callback_observed_owner_review_link_missing",
|
||
"ai_lead_agent": "Hermes",
|
||
"supporting_agents": ["OpenClaw", "ElephantAlpha"],
|
||
"automation_state": "manual_owner_review_required",
|
||
"safe_to_auto_repair": False,
|
||
"blocking_reason": reason or "no_matching_completion_item",
|
||
"matching_strategy": "related_incident_id_exact_match",
|
||
"already_done": [
|
||
"callback_reply_persisted",
|
||
"completion_queue_checked",
|
||
"generated_read_only_work_item",
|
||
],
|
||
"next_actions": [
|
||
"review_runs_callback_evidence",
|
||
"queue_matching_km_stale_candidate",
|
||
"complete_owner_review_after_owner_approval",
|
||
],
|
||
},
|
||
"writes_on_read": False,
|
||
"manual_review_required": True,
|
||
"batch_writes_allowed": False,
|
||
}
|
||
|
||
|
||
def _outbound_timeline_status(
|
||
send_status: str,
|
||
callback_reply: dict[str, Any] | None,
|
||
) -> str:
|
||
"""Prefer callback delivery status when the outbound row records one."""
|
||
if callback_reply:
|
||
status = callback_reply.get("status")
|
||
if isinstance(status, str) and status:
|
||
return status
|
||
return send_status
|
||
|
||
|
||
def _outbound_timeline_summary(
|
||
*,
|
||
content_preview: str | None,
|
||
send_error: str | None,
|
||
callback_reply: dict[str, Any] | None,
|
||
) -> str | None:
|
||
"""Summarize callback reply state without forcing operators to inspect raw JSON."""
|
||
if not callback_reply:
|
||
return content_preview or send_error
|
||
|
||
parts = [
|
||
f"callback={callback_reply.get('action') or '--'}",
|
||
f"incident={callback_reply.get('incident_id') or '--'}",
|
||
f"status={callback_reply.get('status') or '--'}",
|
||
]
|
||
parse_mode = callback_reply.get("parse_mode")
|
||
if parse_mode:
|
||
parts.append(f"parse_mode={parse_mode}")
|
||
error = callback_reply.get("error")
|
||
if error:
|
||
parts.append(f"error={error}")
|
||
if content_preview:
|
||
parts.append(str(content_preview))
|
||
return " · ".join(parts)
|
||
|
||
|
||
def _outbound_timeline_metadata(
|
||
row: AwoooPOutboundMessage,
|
||
callback_reply: dict[str, Any] | None,
|
||
) -> dict[str, Any]:
|
||
"""Build compact outbound metadata with callback fields first when present."""
|
||
metadata: dict[str, Any] = {}
|
||
if callback_reply:
|
||
metadata.update({
|
||
"callback_status": callback_reply.get("status"),
|
||
"callback_action": callback_reply.get("action"),
|
||
"callback_incident_id": callback_reply.get("incident_id"),
|
||
"callback_parse_mode": callback_reply.get("parse_mode"),
|
||
})
|
||
metadata.update({
|
||
"message_type": row.message_type,
|
||
"provider_message_id": row.provider_message_id,
|
||
"triggered_by_state": row.triggered_by_state,
|
||
})
|
||
return metadata
|
||
|
||
|
||
def _validate_cicd_stage_filter(value: str | None) -> str | None:
|
||
"""Normalize a CI/CD stage filter without allowing arbitrary SQL fragments."""
|
||
if value is None:
|
||
return None
|
||
stage = value.strip().lower()
|
||
if not stage:
|
||
return None
|
||
if not _CICD_STAGE_RE.fullmatch(stage):
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
||
detail="stage 格式錯誤,僅允許 a-z、0-9、底線、冒號與短橫線",
|
||
)
|
||
return stage
|
||
|
||
|
||
def _validate_cicd_status_filter(value: str | None) -> str | None:
|
||
"""Normalize and validate CI/CD status filter."""
|
||
if value is None:
|
||
return None
|
||
status_value = value.strip().lower()
|
||
if not status_value:
|
||
return None
|
||
if status_value not in _CICD_STATUS_FILTERS:
|
||
allowed = ", ".join(sorted(_CICD_STATUS_FILTERS))
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
||
detail=f"status 必須是: {allowed}",
|
||
)
|
||
return status_value
|
||
|
||
|
||
def _cicd_duration_seconds(value: Any) -> int:
|
||
"""Coerce Alertmanager duration_seconds label into a non-negative integer."""
|
||
try:
|
||
duration = int(str(value or "0"))
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
return max(duration, 0)
|
||
|
||
|
||
def _cicd_event_needs_attention(status_value: str | None, severity: str | None) -> bool:
|
||
"""Return whether a CI/CD evidence row should be highlighted for operators."""
|
||
normalized_status = str(status_value or "").lower()
|
||
normalized_severity = str(severity or "").lower()
|
||
return normalized_status in {"failed", "pending"} or normalized_severity in {
|
||
"critical",
|
||
"warning",
|
||
}
|
||
|
||
|
||
def _cicd_event_item_from_row(row: Mapping[str, Any], *, project_id: str) -> dict[str, Any]:
|
||
"""Convert one alert_operation_log CI/CD row into an operator-facing item."""
|
||
context = _as_dict(row.get("context"))
|
||
labels = _as_dict(context.get("labels"))
|
||
annotations = _as_dict(context.get("annotations"))
|
||
status_value = str(row.get("status") or labels.get("status") or "").lower() or None
|
||
severity = str(row.get("severity") or labels.get("severity") or "").lower() or None
|
||
summary = row.get("summary") or annotations.get("summary")
|
||
description = row.get("description") or annotations.get("description")
|
||
workflow_url = row.get("workflow_url") or annotations.get("workflow_url")
|
||
return {
|
||
"id": str(row.get("id") or ""),
|
||
"project_id": project_id,
|
||
"alertname": str(row.get("alertname") or labels.get("alertname") or ""),
|
||
"stage": row.get("stage") or labels.get("stage"),
|
||
"status": status_value,
|
||
"severity": severity,
|
||
"commit_sha": row.get("commit_sha") or labels.get("commit"),
|
||
"triggered_by": row.get("triggered_by") or labels.get("triggered_by"),
|
||
"duration_seconds": _cicd_duration_seconds(
|
||
row.get("duration_seconds") or labels.get("duration_seconds")
|
||
),
|
||
"summary": str(summary).strip() if summary else None,
|
||
"description": str(description).strip() if description else None,
|
||
"workflow_url": str(workflow_url).strip() if workflow_url else None,
|
||
"alert_id": row.get("alert_id") or context.get("alert_id"),
|
||
"source": row.get("source") or context.get("source"),
|
||
"action_detail": row.get("action_detail"),
|
||
"needs_attention": _cicd_event_needs_attention(status_value, severity),
|
||
"created_at": row.get("created_at"),
|
||
}
|
||
|
||
|
||
def _run_callback_reply_summary(
|
||
outbound_messages: list[AwoooPOutboundMessage],
|
||
) -> dict[str, Any]:
|
||
"""Summarize Telegram detail/history callback reply delivery for Run List."""
|
||
callback_rows: list[tuple[AwoooPOutboundMessage, dict[str, Any]]] = []
|
||
for row in outbound_messages:
|
||
callback_reply = _outbound_callback_reply(row.source_envelope)
|
||
if callback_reply:
|
||
callback_rows.append((row, callback_reply))
|
||
|
||
if not callback_rows:
|
||
return {
|
||
"schema_version": "awooop_run_callback_reply_summary_v1",
|
||
"status": "no_callback",
|
||
"total": 0,
|
||
"sent": 0,
|
||
"fallback_sent": 0,
|
||
"rescue_sent": 0,
|
||
"failed": 0,
|
||
"needs_human": False,
|
||
"latest_status": None,
|
||
"latest_action": None,
|
||
"latest_incident_id": None,
|
||
"latest_at": None,
|
||
"latest_provider_message_id": None,
|
||
"capture_status": "no_callback",
|
||
"capture_captured": 0,
|
||
"capture_partial": 0,
|
||
"capture_not_captured": 0,
|
||
"latest_capture_status": None,
|
||
"latest_capture_missing": [],
|
||
"latest_capture_next_action": None,
|
||
}
|
||
|
||
sorted_rows = sorted(
|
||
callback_rows,
|
||
key=lambda item: str(item[0].sent_at or item[0].queued_at or ""),
|
||
reverse=True,
|
||
)
|
||
latest_row, latest_callback = sorted_rows[0]
|
||
statuses = [
|
||
str(callback.get("status") or "")
|
||
for _, callback in sorted_rows
|
||
]
|
||
failed = statuses.count("callback_reply_failed")
|
||
latest_status = str(latest_callback.get("status") or "")
|
||
summary_status = _callback_reply_public_status(latest_callback)
|
||
capture_rows = [
|
||
_callback_reply_capture_status_from_outbound(row, callback)
|
||
for row, callback in sorted_rows
|
||
]
|
||
capture_statuses = [
|
||
str(capture.get("status") or "observed")
|
||
for capture in capture_rows
|
||
]
|
||
capture_not_captured = capture_statuses.count("not_captured")
|
||
capture_partial = capture_statuses.count("partial")
|
||
capture_captured = capture_statuses.count("captured")
|
||
latest_capture = capture_rows[0] if capture_rows else {}
|
||
if capture_not_captured > 0:
|
||
capture_status = "not_captured"
|
||
elif capture_partial > 0:
|
||
capture_status = "partial"
|
||
elif capture_captured > 0 and capture_captured == len(capture_rows):
|
||
capture_status = "captured"
|
||
else:
|
||
capture_status = "observed"
|
||
|
||
return {
|
||
"schema_version": "awooop_run_callback_reply_summary_v1",
|
||
"status": summary_status,
|
||
"total": len(sorted_rows),
|
||
"sent": statuses.count("callback_reply_sent"),
|
||
"fallback_sent": statuses.count("callback_reply_fallback_sent"),
|
||
"rescue_sent": statuses.count("callback_reply_rescue_sent"),
|
||
"failed": failed,
|
||
"needs_human": failed > 0 or latest_status == "callback_reply_failed",
|
||
"latest_status": latest_status or None,
|
||
"latest_action": latest_callback.get("action"),
|
||
"latest_incident_id": latest_callback.get("incident_id"),
|
||
"latest_at": latest_row.sent_at or latest_row.queued_at,
|
||
"latest_provider_message_id": latest_row.provider_message_id,
|
||
"capture_status": capture_status,
|
||
"capture_captured": capture_captured,
|
||
"capture_partial": capture_partial,
|
||
"capture_not_captured": capture_not_captured,
|
||
"latest_capture_status": latest_capture.get("status"),
|
||
"latest_capture_missing": latest_capture.get("missing") or [],
|
||
"latest_capture_next_action": latest_capture.get("next_action"),
|
||
}
|
||
|
||
|
||
def _mcp_gateway_summary_row(row: AwoooPMcpGatewayAudit) -> dict[str, Any]:
|
||
"""Convert SQLAlchemy audit rows into the truth-chain summary shape."""
|
||
return {
|
||
"agent_id": row.agent_id,
|
||
"tool_name": row.tool_name,
|
||
"result_status": row.result_status,
|
||
"block_gate": row.block_gate,
|
||
"gate_result": row.gate_result or {},
|
||
}
|
||
|
||
|
||
def _as_dict(value: Any) -> dict[str, Any]:
|
||
"""Return dict payloads defensively; DB JSON fields may be null or stale."""
|
||
return value if isinstance(value, dict) else {}
|
||
|
||
|
||
def _append_unique(values: list[str], candidate: Any) -> None:
|
||
"""Append non-empty string once while preserving discovery order."""
|
||
text_value = str(candidate or "").strip()
|
||
if text_value and text_value not in values:
|
||
values.append(text_value)
|
||
|
||
|
||
def _append_incident_ids_from_text(values: list[str], text_value: Any) -> None:
|
||
"""Extract incident ids from legacy text payloads."""
|
||
if not text_value:
|
||
return
|
||
for incident_id in _INCIDENT_ID_RE.findall(str(text_value)):
|
||
_append_unique(values, incident_id)
|
||
|
||
|
||
def _append_incident_ids_from_source_envelope(values: list[str], envelope: Any) -> None:
|
||
"""Extract incident ids from AwoooP channel event source_refs."""
|
||
source_refs = _as_dict(_as_dict(envelope).get("source_refs"))
|
||
incident_ids = source_refs.get("incident_ids")
|
||
if isinstance(incident_ids, list):
|
||
for incident_id in incident_ids:
|
||
_append_unique(values, incident_id)
|
||
else:
|
||
_append_unique(values, incident_ids)
|
||
|
||
|
||
def _collect_run_incident_ids(
|
||
*,
|
||
run: AwoooPRunState,
|
||
inbound_events: list[AwoooPConversationEvent],
|
||
outbound_messages: list[AwoooPOutboundMessage],
|
||
) -> list[str]:
|
||
"""Collect incident ids that tie a Run back to legacy incident evidence."""
|
||
incident_ids: list[str] = []
|
||
_append_incident_ids_from_text(incident_ids, run.trigger_ref)
|
||
_append_incident_ids_from_text(incident_ids, run.error_detail)
|
||
|
||
for event in inbound_events:
|
||
_append_incident_ids_from_source_envelope(incident_ids, event.source_envelope)
|
||
_append_incident_ids_from_text(incident_ids, event.content_preview)
|
||
_append_incident_ids_from_text(incident_ids, event.content_redacted)
|
||
|
||
for message in outbound_messages:
|
||
_append_incident_ids_from_source_envelope(incident_ids, message.source_envelope)
|
||
_append_incident_ids_from_text(incident_ids, message.content_preview)
|
||
_append_incident_ids_from_text(incident_ids, message.send_error)
|
||
|
||
return incident_ids
|
||
|
||
|
||
async def _load_run_message_context(
|
||
db: Any,
|
||
runs: list[AwoooPRunState],
|
||
*,
|
||
limit: int = _MAX_LIST_CONTEXT_ROWS,
|
||
) -> tuple[
|
||
dict[UUID, list[AwoooPConversationEvent]],
|
||
dict[UUID, list[AwoooPOutboundMessage]],
|
||
]:
|
||
"""Load list-page sidecar events needed to link runs back to incidents."""
|
||
if not runs:
|
||
return {}, {}
|
||
|
||
inbound_by_run: dict[UUID, list[AwoooPConversationEvent]] = defaultdict(list)
|
||
outbound_by_run: dict[UUID, list[AwoooPOutboundMessage]] = defaultdict(list)
|
||
remaining_inbound = max(int(limit), 0)
|
||
remaining_outbound = max(int(limit), 0)
|
||
|
||
for batch in _iter_run_context_batches(runs):
|
||
if remaining_inbound > 0:
|
||
inbound_filters = [AwoooPConversationEvent.run_id.in_(batch["run_ids"])]
|
||
if batch["trigger_refs"]:
|
||
inbound_filters.append(AwoooPConversationEvent.provider_event_id.in_(
|
||
batch["trigger_refs"]
|
||
))
|
||
if batch["trigger_event_ids"]:
|
||
inbound_filters.append(AwoooPConversationEvent.event_id.in_(
|
||
batch["trigger_event_ids"]
|
||
))
|
||
|
||
inbound_result = await db.execute(
|
||
select(AwoooPConversationEvent)
|
||
.where(sa_or(*inbound_filters))
|
||
.order_by(AwoooPConversationEvent.received_at.desc())
|
||
.limit(remaining_inbound)
|
||
)
|
||
inbound_events = list(inbound_result.scalars().all())
|
||
remaining_inbound = max(remaining_inbound - len(inbound_events), 0)
|
||
for event in inbound_events:
|
||
target_run_id = (
|
||
event.run_id if event.run_id in batch["run_ids_set"] else None
|
||
)
|
||
if target_run_id is None:
|
||
target_run_id = batch["trigger_ref_to_run"].get(
|
||
str(event.provider_event_id)
|
||
)
|
||
if target_run_id is None:
|
||
target_run_id = batch["trigger_ref_to_run"].get(str(event.event_id))
|
||
if target_run_id is not None:
|
||
inbound_by_run[target_run_id].append(event)
|
||
|
||
if remaining_outbound > 0:
|
||
outbound_result = await db.execute(
|
||
select(AwoooPOutboundMessage)
|
||
.where(AwoooPOutboundMessage.run_id.in_(batch["run_ids"]))
|
||
.order_by(AwoooPOutboundMessage.queued_at.desc())
|
||
.limit(remaining_outbound)
|
||
)
|
||
outbound_messages = list(outbound_result.scalars().all())
|
||
remaining_outbound = max(remaining_outbound - len(outbound_messages), 0)
|
||
for message in outbound_messages:
|
||
outbound_by_run[message.run_id].append(message)
|
||
|
||
return dict(inbound_by_run), dict(outbound_by_run)
|
||
|
||
|
||
def _list_filter_context_limit(candidate_count: int) -> int:
|
||
return min(max(candidate_count * 4, _MAX_LIST_CONTEXT_ROWS), 20_000)
|
||
|
||
|
||
def _iter_run_context_batches(
|
||
runs: list[AwoooPRunState],
|
||
) -> list[dict[str, Any]]:
|
||
"""Split run context lookups below asyncpg's bind-parameter ceiling."""
|
||
batches: list[dict[str, Any]] = []
|
||
for start in range(0, len(runs), _RUN_CONTEXT_QUERY_CHUNK_SIZE):
|
||
batch_runs = runs[start : start + _RUN_CONTEXT_QUERY_CHUNK_SIZE]
|
||
run_ids = [run.run_id for run in batch_runs]
|
||
trigger_refs = [str(run.trigger_ref) for run in batch_runs if run.trigger_ref]
|
||
trigger_ref_to_run = {
|
||
str(run.trigger_ref): run.run_id
|
||
for run in batch_runs
|
||
if run.trigger_ref
|
||
}
|
||
trigger_event_ids: list[UUID] = []
|
||
for trigger_ref in trigger_refs:
|
||
try:
|
||
trigger_event_ids.append(uuid.UUID(trigger_ref))
|
||
except ValueError:
|
||
continue
|
||
batches.append({
|
||
"run_ids": run_ids,
|
||
"run_ids_set": set(run_ids),
|
||
"trigger_refs": trigger_refs,
|
||
"trigger_ref_to_run": trigger_ref_to_run,
|
||
"trigger_event_ids": trigger_event_ids,
|
||
})
|
||
return batches
|
||
|
||
|
||
def _route_label_from_remediation(item: dict[str, Any]) -> str:
|
||
"""Render remediation MCP route consistently with Telegram / Work Items."""
|
||
return "/".join(
|
||
str(part)
|
||
for part in (
|
||
item.get("agent_id"),
|
||
item.get("tool_name"),
|
||
item.get("required_scope"),
|
||
)
|
||
if part
|
||
) or "--"
|
||
|
||
|
||
def _route_label_from_legacy_mcp(record: dict[str, Any]) -> str:
|
||
"""Render self-built/legacy MCP evidence as agent/tool/scope for list UX."""
|
||
tool = record.get("tool_name")
|
||
server = record.get("mcp_server")
|
||
tool_label = ".".join(str(part) for part in (server, tool) if part) or tool
|
||
return "/".join(
|
||
str(part)
|
||
for part in (
|
||
record.get("agent_role"),
|
||
tool_label,
|
||
"read",
|
||
)
|
||
if part
|
||
) or "--"
|
||
|
||
|
||
def _remediation_timeline_status(item: dict[str, Any]) -> str:
|
||
if item.get("success") is False or item.get("allowed") is False:
|
||
return "failed"
|
||
if item.get("verification_result_preview") == "success":
|
||
return "success"
|
||
return "warning"
|
||
|
||
|
||
def _remediation_timeline_summary(item: dict[str, Any]) -> str:
|
||
return (
|
||
f"incident={item.get('incident_id') or '--'} "
|
||
f"mode={item.get('mode') or '--'} "
|
||
f"preview={item.get('verification_result_preview') or '--'} "
|
||
f"route={_route_label_from_remediation(item)} "
|
||
f"writes_incident={item.get('writes_incident_state')} "
|
||
f"writes_auto_repair={item.get('writes_auto_repair_result')}"
|
||
)[:500]
|
||
|
||
|
||
def _legacy_mcp_timeline_status(record: dict[str, Any]) -> str:
|
||
if record.get("success") is True:
|
||
return "success"
|
||
if record.get("success") is False:
|
||
return "failed"
|
||
return "warning"
|
||
|
||
|
||
def _legacy_mcp_timeline_summary(record: dict[str, Any]) -> str:
|
||
return (
|
||
f"incident={record.get('incident_id') or '--'} "
|
||
f"agent={record.get('agent_role') or '--'} "
|
||
f"node={record.get('flywheel_node') or '--'} "
|
||
f"duration_ms={record.get('duration_ms') if record.get('duration_ms') is not None else '--'} "
|
||
f"error={record.get('error_message') or '--'}"
|
||
)[:500]
|
||
|
||
|
||
def _run_remediation_list_summary(
|
||
*,
|
||
run: AwoooPRunState,
|
||
incident_ids: list[str],
|
||
items: list[dict[str, Any]],
|
||
legacy_mcp_records: list[dict[str, Any]] | None = None,
|
||
errors: list[dict[str, str]] | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Summarize durable ADR-100 dry-run and MCP investigation evidence for list UX."""
|
||
sorted_items = sorted(
|
||
(item for item in items if isinstance(item, dict)),
|
||
key=lambda item: str(item.get("created_at") or ""),
|
||
reverse=True,
|
||
)
|
||
sorted_mcp_records = sorted(
|
||
(record for record in (legacy_mcp_records or []) if isinstance(record, dict)),
|
||
key=lambda record: str(record.get("created_at") or ""),
|
||
reverse=True,
|
||
)
|
||
latest = sorted_items[0] if sorted_items else {}
|
||
latest_mcp = sorted_mcp_records[0] if sorted_mcp_records else {}
|
||
writes_incident = latest.get("writes_incident_state")
|
||
writes_auto_repair = latest.get("writes_auto_repair_result")
|
||
route = (
|
||
_route_label_from_remediation(latest)
|
||
if latest
|
||
else _route_label_from_legacy_mcp(latest_mcp)
|
||
if latest_mcp
|
||
else "--"
|
||
)
|
||
write_observed = writes_incident is True or writes_auto_repair is True
|
||
is_read_only = (
|
||
bool(latest)
|
||
and latest.get("required_scope") == "read"
|
||
and writes_incident is False
|
||
and writes_auto_repair is False
|
||
)
|
||
mcp_total = len(sorted_mcp_records)
|
||
mcp_success = sum(1 for record in sorted_mcp_records if record.get("success") is True)
|
||
mcp_failed = sum(1 for record in sorted_mcp_records if record.get("success") is False)
|
||
|
||
if not sorted_items:
|
||
status_value = "mcp_observed" if mcp_total > 0 else "no_evidence"
|
||
elif latest.get("success") is False or latest.get("allowed") is False:
|
||
status_value = "blocked"
|
||
elif write_observed:
|
||
status_value = "write_observed"
|
||
elif is_read_only:
|
||
status_value = "read_only_dry_run"
|
||
else:
|
||
status_value = "observed"
|
||
|
||
return {
|
||
"schema_version": "awooop_run_remediation_summary_v1",
|
||
"source": "alert_operation_log" if sorted_items else "mcp_audit_log" if mcp_total > 0 else "none",
|
||
"incident_ids": incident_ids,
|
||
"total": len(sorted_items),
|
||
"evidence_total": len(sorted_items) + mcp_total,
|
||
"status": status_value,
|
||
"has_dry_run": bool(sorted_items),
|
||
"has_mcp_investigation": mcp_total > 0,
|
||
"is_read_only": is_read_only,
|
||
"human_gate_open": run.state == "waiting_approval",
|
||
"latest_at": latest.get("created_at"),
|
||
"latest_preview": latest.get("verification_result_preview"),
|
||
"latest_mode": latest.get("mode"),
|
||
"latest_route": route,
|
||
"latest_agent_id": latest.get("agent_id") or latest_mcp.get("agent_role"),
|
||
"latest_tool_name": latest.get("tool_name") or latest_mcp.get("tool_name"),
|
||
"latest_required_scope": latest.get("required_scope") or ("read" if latest_mcp else None),
|
||
"writes_incident_state": writes_incident,
|
||
"writes_auto_repair_result": writes_auto_repair,
|
||
"mcp_observation_total": mcp_total,
|
||
"mcp_observation_success": mcp_success,
|
||
"mcp_observation_failed": mcp_failed,
|
||
"latest_mcp_server": latest_mcp.get("mcp_server"),
|
||
"errors": errors or [],
|
||
}
|
||
|
||
|
||
def _safe_int(value: Any) -> int:
|
||
try:
|
||
return int(value or 0)
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
|
||
|
||
def _has_repair_execution_evidence(facts: dict[str, Any]) -> bool:
|
||
return (
|
||
_safe_int(facts.get("auto_repair_execution_records")) > 0
|
||
or _safe_int(facts.get("effective_execution_records")) > 0
|
||
)
|
||
|
||
|
||
def _has_nonrepair_operation_evidence(facts: dict[str, Any]) -> bool:
|
||
return (
|
||
_safe_int(facts.get("automation_operation_records")) > 0
|
||
and not _has_repair_execution_evidence(facts)
|
||
)
|
||
|
||
|
||
def _latest_remediation_history_item(
|
||
history: dict[str, Any] | None,
|
||
) -> dict[str, Any]:
|
||
if not isinstance(history, dict):
|
||
return {}
|
||
items = history.get("items") if isinstance(history.get("items"), list) else []
|
||
latest = items[0] if items and isinstance(items[0], dict) else {}
|
||
return latest
|
||
|
||
|
||
def _remediation_evidence_state(history: dict[str, Any] | None) -> str:
|
||
"""Classify ADR-100 evidence with the same operator semantics as Telegram."""
|
||
if not isinstance(history, dict):
|
||
return "missing"
|
||
|
||
total = _safe_int(history.get("total"))
|
||
if total <= 0:
|
||
if history.get("status") == "fetch_failed":
|
||
return "fetch_failed"
|
||
return "missing"
|
||
|
||
latest = _latest_remediation_history_item(history)
|
||
if latest.get("writes_incident_state") or latest.get("writes_auto_repair_result"):
|
||
return "write_observed"
|
||
if latest.get("allowed") is False or latest.get("success") is False:
|
||
return "blocked"
|
||
if (
|
||
str(latest.get("safety_level") or "").lower() == "read_only"
|
||
or str(latest.get("required_scope") or "").lower() == "read"
|
||
):
|
||
return "read_only"
|
||
return "observed"
|
||
|
||
|
||
def _select_status_chain_source_id(
|
||
incident_ids: list[str],
|
||
remediation_history: dict[str, Any] | None,
|
||
) -> str | None:
|
||
latest_incident_id = str(
|
||
_latest_remediation_history_item(remediation_history).get("incident_id") or ""
|
||
).strip()
|
||
if latest_incident_id and latest_incident_id in incident_ids:
|
||
return latest_incident_id
|
||
return incident_ids[0] if incident_ids else latest_incident_id or None
|
||
|
||
|
||
def _status_chain_mcp_section(truth_chain: dict[str, Any] | None) -> dict[str, Any]:
|
||
mcp = truth_chain.get("mcp") if isinstance(truth_chain, dict) else {}
|
||
if not isinstance(mcp, dict):
|
||
mcp = {}
|
||
gateway = mcp.get("awooop_gateway") if isinstance(mcp.get("awooop_gateway"), dict) else {}
|
||
legacy = mcp.get("legacy") if isinstance(mcp.get("legacy"), dict) else {}
|
||
|
||
top_tools: list[dict[str, Any]] = []
|
||
seen_tools: set[str] = set()
|
||
for source, summary in (("gateway", gateway), ("legacy", legacy)):
|
||
by_tool = summary.get("by_tool") if isinstance(summary, dict) else []
|
||
if not isinstance(by_tool, list):
|
||
continue
|
||
for item in by_tool:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
tool_name = str(item.get("tool_name") or "unknown").strip() or "unknown"
|
||
key = f"{source}:{tool_name}"
|
||
if key in seen_tools:
|
||
continue
|
||
seen_tools.add(key)
|
||
top_tools.append({
|
||
"source": source,
|
||
"tool_name": tool_name,
|
||
"total": (
|
||
_safe_int(item.get("total"))
|
||
or _safe_int(item.get("success"))
|
||
+ _safe_int(item.get("failed"))
|
||
+ _safe_int(item.get("blocked"))
|
||
),
|
||
"success": _safe_int(item.get("success")),
|
||
"failed": _safe_int(item.get("failed")),
|
||
"blocked": _safe_int(item.get("blocked")),
|
||
"last_error": item.get("last_error"),
|
||
})
|
||
if len(top_tools) >= 5:
|
||
break
|
||
if len(top_tools) >= 5:
|
||
break
|
||
|
||
return {
|
||
"gateway": {
|
||
"total": _safe_int(gateway.get("total")),
|
||
"success": _safe_int(gateway.get("success")),
|
||
"failed": _safe_int(gateway.get("failed")),
|
||
"blocked": _safe_int(gateway.get("blocked")),
|
||
"first_class_total": _safe_int(gateway.get("first_class_total")),
|
||
"legacy_bridge_total": _safe_int(gateway.get("legacy_bridge_total")),
|
||
"policy_enforced_total": _safe_int(gateway.get("policy_enforced_total")),
|
||
"stage": gateway.get("stage"),
|
||
"stage_status": gateway.get("stage_status"),
|
||
},
|
||
"legacy": {
|
||
"total": _safe_int(legacy.get("total")),
|
||
"success": _safe_int(legacy.get("success")),
|
||
"failed": _safe_int(legacy.get("failed")),
|
||
},
|
||
"top_tools": top_tools,
|
||
}
|
||
|
||
|
||
def _first_non_empty(row: Mapping[str, Any], keys: tuple[str, ...]) -> Any:
|
||
for key in keys:
|
||
value = row.get(key)
|
||
if value not in (None, ""):
|
||
return value
|
||
return None
|
||
|
||
|
||
def _status_chain_execution_section(truth_chain: dict[str, Any] | None) -> dict[str, Any]:
|
||
execution = truth_chain.get("execution") if isinstance(truth_chain, dict) else {}
|
||
if not isinstance(execution, dict):
|
||
execution = {}
|
||
ops = execution.get("automation_operation_log")
|
||
if not isinstance(ops, list):
|
||
ops = []
|
||
latest_op = ops[0] if ops and isinstance(ops[0], dict) else {}
|
||
|
||
playbook_ids: list[str] = []
|
||
playbook_paths: list[str] = []
|
||
for row in ops:
|
||
if not isinstance(row, dict):
|
||
continue
|
||
_append_unique(playbook_ids, row.get("matched_playbook_id"))
|
||
_append_unique(playbook_ids, row.get("input_playbook_id"))
|
||
_append_unique(playbook_ids, row.get("output_playbook_id"))
|
||
_append_unique(playbook_paths, row.get("input_playbook_path"))
|
||
_append_unique(playbook_paths, row.get("output_playbook_path"))
|
||
_append_unique(playbook_paths, row.get("input_ansible_playbook_path"))
|
||
_append_unique(playbook_paths, row.get("output_ansible_playbook_path"))
|
||
|
||
ansible = execution.get("ansible") if isinstance(execution.get("ansible"), dict) else {}
|
||
ansible_records = ansible.get("records") if isinstance(ansible.get("records"), list) else []
|
||
latest_ansible = (
|
||
ansible_records[0]
|
||
if ansible_records and isinstance(ansible_records[0], dict)
|
||
else {}
|
||
)
|
||
candidate_catalog = (
|
||
ansible.get("candidate_catalog")
|
||
if isinstance(ansible.get("candidate_catalog"), dict)
|
||
else {}
|
||
)
|
||
candidates = (
|
||
candidate_catalog.get("candidates")
|
||
if isinstance(candidate_catalog.get("candidates"), list)
|
||
else []
|
||
)
|
||
ansible_summary = (
|
||
ansible.get("summary")
|
||
if isinstance(ansible.get("summary"), dict)
|
||
else summarize_ansible_execution([row for row in ansible_records if isinstance(row, dict)])
|
||
)
|
||
|
||
return {
|
||
"operation_total": len(ops),
|
||
"latest_operation_type": latest_op.get("operation_type"),
|
||
"latest_status": latest_op.get("status"),
|
||
"latest_actor": latest_op.get("actor"),
|
||
"latest_action": _first_non_empty(latest_op, ("input_action", "output_action")),
|
||
"latest_executor": _first_non_empty(
|
||
latest_op,
|
||
(
|
||
"input_executor",
|
||
"output_executor",
|
||
"input_execution_backend",
|
||
"output_execution_backend",
|
||
),
|
||
),
|
||
"playbook_ids": playbook_ids[:5],
|
||
"playbook_paths": playbook_paths[:5],
|
||
"ansible": {
|
||
"considered": bool(ansible.get("considered")),
|
||
"record_total": len(ansible_records),
|
||
"candidate_count": len(candidates),
|
||
"not_used_reason": ansible.get("not_used_reason"),
|
||
"check_mode_total": ansible_summary.get("check_mode_total"),
|
||
"apply_total": ansible_summary.get("apply_total"),
|
||
"rollback_total": ansible_summary.get("rollback_total"),
|
||
"pending_check_mode_total": ansible_summary.get("pending_check_mode_total"),
|
||
"applied_success_total": ansible_summary.get("applied_success_total"),
|
||
"applied": ansible_summary.get("applied"),
|
||
"controlled_apply": ansible_summary.get("controlled_apply"),
|
||
"latest_operation_type": ansible_summary.get("latest_operation_type") or latest_ansible.get("operation_type"),
|
||
"latest_status": ansible_summary.get("latest_status") or latest_ansible.get("status"),
|
||
"latest_catalog_id": ansible_summary.get("latest_catalog_id") or latest_ansible.get("catalog_id"),
|
||
"latest_playbook_path": ansible_summary.get("latest_playbook_path") or latest_ansible.get("playbook_path"),
|
||
"latest_execution_mode": ansible_summary.get("latest_execution_mode") or latest_ansible.get("execution_mode"),
|
||
"latest_check_mode": (
|
||
ansible_summary.get("latest_check_mode")
|
||
if ansible_summary.get("latest_check_mode") is not None
|
||
else latest_ansible.get("check_mode")
|
||
),
|
||
"latest_returncode": ansible_summary.get("latest_returncode"),
|
||
"latest_apply_executed": ansible_summary.get("latest_apply_executed"),
|
||
"approval_source": ansible_summary.get("approval_source"),
|
||
"candidate_playbooks": [
|
||
{
|
||
"catalog_id": item.get("catalog_id"),
|
||
"playbook_path": item.get("playbook_path"),
|
||
"risk_level": item.get("risk_level"),
|
||
"match_score": item.get("match_score"),
|
||
}
|
||
for item in candidates[:3]
|
||
if isinstance(item, dict)
|
||
],
|
||
},
|
||
}
|
||
|
||
|
||
def _source_ref_values(envelope: Any, key: str) -> list[str]:
|
||
if not isinstance(envelope, dict):
|
||
return []
|
||
source_refs = envelope.get("source_refs")
|
||
if not isinstance(source_refs, dict):
|
||
return []
|
||
raw_values = source_refs.get(key)
|
||
if isinstance(raw_values, list):
|
||
return [str(item) for item in raw_values if str(item or "").strip()]
|
||
if raw_values not in (None, ""):
|
||
return [str(raw_values)]
|
||
return []
|
||
|
||
|
||
def _source_correlation_empty(
|
||
incident_ids: list[str],
|
||
*,
|
||
status_value: str,
|
||
missing_reason: str,
|
||
) -> dict[str, Any]:
|
||
return {
|
||
"schema_version": _SOURCE_CORRELATION_SCHEMA_VERSION,
|
||
"status": status_value,
|
||
"missing_reason": missing_reason,
|
||
"incident_ids": incident_ids,
|
||
"direct_ref_total": 0,
|
||
"candidate_total": 0,
|
||
"applied_link_total": 0,
|
||
"provider_event_total": 0,
|
||
"latest_applied_link_at": None,
|
||
"verification_status": status_value,
|
||
"providers": {
|
||
provider: {
|
||
"direct_ref_total": 0,
|
||
"candidate_total": 0,
|
||
"applied_link_total": 0,
|
||
"latest_event_at": None,
|
||
"latest_heartbeat_at": None,
|
||
"latest_applied_link_at": None,
|
||
}
|
||
for provider in _SOURCE_CORRELATION_PROVIDERS
|
||
},
|
||
"top_candidates": [],
|
||
"matching_criteria": [
|
||
"source_correlation_linked_stage",
|
||
"direct_source_ref",
|
||
"fingerprint_overlap",
|
||
"alertname_overlap",
|
||
"service_or_namespace_overlap",
|
||
"severity_overlap",
|
||
],
|
||
}
|
||
|
||
|
||
def _normalize_correlation_value(value: Any) -> str:
|
||
if hasattr(value, "value"):
|
||
value = value.value
|
||
return str(value or "").strip().lower()
|
||
|
||
|
||
def _append_correlation_term(values: list[str], value: Any) -> None:
|
||
term = _normalize_correlation_value(value)
|
||
if term in {"", "--", "n/a", "none", "null", "unknown"}:
|
||
return
|
||
if len(term) < 2:
|
||
return
|
||
if term not in values:
|
||
values.append(term)
|
||
|
||
|
||
def _intersection(left: list[str], right: list[str]) -> list[str]:
|
||
right_set = set(right)
|
||
return [item for item in left if item in right_set]
|
||
|
||
|
||
def _as_utc_naive(value: Any) -> datetime | None:
|
||
if not isinstance(value, datetime):
|
||
return None
|
||
if value.tzinfo is not None:
|
||
return value.astimezone(UTC).replace(tzinfo=None)
|
||
return value
|
||
|
||
|
||
def _iso_or_none(value: Any) -> str | None:
|
||
if hasattr(value, "isoformat"):
|
||
return value.isoformat()
|
||
if value in (None, ""):
|
||
return None
|
||
return str(value)
|
||
|
||
|
||
def _incident_correlation_context(record: IncidentRecord) -> dict[str, list[str]]:
|
||
"""Build compact incident terms used only for read-only source matching."""
|
||
alertnames: list[str] = []
|
||
severities: list[str] = []
|
||
fingerprints: list[str] = []
|
||
namespaces: list[str] = []
|
||
targets: list[str] = []
|
||
|
||
_append_correlation_term(alertnames, record.alertname)
|
||
_append_correlation_term(severities, record.severity)
|
||
for service in record.affected_services or []:
|
||
_append_correlation_term(targets, service)
|
||
|
||
for signal in record.signals or []:
|
||
if not isinstance(signal, dict):
|
||
continue
|
||
_append_correlation_term(alertnames, signal.get("alert_name"))
|
||
_append_correlation_term(severities, signal.get("severity"))
|
||
_append_correlation_term(fingerprints, signal.get("fingerprint"))
|
||
labels = _as_dict(signal.get("labels"))
|
||
annotations = _as_dict(signal.get("annotations"))
|
||
_append_correlation_term(alertnames, labels.get("alertname"))
|
||
_append_correlation_term(fingerprints, labels.get("fingerprint"))
|
||
for key in (
|
||
"namespace",
|
||
"kubernetes_namespace",
|
||
):
|
||
_append_correlation_term(namespaces, labels.get(key))
|
||
for key in (
|
||
"service",
|
||
"service_name",
|
||
"pod",
|
||
"pod_name",
|
||
"deployment",
|
||
"deployment_name",
|
||
"container",
|
||
"job",
|
||
"instance",
|
||
"target",
|
||
"target_resource",
|
||
"workload",
|
||
"app",
|
||
"app.kubernetes.io/name",
|
||
):
|
||
_append_correlation_term(targets, labels.get(key))
|
||
for key in ("summary", "description"):
|
||
_append_correlation_term(alertnames, annotations.get(key))
|
||
|
||
return {
|
||
"incident_ids": [record.incident_id],
|
||
"alertnames": alertnames,
|
||
"severities": severities,
|
||
"fingerprints": fingerprints,
|
||
"namespaces": namespaces,
|
||
"targets": targets,
|
||
}
|
||
|
||
|
||
def _source_event_correlation_context(row: Mapping[str, Any]) -> dict[str, Any]:
|
||
envelope = _as_dict(row.get("source_envelope"))
|
||
source_refs = _as_dict(envelope.get("source_refs"))
|
||
log_correlation = _as_dict(envelope.get("log_correlation"))
|
||
labels = _as_dict(envelope.get("labels"))
|
||
annotations = _as_dict(envelope.get("annotations"))
|
||
|
||
alertnames: list[str] = []
|
||
severities: list[str] = []
|
||
fingerprints: list[str] = []
|
||
namespaces: list[str] = []
|
||
targets: list[str] = []
|
||
|
||
_append_correlation_term(alertnames, log_correlation.get("alertname"))
|
||
_append_correlation_term(alertnames, labels.get("alertname"))
|
||
for value in _source_ref_values(envelope, "signoz_alerts"):
|
||
_append_correlation_term(alertnames, value)
|
||
_append_correlation_term(severities, log_correlation.get("severity"))
|
||
_append_correlation_term(severities, labels.get("severity"))
|
||
_append_correlation_term(fingerprints, log_correlation.get("fingerprint"))
|
||
_append_correlation_term(fingerprints, labels.get("fingerprint"))
|
||
for value in _source_ref_values(envelope, "fingerprints"):
|
||
_append_correlation_term(fingerprints, value)
|
||
for key in ("namespace", "kubernetes_namespace"):
|
||
_append_correlation_term(namespaces, log_correlation.get(key))
|
||
_append_correlation_term(namespaces, labels.get(key))
|
||
for key in (
|
||
"target_resource",
|
||
"service",
|
||
"service_name",
|
||
"pod",
|
||
"pod_name",
|
||
"deployment",
|
||
"deployment_name",
|
||
"container",
|
||
"job",
|
||
"instance",
|
||
"target",
|
||
"workload",
|
||
"app",
|
||
"app.kubernetes.io/name",
|
||
):
|
||
_append_correlation_term(targets, log_correlation.get(key))
|
||
_append_correlation_term(targets, labels.get(key))
|
||
for key in ("summary", "description"):
|
||
_append_correlation_term(alertnames, annotations.get(key))
|
||
|
||
return {
|
||
"provider": str(row.get("provider") or envelope.get("provider") or "").lower(),
|
||
"stage": str(row.get("stage") or envelope.get("stage") or ""),
|
||
"provider_event_id": row.get("provider_event_id") or envelope.get("provider_event_id"),
|
||
"received_at": row.get("received_at"),
|
||
"source_refs": source_refs,
|
||
"incident_ids": _source_ref_values(envelope, "incident_ids"),
|
||
"alertnames": alertnames,
|
||
"severities": severities,
|
||
"fingerprints": fingerprints,
|
||
"namespaces": namespaces,
|
||
"targets": targets,
|
||
}
|
||
|
||
|
||
def _score_source_correlation_event(
|
||
incident_context: dict[str, list[str]],
|
||
event_context: dict[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Return a deterministic, read-only source-match score for UI evidence."""
|
||
reasons: list[str] = []
|
||
score = 0
|
||
is_direct = False
|
||
|
||
if _intersection(incident_context["incident_ids"], event_context["incident_ids"]):
|
||
is_direct = True
|
||
score += 100
|
||
reasons.append("direct_incident_ref")
|
||
|
||
fingerprint_hits = _intersection(
|
||
incident_context["fingerprints"],
|
||
event_context["fingerprints"],
|
||
)
|
||
if fingerprint_hits:
|
||
is_direct = True
|
||
score += 80
|
||
reasons.append("fingerprint_overlap")
|
||
|
||
if _intersection(incident_context["alertnames"], event_context["alertnames"]):
|
||
score += 35
|
||
reasons.append("alertname_overlap")
|
||
|
||
if _intersection(incident_context["targets"], event_context["targets"]):
|
||
score += 25
|
||
reasons.append("target_overlap")
|
||
|
||
if _intersection(incident_context["namespaces"], event_context["namespaces"]):
|
||
score += 10
|
||
reasons.append("namespace_overlap")
|
||
|
||
if _intersection(incident_context["severities"], event_context["severities"]):
|
||
score += 5
|
||
reasons.append("severity_overlap")
|
||
|
||
return {
|
||
"is_direct": is_direct,
|
||
"is_candidate": bool(is_direct or score >= 35),
|
||
"score": min(score, 100),
|
||
"reasons": reasons[:5],
|
||
}
|
||
|
||
|
||
def _is_source_correlation_applied_link(
|
||
event_context: dict[str, Any],
|
||
scored: dict[str, Any],
|
||
) -> bool:
|
||
"""Applied source links must be append-only events that still match directly."""
|
||
return (
|
||
str(event_context.get("stage") or "").lower() == "source_correlation_linked"
|
||
and bool(scored.get("is_direct"))
|
||
)
|
||
|
||
|
||
async def _fetch_source_correlation_summary(
|
||
*,
|
||
project_id: str,
|
||
incident_ids: list[str],
|
||
) -> dict[str, Any]:
|
||
"""Fetch read-only Sentry/SigNoz evidence candidates for incident status-chain."""
|
||
if not incident_ids:
|
||
return _source_correlation_empty(
|
||
incident_ids,
|
||
status_value="no_incident_context",
|
||
missing_reason="no_incident_ids",
|
||
)
|
||
|
||
safe_project_id = project_id or "awoooi"
|
||
async with get_db_context(safe_project_id) as db:
|
||
incident_result = await db.execute(
|
||
select(IncidentRecord)
|
||
.where(IncidentRecord.project_id == safe_project_id)
|
||
.where(IncidentRecord.incident_id.in_(incident_ids))
|
||
)
|
||
incident_rows = list(incident_result.scalars().all())
|
||
|
||
if not incident_rows:
|
||
heartbeat_rows = []
|
||
source_rows = []
|
||
else:
|
||
now = _utc_now_naive()
|
||
created_candidates = [
|
||
value
|
||
for value in (_as_utc_naive(row.created_at) for row in incident_rows)
|
||
if value is not None
|
||
]
|
||
earliest_created = min(created_candidates) if created_candidates else now
|
||
window_start = max(
|
||
earliest_created - timedelta(hours=_SOURCE_CORRELATION_PRE_WINDOW_HOURS),
|
||
now - timedelta(days=_SOURCE_CORRELATION_LOOKBACK_DAYS),
|
||
)
|
||
provider_sql = (
|
||
"LOWER(COALESCE(NULLIF(source_envelope->>'provider', ''), "
|
||
"NULLIF(split_part(provider_event_id, ':', 1), ''), channel_type))"
|
||
)
|
||
source_result = await db.execute(
|
||
text(f"""
|
||
SELECT
|
||
event_id::text AS event_id,
|
||
project_id,
|
||
channel_type,
|
||
provider_event_id,
|
||
content_preview,
|
||
source_envelope,
|
||
received_at,
|
||
{provider_sql} AS provider,
|
||
LOWER(COALESCE(source_envelope->>'stage', '')) AS stage
|
||
FROM awooop_conversation_event
|
||
WHERE project_id = :project_id
|
||
AND {provider_sql} IN ('sentry', 'signoz')
|
||
AND LOWER(COALESCE(source_envelope->>'stage', '')) <> 'heartbeat'
|
||
AND received_at >= :window_start
|
||
ORDER BY received_at DESC
|
||
LIMIT :limit
|
||
"""),
|
||
{
|
||
"project_id": safe_project_id,
|
||
"window_start": window_start,
|
||
"limit": _SOURCE_CORRELATION_EVENT_LIMIT,
|
||
},
|
||
)
|
||
source_rows = list(source_result.mappings().all())
|
||
|
||
heartbeat_result = await db.execute(
|
||
text(f"""
|
||
SELECT
|
||
{provider_sql} AS provider,
|
||
MAX(received_at) AS latest_heartbeat_at
|
||
FROM awooop_conversation_event
|
||
WHERE project_id = :project_id
|
||
AND {provider_sql} IN ('sentry', 'signoz')
|
||
AND LOWER(COALESCE(source_envelope->>'stage', '')) = 'heartbeat'
|
||
AND received_at >= :window_start
|
||
GROUP BY {provider_sql}
|
||
"""),
|
||
{
|
||
"project_id": safe_project_id,
|
||
"window_start": window_start,
|
||
},
|
||
)
|
||
heartbeat_rows = list(heartbeat_result.mappings().all())
|
||
|
||
if not incident_rows:
|
||
summary = _source_correlation_empty(
|
||
incident_ids,
|
||
status_value="no_incident_context",
|
||
missing_reason="incident_not_found",
|
||
)
|
||
return summary
|
||
|
||
contexts = [_incident_correlation_context(row) for row in incident_rows]
|
||
summary = _source_correlation_empty(
|
||
incident_ids,
|
||
status_value="missing",
|
||
missing_reason="no_matching_provider_source_event",
|
||
)
|
||
providers = summary["providers"]
|
||
for heartbeat in heartbeat_rows:
|
||
provider = str(heartbeat.get("provider") or "").lower()
|
||
if provider in providers:
|
||
providers[provider]["latest_heartbeat_at"] = _iso_or_none(
|
||
heartbeat.get("latest_heartbeat_at")
|
||
)
|
||
|
||
top_candidates: list[dict[str, Any]] = []
|
||
for row in source_rows:
|
||
event_context = _source_event_correlation_context(row)
|
||
provider = str(event_context.get("provider") or "").lower()
|
||
if provider not in providers:
|
||
continue
|
||
provider_item = providers[provider]
|
||
if provider_item.get("latest_event_at") is None:
|
||
provider_item["latest_event_at"] = _iso_or_none(row.get("received_at"))
|
||
|
||
best_match: dict[str, Any] | None = None
|
||
for context in contexts:
|
||
scored = _score_source_correlation_event(context, event_context)
|
||
if best_match is None or scored["score"] > best_match["score"]:
|
||
best_match = scored
|
||
if not best_match or not best_match["is_candidate"]:
|
||
continue
|
||
|
||
summary["provider_event_total"] += 1
|
||
if best_match["is_direct"]:
|
||
summary["direct_ref_total"] += 1
|
||
provider_item["direct_ref_total"] += 1
|
||
else:
|
||
summary["candidate_total"] += 1
|
||
provider_item["candidate_total"] += 1
|
||
|
||
is_applied_link = _is_source_correlation_applied_link(
|
||
event_context,
|
||
best_match,
|
||
)
|
||
if is_applied_link:
|
||
applied_at = _iso_or_none(row.get("received_at"))
|
||
summary["applied_link_total"] += 1
|
||
provider_item["applied_link_total"] += 1
|
||
if summary.get("latest_applied_link_at") is None:
|
||
summary["latest_applied_link_at"] = applied_at
|
||
if provider_item.get("latest_applied_link_at") is None:
|
||
provider_item["latest_applied_link_at"] = applied_at
|
||
|
||
top_candidates.append(
|
||
{
|
||
"provider": provider,
|
||
"provider_event_id": str(event_context.get("provider_event_id") or ""),
|
||
"stage": str(event_context.get("stage") or ""),
|
||
"score": best_match["score"],
|
||
"match_type": "direct" if best_match["is_direct"] else "candidate",
|
||
"link_state": (
|
||
"applied"
|
||
if is_applied_link
|
||
else "direct_ref"
|
||
if best_match["is_direct"]
|
||
else "candidate"
|
||
),
|
||
"verification_status": (
|
||
"applied_link_verified"
|
||
if is_applied_link
|
||
else "direct_ref_verified"
|
||
if best_match["is_direct"]
|
||
else "candidate_only"
|
||
),
|
||
"reasons": best_match["reasons"],
|
||
"received_at": _iso_or_none(row.get("received_at")),
|
||
}
|
||
)
|
||
|
||
if summary["applied_link_total"] > 0:
|
||
summary["status"] = "linked"
|
||
summary["verification_status"] = "applied_link_verified"
|
||
summary["missing_reason"] = None
|
||
elif summary["direct_ref_total"] > 0:
|
||
summary["status"] = "linked"
|
||
summary["verification_status"] = "direct_ref_verified"
|
||
summary["missing_reason"] = None
|
||
elif summary["candidate_total"] > 0:
|
||
summary["status"] = "candidate_found"
|
||
summary["verification_status"] = "candidate_only"
|
||
summary["missing_reason"] = None
|
||
elif any(item.get("latest_heartbeat_at") for item in providers.values()):
|
||
summary["status"] = "provider_fresh_no_match"
|
||
summary["verification_status"] = "provider_fresh_no_match"
|
||
summary["missing_reason"] = "provider_heartbeat_present_but_no_incident_match"
|
||
|
||
summary["top_candidates"] = sorted(
|
||
top_candidates,
|
||
key=lambda item: (item.get("score") or 0, item.get("received_at") or ""),
|
||
reverse=True,
|
||
)[:5]
|
||
return summary
|
||
|
||
|
||
def _status_chain_source_section(truth_chain: dict[str, Any] | None) -> dict[str, Any]:
|
||
channel = truth_chain.get("channel") if isinstance(truth_chain, dict) else {}
|
||
if not isinstance(channel, dict):
|
||
channel = {}
|
||
inbound_events = channel.get("inbound_events")
|
||
outbound_messages = channel.get("outbound_messages")
|
||
if not isinstance(inbound_events, list):
|
||
inbound_events = []
|
||
if not isinstance(outbound_messages, list):
|
||
outbound_messages = []
|
||
|
||
source_refs: dict[str, list[str]] = {
|
||
"alert_ids": [],
|
||
"sentry_issue_ids": [],
|
||
"signoz_alerts": [],
|
||
"fingerprints": [],
|
||
"incident_ids": [],
|
||
}
|
||
inbound_channels: list[str] = []
|
||
for row in inbound_events:
|
||
if not isinstance(row, dict):
|
||
continue
|
||
_append_unique(inbound_channels, row.get("channel_type"))
|
||
envelope = row.get("source_envelope")
|
||
for key in source_refs:
|
||
for value in _source_ref_values(envelope, key):
|
||
_append_unique(source_refs[key], value)
|
||
|
||
latest_inbound = inbound_events[0] if inbound_events and isinstance(inbound_events[0], dict) else {}
|
||
latest_outbound = (
|
||
outbound_messages[0]
|
||
if outbound_messages and isinstance(outbound_messages[0], dict)
|
||
else {}
|
||
)
|
||
|
||
return {
|
||
"inbound_total": len(inbound_events),
|
||
"outbound_total": len(outbound_messages),
|
||
"inbound_channels": inbound_channels[:5],
|
||
"refs": {key: values[:5] for key, values in source_refs.items()},
|
||
"latest_inbound": {
|
||
"channel_type": latest_inbound.get("channel_type"),
|
||
"provider_event_id": latest_inbound.get("provider_event_id"),
|
||
"content_type": latest_inbound.get("content_type"),
|
||
"is_duplicate": latest_inbound.get("is_duplicate"),
|
||
"received_at": latest_inbound.get("received_at"),
|
||
},
|
||
"latest_outbound": {
|
||
"channel_type": latest_outbound.get("channel_type"),
|
||
"message_type": latest_outbound.get("message_type"),
|
||
"send_status": latest_outbound.get("send_status"),
|
||
"sent_at": latest_outbound.get("sent_at"),
|
||
},
|
||
}
|
||
|
||
|
||
def _build_awooop_status_chain(
|
||
*,
|
||
incident_ids: list[str],
|
||
truth_chain: dict[str, Any] | None = None,
|
||
remediation_history: dict[str, Any] | None = None,
|
||
source_id: str | None = None,
|
||
fetch_error: str | None = None,
|
||
source_correlation: dict[str, Any] | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Build the shared read-only status chain used by Telegram and Operator UI."""
|
||
truth_status = (
|
||
truth_chain.get("truth_status")
|
||
if isinstance(truth_chain, dict) and isinstance(truth_chain.get("truth_status"), dict)
|
||
else {}
|
||
)
|
||
quality = (
|
||
truth_chain.get("automation_quality")
|
||
if isinstance(truth_chain, dict) and isinstance(truth_chain.get("automation_quality"), dict)
|
||
else {}
|
||
)
|
||
facts = quality.get("facts") if isinstance(quality.get("facts"), dict) else {}
|
||
latest = _latest_remediation_history_item(remediation_history)
|
||
remediation_state = _remediation_evidence_state(remediation_history)
|
||
remediation_total = (
|
||
_safe_int(remediation_history.get("total"))
|
||
if isinstance(remediation_history, dict)
|
||
else 0
|
||
)
|
||
latest_route = _route_label_from_remediation(latest) if latest else "--"
|
||
|
||
current_stage = str(truth_status.get("current_stage") or "unknown")
|
||
stage_status = str(truth_status.get("stage_status") or "unknown")
|
||
verdict = str(quality.get("verdict") or "unknown")
|
||
verification = (
|
||
facts.get("verification_result")
|
||
or latest.get("verification_result_preview")
|
||
or "missing"
|
||
)
|
||
auto_repair_records = _safe_int(facts.get("auto_repair_execution_records"))
|
||
operation_records = _safe_int(facts.get("automation_operation_records"))
|
||
has_repair_execution = _has_repair_execution_evidence(facts)
|
||
has_nonrepair_operation = _has_nonrepair_operation_evidence(facts)
|
||
gateway_total = _safe_int(facts.get("mcp_gateway_total"))
|
||
km_entries = _safe_int(facts.get("knowledge_entries"))
|
||
needs_human = bool(truth_status.get("needs_human"))
|
||
|
||
if verdict == "auto_repaired_verified":
|
||
repair_state = "auto_repaired_verified"
|
||
next_step = "monitor_for_regression"
|
||
elif verdict == "approval_rejected_no_execution":
|
||
repair_state = "approval_rejected_no_execution"
|
||
next_step = "monitor_or_reopen_if_alert_recurs"
|
||
elif verdict == "approval_expired_manual_review":
|
||
repair_state = "approval_expired_manual_review"
|
||
next_step = "reopen_close_or_escalate_expired_approval"
|
||
elif has_repair_execution:
|
||
repair_state = (
|
||
"executed_pending_verification"
|
||
if str(verification) == "missing"
|
||
else "executed"
|
||
)
|
||
next_step = "verify_execution_result"
|
||
elif has_nonrepair_operation:
|
||
repair_state = "diagnostic_or_audit_recorded"
|
||
next_step = "manual_review_or_collect_repair_evidence"
|
||
elif remediation_state == "read_only":
|
||
repair_state = "read_only_dry_run"
|
||
next_step = "approve_or_escalate_from_awooop"
|
||
elif remediation_state == "write_observed":
|
||
repair_state = "write_observed_manual_review"
|
||
next_step = "review_write_evidence"
|
||
elif remediation_state == "blocked":
|
||
repair_state = "blocked_manual_required"
|
||
next_step = "manual_investigation"
|
||
elif needs_human:
|
||
repair_state = "manual_required"
|
||
next_step = "manual_investigation"
|
||
else:
|
||
repair_state = "no_execution_evidence"
|
||
next_step = "collect_evidence_or_wait"
|
||
|
||
if remediation_state in {"blocked", "fetch_failed"}:
|
||
needs_human = True
|
||
if (
|
||
remediation_state == "write_observed"
|
||
and repair_state != "auto_repaired_verified"
|
||
):
|
||
needs_human = True
|
||
|
||
mcp_section = _status_chain_mcp_section(truth_chain)
|
||
execution_section = _status_chain_execution_section(truth_chain)
|
||
source_section = _status_chain_source_section(truth_chain)
|
||
if source_correlation is not None:
|
||
source_section["correlation"] = source_correlation
|
||
blockers = [
|
||
str(item)
|
||
for item in [
|
||
*(truth_status.get("blockers") if isinstance(truth_status.get("blockers"), list) else []),
|
||
*(quality.get("blockers") if isinstance(quality.get("blockers"), list) else []),
|
||
]
|
||
if item
|
||
]
|
||
if fetch_error:
|
||
blockers.append("truth_chain_fetch_failed")
|
||
outcome = {}
|
||
if isinstance(quality.get("operator_outcome"), dict):
|
||
outcome = dict(quality["operator_outcome"])
|
||
else:
|
||
outcome = build_operator_outcome(
|
||
truth_status=truth_status,
|
||
automation_quality=quality,
|
||
remediation_state=remediation_state,
|
||
fetch_error=fetch_error,
|
||
source_id=source_id,
|
||
)
|
||
if outcome:
|
||
needs_human = bool(needs_human or outcome.get("needs_human"))
|
||
next_step = str(outcome.get("next_action") or next_step)
|
||
|
||
return {
|
||
"schema_version": "awooop_status_chain_v1",
|
||
"source": "truth_chain+adr100_history",
|
||
"source_id": source_id,
|
||
"incident_ids": incident_ids,
|
||
"current_stage": current_stage,
|
||
"stage_status": stage_status,
|
||
"verdict": verdict,
|
||
"repair_state": repair_state,
|
||
"verification": str(verification),
|
||
"needs_human": needs_human,
|
||
"next_step": next_step,
|
||
"operator_outcome": outcome,
|
||
"blockers": blockers[:8],
|
||
"fetch_error": fetch_error,
|
||
"evidence": {
|
||
"auto_repair_records": auto_repair_records,
|
||
"operation_records": operation_records,
|
||
"mcp_gateway_total": gateway_total,
|
||
"knowledge_entries": km_entries,
|
||
"remediation_total": remediation_total,
|
||
"remediation_state": remediation_state,
|
||
"latest_route": latest_route,
|
||
"latest_mode": latest.get("mode"),
|
||
"latest_at": latest.get("created_at"),
|
||
"latest_preview": latest.get("verification_result_preview"),
|
||
},
|
||
"writes": {
|
||
"incident": latest.get("writes_incident_state"),
|
||
"auto_repair": latest.get("writes_auto_repair_result"),
|
||
},
|
||
"mcp": mcp_section,
|
||
"execution": execution_section,
|
||
"source_refs": source_section,
|
||
}
|
||
|
||
|
||
async def _fetch_awooop_status_chain(
|
||
*,
|
||
incident_ids: list[str],
|
||
project_id: str,
|
||
remediation_history: dict[str, Any] | None,
|
||
) -> dict[str, Any]:
|
||
"""Fetch read-only truth-chain state and merge it with ADR-100 evidence."""
|
||
source_id = _select_status_chain_source_id(incident_ids, remediation_history)
|
||
truth_chain: dict[str, Any] | None = None
|
||
fetch_error: str | None = None
|
||
if source_id:
|
||
try:
|
||
truth_chain = await fetch_truth_chain(
|
||
source_id=source_id,
|
||
project_id=project_id or "awoooi",
|
||
)
|
||
except Exception as exc:
|
||
fetch_error = str(exc)
|
||
logger.warning(
|
||
"operator_awooop_status_chain_fetch_failed",
|
||
source_id=source_id,
|
||
project_id=project_id,
|
||
error=fetch_error,
|
||
)
|
||
|
||
try:
|
||
source_correlation = await _fetch_source_correlation_summary(
|
||
incident_ids=incident_ids,
|
||
project_id=project_id or "awoooi",
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"operator_source_correlation_fetch_failed",
|
||
incident_ids=incident_ids,
|
||
project_id=project_id,
|
||
error=str(exc),
|
||
)
|
||
source_correlation = _source_correlation_empty(
|
||
incident_ids,
|
||
status_value="fetch_failed",
|
||
missing_reason="source_correlation_fetch_failed",
|
||
)
|
||
|
||
return _build_awooop_status_chain(
|
||
incident_ids=incident_ids,
|
||
truth_chain=truth_chain,
|
||
remediation_history=remediation_history,
|
||
source_id=source_id,
|
||
fetch_error=fetch_error,
|
||
source_correlation=source_correlation,
|
||
)
|
||
|
||
|
||
async def get_awooop_status_chain(
|
||
*,
|
||
project_id: str | None,
|
||
incident_ids: list[str],
|
||
) -> dict[str, Any]:
|
||
"""Return the shared AwoooP status chain for UI surfaces without writing state."""
|
||
normalized_incident_ids: list[str] = []
|
||
for incident_id in incident_ids:
|
||
safe_incident_id = str(incident_id or "").strip()
|
||
if not safe_incident_id:
|
||
continue
|
||
_validate_incident_id_filter(safe_incident_id)
|
||
_append_unique(normalized_incident_ids, safe_incident_id)
|
||
|
||
if not normalized_incident_ids:
|
||
return _build_awooop_status_chain(incident_ids=[], source_id=None)
|
||
|
||
remediation_history = await _fetch_run_remediation_history(
|
||
normalized_incident_ids,
|
||
limit=5,
|
||
)
|
||
return await _fetch_awooop_status_chain(
|
||
incident_ids=normalized_incident_ids,
|
||
project_id=project_id or "awoooi",
|
||
remediation_history=remediation_history,
|
||
)
|
||
|
||
|
||
def _validate_remediation_status_filter(value: str | None) -> None:
|
||
if value is None:
|
||
return
|
||
if value not in _REMEDIATION_STATUS_FILTERS:
|
||
allowed = ", ".join(sorted(_REMEDIATION_STATUS_FILTERS))
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||
detail=f"remediation_status 必須是: {allowed}",
|
||
)
|
||
|
||
|
||
def _validate_callback_reply_status_filter(value: str | None) -> None:
|
||
if value is None:
|
||
return
|
||
if value not in _CALLBACK_REPLY_STATUS_FILTERS:
|
||
allowed = ", ".join(sorted(_CALLBACK_REPLY_STATUS_FILTERS))
|
||
raise HTTPException(
|
||
status_code=422,
|
||
detail=f"callback_reply_status 必須是: {allowed}",
|
||
)
|
||
|
||
|
||
def _validate_callback_reply_action_filter(value: str | None) -> str | None:
|
||
if value is None:
|
||
return None
|
||
normalized = value.strip().lower()
|
||
if not normalized:
|
||
return None
|
||
if not _CALLBACK_REPLY_ACTION_RE.fullmatch(normalized):
|
||
raise HTTPException(
|
||
status_code=422,
|
||
detail="callback action 格式錯誤,僅允許 a-z、0-9、底線、冒號與短橫線",
|
||
)
|
||
return normalized
|
||
|
||
|
||
def _validate_incident_id_filter(value: str | None) -> None:
|
||
if value is None:
|
||
return
|
||
if not _INCIDENT_ID_RE.fullmatch(value):
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||
detail="incident_id 格式錯誤,必須是 INC-YYYYMMDD-XXXX",
|
||
)
|
||
|
||
|
||
def _remediation_summary_matches_status(
|
||
summary: dict[str, Any] | None,
|
||
remediation_status: str | None,
|
||
) -> bool:
|
||
if remediation_status is None:
|
||
return True
|
||
status_value = str((summary or {}).get("status") or "no_evidence")
|
||
return status_value == remediation_status
|
||
|
||
|
||
def _callback_reply_summary_matches_status(
|
||
summary: dict[str, Any] | None,
|
||
callback_reply_status: str | None,
|
||
) -> bool:
|
||
if callback_reply_status is None:
|
||
return True
|
||
status_value = str((summary or {}).get("status") or "no_callback")
|
||
if callback_reply_status == "observed":
|
||
return status_value != "no_callback"
|
||
return status_value == callback_reply_status
|
||
|
||
|
||
def _remediation_summary_matches_incident_id(
|
||
summary: dict[str, Any] | None,
|
||
incident_id: str | None,
|
||
) -> bool:
|
||
if incident_id is None:
|
||
return True
|
||
incident_ids = (summary or {}).get("incident_ids")
|
||
return isinstance(incident_ids, list) and incident_id in incident_ids
|
||
|
||
|
||
async def _build_run_remediation_summaries(
|
||
*,
|
||
runs: list[AwoooPRunState],
|
||
inbound_by_run: dict[UUID, list[AwoooPConversationEvent]],
|
||
outbound_by_run: dict[UUID, list[AwoooPOutboundMessage]],
|
||
) -> dict[UUID, dict[str, Any]]:
|
||
"""Build remediation summaries for list endpoints without writing state."""
|
||
if not runs:
|
||
return {}
|
||
|
||
incident_ids_by_run: dict[UUID, list[str]] = {}
|
||
all_incident_ids: list[str] = []
|
||
for run in runs:
|
||
incident_ids = _collect_run_incident_ids(
|
||
run=run,
|
||
inbound_events=inbound_by_run.get(run.run_id, []),
|
||
outbound_messages=outbound_by_run.get(run.run_id, []),
|
||
)
|
||
incident_ids_by_run[run.run_id] = incident_ids
|
||
for incident_id in incident_ids:
|
||
_append_unique(all_incident_ids, incident_id)
|
||
|
||
histories_by_incident: dict[str, list[dict[str, Any]]] = {}
|
||
legacy_mcp_by_incident: dict[str, list[dict[str, Any]]] = {}
|
||
errors_by_incident: dict[str, dict[str, str]] = {}
|
||
if all_incident_ids:
|
||
from src.services.adr100_remediation_service import Adr100RemediationService
|
||
|
||
service = Adr100RemediationService(record_history=False)
|
||
for incident_id in all_incident_ids:
|
||
try:
|
||
history = await service.history(
|
||
limit=_REMEDIATION_HISTORY_LIMIT,
|
||
incident_id=incident_id,
|
||
)
|
||
histories_by_incident[incident_id] = [
|
||
item
|
||
for item in history.get("items", [])
|
||
if isinstance(item, dict)
|
||
]
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"run_list_remediation_history_fetch_failed",
|
||
incident_id=incident_id,
|
||
error=str(exc),
|
||
)
|
||
errors_by_incident[incident_id] = {
|
||
"incident_id": incident_id,
|
||
"error": str(exc),
|
||
}
|
||
legacy_mcp_by_incident = await _fetch_legacy_mcp_by_incident_ids(
|
||
all_incident_ids,
|
||
limit=min(max(len(all_incident_ids) * _REMEDIATION_HISTORY_LIMIT, 100), 5_000),
|
||
)
|
||
|
||
summaries: dict[UUID, dict[str, Any]] = {}
|
||
for run in runs:
|
||
incident_ids = incident_ids_by_run.get(run.run_id, [])
|
||
items: list[dict[str, Any]] = []
|
||
legacy_mcp_records: list[dict[str, Any]] = []
|
||
errors: list[dict[str, str]] = []
|
||
for incident_id in incident_ids:
|
||
items.extend(histories_by_incident.get(incident_id, []))
|
||
legacy_mcp_records.extend(legacy_mcp_by_incident.get(incident_id, []))
|
||
if incident_id in errors_by_incident:
|
||
errors.append(errors_by_incident[incident_id])
|
||
summaries[run.run_id] = _run_remediation_list_summary(
|
||
run=run,
|
||
incident_ids=incident_ids,
|
||
items=items,
|
||
legacy_mcp_records=legacy_mcp_records,
|
||
errors=errors,
|
||
)
|
||
return summaries
|
||
|
||
|
||
def _timeline_sort_key(item: dict[str, Any], fallback_ts: Any) -> str:
|
||
"""Normalize mixed DB datetime / ISO string timestamps for timeline sorting."""
|
||
value = item.get("ts") or fallback_ts
|
||
if hasattr(value, "isoformat"):
|
||
return value.isoformat()
|
||
return str(value or "")
|
||
|
||
|
||
def _summarize_run_remediation_by_work_item(
|
||
items: list[dict[str, Any]],
|
||
) -> list[dict[str, Any]]:
|
||
summary: dict[str, dict[str, Any]] = {}
|
||
for item in items:
|
||
key = str(item.get("work_item_id") or item.get("incident_id") or item.get("id"))
|
||
if key not in summary:
|
||
summary[key] = {
|
||
"work_item_id": item.get("work_item_id"),
|
||
"incident_id": item.get("incident_id"),
|
||
"count": 0,
|
||
"latest_at": item.get("created_at"),
|
||
"latest_preview": item.get("verification_result_preview"),
|
||
"latest_mode": item.get("mode"),
|
||
"latest_route": _route_label_from_remediation(item),
|
||
}
|
||
summary[key]["count"] += 1
|
||
return list(summary.values())
|
||
|
||
|
||
async def _fetch_run_remediation_history(
|
||
incident_ids: list[str],
|
||
*,
|
||
limit: int = _REMEDIATION_HISTORY_LIMIT,
|
||
) -> dict[str, Any]:
|
||
"""Fetch durable ADR-100 remediation dry-run evidence linked to run incidents."""
|
||
if not incident_ids:
|
||
return {
|
||
"schema_version": "awooop_run_remediation_evidence_v1",
|
||
"source": "alert_operation_log",
|
||
"incident_ids": [],
|
||
"total": 0,
|
||
"limit": limit,
|
||
"items": [],
|
||
"by_work_item": [],
|
||
"errors": [],
|
||
}
|
||
|
||
from src.services.adr100_remediation_service import Adr100RemediationService
|
||
|
||
service = Adr100RemediationService(record_history=False)
|
||
items: list[dict[str, Any]] = []
|
||
errors: list[dict[str, str]] = []
|
||
for incident_id in incident_ids:
|
||
try:
|
||
history = await service.history(limit=limit, incident_id=incident_id)
|
||
items.extend(
|
||
item
|
||
for item in history.get("items", [])
|
||
if isinstance(item, dict)
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"run_remediation_history_fetch_failed",
|
||
incident_id=incident_id,
|
||
error=str(exc),
|
||
)
|
||
errors.append({"incident_id": incident_id, "error": str(exc)})
|
||
|
||
items.sort(key=lambda item: str(item.get("created_at") or ""), reverse=True)
|
||
visible_items = items[:limit]
|
||
return {
|
||
"schema_version": "awooop_run_remediation_evidence_v1",
|
||
"source": "alert_operation_log",
|
||
"incident_ids": incident_ids,
|
||
"total": len(items),
|
||
"limit": limit,
|
||
"items": visible_items,
|
||
"by_work_item": _summarize_run_remediation_by_work_item(visible_items),
|
||
"errors": errors,
|
||
}
|
||
|
||
|
||
def _legacy_mcp_record(row: MCPAuditLog) -> dict[str, Any]:
|
||
return {
|
||
"id": row.id,
|
||
"session_id": row.session_id,
|
||
"flywheel_node": row.flywheel_node,
|
||
"mcp_server": row.mcp_server,
|
||
"tool_name": row.tool_name,
|
||
"duration_ms": row.duration_ms,
|
||
"success": row.success,
|
||
"error_message": row.error_message,
|
||
"incident_id": row.incident_id,
|
||
"agent_role": row.agent_role,
|
||
"created_at": row.created_at,
|
||
}
|
||
|
||
|
||
async def _fetch_legacy_mcp_by_incident_ids(
|
||
incident_ids: list[str],
|
||
*,
|
||
limit: int,
|
||
) -> dict[str, list[dict[str, Any]]]:
|
||
"""Fetch legacy/self-built MCP rows for list evidence summaries."""
|
||
if not incident_ids:
|
||
return {}
|
||
|
||
async with get_db_context("awoooi") as db:
|
||
result = await db.execute(
|
||
select(MCPAuditLog)
|
||
.where(MCPAuditLog.incident_id.in_(incident_ids))
|
||
.order_by(MCPAuditLog.created_at.desc())
|
||
.limit(limit)
|
||
)
|
||
rows = list(result.scalars().all())
|
||
|
||
by_incident: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
||
for row in rows:
|
||
if row.incident_id:
|
||
by_incident[row.incident_id].append(_legacy_mcp_record(row))
|
||
return dict(by_incident)
|
||
|
||
|
||
async def _fetch_run_legacy_mcp_history(
|
||
incident_ids: list[str],
|
||
*,
|
||
limit: int = _MAX_TIMELINE_ITEMS,
|
||
) -> dict[str, Any]:
|
||
"""Fetch legacy/self-built MCP audit rows linked through incident ids."""
|
||
if not incident_ids:
|
||
return {
|
||
"schema_version": "awooop_run_legacy_mcp_evidence_v1",
|
||
"source": "mcp_audit_log",
|
||
"incident_ids": [],
|
||
"total": 0,
|
||
"limit": limit,
|
||
"records": [],
|
||
"summary": _summarize_mcp([]),
|
||
}
|
||
|
||
async with get_db_context("awoooi") as db:
|
||
result = await db.execute(
|
||
select(MCPAuditLog)
|
||
.where(MCPAuditLog.incident_id.in_(incident_ids))
|
||
.order_by(MCPAuditLog.created_at.desc())
|
||
.limit(limit)
|
||
)
|
||
rows = list(result.scalars().all())
|
||
|
||
records = [_legacy_mcp_record(row) for row in rows]
|
||
return {
|
||
"schema_version": "awooop_run_legacy_mcp_evidence_v1",
|
||
"source": "mcp_audit_log",
|
||
"incident_ids": incident_ids,
|
||
"total": len(records),
|
||
"limit": limit,
|
||
"records": records,
|
||
"summary": _summarize_mcp(records),
|
||
}
|
||
|
||
|
||
async def get_run_detail(
|
||
run_id: str,
|
||
project_id: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""取得單一 Run 的處置脈絡,供 AwoooP Run detail / Timeline 顯示。"""
|
||
try:
|
||
run_uuid = uuid.UUID(run_id)
|
||
except ValueError as exc:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||
detail=f"run_id 格式錯誤: {exc}",
|
||
) from exc
|
||
|
||
async with get_db_context(project_id or "awoooi") as db:
|
||
run_stmt = select(AwoooPRunState).where(AwoooPRunState.run_id == run_uuid)
|
||
if project_id is not None:
|
||
run_stmt = run_stmt.where(AwoooPRunState.project_id == project_id)
|
||
run_result = await db.execute(run_stmt)
|
||
run = run_result.scalar_one_or_none()
|
||
|
||
if run is None:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"run {run_id!r} 不存在",
|
||
)
|
||
|
||
steps_result = await db.execute(
|
||
select(AwoooPRunStepJournal)
|
||
.where(AwoooPRunStepJournal.run_id == run_uuid)
|
||
.order_by(AwoooPRunStepJournal.step_seq.asc())
|
||
.limit(_MAX_TIMELINE_ITEMS)
|
||
)
|
||
steps = list(steps_result.scalars().all())
|
||
|
||
inbound_where = [AwoooPConversationEvent.run_id == run_uuid]
|
||
if run.trigger_ref:
|
||
try:
|
||
trigger_event_uuid = uuid.UUID(run.trigger_ref)
|
||
inbound_where.append(AwoooPConversationEvent.event_id == trigger_event_uuid)
|
||
except ValueError:
|
||
inbound_where.append(
|
||
AwoooPConversationEvent.provider_event_id == run.trigger_ref
|
||
)
|
||
inbound_result = await db.execute(
|
||
select(AwoooPConversationEvent)
|
||
.where(sa_or(*inbound_where))
|
||
.order_by(AwoooPConversationEvent.received_at.asc())
|
||
.limit(_MAX_TIMELINE_ITEMS)
|
||
)
|
||
inbound_events = list(inbound_result.scalars().all())
|
||
|
||
outbound_result = await db.execute(
|
||
select(AwoooPOutboundMessage)
|
||
.where(AwoooPOutboundMessage.run_id == run_uuid)
|
||
.order_by(AwoooPOutboundMessage.queued_at.asc())
|
||
.limit(_MAX_TIMELINE_ITEMS)
|
||
)
|
||
outbound_messages = list(outbound_result.scalars().all())
|
||
|
||
mcp_result = await db.execute(
|
||
select(AwoooPMcpGatewayAudit)
|
||
.where(AwoooPMcpGatewayAudit.run_id == run_uuid)
|
||
.order_by(AwoooPMcpGatewayAudit.created_at.asc())
|
||
.limit(_MAX_TIMELINE_ITEMS)
|
||
)
|
||
mcp_calls = list(mcp_result.scalars().all())
|
||
|
||
run_payload = {
|
||
"run_id": run.run_id,
|
||
"project_id": run.project_id,
|
||
"agent_id": run.agent_id,
|
||
"state": run.state,
|
||
"is_shadow": run.is_shadow,
|
||
"trace_id": run.trace_id,
|
||
"trigger_type": run.trigger_type,
|
||
"trigger_ref": run.trigger_ref,
|
||
"cost_usd": run.cost_usd,
|
||
"step_count": run.step_count,
|
||
"attempt_count": run.attempt_count,
|
||
"max_attempts": run.max_attempts,
|
||
"error_code": run.error_code,
|
||
"error_detail": run.error_detail,
|
||
"created_at": run.created_at,
|
||
"started_at": run.started_at,
|
||
"completed_at": run.completed_at,
|
||
"timeout_at": run.timeout_at,
|
||
"heartbeat_at": run.heartbeat_at,
|
||
}
|
||
|
||
step_items = [
|
||
{
|
||
"step_id": row.step_id,
|
||
"step_seq": row.step_seq,
|
||
"tool_name": row.tool_name,
|
||
"result_status": row.result_status,
|
||
"was_blocked": row.was_blocked,
|
||
"block_reason": row.block_reason,
|
||
"error_code": row.error_code,
|
||
"latency_ms": row.latency_ms,
|
||
"created_at": row.created_at,
|
||
"completed_at": row.completed_at,
|
||
}
|
||
for row in steps
|
||
]
|
||
|
||
inbound_items = [
|
||
{
|
||
"event_id": row.event_id,
|
||
"channel_type": row.channel_type,
|
||
"provider_event_id": row.provider_event_id,
|
||
"content_preview": row.content_preview,
|
||
"is_duplicate": row.is_duplicate,
|
||
"received_at": row.received_at,
|
||
}
|
||
for row in inbound_events
|
||
]
|
||
|
||
outbound_items = []
|
||
for row in outbound_messages:
|
||
callback_reply = _outbound_callback_reply(row.source_envelope)
|
||
outbound_items.append({
|
||
"message_id": row.message_id,
|
||
"channel_type": row.channel_type,
|
||
"message_type": row.message_type,
|
||
"content_preview": row.content_preview,
|
||
"send_status": row.send_status,
|
||
"send_error": row.send_error,
|
||
"provider_message_id": row.provider_message_id,
|
||
"queued_at": row.queued_at,
|
||
"sent_at": row.sent_at,
|
||
"triggered_by_state": row.triggered_by_state,
|
||
"callback_reply": callback_reply,
|
||
})
|
||
|
||
def _mcp_item(row: AwoooPMcpGatewayAudit) -> dict[str, Any]:
|
||
gate_result = row.gate_result if isinstance(row.gate_result, dict) else {}
|
||
return {
|
||
"call_id": row.call_id,
|
||
"agent_id": row.agent_id,
|
||
"tool_name": row.tool_name,
|
||
"result_status": row.result_status,
|
||
"block_gate": row.block_gate,
|
||
"block_reason": row.block_reason,
|
||
"latency_ms": row.latency_ms,
|
||
"created_at": row.created_at,
|
||
"required_scope": gate_result.get("required_scope"),
|
||
"policy_enforced": gate_result.get("policy_enforced"),
|
||
"is_shadow": gate_result.get("is_shadow"),
|
||
"gate_result": gate_result,
|
||
}
|
||
|
||
mcp_items = [_mcp_item(row) for row in mcp_calls]
|
||
mcp_gateway_summary = _summarize_gateway_mcp([
|
||
_mcp_gateway_summary_row(row) for row in mcp_calls
|
||
])
|
||
incident_ids = _collect_run_incident_ids(
|
||
run=run,
|
||
inbound_events=inbound_events,
|
||
outbound_messages=outbound_messages,
|
||
)
|
||
legacy_mcp_history = await _fetch_run_legacy_mcp_history(incident_ids)
|
||
remediation_history = await _fetch_run_remediation_history(incident_ids)
|
||
awooop_status_chain = await _fetch_awooop_status_chain(
|
||
incident_ids=incident_ids,
|
||
project_id=run.project_id,
|
||
remediation_history=remediation_history,
|
||
)
|
||
|
||
timeline: list[dict[str, Any]] = [
|
||
_timeline_item(
|
||
ts=run.created_at,
|
||
kind="run",
|
||
title="Run 建立",
|
||
status=run.state,
|
||
summary=f"{run.trigger_type or 'unknown'} → {run.agent_id}",
|
||
metadata={"trace_id": run.trace_id, "trigger_ref": run.trigger_ref},
|
||
)
|
||
]
|
||
if run.started_at:
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=run.started_at,
|
||
kind="run",
|
||
title="Run 開始執行",
|
||
status="running",
|
||
summary=run.worker_id,
|
||
)
|
||
)
|
||
for row in inbound_events:
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=row.received_at,
|
||
kind="inbound",
|
||
title=f"{row.channel_type} 入站事件",
|
||
status="duplicate" if row.is_duplicate else "received",
|
||
summary=row.content_preview,
|
||
metadata={"provider_event_id": row.provider_event_id},
|
||
)
|
||
)
|
||
for row in steps:
|
||
is_approval_step = row.tool_name.startswith("operator_console.")
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=row.completed_at or row.created_at,
|
||
kind="approval" if is_approval_step else "step",
|
||
title=_approval_step_title(row.tool_name, row.step_seq),
|
||
status=row.result_status,
|
||
summary=row.block_reason or row.error_code,
|
||
metadata={
|
||
"was_blocked": row.was_blocked,
|
||
"latency_ms": row.latency_ms,
|
||
},
|
||
)
|
||
)
|
||
for row in mcp_calls:
|
||
gate_result = row.gate_result if isinstance(row.gate_result, dict) else {}
|
||
scope = gate_result.get("required_scope")
|
||
policy_enforced = gate_result.get("policy_enforced")
|
||
summary = row.block_reason
|
||
if summary is None:
|
||
summary = (
|
||
f"agent={row.agent_id or 'unknown'}"
|
||
f" scope={scope or 'unknown'}"
|
||
f" policy_enforced={policy_enforced}"
|
||
)
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=row.created_at,
|
||
kind="mcp",
|
||
title=f"MCP: {row.tool_name}",
|
||
status=row.result_status,
|
||
summary=summary,
|
||
metadata={
|
||
"agent_id": row.agent_id,
|
||
"block_gate": row.block_gate,
|
||
"required_scope": scope,
|
||
"policy_enforced": policy_enforced,
|
||
"latency_ms": row.latency_ms,
|
||
},
|
||
)
|
||
)
|
||
for record in legacy_mcp_history.get("records", []):
|
||
if not isinstance(record, dict):
|
||
continue
|
||
tool_route = "/".join(
|
||
part
|
||
for part in (
|
||
str(record.get("mcp_server") or ""),
|
||
str(record.get("tool_name") or ""),
|
||
)
|
||
if part
|
||
) or "unknown"
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=record.get("created_at"),
|
||
kind="mcp",
|
||
title=f"Legacy MCP: {tool_route}",
|
||
status=_legacy_mcp_timeline_status(record),
|
||
summary=_legacy_mcp_timeline_summary(record),
|
||
metadata={
|
||
"incident_id": record.get("incident_id"),
|
||
"agent_role": record.get("agent_role"),
|
||
"flywheel_node": record.get("flywheel_node"),
|
||
"history_source": "mcp_audit_log",
|
||
},
|
||
)
|
||
)
|
||
for item in remediation_history.get("items", []):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=item.get("created_at"),
|
||
kind="remediation",
|
||
title="ADR-100 補救試跑",
|
||
status=_remediation_timeline_status(item),
|
||
summary=_remediation_timeline_summary(item),
|
||
metadata={
|
||
"incident_id": item.get("incident_id"),
|
||
"work_item_id": item.get("work_item_id"),
|
||
"mcp_route": _route_label_from_remediation(item),
|
||
"writes_incident_state": item.get("writes_incident_state"),
|
||
"writes_auto_repair_result": item.get("writes_auto_repair_result"),
|
||
"history_source": "alert_operation_log",
|
||
},
|
||
)
|
||
)
|
||
for row in outbound_messages:
|
||
callback_reply = _outbound_callback_reply(row.source_envelope)
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=row.sent_at or row.queued_at,
|
||
kind="outbound",
|
||
title=_outbound_timeline_title(
|
||
row.channel_type,
|
||
row.message_type,
|
||
row.content_preview,
|
||
callback_reply,
|
||
),
|
||
status=_outbound_timeline_status(row.send_status, callback_reply),
|
||
summary=_outbound_timeline_summary(
|
||
content_preview=row.content_preview,
|
||
send_error=row.send_error,
|
||
callback_reply=callback_reply,
|
||
),
|
||
metadata=_outbound_timeline_metadata(row, callback_reply),
|
||
)
|
||
)
|
||
if run.completed_at:
|
||
timeline.append(
|
||
_timeline_item(
|
||
ts=run.completed_at,
|
||
kind="run",
|
||
title="Run 結束",
|
||
status=run.state,
|
||
summary=run.error_detail or run.error_code,
|
||
)
|
||
)
|
||
|
||
timeline = sorted(
|
||
timeline,
|
||
key=lambda item: _timeline_sort_key(item, run.created_at),
|
||
)[:_MAX_TIMELINE_ITEMS]
|
||
|
||
return {
|
||
"run": run_payload,
|
||
"steps": step_items,
|
||
"inbound_events": inbound_items,
|
||
"outbound_messages": outbound_items,
|
||
"mcp_calls": mcp_items,
|
||
"mcp_gateway": mcp_gateway_summary,
|
||
"mcp_legacy": legacy_mcp_history,
|
||
"remediation_history": remediation_history,
|
||
"awooop_status_chain": awooop_status_chain,
|
||
"timeline": timeline,
|
||
"counts": {
|
||
"steps": len(step_items),
|
||
"inbound_events": len(inbound_items),
|
||
"outbound_messages": len(outbound_items),
|
||
"mcp_calls": len(mcp_items),
|
||
"legacy_mcp_calls": legacy_mcp_history.get("total", 0),
|
||
"remediation_history": remediation_history.get("total", 0),
|
||
"timeline": len(timeline),
|
||
},
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# Channel Events
|
||
# =============================================================================
|
||
|
||
async def list_recent_channel_events(
|
||
*,
|
||
project_id: str | None,
|
||
channel_type: str | None,
|
||
provider_prefix: str | None,
|
||
limit: int,
|
||
) -> dict[str, Any]:
|
||
"""列出最近 channel events,供 Operator Console 顯示收斂/鏡像脈絡。"""
|
||
safe_limit = max(1, min(limit, _MAX_EVENTS))
|
||
|
||
async with get_db_context("awoooi") as db:
|
||
stmt = select(AwoooPConversationEvent).order_by(
|
||
AwoooPConversationEvent.received_at.desc()
|
||
)
|
||
if project_id is not None:
|
||
stmt = stmt.where(AwoooPConversationEvent.project_id == project_id)
|
||
if channel_type is not None:
|
||
stmt = stmt.where(AwoooPConversationEvent.channel_type == channel_type)
|
||
if provider_prefix is not None:
|
||
stmt = stmt.where(
|
||
AwoooPConversationEvent.provider_event_id.like(
|
||
f"{provider_prefix}%"
|
||
)
|
||
)
|
||
|
||
result = await db.execute(stmt.limit(safe_limit))
|
||
rows = list(result.scalars().all())
|
||
|
||
events = [_recent_channel_event_item(r) for r in rows]
|
||
return {"events": events, "total": len(events), "limit": safe_limit}
|
||
|
||
|
||
# =============================================================================
|
||
# Approvals
|
||
# =============================================================================
|
||
|
||
async def list_approvals(
|
||
project_id: str | None,
|
||
run_id: str | None = None,
|
||
remediation_status: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""列出 waiting_approval runs,可依 project_id / run_id / remediation_status 篩選。"""
|
||
_validate_remediation_status_filter(remediation_status)
|
||
|
||
run_uuid: UUID | None = None
|
||
if run_id:
|
||
try:
|
||
run_uuid = uuid.UUID(run_id)
|
||
except ValueError as exc:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||
detail=f"run_id 格式錯誤: {exc}",
|
||
) from exc
|
||
|
||
async with get_db_context("awoooi") as db:
|
||
stmt = (
|
||
select(AwoooPRunState)
|
||
.where(AwoooPRunState.state == "waiting_approval")
|
||
.order_by(AwoooPRunState.created_at.asc())
|
||
)
|
||
if project_id is not None:
|
||
stmt = stmt.where(AwoooPRunState.project_id == project_id)
|
||
if run_uuid is not None:
|
||
stmt = stmt.where(AwoooPRunState.run_id == run_uuid)
|
||
|
||
count_stmt = select(func.count()).select_from(stmt.subquery())
|
||
total_result = await db.execute(count_stmt)
|
||
total = total_result.scalar_one()
|
||
|
||
result = await db.execute(stmt)
|
||
rows = list(result.scalars().all())
|
||
|
||
inbound_by_run, outbound_by_run = await _load_run_message_context(db, rows)
|
||
|
||
remediation_summaries = await _build_run_remediation_summaries(
|
||
runs=rows,
|
||
inbound_by_run=inbound_by_run,
|
||
outbound_by_run=outbound_by_run,
|
||
)
|
||
if remediation_status:
|
||
rows = [
|
||
row
|
||
for row in rows
|
||
if _remediation_summary_matches_status(
|
||
remediation_summaries.get(row.run_id),
|
||
remediation_status,
|
||
)
|
||
]
|
||
total = len(rows)
|
||
|
||
status_chain_cache: dict[tuple[str, tuple[str, ...]], dict[str, Any]] = {}
|
||
items = []
|
||
for r in rows:
|
||
summary = remediation_summaries.get(r.run_id)
|
||
summary_incident_ids = summary.get("incident_ids") if isinstance(summary, dict) else []
|
||
incident_ids = [
|
||
str(incident_id)
|
||
for incident_id in summary_incident_ids
|
||
if isinstance(incident_id, str) and incident_id
|
||
]
|
||
cache_key = (r.project_id, tuple(incident_ids))
|
||
status_chain = status_chain_cache.get(cache_key)
|
||
if status_chain is None:
|
||
status_chain = await get_awooop_status_chain(
|
||
project_id=r.project_id,
|
||
incident_ids=incident_ids,
|
||
)
|
||
status_chain_cache[cache_key] = status_chain
|
||
|
||
items.append({
|
||
"run_id": r.run_id,
|
||
"project_id": r.project_id,
|
||
"agent_id": r.agent_id,
|
||
"trigger_type": r.trigger_type,
|
||
"trigger_ref": r.trigger_ref,
|
||
"is_shadow": r.is_shadow,
|
||
"created_at": r.created_at,
|
||
"timeout_at": r.timeout_at,
|
||
"remediation_summary": summary,
|
||
"awooop_status_chain": status_chain,
|
||
})
|
||
return {"approvals": items, "total": total, "items": items}
|
||
|
||
|
||
async def decide_approval(
|
||
run_id: str,
|
||
project_id: str,
|
||
decision: str,
|
||
approver_id: str,
|
||
reason: str | None,
|
||
) -> dict[str, Any]:
|
||
"""核准或拒絕一個待審核的 run(ADR-116 Gate 5)。"""
|
||
try:
|
||
run_uuid = uuid.UUID(run_id)
|
||
except ValueError as exc:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||
detail=f"run_id 格式錯誤: {exc}",
|
||
) from exc
|
||
|
||
async with get_db_context(project_id) as db:
|
||
result = await db.execute(
|
||
select(AwoooPRunState).where(
|
||
AwoooPRunState.run_id == run_uuid,
|
||
AwoooPRunState.project_id == project_id,
|
||
)
|
||
)
|
||
run = result.scalar_one_or_none()
|
||
|
||
if run is None:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_404_NOT_FOUND,
|
||
detail=f"run {run_id!r} 不存在或非此 project 所有",
|
||
)
|
||
if run.state != "waiting_approval":
|
||
raise HTTPException(
|
||
status_code=status.HTTP_409_CONFLICT,
|
||
detail=f"run {run_id!r} 目前狀態為 {run.state!r},無法審核(需為 waiting_approval)",
|
||
)
|
||
is_projection_only_gate5 = run.trigger_type == _ADR100_GATE5_PROJECTION_TRIGGER
|
||
|
||
approval_token_jti: str | None = None
|
||
new_state: str
|
||
|
||
if is_projection_only_gate5:
|
||
await _record_approval_projection_guard_step(
|
||
run_id=run_uuid,
|
||
project_id=project_id,
|
||
decision=decision,
|
||
approver_id=approver_id,
|
||
reason=reason,
|
||
)
|
||
try:
|
||
await write_audit(
|
||
project_id=project_id,
|
||
action=f"run.approval.{decision}.blocked",
|
||
resource_type="run",
|
||
resource_id=run_id,
|
||
details={
|
||
"approver_id": approver_id,
|
||
"decision": decision,
|
||
"reason": reason,
|
||
"new_state": "waiting_approval",
|
||
"trigger_type": _ADR100_GATE5_PROJECTION_TRIGGER,
|
||
"block_reason": "adr100_runtime_replay_gate5_projection_only",
|
||
"execution_authorized": False,
|
||
"repair_executed": False,
|
||
},
|
||
run_id=run_id,
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"approval_projection_guard_audit_write_failed",
|
||
run_id=run_id,
|
||
error=str(exc),
|
||
)
|
||
raise HTTPException(
|
||
status_code=status.HTTP_409_CONFLICT,
|
||
detail=(
|
||
"adr100_runtime_replay_gate5_projection_only: "
|
||
"此 AwoooP 簽核列只投影 legacy Gate 5 approval 與狀態鏈,"
|
||
"尚未接上 auto_repair_executor 執行 handoff,不能直接由平台按鈕轉成 running。"
|
||
),
|
||
)
|
||
|
||
if decision == "approve":
|
||
token = issue_approval_token(
|
||
project_id=project_id,
|
||
run_id=run_id,
|
||
tool_name="operator_console_approve",
|
||
approver_id=approver_id,
|
||
)
|
||
try:
|
||
await record_approval(
|
||
project_id=project_id,
|
||
run_id=run_id,
|
||
tool_name="operator_console_approve",
|
||
approver_id=approver_id,
|
||
token=token,
|
||
)
|
||
except Exception as exc:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||
detail=f"核准記錄失敗: {exc}",
|
||
) from exc
|
||
|
||
await transition(run_uuid, project_id, "running")
|
||
new_state = "running"
|
||
await _record_approval_decision_step(
|
||
run_id=run_uuid,
|
||
project_id=project_id,
|
||
decision=decision,
|
||
approver_id=approver_id,
|
||
reason=reason,
|
||
)
|
||
|
||
import base64
|
||
import json as _json
|
||
try:
|
||
p_b64 = token.split(".")[1]
|
||
padding = 4 - len(p_b64) % 4
|
||
if padding != 4:
|
||
p_b64 += "=" * padding
|
||
payload = _json.loads(base64.urlsafe_b64decode(p_b64))
|
||
approval_token_jti = payload.get("jti")
|
||
except Exception:
|
||
approval_token_jti = None
|
||
|
||
else:
|
||
await transition(
|
||
run_uuid,
|
||
project_id,
|
||
"cancelled",
|
||
error_code="E-APPR-REJECTED",
|
||
error_detail=f"operator 拒絕: approver={approver_id!r}, reason={reason!r}",
|
||
)
|
||
new_state = "cancelled"
|
||
await _record_approval_decision_step(
|
||
run_id=run_uuid,
|
||
project_id=project_id,
|
||
decision=decision,
|
||
approver_id=approver_id,
|
||
reason=reason,
|
||
)
|
||
|
||
try:
|
||
await write_audit(
|
||
project_id=project_id,
|
||
action=f"run.approval.{decision}",
|
||
resource_type="run",
|
||
resource_id=run_id,
|
||
details={
|
||
"approver_id": approver_id,
|
||
"decision": decision,
|
||
"reason": reason,
|
||
"new_state": new_state,
|
||
},
|
||
run_id=run_id,
|
||
)
|
||
except Exception as exc:
|
||
logger.warning("approval_audit_write_failed", run_id=run_id, error=str(exc))
|
||
|
||
return {
|
||
"run_id": run_id,
|
||
"decision": decision,
|
||
"new_state": new_state,
|
||
"approval_token_jti": approval_token_jti,
|
||
}
|
||
|
||
|
||
async def _record_approval_projection_guard_step(
|
||
*,
|
||
run_id: UUID,
|
||
project_id: str,
|
||
decision: str,
|
||
approver_id: str,
|
||
reason: str | None,
|
||
) -> None:
|
||
summary = _truncate_step_summary(
|
||
"projection_only_gate5; "
|
||
f"approver={approver_id}; decision={decision}; reason={reason or '-'}"
|
||
)
|
||
try:
|
||
async with get_db_context(project_id) as db:
|
||
max_result = await db.execute(
|
||
select(func.coalesce(func.max(AwoooPRunStepJournal.step_seq), 0)).where(
|
||
AwoooPRunStepJournal.run_id == run_id,
|
||
AwoooPRunStepJournal.project_id == project_id,
|
||
)
|
||
)
|
||
step_seq = int(max_result.scalar_one()) + 1
|
||
|
||
db.add(
|
||
AwoooPRunStepJournal(
|
||
run_id=run_id,
|
||
project_id=project_id,
|
||
step_seq=step_seq,
|
||
tool_name="operator_console.approval_projection_guard",
|
||
result_status="failed",
|
||
error_code="E-ADR100-GATE5-PROJECTION",
|
||
was_blocked=True,
|
||
block_reason=summary,
|
||
completed_at=_utc_now_naive(),
|
||
)
|
||
)
|
||
await db.execute(
|
||
update(AwoooPRunState)
|
||
.where(
|
||
AwoooPRunState.run_id == run_id,
|
||
AwoooPRunState.project_id == project_id,
|
||
)
|
||
.values(step_count=AwoooPRunState.step_count + 1)
|
||
)
|
||
|
||
logger.info(
|
||
"approval_projection_guard_step_recorded",
|
||
run_id=str(run_id),
|
||
project_id=project_id,
|
||
decision=decision,
|
||
approver_id=approver_id,
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"approval_projection_guard_step_record_failed",
|
||
run_id=str(run_id),
|
||
project_id=project_id,
|
||
decision=decision,
|
||
error=str(exc),
|
||
)
|
||
|
||
|
||
async def _record_approval_decision_step(
|
||
*,
|
||
run_id: UUID,
|
||
project_id: str,
|
||
decision: str,
|
||
approver_id: str,
|
||
reason: str | None,
|
||
) -> None:
|
||
"""把 Operator Console 的人工審批決策寫進 Run Step Journal。
|
||
|
||
這是治理與可觀測節點,不是執行閘門本身;寫入失敗不可反向阻擋
|
||
已完成的 approve / reject,否則會讓人工決策狀態機產生二次故障。
|
||
"""
|
||
tool_name = (
|
||
"operator_console.approve"
|
||
if decision == "approve"
|
||
else "operator_console.reject"
|
||
)
|
||
summary = _truncate_step_summary(
|
||
f"approver={approver_id}; decision={decision}; reason={reason or '-'}"
|
||
)
|
||
|
||
try:
|
||
async with get_db_context(project_id) as db:
|
||
max_result = await db.execute(
|
||
select(func.coalesce(func.max(AwoooPRunStepJournal.step_seq), 0)).where(
|
||
AwoooPRunStepJournal.run_id == run_id,
|
||
AwoooPRunStepJournal.project_id == project_id,
|
||
)
|
||
)
|
||
step_seq = int(max_result.scalar_one()) + 1
|
||
|
||
db.add(
|
||
AwoooPRunStepJournal(
|
||
run_id=run_id,
|
||
project_id=project_id,
|
||
step_seq=step_seq,
|
||
tool_name=tool_name,
|
||
result_status="success",
|
||
block_reason=summary,
|
||
completed_at=_utc_now_naive(),
|
||
)
|
||
)
|
||
await db.execute(
|
||
update(AwoooPRunState)
|
||
.where(
|
||
AwoooPRunState.run_id == run_id,
|
||
AwoooPRunState.project_id == project_id,
|
||
)
|
||
.values(step_count=AwoooPRunState.step_count + 1)
|
||
)
|
||
|
||
logger.info(
|
||
"approval_decision_step_recorded",
|
||
run_id=str(run_id),
|
||
project_id=project_id,
|
||
decision=decision,
|
||
approver_id=approver_id,
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"approval_decision_step_record_failed",
|
||
run_id=str(run_id),
|
||
project_id=project_id,
|
||
decision=decision,
|
||
error=str(exc),
|
||
)
|