Files
awoooi/apps/api/src/services/platform_operator_service.py
Your Name fe74d8616e
Some checks failed
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / tests (push) Successful in 1m40s
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
fix(api): expose controlled runtime promotion summaries
2026-06-26 23:56:24 +08:00

7246 lines
270 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AwoooP Operator Console — Platform Operator Service
====================================================
leWOOOgo 積木化DB 存取集中在 Service 層Router 不直接引用 get_db。
ADR-106AwoooP Agent Platform
2026-05-05 ogt + Claude Sonnet 4.6
"""
from __future__ import annotations
import asyncio
import json
import os
import re
import time
import uuid
from collections import defaultdict
from collections.abc import Mapping
from datetime import UTC, datetime, timedelta
from pathlib import Path
from typing import Any, get_args
from urllib.parse import urlencode
from uuid import UUID
import httpx
import structlog
from fastapi import HTTPException, status
from sqlalchemy import func, select, text, update
from sqlalchemy import or_ as sa_or
from src.core.config import get_settings
from src.db.awooop_models import (
AwoooPContractRevision,
AwoooPConversationEvent,
AwoooPMcpGatewayAudit,
AwoooPOutboundMessage,
AwoooPRunState,
AwoooPRunStepJournal,
)
from src.db.base import get_db_context
from src.db.models import ApprovalRecord, IncidentRecord, MCPAuditLog
from src.services.audit_sink import write_audit
from src.services.awooop_ansible_audit_service import summarize_ansible_execution
from src.services.awooop_approval_token import issue_approval_token, record_approval
from src.services.awooop_truth_chain_service import (
_summarize_gateway_mcp,
_summarize_mcp,
fetch_truth_chain,
)
from src.services.ai_agent_result_capture_owner_release_approval_gate import (
load_latest_ai_agent_result_capture_owner_release_approval_gate,
)
from src.services.ai_agent_result_capture_release_verifier_preflight_gate import (
load_latest_ai_agent_result_capture_release_verifier_preflight_gate,
)
from src.services.governance_km_stale_review_service import (
query_km_stale_owner_review_completion_queue,
)
from src.services.ollama_endpoint_resolver import (
OllamaEndpointSelection,
OllamaWorkloadType,
resolve_ollama_order,
)
from src.services.ollama_failover_manager import (
OllamaEndpoint,
OllamaRoutingResult,
get_ollama_failover_manager,
)
from src.services.ollama_health_monitor import HealthReport, HealthStatus
from src.services.operator_outcome import build_operator_outcome, normalize_operator_blockers
from src.services.operator_summary_cache import (
get_cached_operator_summary_async,
store_operator_summary_async,
)
from src.services.run_state_machine import transition
from src.services.snapshot_paths import resolve_repo_root
logger = structlog.get_logger(__name__)
_MAX_CONTRACTS = 200
_DEFAULT_PER_PAGE = 50
_MAX_PER_PAGE = 200
_MAX_EVENTS = 100
_MAX_TIMELINE_ITEMS = 100
_MAX_LIST_CONTEXT_ROWS = 500
_RUN_CONTEXT_QUERY_CHUNK_SIZE = 500
_MAX_STEP_SUMMARY_CHARS = 128
_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS = 12.0
_AI_ROUTE_STATUS_CONNECTIVITY_TIMEOUT_SECONDS = 2.5
_REMEDIATION_HISTORY_LIMIT = 20
_ADR100_GATE5_PROJECTION_TRIGGER = "adr100_runtime_replay_gate5"
_CALLBACK_REPLY_CACHE_TTL_SECONDS = int(
os.getenv("AWOOOP_CALLBACK_REPLY_CACHE_TTL_SECONDS", "20")
)
_AI_ALERT_CARD_CACHE_TTL_SECONDS = int(
os.getenv("AWOOOP_AI_ALERT_CARD_CACHE_TTL_SECONDS", "20")
)
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b")
_REMEDIATION_STATUS_FILTERS = {
"mcp_observed",
"no_evidence",
"read_only_dry_run",
"write_observed",
"blocked",
"observed",
}
_CALLBACK_REPLY_STATUS_FILTERS = {
"no_callback",
"sent",
"fallback_sent",
"rescue_sent",
"failed",
"observed",
}
_CALLBACK_REPLY_RAW_STATUS_BY_FILTER = {
"sent": "callback_reply_sent",
"fallback_sent": "callback_reply_fallback_sent",
"rescue_sent": "callback_reply_rescue_sent",
"failed": "callback_reply_failed",
}
_CALLBACK_REPLY_ACTION_RE = re.compile(r"^[a-z0-9_:-]{1,64}$", re.IGNORECASE)
_CICD_STATUS_FILTERS = {"running", "success", "failed", "pending"}
_CICD_STAGE_RE = re.compile(r"^[a-z0-9_:-]{1,64}$", re.IGNORECASE)
_AI_ROUTE_STATUS_SCHEMA_VERSION = "awooop_ai_route_status_v1"
_AI_ROUTE_WORKLOADS = set(get_args(OllamaWorkloadType))
_AI_ROUTE_REPAIR_EVIDENCE_PROVIDER = "ai_route_repair"
_AI_ROUTE_REPAIR_EVIDENCE_STAGE = "repair_diagnosis"
_SOURCE_CORRELATION_SCHEMA_VERSION = "source_provider_correlation_v1"
_SOURCE_CORRELATION_PROVIDERS = ("sentry", "signoz")
_SOURCE_CORRELATION_EVENT_LIMIT = 200
_SOURCE_CORRELATION_LOOKBACK_DAYS = 7
_SOURCE_CORRELATION_PRE_WINDOW_HOURS = 2
_KM_STALE_COMPLETION_CALLBACK_SCHEMA_VERSION = (
"km_stale_owner_review_completion_callback_summary_v1"
)
_CALLBACK_EVIDENCE_CAPTURE_STATUS_SCHEMA_VERSION = "callback_evidence_capture_status_v1"
_CALLBACK_REPLY_AUDIT_SUMMARY_SCHEMA_VERSION = (
"telegram_callback_reply_audit_summary_v1"
)
# =============================================================================
# Tenants
# =============================================================================
_PUBLIC_GATEWAY_INVENTORY_SNAPSHOT = (
"docs/security/public-gateway-preflight-inventory.snapshot.json"
)
_SOURCE_CONTROL_READINESS_SNAPSHOT = (
"docs/security/source-control-primary-readiness-gate.snapshot.json"
)
_PUBLIC_PRODUCT_PROFILES: dict[str, dict[str, str]] = {
"awoooi": {"public_id": "PRD-001", "public_name": "核心營運平台"},
"ewoooc": {"public_id": "PRD-002", "public_name": "行動商務產品"},
"2026-fifa-world-cup": {"public_id": "PRD-003", "public_name": "世界盃研究站"},
"vibework": {"public_id": "PRD-004", "public_name": "任務媒合產品"},
"agent-" + "bounty-" + "protocol": {"public_id": "PRD-005", "public_name": "代理賞金協議"},
"stockplatform": {"public_id": "PRD-006", "public_name": "股票研究平台"},
"bitan-pharmacy": {"public_id": "PRD-007", "public_name": "藥局網站"},
"tsenyang-website": {"public_id": "PRD-008", "public_name": "官方形象網站"},
"vtuber": {"public_id": "PRD-009", "public_name": "直播角色網站"},
"wooo-open-design": {"public_id": "PRD-010", "public_name": "設計系統"},
"workflow-automation": {"public_id": "PRD-011", "public_name": "流程自動化工具"},
"data-workspace": {"public_id": "PRD-012", "public_name": "資料工作區"},
"security-secrets-platform": {"public_id": "PRD-013", "public_name": "機密管理候選"},
"ai-model-gateway": {"public_id": "PRD-014", "public_name": "模型服務閘道"},
"source-control": {"public_id": "PRD-015", "public_name": "版本與交付工具"},
"observability-tooling": {"public_id": "PRD-016", "public_name": "監控與可觀測工具"},
}
def _public_product_profile(product_id: str) -> dict[str, str]:
return _PUBLIC_PRODUCT_PROFILES.get(
product_id,
{"public_id": "PRD-ROUTE", "public_name": "公開路由候選"},
)
def _public_product_fields(product_id: str) -> dict[str, str]:
profile = _public_product_profile(product_id)
return {
"product_id": profile["public_id"],
"product_name": profile["public_name"],
"project_id": profile["public_id"],
}
_DOMAIN_PRODUCT_OVERRIDES: dict[str, dict[str, str]] = {
"aiops.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"awoooi.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"api.awoooi.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"app.awoooi.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"api.aiops.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"clawbot.aiops.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"command.aiops.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"security.wooo.work": {
"product_id": "awoooi",
"product_name": "核心營運平台",
"category": "core_platform",
},
"mo.wooo.work": {
"product_id": "ewoooc",
"product_name": "行動商務產品",
"category": "business_product",
},
"2026fifa.wooo.work": {
"product_id": "2026-fifa-world-cup",
"product_name": "世界盃研究站",
"category": "business_product",
},
"vibework.wooo.work": {
"product_id": "vibework",
"product_name": "任務媒合產品",
"category": "business_product",
},
"agent.wooo.work": {
"product_id": "agent-bounty-protocol",
"product_name": "代理賞金協議",
"category": "business_product",
},
"stock.wooo.work": {
"product_id": "stockplatform",
"product_name": "股票研究平台",
"category": "business_product",
},
"bitan.wooo.work": {
"product_id": "bitan-pharmacy",
"product_name": "藥局網站",
"category": "business_product",
},
"tsenyang.com": {
"product_id": "tsenyang-website",
"product_name": "官方形象網站",
"category": "public_site",
},
"www.tsenyang.com": {
"product_id": "tsenyang-website",
"product_name": "官方形象網站",
"category": "public_site",
},
"tsenyang.wooo.work": {
"product_id": "tsenyang-website",
"product_name": "官方形象網站",
"category": "public_site",
},
"vtuber.wooo.work": {
"product_id": "vtuber",
"product_name": "直播角色網站",
"category": "public_site",
},
"design.wooo.work": {
"product_id": "wooo-open-design",
"product_name": "設計系統",
"category": "platform_tool",
},
"grist.wooo.work": {
"product_id": "data-workspace",
"product_name": "資料工作區",
"category": "platform_tool",
},
"n8n.wooo.work": {
"product_id": "workflow-automation",
"product_name": "流程自動化工具",
"category": "platform_tool",
},
"vault.wooo.work": {
"product_id": "security-secrets-platform",
"product_name": "機密管理候選",
"category": "platform_tool",
},
"ollama.wooo.work": {
"product_id": "ai-model-gateway",
"product_name": "模型服務閘道",
"category": "platform_tool",
},
"gitea.wooo.work": {
"product_id": "source-control",
"product_name": "版本與交付工具",
"category": "platform_tool",
},
"gitlab.wooo.work": {
"product_id": "source-control",
"product_name": "版本與交付工具",
"category": "platform_tool",
},
"harbor.wooo.work": {
"product_id": "source-control",
"product_name": "版本與交付工具",
"category": "platform_tool",
},
"registry.wooo.work": {
"product_id": "source-control",
"product_name": "版本與交付工具",
"category": "platform_tool",
},
"sentry.wooo.work": {
"product_id": "observability-tooling",
"product_name": "監控與可觀測工具",
"category": "platform_tool",
},
"signoz.wooo.work": {
"product_id": "observability-tooling",
"product_name": "監控與可觀測工具",
"category": "platform_tool",
},
"langfuse.wooo.work": {
"product_id": "observability-tooling",
"product_name": "監控與可觀測工具",
"category": "platform_tool",
},
"monitor.wooo.work": {
"product_id": "observability-tooling",
"product_name": "監控與可觀測工具",
"category": "platform_tool",
},
}
_CANONICAL_PRODUCT_SURFACES: tuple[dict[str, Any], ...] = (
{
"product_id": "awoooi",
"product_name": "AWOOOI / AwoooP / IwoooS",
"project_id": "awoooi",
"category": "core_platform",
"surface_kind": "platform_product",
"owner_lane": "S4.9-S4.13",
"coverage_status": "read_only_visible",
"public_routes": [
"awoooi.wooo.work",
"aiops.wooo.work",
"api.awoooi.wooo.work",
"app.awoooi.wooo.work",
"api.aiops.wooo.work",
"clawbot.aiops.wooo.work",
"command.aiops.wooo.work",
"security.wooo.work",
],
"source_keys": ["wooo/awoooi"],
},
{
"product_id": "ewoooc",
"product_name": "EwoooC / Mo",
"project_id": "ewoooc",
"category": "business_product",
"surface_kind": "product_console",
"owner_lane": "S4.10",
"coverage_status": "owner_response_required",
"public_routes": ["mo.wooo.work"],
"source_keys": ["wooo/ewoooc / root/momo-pro-system / momo working trees"],
},
{
"product_id": "2026-fifa-world-cup",
"product_name": "2026 FIFA World Cup",
"project_id": "2026FIFAWorldCup",
"category": "business_product",
"surface_kind": "product_site",
"owner_lane": "product_onboarding",
"coverage_status": "owner_response_required",
"public_routes": ["2026fifa.wooo.work"],
"source_keys": ["2026FIFAWorldCup"],
},
{
"product_id": "vibework",
"product_name": "VibeWork",
"project_id": "vibework",
"category": "business_product",
"surface_kind": "product_site",
"owner_lane": "S4.10",
"coverage_status": "owner_response_required",
"public_routes": ["vibework.wooo.work"],
"source_keys": ["vibework"],
},
{
"product_id": "agent-bounty-protocol",
"product_name": "Agent Bounty Protocol",
"project_id": "agent-bounty-protocol",
"category": "business_product",
"surface_kind": "agent_protocol",
"owner_lane": "S4.10",
"coverage_status": "owner_response_required",
"public_routes": ["agent.wooo.work"],
"source_keys": ["agent-bounty-protocol"],
},
{
"product_id": "stockplatform",
"product_name": "StockPlatform",
"project_id": "stockplatform",
"category": "business_product",
"surface_kind": "product_site",
"owner_lane": "P0-CRON-001",
"coverage_status": "read_only_visible",
"public_routes": ["stock.wooo.work"],
"source_keys": ["stockplatform"],
},
{
"product_id": "bitan-pharmacy",
"product_name": "Bitan Pharmacy",
"project_id": "bitan-pharmacy",
"category": "business_product",
"surface_kind": "product_site",
"owner_lane": "P0-CRON-002",
"coverage_status": "read_only_visible",
"public_routes": ["bitan.wooo.work"],
"source_keys": ["bitan-pharmacy"],
},
{
"product_id": "tsenyang-website",
"product_name": "TsenYang Website",
"project_id": "tsenyang-website",
"category": "public_site",
"surface_kind": "public_site",
"owner_lane": "S4.10",
"coverage_status": "owner_response_required",
"public_routes": ["tsenyang.com", "www.tsenyang.com", "tsenyang.wooo.work"],
"source_keys": ["tsenyang-website"],
},
{
"product_id": "vtuber",
"product_name": "VTuber",
"project_id": "vtuber",
"category": "public_site",
"surface_kind": "public_site",
"owner_lane": "public_gateway",
"coverage_status": "read_only_visible",
"public_routes": ["vtuber.wooo.work"],
"source_keys": [],
},
{
"product_id": "wooo-open-design",
"product_name": "WOOO Open Design",
"project_id": "open-design",
"category": "platform_tool",
"surface_kind": "design_system",
"owner_lane": "S4.10",
"coverage_status": "read_only_visible",
"public_routes": ["design.wooo.work"],
"source_keys": ["open-design"],
},
{
"product_id": "workflow-automation",
"product_name": "Workflow Automation / n8n",
"project_id": "__platform__",
"category": "platform_tool",
"surface_kind": "automation_tool",
"owner_lane": "observability_tooling",
"coverage_status": "read_only_candidate",
"public_routes": ["n8n.wooo.work"],
"source_keys": [],
},
{
"product_id": "data-workspace",
"product_name": "Data Workspace / Grist",
"project_id": "__platform__",
"category": "platform_tool",
"surface_kind": "data_tool",
"owner_lane": "observability_tooling",
"coverage_status": "read_only_candidate",
"public_routes": ["grist.wooo.work"],
"source_keys": [],
},
{
"product_id": "security-secrets-platform",
"product_name": "Security / Vault",
"project_id": "__platform__",
"category": "platform_tool",
"surface_kind": "security_tool",
"owner_lane": "S4.12",
"coverage_status": "read_only_candidate",
"public_routes": ["vault.wooo.work"],
"source_keys": [],
},
{
"product_id": "ai-model-gateway",
"product_name": "AI Model Gateway / Ollama",
"project_id": "__platform__",
"category": "platform_tool",
"surface_kind": "ai_provider_tool",
"owner_lane": "P1-004",
"coverage_status": "read_only_candidate",
"public_routes": ["ollama.wooo.work"],
"source_keys": [],
},
{
"product_id": "source-control",
"product_name": "Source Control / DevOps",
"project_id": "__platform__",
"category": "platform_tool",
"surface_kind": "tooling",
"owner_lane": "S4.9-S4.13",
"coverage_status": "read_only_visible",
"public_routes": [
"gitea.wooo.work",
"gitlab.wooo.work",
"harbor.wooo.work",
"registry.wooo.work",
],
"source_keys": ["wooo/wooo-infra-config"],
},
{
"product_id": "observability-tooling",
"product_name": "Observability / LLMOps",
"project_id": "__platform__",
"category": "platform_tool",
"surface_kind": "tooling",
"owner_lane": "P1-003",
"coverage_status": "read_only_visible",
"public_routes": [
"sentry.wooo.work",
"signoz.wooo.work",
"langfuse.wooo.work",
"monitor.wooo.work",
],
"source_keys": [],
},
)
_ADDITIONAL_PUBLIC_ROUTES: tuple[dict[str, Any], ...] = (
{
"domain": "awoooi.wooo.work",
"coverage_status": "read_only_visible",
"source": "awoooi_canonical_route",
},
{
"domain": "api.awoooi.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "app.awoooi.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "api.aiops.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "clawbot.aiops.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "command.aiops.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "security.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "2026fifa.wooo.work",
"coverage_status": "owner_response_required",
"source": "cross_workspace_product_candidate",
},
{
"domain": "vibework.wooo.work",
"coverage_status": "owner_response_required",
"source": "vibework_onboarding_handoff",
},
{
"domain": "agent.wooo.work",
"coverage_status": "owner_response_required",
"source": "agent_bounty_onboarding_handoff",
},
{
"domain": "tsenyang.wooo.work",
"coverage_status": "owner_response_required",
"source": "repo_domain_scan_candidate",
},
{
"domain": "design.wooo.work",
"coverage_status": "read_only_visible",
"source": "open_design_route_candidate",
},
{
"domain": "grist.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "n8n.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "vault.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "ollama.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
{
"domain": "monitor.wooo.work",
"coverage_status": "read_only_candidate",
"source": "repo_domain_scan_candidate",
},
)
def _load_committed_snapshot(relative_path: str) -> dict[str, Any]:
repo_root = resolve_repo_root(Path(__file__))
snapshot_path = repo_root / relative_path
try:
return json.loads(snapshot_path.read_text(encoding="utf-8"))
except FileNotFoundError:
logger.warning("tenant_asset_snapshot_missing", path=relative_path)
except json.JSONDecodeError as exc:
logger.warning(
"tenant_asset_snapshot_invalid_json",
path=relative_path,
error=str(exc),
)
return {}
def _domain_product(domain: str) -> dict[str, str]:
internal = _DOMAIN_PRODUCT_OVERRIDES.get(
domain,
{
"product_id": "public-route",
"product_name": "公開路由候選",
"category": "public_route",
},
)
public = _public_product_fields(internal["product_id"])
return {
"product_id": public["product_id"],
"product_name": public["product_name"],
"category": internal["category"],
}
def _route_asset_from_gateway_row(row: Mapping[str, Any]) -> dict[str, Any]:
domain = str(row.get("domain") or "")
product = _domain_product(domain)
route_smoke_accepted = bool(row.get("route_smoke_accepted"))
owner_response_accepted = bool(row.get("owner_response_accepted"))
coverage_status = (
"verified"
if route_smoke_accepted and owner_response_accepted
else "read_only_pending_smoke"
)
return {
"domain": domain,
"product_id": product["product_id"],
"product_name": product["product_name"],
"category": product["category"],
"coverage_status": coverage_status,
"control_tier": row.get("control_tier") or "unknown",
"upstream_count": int(row.get("upstream_count") or 0),
"admin_route_count": int(row.get("admin_route_count") or 0),
"websocket_route_count": int(row.get("websocket_route_count") or 0),
"public_route_smoke_required": bool(row.get("public_route_smoke_required")),
"route_smoke_accepted": route_smoke_accepted,
"owner_response_accepted": owner_response_accepted,
"runtime_gate_count": 0,
"action_button_count": 0,
"source": "public_gateway_preflight_inventory",
}
def _build_route_assets(public_gateway_snapshot: Mapping[str, Any]) -> list[dict[str, Any]]:
seen_domains: set[str] = set()
route_assets: list[dict[str, Any]] = []
for row in public_gateway_snapshot.get("route_impacts") or []:
if not isinstance(row, Mapping):
continue
asset = _route_asset_from_gateway_row(row)
if not asset["domain"] or asset["domain"] in seen_domains:
continue
seen_domains.add(asset["domain"])
route_assets.append(asset)
for row in _ADDITIONAL_PUBLIC_ROUTES:
domain = str(row["domain"])
if domain in seen_domains:
continue
product = _domain_product(domain)
route_assets.append(
{
"domain": domain,
"product_id": product["product_id"],
"product_name": product["product_name"],
"category": product["category"],
"coverage_status": row["coverage_status"],
"control_tier": "candidate",
"upstream_count": 0,
"admin_route_count": 0,
"websocket_route_count": 0,
"public_route_smoke_required": True,
"route_smoke_accepted": False,
"owner_response_accepted": False,
"runtime_gate_count": 0,
"action_button_count": 0,
"source": row["source"],
}
)
return route_assets
def _source_scope_id(index: int) -> str:
return f"SRC-{index:03d}"
def _public_source_risk(value: Any) -> str:
risk = str(value or "").strip().lower()
if risk in {"high", "medium", "low"}:
return risk
return "unknown"
def _public_source_readiness(value: Any) -> str:
readiness = str(value or "").strip().lower()
if "refs" in readiness and "parity" in readiness:
return "need_refs_evidence"
if "target" in readiness and "decision" in readiness:
return "need_target_decision"
if "internal_remote" in readiness or ("remote" in readiness and "decision" in readiness):
return "need_internal_remote_decision"
if "scope" in readiness and "review" in readiness:
return "need_scope_review"
return "need_owner_evidence"
def _public_surface_source_refs(surface: Mapping[str, Any]) -> list[str]:
return [f"SRCREF-{index:03d}" for index, _ in enumerate(surface.get("source_keys") or [], start=1)]
def _source_control_match_keys(source_control_snapshot: Mapping[str, Any]) -> set[str]:
keys: set[str] = set()
for row in source_control_snapshot.get("repo_readiness") or []:
if not isinstance(row, Mapping):
continue
for field in ("source_key", "github_repo"):
value = str(row.get(field) or "")
if value:
keys.add(value)
return keys
def _build_source_repo_assets(
source_control_snapshot: Mapping[str, Any],
) -> list[dict[str, Any]]:
source_assets: list[dict[str, Any]] = []
for index, row in enumerate(source_control_snapshot.get("repo_readiness") or [], start=1):
if not isinstance(row, Mapping):
continue
source_key = str(row.get("source_key") or "")
github_repo = str(row.get("github_repo") or "")
source_scope_id = _source_scope_id(index)
product = {
"product_id": source_scope_id,
"product_name": source_scope_id,
"category": "source_repo",
}
for surface in _CANONICAL_PRODUCT_SURFACES:
if source_key in surface.get("source_keys", []) or github_repo in surface.get(
"source_keys", []
):
public = _public_product_fields(str(surface["product_id"]))
product = {
"product_id": public["product_id"],
"product_name": public["product_name"],
"category": str(surface["category"]),
}
break
source_assets.append(
{
"github_repo": source_scope_id,
"source_key": source_scope_id,
"source_scope_id": source_scope_id,
"source_namespace_redacted": True,
"product_id": product["product_id"],
"product_name": product["product_name"],
"category": product["category"],
"scope_status": row.get("scope_status") or "unknown",
"readiness_state": _public_source_readiness(row.get("readiness_state")),
"risk": _public_source_risk(row.get("risk")),
"primary_ready": bool(row.get("primary_ready")),
"blocker_count": len(row.get("blockers") or []),
"runtime_gate_count": 0,
"action_button_count": 0,
}
)
return source_assets
def build_tenant_asset_inventory(tenants: list[Mapping[str, Any]]) -> dict[str, Any]:
"""Build a read-only product / route / source-control asset view for tenants."""
public_gateway = _load_committed_snapshot(_PUBLIC_GATEWAY_INVENTORY_SNAPSHOT)
source_control = _load_committed_snapshot(_SOURCE_CONTROL_READINESS_SNAPSHOT)
public_routes = _build_route_assets(public_gateway)
source_repos = _build_source_repo_assets(source_control)
public_route_domains = {asset["domain"] for asset in public_routes}
source_keys = _source_control_match_keys(source_control)
products: list[dict[str, Any]] = []
for surface in _CANONICAL_PRODUCT_SURFACES:
public_route_count = len(
[domain for domain in surface["public_routes"] if domain in public_route_domains]
)
source_repo_count = len(
[source for source in surface["source_keys"] if source in source_keys]
)
missing_public_routes = [
domain for domain in surface["public_routes"] if domain not in public_route_domains
]
public = _public_product_fields(str(surface["product_id"]))
products.append(
{
**surface,
**public,
"source_keys": _public_surface_source_refs(surface),
"public_route_count": public_route_count,
"source_repo_count": source_repo_count,
"missing_public_routes": missing_public_routes,
"owner_response_received_count": 0,
"owner_response_accepted_count": 0,
"runtime_gate_count": 0,
"action_button_count": 0,
}
)
source_summary = source_control.get("summary") or {}
gateway_summary = public_gateway.get("summary") or {}
return {
"schema_version": "awooop_tenant_asset_inventory_v1",
"mode": "read_only_global_asset_inventory",
"evidence_refs": [
_PUBLIC_GATEWAY_INVENTORY_SNAPSHOT,
_SOURCE_CONTROL_READINESS_SNAPSHOT,
"docs/security/SECURITY-MIRROR-STATUS-ROLLUP.md",
],
"summary": {
"tenant_table_count": len(tenants),
"product_surface_count": len(products),
"public_route_count": len(public_routes),
"public_gateway_snapshot_route_count": int(
gateway_summary.get("route_impact_count") or 0
),
"source_candidate_repo_count": int(
source_summary.get("candidate_repo_count") or len(source_repos)
),
"source_in_scope_repo_count": int(source_summary.get("in_scope_repo_count") or 0),
"source_primary_ready_count": int(source_summary.get("primary_ready_count") or 0),
"owner_response_received_count": 0,
"owner_response_accepted_count": 0,
"runtime_gate_count": 0,
"action_button_count": 0,
},
"products": products,
"public_routes": public_routes,
"source_repos": source_repos,
"boundaries": [
"只讀資產台帳;不修改租戶、路由、主機或專案庫。",
"公開回應只顯示產品代號、範圍代號與繁中管控狀態;不揭露原始負責人、命名空間或完整專案庫名稱。",
"負責人回覆尚未接受;候選範圍不得視為已核准。",
"執行期閘門維持關閉;不啟動掃描、修復、部署或主機操作。",
"未取得正式決策前,不建立專案庫、不同步分支或標籤、不修改工作流程或公開路由。",
],
}
async def list_tenants() -> dict[str, Any]:
"""列出所有 AwoooP 租戶Operator Console不依 RLS 過濾)。"""
async with get_db_context("awoooi") as db:
result = await db.execute(
text("""
SELECT
project_id,
display_name,
migration_mode,
budget_limit_usd,
is_active,
created_at
FROM awooop_operator_list_projects()
""")
)
rows = list(result.mappings().all())
tenants = [
{
"project_id": r["project_id"],
"display_name": r["display_name"],
"migration_mode": r["migration_mode"],
"budget_limit_usd": r["budget_limit_usd"],
"is_active": r["is_active"],
"created_at": r["created_at"],
}
for r in rows
]
return {
"tenants": tenants,
"total": len(tenants),
"asset_inventory": build_tenant_asset_inventory(tenants),
}
# =============================================================================
# Contracts
# =============================================================================
async def list_contracts(
project_id: str | None,
lifecycle_status: str | None,
) -> dict[str, Any]:
"""列出合約 revisions可 filter by project_id / lifecycle_status"""
async with get_db_context("awoooi") as db:
stmt = select(AwoooPContractRevision).order_by(
AwoooPContractRevision.created_at.desc()
)
if project_id is not None:
stmt = stmt.where(AwoooPContractRevision.project_id == project_id)
if lifecycle_status is not None:
stmt = stmt.where(
AwoooPContractRevision.lifecycle_status == lifecycle_status
)
count_stmt = select(func.count()).select_from(stmt.subquery())
total_result = await db.execute(count_stmt)
total = total_result.scalar_one()
stmt = stmt.limit(_MAX_CONTRACTS)
result = await db.execute(stmt)
rows = list(result.scalars().all())
contracts = [
{
"revision_id": r.revision_id,
"contract_id": r.contract_id,
"contract_family": r.contract_family,
"lifecycle_status": r.lifecycle_status,
"body_hash": r.body_hash,
"version_major": r.version_major,
"version_minor": r.version_minor,
"created_at": r.created_at,
"project_id": r.project_id,
}
for r in rows
]
return {"contracts": contracts, "total": total}
# =============================================================================
# Runs
# =============================================================================
async def list_runs(
project_id: str | None,
state: str | None,
remediation_status: str | None,
callback_reply_status: str | None,
incident_id: str | None,
page: int,
per_page: int,
) -> dict[str, Any]:
"""列出 runs支援 project/state/evidence/callback/incident filter 與分頁。"""
_validate_remediation_status_filter(remediation_status)
_validate_callback_reply_status_filter(callback_reply_status)
_validate_incident_id_filter(incident_id)
async with get_db_context("awoooi") as db:
stmt = select(AwoooPRunState).order_by(AwoooPRunState.created_at.desc())
if project_id is not None:
stmt = stmt.where(AwoooPRunState.project_id == project_id)
if state is not None:
stmt = stmt.where(AwoooPRunState.state == state)
if incident_id is not None:
incident_run_ids = await _find_run_ids_for_incident_filter(
db,
project_id=project_id,
incident_id=incident_id,
limit=max(per_page * 20, _MAX_LIST_CONTEXT_ROWS),
)
if not incident_run_ids:
return {"runs": [], "total": 0, "page": page, "per_page": per_page}
stmt = stmt.where(AwoooPRunState.run_id.in_(incident_run_ids))
offset = (page - 1) * per_page
if remediation_status or incident_id or callback_reply_status:
result = await db.execute(stmt)
candidate_rows = list(result.scalars().all())
context_limit = _list_filter_context_limit(len(candidate_rows))
inbound_by_run, outbound_by_run = await _load_run_message_context(
db,
candidate_rows,
limit=context_limit,
)
remediation_summaries = await _build_run_remediation_summaries(
runs=candidate_rows,
inbound_by_run=inbound_by_run,
outbound_by_run=outbound_by_run,
)
callback_reply_summaries = {
row.run_id: _run_callback_reply_summary(outbound_by_run.get(row.run_id, []))
for row in candidate_rows
}
filtered_rows = [
row
for row in candidate_rows
if _remediation_summary_matches_status(
remediation_summaries.get(row.run_id),
remediation_status,
)
and _remediation_summary_matches_incident_id(
remediation_summaries.get(row.run_id),
incident_id,
)
and _callback_reply_summary_matches_status(
callback_reply_summaries.get(row.run_id),
callback_reply_status,
)
]
total = len(filtered_rows)
rows = filtered_rows[offset : offset + per_page]
else:
count_stmt = select(func.count()).select_from(stmt.subquery())
total_result = await db.execute(count_stmt)
total = total_result.scalar_one()
stmt = stmt.offset(offset).limit(per_page)
result = await db.execute(stmt)
rows = list(result.scalars().all())
inbound_by_run, outbound_by_run = await _load_run_message_context(db, rows)
remediation_summaries = await _build_run_remediation_summaries(
runs=rows,
inbound_by_run=inbound_by_run,
outbound_by_run=outbound_by_run,
)
callback_reply_summaries = {
row.run_id: _run_callback_reply_summary(outbound_by_run.get(row.run_id, []))
for row in rows
}
runs = [
{
"run_id": r.run_id,
"project_id": r.project_id,
"agent_id": r.agent_id,
"state": r.state,
"is_shadow": r.is_shadow,
"cost_usd": r.cost_usd,
"step_count": r.step_count,
"created_at": r.created_at,
"timeout_at": r.timeout_at,
"remediation_summary": remediation_summaries.get(r.run_id),
"callback_reply_summary": callback_reply_summaries.get(r.run_id),
}
for r in rows
]
return {"runs": runs, "total": total, "page": page, "per_page": per_page}
async def list_callback_replies(
project_id: str | None,
callback_reply_status: str | None,
action: str | None,
incident_id: str | None,
page: int,
per_page: int,
refresh: bool = False,
) -> dict[str, Any]:
"""列出 Telegram detail/history callback reply evidence不改 runtime 狀態。"""
_validate_callback_reply_status_filter(callback_reply_status)
callback_action = _validate_callback_reply_action_filter(action)
_validate_incident_id_filter(incident_id)
normalized_project_id = project_id or "awoooi"
if callback_reply_status == "no_callback":
return {
"items": [],
"total": 0,
"page": page,
"per_page": per_page,
}
callback_summary_cache_key = {
"project_id": project_id or "__all__",
"callback_reply_status": callback_reply_status or "",
"action": callback_action or "",
"incident_id": incident_id or "",
"page": page,
"per_page": per_page,
}
if not refresh:
cached_response = await get_cached_operator_summary_async(
"callback_replies",
callback_summary_cache_key,
ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
)
if cached_response is not None:
logger.info(
"operator_callback_replies_cache_hit",
project_id=normalized_project_id,
callback_reply_status=callback_reply_status,
action=callback_action,
incident_id=incident_id,
page=page,
per_page=per_page,
ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
)
return cached_response
where_clauses = [
"m.source_envelope ? 'callback_reply'",
]
params: dict[str, Any] = {
"limit": per_page,
"offset": (page - 1) * per_page,
}
if project_id:
where_clauses.append("m.project_id = :project_id")
params["project_id"] = project_id
raw_status = _CALLBACK_REPLY_RAW_STATUS_BY_FILTER.get(
str(callback_reply_status or "")
)
if raw_status:
where_clauses.append(
"m.source_envelope #>> '{callback_reply,status}' = :raw_status"
)
params["raw_status"] = raw_status
if callback_action:
where_clauses.append(
"LOWER(m.source_envelope #>> '{callback_reply,action}') = :callback_action"
)
params["callback_action"] = callback_action
if incident_id:
where_clauses.append(
"m.source_envelope #>> '{callback_reply,incident_id}' = :incident_id"
)
params["incident_id"] = incident_id
where_sql = " AND ".join(where_clauses)
count_sql = text(f"""
SELECT COUNT(*) AS total
FROM awooop_outbound_message m
WHERE {where_sql}
""")
list_sql = text(f"""
SELECT
m.message_id,
m.project_id,
m.run_id,
m.channel_type,
m.message_type,
m.content_preview,
m.provider_message_id,
m.send_status,
m.send_error,
m.queued_at,
m.sent_at,
m.triggered_by_state,
m.source_envelope -> 'callback_reply' AS callback_reply,
m.source_envelope -> 'awooop_status_chain'
AS persisted_awooop_status_chain,
m.source_envelope -> 'km_stale_completion_summary'
AS persisted_km_stale_completion_summary,
r.agent_id,
r.state AS run_state,
r.created_at AS run_created_at
FROM awooop_outbound_message m
LEFT JOIN awooop_run_state r
ON r.project_id = m.project_id
AND r.run_id = m.run_id
WHERE {where_sql}
ORDER BY COALESCE(m.sent_at, m.queued_at) DESC, m.message_id DESC
LIMIT :limit OFFSET :offset
""")
async with get_db_context(normalized_project_id) as db:
count_result = await db.execute(count_sql, params)
total = count_result.scalar_one()
rows_result = await db.execute(list_sql, params)
rows = list(rows_result.mappings().all())
audit_summary = await _fetch_callback_reply_audit_summary(
db,
project_id=normalized_project_id,
)
items = [_callback_reply_event_item(row) for row in rows]
status_chain_cache: dict[tuple[str, str], dict[str, Any]] = {}
km_completion_queue_cache: dict[str, Any] = {}
km_completion_summary_cache: dict[tuple[str, str | None], dict[str, Any]] = {}
for item in items:
incident = item.get("incident_id")
item_project_id = str(item.get("project_id") or project_id or "awoooi")
if not incident:
item["awooop_status_chain"] = _build_awooop_status_chain(
incident_ids=[],
source_id=None,
)
item["km_stale_completion_summary"] = (
_empty_km_stale_completion_summary(
project_id=item_project_id,
incident_id=None,
status_value="no_incident",
reason="callback_reply_missing_incident_id",
)
)
continue
incident_id = str(incident)
status_chain_cache_key = (item_project_id, incident_id)
cached = status_chain_cache.get(status_chain_cache_key)
if cached is not None:
item["awooop_status_chain"] = cached
else:
remediation_history = await _fetch_run_remediation_history(
[incident_id],
limit=5,
)
chain = await _fetch_awooop_status_chain(
incident_ids=[incident_id],
project_id=item_project_id,
remediation_history=remediation_history,
)
status_chain_cache[status_chain_cache_key] = chain
item["awooop_status_chain"] = chain
km_summary_cache_key = (item_project_id, incident_id)
km_summary = km_completion_summary_cache.get(km_summary_cache_key)
if km_summary is None:
km_summary = await _fetch_km_stale_completion_summary_for_incident(
project_id=item_project_id,
incident_id=incident_id,
queue_cache=km_completion_queue_cache,
)
km_completion_summary_cache[km_summary_cache_key] = km_summary
item["km_stale_completion_summary"] = km_summary
response = {
"items": items,
"total": total,
"page": page,
"per_page": per_page,
"summary": audit_summary,
}
logger.info(
"operator_callback_replies_fetched",
project_id=normalized_project_id,
callback_reply_status=callback_reply_status,
action=callback_action,
incident_id=incident_id,
page=page,
per_page=per_page,
total=total,
cache_status="miss",
cache_ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
)
return await store_operator_summary_async(
"callback_replies",
callback_summary_cache_key,
response,
ttl_seconds=_CALLBACK_REPLY_CACHE_TTL_SECONDS,
)
async def list_ai_alert_card_delivery_readback(
*,
project_id: str | None = None,
event_type: str | None = None,
lane: str | None = None,
page: int = 1,
per_page: int = 20,
refresh: bool = False,
) -> dict[str, Any]:
"""Read-only AwoooP delivery readback for AI automation alert cards."""
normalized_project_id = project_id or "awoooi"
normalized_event_type = str(event_type or "").strip()
normalized_lane = str(lane or "").strip()
normalized_page = max(int(page or 1), 1)
normalized_per_page = min(max(int(per_page or 20), 1), _MAX_PER_PAGE)
cache_key = {
"project_id": normalized_project_id,
"event_type": normalized_event_type,
"lane": normalized_lane,
"page": normalized_page,
"per_page": normalized_per_page,
}
if not refresh:
cached_response = await get_cached_operator_summary_async(
"ai_alert_card_delivery_readback",
cache_key,
ttl_seconds=_AI_ALERT_CARD_CACHE_TTL_SECONDS,
)
if cached_response is not None:
logger.info(
"operator_ai_alert_card_delivery_readback_cache_hit",
project_id=normalized_project_id,
event_type=normalized_event_type,
lane=normalized_lane,
page=normalized_page,
per_page=normalized_per_page,
ttl_seconds=_AI_ALERT_CARD_CACHE_TTL_SECONDS,
)
return cached_response
where_clauses = [
"m.project_id = :project_id",
"m.channel_type = 'telegram'",
"m.source_envelope ? 'ai_automation_alert_card'",
]
params: dict[str, Any] = {
"project_id": normalized_project_id,
"limit": normalized_per_page,
"offset": (normalized_page - 1) * normalized_per_page,
}
if normalized_event_type:
where_clauses.append(
"m.source_envelope #>> '{ai_automation_alert_card,event_type}' = :event_type"
)
params["event_type"] = normalized_event_type
if normalized_lane:
where_clauses.append(
"m.source_envelope #>> '{ai_automation_alert_card,lane}' = :lane"
)
params["lane"] = normalized_lane
where_sql = " AND ".join(where_clauses)
summary_sql = text(f"""
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE m.send_status = 'sent') AS sent_total,
COUNT(*) FILTER (WHERE m.send_status = 'failed') AS failed_total,
COUNT(*) FILTER (WHERE m.send_status = 'pending') AS pending_total,
COUNT(*) FILTER (WHERE m.send_status = 'shadow') AS shadow_total,
COUNT(*) FILTER (
WHERE COALESCE(
m.source_envelope #>>
'{{ai_automation_alert_card,delivery_receipt_readback_required}}',
''
) = 'true'
) AS delivery_receipt_required_total,
COUNT(*) FILTER (
WHERE COALESCE(
m.source_envelope #>>
'{{ai_automation_alert_card,runtime_write_gate_count}}',
'0'
) <> '0'
) AS runtime_write_gate_open_count,
MAX(m.sent_at) AS latest_sent_at,
MAX(m.queued_at) AS latest_queued_at
FROM awooop_outbound_message m
WHERE {where_sql}
""")
list_sql = text(f"""
SELECT
m.message_id,
m.project_id,
m.run_id,
m.channel_type,
m.message_type,
m.provider_message_id,
m.send_status,
m.send_error,
m.queued_at,
m.sent_at,
m.triggered_by_state,
m.source_envelope -> 'ai_automation_alert_card' AS alert_card,
m.source_envelope -> 'source_refs' AS source_refs,
r.agent_id,
r.state AS run_state,
r.created_at AS run_created_at
FROM awooop_outbound_message m
LEFT JOIN awooop_run_state r
ON r.project_id = m.project_id
AND r.run_id = m.run_id
WHERE {where_sql}
ORDER BY COALESCE(m.sent_at, m.queued_at) DESC, m.message_id DESC
LIMIT :limit OFFSET :offset
""")
async with get_db_context(normalized_project_id) as db:
summary_result = await db.execute(summary_sql, params)
summary_row = summary_result.mappings().first() or {}
rows_result = await db.execute(list_sql, params)
rows = list(rows_result.mappings().all())
summary = _ai_alert_card_delivery_summary_from_row(
summary_row,
project_id=normalized_project_id,
event_type=normalized_event_type or None,
lane=normalized_lane or None,
)
response = {
"items": [_ai_alert_card_delivery_item(row) for row in rows],
"total": summary["total"],
"page": normalized_page,
"per_page": normalized_per_page,
"summary": summary,
}
logger.info(
"operator_ai_alert_card_delivery_readback_fetched",
project_id=normalized_project_id,
event_type=normalized_event_type,
lane=normalized_lane,
page=normalized_page,
per_page=normalized_per_page,
total=summary["total"],
cache_status="miss",
cache_ttl_seconds=_AI_ALERT_CARD_CACHE_TTL_SECONDS,
)
return await store_operator_summary_async(
"ai_alert_card_delivery_readback",
cache_key,
response,
ttl_seconds=_AI_ALERT_CARD_CACHE_TTL_SECONDS,
)
def _ai_alert_card_delivery_summary_from_row(
row: Mapping[str, Any],
*,
project_id: str,
event_type: str | None,
lane: str | None,
) -> dict[str, Any]:
"""Normalize AI alert card delivery summary counts."""
total = _safe_int(row.get("total"))
sent_total = _safe_int(row.get("sent_total"))
failed_total = _safe_int(row.get("failed_total"))
pending_total = _safe_int(row.get("pending_total"))
shadow_total = _safe_int(row.get("shadow_total"))
runtime_write_gate_open_count = _safe_int(
row.get("runtime_write_gate_open_count")
)
status_value = "no_delivery_receipt" if total == 0 else "observed"
if failed_total > 0:
status_value = "delivery_failure_observed"
elif pending_total > 0:
status_value = "delivery_pending_observed"
return {
"schema_version": "awooop_ai_alert_card_delivery_readback_v1",
"project_id": project_id,
"event_type": event_type,
"lane": lane,
"status": status_value,
"total": total,
"sent_total": sent_total,
"failed_total": failed_total,
"pending_total": pending_total,
"shadow_total": shadow_total,
"delivery_receipt_required_total": _safe_int(
row.get("delivery_receipt_required_total")
),
"runtime_write_gate_open_count": runtime_write_gate_open_count,
"runtime_write_allowed": runtime_write_gate_open_count > 0,
"latest_sent_at": row.get("latest_sent_at"),
"latest_queued_at": row.get("latest_queued_at"),
"production_write_count": 0,
}
def _ai_alert_card_delivery_item(row: Mapping[str, Any]) -> dict[str, Any]:
"""Convert one AI alert-card outbound mirror row into delivery evidence."""
alert_card = _as_dict(row.get("alert_card"))
source_refs = _as_dict(row.get("source_refs"))
run_id = row.get("run_id")
project_id = str(row.get("project_id") or "")
runtime_write_gate_count = _safe_int(
alert_card.get("runtime_write_gate_count")
)
event_at = row.get("sent_at") or row.get("queued_at")
return {
"message_id": row.get("message_id"),
"run_id": run_id,
"project_id": project_id,
"event_at": event_at,
"channel_type": row.get("channel_type"),
"message_type": row.get("message_type"),
"send_status": row.get("send_status"),
"send_error": row.get("send_error"),
"provider_message_id": row.get("provider_message_id"),
"triggered_by_state": row.get("triggered_by_state"),
"event_type": str(alert_card.get("event_type") or ""),
"lane": str(alert_card.get("lane") or ""),
"target": str(alert_card.get("target") or ""),
"gates": alert_card.get("gates") if isinstance(alert_card.get("gates"), list) else [],
"runtime_write_gate_count": runtime_write_gate_count,
"runtime_write_allowed": runtime_write_gate_count > 0,
"candidate_only": bool(alert_card.get("candidate_only")),
"controlled_playbook_queue": bool(alert_card.get("controlled_playbook_queue")),
"runtime_write_gate_state": str(
alert_card.get("runtime_write_gate_state") or "unknown"
),
"delivery_receipt_readback_required": bool(
alert_card.get("delivery_receipt_readback_required")
),
"source_refs": source_refs,
"run_state": row.get("run_state"),
"agent_id": row.get("agent_id"),
"run_created_at": row.get("run_created_at"),
"run_detail_href": (
f"/awooop/runs/{run_id}?project_id={project_id}"
if run_id and project_id
else None
),
}
async def _fetch_callback_reply_audit_summary(
db: Any,
*,
project_id: str,
) -> dict[str, Any]:
"""Summarize Telegram outbound mirror and callback evidence capture coverage."""
result = await db.execute(
text("""
WITH outbound AS (
SELECT
m.*,
EXISTS (
SELECT 1
FROM jsonb_each(
CASE
WHEN jsonb_typeof(
COALESCE(
m.source_envelope -> 'source_refs',
'{}'::jsonb
)
) = 'object'
THEN COALESCE(
m.source_envelope -> 'source_refs',
'{}'::jsonb
)
ELSE '{}'::jsonb
END
) AS refs(key, value)
WHERE jsonb_typeof(refs.value) = 'array'
AND refs.value <> '[]'::jsonb
) AS has_trace_ref
FROM awooop_outbound_message m
WHERE m.project_id = :project_id
AND m.channel_type = 'telegram'
),
trace_gap_cutoff AS (
SELECT
MAX(COALESCE(sent_at, queued_at))
AS latest_missing_trace_ref_at
FROM outbound
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND NOT has_trace_ref
),
inbound_callbacks AS (
SELECT
COUNT(*) AS inbound_callback_total,
COUNT(*) FILTER (
WHERE received_at >= NOW() - INTERVAL '24 hours'
) AS inbound_callback_recent_24h_total,
MAX(received_at) AS inbound_callback_latest_at
FROM awooop_conversation_event
WHERE project_id = :project_id
AND channel_type = 'telegram'
AND content_type = 'callback_query'
)
SELECT
COUNT(*) AS outbound_total,
COUNT(*) FILTER (
WHERE source_envelope <> '{}'::jsonb
) AS outbound_source_envelope_total,
COUNT(*) FILTER (
WHERE source_envelope ? 'source_refs'
) AS outbound_source_refs_total,
COUNT(*) FILTER (
WHERE has_trace_ref
) AS outbound_trace_ref_total,
COUNT(*) FILTER (
WHERE COALESCE(
source_envelope #> '{source_refs,incident_ids}',
'[]'::jsonb
) <> '[]'::jsonb
) AS outbound_incident_ref_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
) AS outbound_reply_markup_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND COALESCE(
source_envelope #> '{source_refs,incident_ids}',
'[]'::jsonb
) = '[]'::jsonb
) AS outbound_reply_markup_missing_incident_ref_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND COALESCE(
source_envelope #> '{source_refs,incident_ids}',
'[]'::jsonb
) = '[]'::jsonb
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '1 hour'
) AS outbound_reply_markup_missing_incident_ref_recent_1h_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND COALESCE(
source_envelope #> '{source_refs,incident_ids}',
'[]'::jsonb
) = '[]'::jsonb
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '24 hours'
) AS outbound_reply_markup_missing_incident_ref_recent_24h_total,
MAX(COALESCE(sent_at, queued_at)) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND COALESCE(
source_envelope #> '{source_refs,incident_ids}',
'[]'::jsonb
) = '[]'::jsonb
) AS outbound_reply_markup_missing_incident_ref_latest_sent_at,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND NOT has_trace_ref
) AS outbound_reply_markup_missing_trace_ref_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND NOT has_trace_ref
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '1 hour'
) AS outbound_reply_markup_missing_trace_ref_recent_1h_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND NOT has_trace_ref
AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '24 hours'
) AS outbound_reply_markup_missing_trace_ref_recent_24h_total,
MAX(COALESCE(sent_at, queued_at)) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND NOT has_trace_ref
) AS outbound_reply_markup_missing_trace_ref_latest_sent_at,
COUNT(*) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND has_trace_ref
AND trace_gap_cutoff.latest_missing_trace_ref_at
IS NOT NULL
AND COALESCE(sent_at, queued_at)
> trace_gap_cutoff.latest_missing_trace_ref_at
) AS outbound_reply_markup_trace_ref_after_gap_total,
MIN(COALESCE(sent_at, queued_at)) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND has_trace_ref
AND trace_gap_cutoff.latest_missing_trace_ref_at
IS NOT NULL
AND COALESCE(sent_at, queued_at)
> trace_gap_cutoff.latest_missing_trace_ref_at
) AS outbound_reply_markup_trace_ref_after_gap_first_sent_at,
MAX(COALESCE(sent_at, queued_at)) FILTER (
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND has_trace_ref
AND trace_gap_cutoff.latest_missing_trace_ref_at
IS NOT NULL
AND COALESCE(sent_at, queued_at)
> trace_gap_cutoff.latest_missing_trace_ref_at
) AS outbound_reply_markup_trace_ref_after_gap_latest_sent_at,
COALESCE((
SELECT jsonb_agg(
jsonb_build_object(
'prefix', prefix,
'total', total,
'recent_24h_total', recent_24h_total,
'first_sent_at', first_sent_at,
'last_sent_at', last_sent_at
)
ORDER BY total DESC, prefix ASC
)
FROM (
SELECT
COALESCE(
NULLIF(
source_envelope #>>
'{reply_markup,buttons,0,callback_prefix}',
''
),
'unknown'
) AS prefix,
COUNT(*) AS total,
COUNT(*) FILTER (
WHERE COALESCE(sent_at, queued_at)
>= NOW() - INTERVAL '24 hours'
) AS recent_24h_total,
MIN(COALESCE(sent_at, queued_at)) AS first_sent_at,
MAX(COALESCE(sent_at, queued_at)) AS last_sent_at
FROM outbound
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND COALESCE(
source_envelope #> '{source_refs,incident_ids}',
'[]'::jsonb
) = '[]'::jsonb
GROUP BY 1
ORDER BY total DESC, prefix ASC
LIMIT 5
) missing_prefixes
), '[]'::jsonb)
AS outbound_reply_markup_missing_incident_ref_top_prefixes,
COALESCE((
SELECT jsonb_agg(
jsonb_build_object(
'prefix', prefix,
'total', total,
'recent_24h_total', recent_24h_total,
'first_sent_at', first_sent_at,
'last_sent_at', last_sent_at
)
ORDER BY total DESC, prefix ASC
)
FROM (
SELECT
COALESCE(
NULLIF(
source_envelope #>>
'{reply_markup,buttons,0,callback_prefix}',
''
),
'unknown'
) AS prefix,
COUNT(*) AS total,
COUNT(*) FILTER (
WHERE COALESCE(sent_at, queued_at)
>= NOW() - INTERVAL '24 hours'
) AS recent_24h_total,
MIN(COALESCE(sent_at, queued_at)) AS first_sent_at,
MAX(COALESCE(sent_at, queued_at)) AS last_sent_at
FROM outbound
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
AND NOT has_trace_ref
GROUP BY 1
ORDER BY total DESC, prefix ASC
LIMIT 5
) missing_trace_prefixes
), '[]'::jsonb)
AS outbound_reply_markup_missing_trace_ref_top_prefixes,
COUNT(*) FILTER (
WHERE send_status = 'failed'
) AS outbound_failed_total,
COUNT(*) FILTER (
WHERE source_envelope ? 'callback_reply'
) AS callback_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{callback_reply,status}'
= 'callback_reply_sent'
) AS callback_sent_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{callback_reply,status}'
= 'callback_reply_fallback_sent'
) AS callback_fallback_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{callback_reply,status}'
= 'callback_reply_rescue_sent'
) AS callback_rescue_total,
COUNT(*) FILTER (
WHERE source_envelope #>> '{callback_reply,status}'
= 'callback_reply_failed'
) AS callback_failed_total,
COUNT(*) FILTER (
WHERE LOWER(source_envelope #>> '{callback_reply,action}')
= 'detail'
) AS callback_detail_total,
COUNT(*) FILTER (
WHERE LOWER(source_envelope #>> '{callback_reply,action}')
= 'history'
) AS callback_history_total,
COUNT(*) FILTER (
WHERE source_envelope ? 'callback_reply'
AND source_envelope ? 'awooop_status_chain'
AND source_envelope ? 'km_stale_completion_summary'
) AS callback_snapshot_captured_total,
COUNT(*) FILTER (
WHERE source_envelope ? 'callback_reply'
AND (
source_envelope ? 'awooop_status_chain'
OR source_envelope ? 'km_stale_completion_summary'
)
AND NOT (
source_envelope ? 'awooop_status_chain'
AND source_envelope ? 'km_stale_completion_summary'
)
) AS callback_snapshot_partial_total,
COUNT(*) FILTER (
WHERE source_envelope ? 'callback_reply'
AND NOT (
source_envelope ? 'awooop_status_chain'
OR source_envelope ? 'km_stale_completion_summary'
)
) AS callback_snapshot_missing_total,
COUNT(DISTINCT source_envelope #>> '{callback_reply,incident_id}')
FILTER (
WHERE source_envelope ? 'callback_reply'
AND COALESCE(
source_envelope #>> '{callback_reply,incident_id}',
''
) <> ''
) AS callback_incident_total,
MAX(inbound_callbacks.inbound_callback_total)
AS inbound_callback_total,
MAX(inbound_callbacks.inbound_callback_recent_24h_total)
AS inbound_callback_recent_24h_total,
MAX(inbound_callbacks.inbound_callback_latest_at)
AS inbound_callback_latest_at,
MAX(COALESCE(sent_at, queued_at)) AS latest_outbound_at,
MAX(COALESCE(sent_at, queued_at)) FILTER (
WHERE source_envelope ? 'callback_reply'
) AS latest_callback_at
FROM outbound
CROSS JOIN trace_gap_cutoff
CROSS JOIN inbound_callbacks
"""),
{"project_id": project_id},
)
return _callback_reply_audit_summary_from_row(
result.mappings().one(),
project_id=project_id,
)
def _callback_reply_audit_summary_from_row(
row: Mapping[str, Any],
*,
project_id: str,
) -> dict[str, Any]:
"""Convert aggregate SQL row into the public callback evidence audit summary."""
outbound_total = _safe_int(row.get("outbound_total"))
callback_total = _safe_int(row.get("callback_total"))
captured = _safe_int(row.get("callback_snapshot_captured_total"))
partial = _safe_int(row.get("callback_snapshot_partial_total"))
missing = _safe_int(row.get("callback_snapshot_missing_total"))
outbound_incident_refs = _safe_int(row.get("outbound_incident_ref_total"))
top_missing_prefixes = _reply_markup_gap_prefixes_from_value(
row.get("outbound_reply_markup_missing_incident_ref_top_prefixes")
)
top_missing_trace_prefixes = _reply_markup_gap_prefixes_from_value(
row.get("outbound_reply_markup_missing_trace_ref_top_prefixes")
)
missing_trace_total = _safe_int(
row.get("outbound_reply_markup_missing_trace_ref_total")
)
missing_trace_recent_1h = _safe_int(
row.get("outbound_reply_markup_missing_trace_ref_recent_1h_total")
)
missing_trace_recent_24h = _safe_int(
row.get("outbound_reply_markup_missing_trace_ref_recent_24h_total")
)
trace_gap_status, trace_gap_next_action = _trace_ref_gap_decision(
total=missing_trace_total,
recent_1h=missing_trace_recent_1h,
recent_24h=missing_trace_recent_24h,
)
trace_ref_after_gap_total = _safe_int(
row.get("outbound_reply_markup_trace_ref_after_gap_total")
)
trace_gap_recovery_status = _trace_ref_gap_recovery_status(
missing_total=missing_trace_total,
after_gap_total=trace_ref_after_gap_total,
)
inbound_callback_total = _safe_int(row.get("inbound_callback_total"))
inbound_callback_recent_24h_total = _safe_int(
row.get("inbound_callback_recent_24h_total")
)
if inbound_callback_total > 0:
inbound_callback_mirror_status = "capturing"
inbound_callback_next_action = "none"
elif callback_total > 0:
inbound_callback_mirror_status = "reply_only_gap"
inbound_callback_next_action = "press_any_telegram_callback_after_rollout"
else:
inbound_callback_mirror_status = "no_callback_observed"
inbound_callback_next_action = "press_any_telegram_callback_after_rollout"
if callback_total <= 0:
snapshot_status = "no_callback"
next_action = "press_telegram_detail_or_history"
elif captured > 0 and (missing > 0 or partial > 0):
snapshot_status = "partial"
next_action = "review_legacy_callback_snapshot_gap"
elif partial > 0:
snapshot_status = "partial"
next_action = "press_telegram_detail_or_history_after_rollout"
elif missing > 0:
snapshot_status = "not_captured"
next_action = "press_telegram_detail_or_history_after_rollout"
elif outbound_total > 0 and outbound_incident_refs == 0:
snapshot_status = "captured"
next_action = "review_outbound_source_refs"
else:
snapshot_status = "captured"
next_action = "none"
return {
"schema_version": _CALLBACK_REPLY_AUDIT_SUMMARY_SCHEMA_VERSION,
"project_id": project_id,
"outbound_total": outbound_total,
"outbound_source_envelope_total": _safe_int(
row.get("outbound_source_envelope_total")
),
"outbound_source_refs_total": _safe_int(
row.get("outbound_source_refs_total")
),
"outbound_trace_ref_total": _safe_int(row.get("outbound_trace_ref_total")),
"outbound_incident_ref_total": outbound_incident_refs,
"outbound_reply_markup_total": _safe_int(
row.get("outbound_reply_markup_total")
),
"outbound_reply_markup_missing_incident_ref_total": _safe_int(
row.get("outbound_reply_markup_missing_incident_ref_total")
),
"outbound_reply_markup_missing_incident_ref_recent_1h_total": _safe_int(
row.get("outbound_reply_markup_missing_incident_ref_recent_1h_total")
),
"outbound_reply_markup_missing_incident_ref_recent_24h_total": _safe_int(
row.get("outbound_reply_markup_missing_incident_ref_recent_24h_total")
),
"outbound_reply_markup_missing_incident_ref_latest_sent_at": row.get(
"outbound_reply_markup_missing_incident_ref_latest_sent_at"
),
"outbound_reply_markup_missing_trace_ref_total": missing_trace_total,
"outbound_reply_markup_missing_trace_ref_recent_1h_total": (
missing_trace_recent_1h
),
"outbound_reply_markup_missing_trace_ref_recent_24h_total": (
missing_trace_recent_24h
),
"outbound_reply_markup_missing_trace_ref_latest_sent_at": row.get(
"outbound_reply_markup_missing_trace_ref_latest_sent_at"
),
"outbound_reply_markup_trace_ref_gap_status": trace_gap_status,
"outbound_reply_markup_trace_ref_gap_next_action": trace_gap_next_action,
"outbound_reply_markup_trace_ref_after_gap_total": (
trace_ref_after_gap_total
),
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": row.get(
"outbound_reply_markup_trace_ref_after_gap_first_sent_at"
),
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": row.get(
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at"
),
"outbound_reply_markup_trace_ref_gap_recovery_status": (
trace_gap_recovery_status
),
"outbound_reply_markup_missing_incident_ref_top_prefixes": (
top_missing_prefixes
),
"outbound_reply_markup_missing_trace_ref_top_prefixes": (
top_missing_trace_prefixes
),
"outbound_failed_total": _safe_int(row.get("outbound_failed_total")),
"callback_total": callback_total,
"callback_sent_total": _safe_int(row.get("callback_sent_total")),
"callback_fallback_total": _safe_int(row.get("callback_fallback_total")),
"callback_rescue_total": _safe_int(row.get("callback_rescue_total")),
"callback_failed_total": _safe_int(row.get("callback_failed_total")),
"callback_detail_total": _safe_int(row.get("callback_detail_total")),
"callback_history_total": _safe_int(row.get("callback_history_total")),
"callback_snapshot_captured_total": captured,
"callback_snapshot_partial_total": partial,
"callback_snapshot_missing_total": missing,
"callback_incident_total": _safe_int(row.get("callback_incident_total")),
"inbound_callback_total": inbound_callback_total,
"inbound_callback_recent_24h_total": inbound_callback_recent_24h_total,
"inbound_callback_latest_at": row.get("inbound_callback_latest_at"),
"inbound_callback_mirror_status": inbound_callback_mirror_status,
"inbound_callback_next_action": inbound_callback_next_action,
"snapshot_status": snapshot_status,
"next_action": next_action,
"latest_outbound_at": row.get("latest_outbound_at"),
"latest_callback_at": row.get("latest_callback_at"),
}
def _trace_ref_gap_decision(
*,
total: int,
recent_1h: int,
recent_24h: int,
) -> tuple[str, str]:
"""Classify reply_markup messages without any source_refs into operator actions."""
if total <= 0:
return "clean", "none"
if recent_1h > 0:
return "active_gap", "inspect_recent_outbound_source_refs"
if recent_24h > 0:
return "recent_backlog", "watch_24h_decay"
return "legacy_backlog", "backfill_or_archive_legacy_callbacks"
def _trace_ref_gap_recovery_status(
*,
missing_total: int,
after_gap_total: int,
) -> str:
"""Describe whether traced reply_markup messages resumed after the last gap."""
if missing_total <= 0:
return "not_needed"
if after_gap_total > 0:
return "recovered_after_gap"
return "no_recovery_signal"
def _reply_markup_gap_prefixes_from_value(value: Any) -> list[dict[str, Any]]:
if not isinstance(value, list):
return []
prefixes: list[dict[str, Any]] = []
for item in value:
if not isinstance(item, Mapping):
continue
prefix = str(item.get("prefix") or "unknown").strip() or "unknown"
prefixes.append({
"prefix": prefix[:80],
"total": _safe_int(item.get("total")),
"recent_24h_total": _safe_int(item.get("recent_24h_total")),
"first_sent_at": item.get("first_sent_at"),
"last_sent_at": item.get("last_sent_at"),
})
if len(prefixes) >= 5:
break
return prefixes
async def _fetch_km_stale_completion_summary_for_incident(
*,
project_id: str,
incident_id: str | None,
queue_cache: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Fetch read-only KM owner-review completion context for callback evidence."""
normalized_project_id = project_id or "awoooi"
normalized_incident_id = str(incident_id or "").strip() or None
if not normalized_incident_id:
return _empty_km_stale_completion_summary(
project_id=normalized_project_id,
incident_id=None,
status_value="no_incident",
reason="callback_reply_missing_incident_id",
)
cache = queue_cache if queue_cache is not None else {}
queue = cache.get(normalized_project_id)
if queue is None:
try:
queue = await query_km_stale_owner_review_completion_queue(
project_id=normalized_project_id,
status_bucket="all",
limit=100,
)
except Exception as exc:
logger.warning(
"operator_km_stale_completion_summary_fetch_failed",
project_id=normalized_project_id,
incident_id=normalized_incident_id,
error=str(exc),
)
return _empty_km_stale_completion_summary(
project_id=normalized_project_id,
incident_id=normalized_incident_id,
status_value="fetch_failed",
reason="km_stale_completion_queue_fetch_failed",
)
cache[normalized_project_id] = queue
return _build_km_stale_completion_summary(
queue=queue,
project_id=normalized_project_id,
incident_id=normalized_incident_id,
)
async def list_cicd_events(
*,
project_id: str | None,
stage: str | None,
status_filter: str | None,
limit: int,
) -> dict[str, Any]:
"""列出 CI/CD notification evidence來源是 alert_operation_log。"""
safe_limit = max(1, min(limit, 50))
normalized_stage = _validate_cicd_stage_filter(stage)
normalized_status = _validate_cicd_status_filter(status_filter)
# alert_operation_log 目前是 legacy/global evidence tableCI/CD notification
# 只屬於 AWOOOI production非 awoooi project filter 回空集合,避免誤導多租戶 UI。
if project_id and project_id != "awoooi":
return {"items": [], "total": 0, "limit": safe_limit}
where_clauses = [
"event_type = 'ALERT_RECEIVED'",
"actor = 'alertmanager'",
"""
COALESCE(
context #>> '{labels,alertname}',
context ->> 'alertname',
''
) LIKE 'CI_%'
""",
]
params: dict[str, Any] = {"limit": safe_limit}
if normalized_stage:
where_clauses.append(
"LOWER(COALESCE(context #>> '{labels,stage}', '')) = :stage"
)
params["stage"] = normalized_stage
if normalized_status:
where_clauses.append(
"LOWER(COALESCE(context #>> '{labels,status}', '')) = :status"
)
params["status"] = normalized_status
where_sql = " AND ".join(where_clauses)
sql = text(f"""
SELECT
id,
action_detail,
success,
created_at,
context,
COALESCE(
context #>> '{{labels,alertname}}',
context ->> 'alertname',
''
) AS alertname,
context #>> '{{labels,stage}}' AS stage,
context #>> '{{labels,status}}' AS status,
context #>> '{{labels,severity}}' AS severity,
context #>> '{{labels,commit}}' AS commit_sha,
context #>> '{{labels,triggered_by}}' AS triggered_by,
context #>> '{{labels,duration_seconds}}' AS duration_seconds,
context #>> '{{annotations,summary}}' AS summary,
context #>> '{{annotations,description}}' AS description,
context #>> '{{annotations,workflow_url}}' AS workflow_url,
context ->> 'alert_id' AS alert_id,
context ->> 'source' AS source
FROM alert_operation_log
WHERE {where_sql}
ORDER BY created_at DESC, id DESC
LIMIT :limit
""")
async with get_db_context("awoooi") as db:
result = await db.execute(sql, params)
rows = list(result.mappings().all())
items = [_cicd_event_item_from_row(row, project_id=project_id or "awoooi") for row in rows]
return {"items": items, "total": len(items), "limit": safe_limit}
async def get_ai_route_status(
workload_type: str | None = None,
) -> dict[str, Any]:
"""回傳目前 AI/Ollama provider routing 的只讀狀態,供 Operator Console 顯示。"""
workload = _validate_ai_route_workload(workload_type)
policy_order = _ai_route_policy_order(workload)
checked_at = _utc_now_naive()
try:
route = await asyncio.wait_for(
get_ollama_failover_manager().select_provider(task_type=workload),
timeout=_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS,
)
except TimeoutError:
logger.warning(
"ai_route_status_check_timeout",
workload_type=workload,
timeout_seconds=_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS,
)
return await _ai_route_lightweight_status_from_policy(
workload=workload,
policy_order=policy_order,
checked_at=checked_at,
route_reason="route_check_timeout",
route_error=(
f"route status timed out after "
f"{_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS:g}s"
),
)
except Exception as exc:
return await _ai_route_lightweight_status_from_policy(
workload=workload,
policy_order=policy_order,
checked_at=checked_at,
route_reason="route_check_failed",
route_error=str(exc),
)
health = _ai_route_health_map(route)
response = {
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
"workload_type": workload,
"policy_order": policy_order,
"selected_provider": route.primary.provider_name,
"selected_url": route.primary.url or None,
"selected_model": route.primary.model,
"fallback_chain": [
_ai_route_runtime_endpoint_item(endpoint, priority=index + 2)
for index, endpoint in enumerate(route.fallback_chain)
],
"route_reason": route.routing_reason,
"route_source": "ollama_failover_manager",
"route_error": None,
"health": health,
"checked_at": checked_at,
}
response.update(_ai_route_lane_state(
policy_order=policy_order,
selected_provider=route.primary.provider_name,
health=health,
))
return await _ai_route_response_with_repair_evidence(response)
def _validate_ai_route_workload(workload_type: str | None) -> OllamaWorkloadType:
"""Normalize and validate workload filter for the public route status endpoint."""
workload = str(workload_type or "deep_rca").strip() or "deep_rca"
if workload not in _AI_ROUTE_WORKLOADS:
allowed = ", ".join(sorted(_AI_ROUTE_WORKLOADS))
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail=f"Unsupported workload_type: {workload}. Allowed: {allowed}",
)
return workload # type: ignore[return-value]
def _ai_route_policy_order(workload: OllamaWorkloadType) -> list[dict[str, Any]]:
"""Expose configured policy order: GCP-A -> GCP-B -> 111 -> Gemini."""
items = [
_ai_route_policy_endpoint_item(endpoint, priority=index + 1)
for index, endpoint in enumerate(resolve_ollama_order(workload))
]
items.append({
"priority": len(items) + 1,
"provider_name": "gemini",
"url": None,
"workload_type": workload,
"reason": "final_cloud_fallback_after_all_ollama_endpoints",
"role": "final_fallback",
"runtime": "cloud",
})
return items
async def _ai_route_lightweight_status_from_policy(
*,
workload: OllamaWorkloadType,
policy_order: list[dict[str, Any]],
checked_at: datetime,
route_reason: str,
route_error: str,
) -> dict[str, Any]:
"""Fallback read model for route status; never changes the execution router."""
endpoints = list(resolve_ollama_order(workload))
try:
reports = await asyncio.gather(
*[_ai_route_probe_connectivity(endpoint) for endpoint in endpoints],
)
except Exception as exc:
logger.warning(
"ai_route_status_lightweight_probe_failed",
workload_type=workload,
route_reason=route_reason,
error=str(exc),
)
response = _ai_route_unavailable_status(
workload=workload,
policy_order=policy_order,
checked_at=checked_at,
route_reason=route_reason,
route_error=route_error,
route_source="ollama_failover_manager",
)
return await _ai_route_response_with_repair_evidence(response)
health_by_provider = {
endpoint.provider_name: _ai_route_health_item(report)
for endpoint, report in zip(endpoints, reports, strict=False)
}
selected_index = next(
(
index
for index, report in enumerate(reports)
if report.status != HealthStatus.OFFLINE
),
None,
)
if selected_index is None:
response = {
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
"workload_type": workload,
"policy_order": policy_order,
"selected_provider": "gemini",
"selected_url": None,
"selected_model": None,
"fallback_chain": [],
"route_reason": (
f"{route_reason}; lightweight connectivity found all Ollama "
"endpoints offline; final fallback policy is Gemini"
),
"route_source": "lightweight_connectivity_fallback",
"route_error": None,
"health": health_by_provider,
"checked_at": checked_at,
}
response.update(_ai_route_lane_state(
policy_order=policy_order,
selected_provider="gemini",
health=health_by_provider,
))
return await _ai_route_response_with_repair_evidence(response)
selected = endpoints[selected_index]
model = get_settings().OLLAMA_HEALTH_CHECK_MODEL
fallback_chain = [
_ai_route_runtime_policy_endpoint_item(
endpoint,
priority=index + 1,
model=model,
)
for index, endpoint in enumerate(endpoints[selected_index + 1 :], start=selected_index + 1)
]
fallback_chain.append({
"priority": len(endpoints) + 1,
"provider_name": "gemini",
"url": None,
"model": None,
"runtime": "cloud",
})
response = {
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
"workload_type": workload,
"policy_order": policy_order,
"selected_provider": selected.provider_name,
"selected_url": selected.url,
"selected_model": model,
"fallback_chain": fallback_chain,
"route_reason": (
f"{route_reason}; lightweight connectivity selected "
f"{selected.provider_name}"
),
"route_source": "lightweight_connectivity_fallback",
"route_error": None,
"health": health_by_provider,
"checked_at": checked_at,
}
response.update(_ai_route_lane_state(
policy_order=policy_order,
selected_provider=selected.provider_name,
health=health_by_provider,
))
return await _ai_route_response_with_repair_evidence(response)
async def _ai_route_probe_connectivity(
endpoint: OllamaEndpointSelection,
) -> HealthReport:
"""Cheap read-only /api/tags probe for Operator Console status fallback."""
if not endpoint.url:
return HealthReport(
status=HealthStatus.OFFLINE,
host=endpoint.url,
reason="no_ollama_endpoint_url",
)
start = time.perf_counter()
try:
async with httpx.AsyncClient(
timeout=httpx.Timeout(_AI_ROUTE_STATUS_CONNECTIVITY_TIMEOUT_SECONDS),
) as client:
response = await client.get(f"{endpoint.url.rstrip('/')}/api/tags")
latency_ms = (time.perf_counter() - start) * 1000
if response.status_code == 200:
return HealthReport(
status=HealthStatus.HEALTHY,
host=endpoint.url,
latency_ms=latency_ms,
reason="status_only_connectivity_ok",
)
return HealthReport(
status=HealthStatus.OFFLINE,
host=endpoint.url,
latency_ms=latency_ms,
reason=f"status_only_connectivity_http_{response.status_code}",
)
except Exception as exc:
latency_ms = (time.perf_counter() - start) * 1000
return HealthReport(
status=HealthStatus.OFFLINE,
host=endpoint.url,
latency_ms=latency_ms,
reason=f"status_only_connectivity_error:{type(exc).__name__}",
)
def _ai_route_runtime_policy_endpoint_item(
endpoint: OllamaEndpointSelection,
*,
priority: int,
model: str,
) -> dict[str, Any]:
return {
"priority": priority,
"provider_name": endpoint.provider_name,
"url": endpoint.url or None,
"model": model,
"runtime": "ollama",
}
def _ai_route_unavailable_status(
*,
workload: OllamaWorkloadType,
policy_order: list[dict[str, Any]],
checked_at: datetime,
route_reason: str,
route_error: str,
route_source: str,
) -> dict[str, Any]:
response = {
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
"workload_type": workload,
"policy_order": policy_order,
"selected_provider": None,
"selected_url": None,
"selected_model": None,
"fallback_chain": [],
"route_reason": route_reason,
"route_source": route_source,
"route_error": route_error,
"health": {},
"checked_at": checked_at,
}
response.update(_ai_route_lane_state(
policy_order=policy_order,
selected_provider=None,
health={},
))
return response
async def _ai_route_response_with_repair_evidence(
response: dict[str, Any],
) -> dict[str, Any]:
"""Attach latest read-only repair dossier evidence when a lane is degraded."""
response["repair_evidence"] = None
if response.get("lane_mode") not in {
"degraded_failover",
"cloud_fallback",
"unavailable",
}:
return response
target_provider = _ai_route_repair_evidence_target(response)
response["repair_evidence"] = await _latest_ai_route_repair_evidence(
target_provider=target_provider,
)
return response
def _ai_route_repair_evidence_target(response: Mapping[str, Any]) -> str | None:
skipped_lanes = response.get("skipped_lanes")
if isinstance(skipped_lanes, list):
for lane in skipped_lanes:
if not isinstance(lane, dict):
continue
provider_name = str(lane.get("provider_name") or "").strip()
if provider_name and lane.get("action_required") is True:
return provider_name
for lane in skipped_lanes:
if isinstance(lane, dict):
provider_name = str(lane.get("provider_name") or "").strip()
if provider_name:
return provider_name
policy_order = response.get("policy_order")
if isinstance(policy_order, list):
for item in policy_order:
if not isinstance(item, dict):
continue
if item.get("runtime") == "ollama":
provider_name = str(item.get("provider_name") or "").strip()
if provider_name:
return provider_name
return None
async def _latest_ai_route_repair_evidence(
*,
project_id: str = "awoooi",
target_provider: str | None = None,
) -> dict[str, Any] | None:
"""Fetch the newest AI route repair diagnosis stored in AwoooP event DB."""
params: dict[str, Any] = {
"project_id": project_id,
"provider": _AI_ROUTE_REPAIR_EVIDENCE_PROVIDER,
"stage": _AI_ROUTE_REPAIR_EVIDENCE_STAGE,
}
target_clause = ""
if target_provider:
target_clause = """
AND COALESCE(
NULLIF(source_envelope #>> '{log_correlation,target_resource}', ''),
NULLIF(source_envelope #>> '{extra,payload,target_resource}', '')
) = :target_provider
"""
params["target_provider"] = target_provider
try:
item = await _fetch_latest_ai_route_repair_evidence(
params=params,
target_clause=target_clause,
)
if item is None and target_provider:
params.pop("target_provider", None)
item = await _fetch_latest_ai_route_repair_evidence(
params=params,
target_clause="",
)
return item
except Exception as exc:
logger.warning(
"ai_route_repair_evidence_fetch_failed",
project_id=project_id,
target_provider=target_provider,
error=str(exc),
)
return None
async def _fetch_latest_ai_route_repair_evidence(
*,
params: dict[str, Any],
target_clause: str,
) -> dict[str, Any] | None:
sql = text(f"""
SELECT
event_id,
run_id,
provider_event_id,
source_envelope,
provider_ts,
received_at
FROM awooop_conversation_event
WHERE project_id = :project_id
AND LOWER(COALESCE(
NULLIF(source_envelope->>'provider', ''),
NULLIF(split_part(provider_event_id, ':', 1), ''),
channel_type
)) = :provider
AND LOWER(COALESCE(NULLIF(source_envelope->>'stage', ''), 'received')) = :stage
{target_clause}
ORDER BY received_at DESC, event_id DESC
LIMIT 1
""")
async with get_db_context("awoooi") as db:
result = await db.execute(sql, params)
row = result.mappings().first()
return _ai_route_repair_evidence_item(row) if row else None
def _ai_route_repair_evidence_item(
row: Mapping[str, Any],
) -> dict[str, Any]:
"""Project route-repair source envelopes into a compact operator-safe view."""
envelope = _as_dict(row.get("source_envelope"))
extra = _as_dict(envelope.get("extra"))
payload = _as_dict(extra.get("payload"))
log_correlation = _as_dict(envelope.get("log_correlation"))
live_probe = _as_dict(payload.get("live_probe"))
observed_state = _as_dict(payload.get("observed_state"))
side_effects = _ai_route_repair_side_effects(payload.get("side_effects"))
evidence = {
"schema_version": (
payload.get("schema_version")
or envelope.get("schema_version")
or "ai_route_repair_evidence_projection_v1"
),
"provider": (
envelope.get("provider")
or str(row.get("provider_event_id") or "").split(":", 1)[0]
),
"stage": envelope.get("stage") or _AI_ROUTE_REPAIR_EVIDENCE_STAGE,
"provider_event_id": row.get("provider_event_id"),
"conversation_event_id": _string_or_none(row.get("event_id")),
"run_id": _string_or_none(row.get("run_id")),
"alertname": log_correlation.get("alertname"),
"severity": log_correlation.get("severity"),
"fingerprint": log_correlation.get("fingerprint"),
"target_resource": (
log_correlation.get("target_resource")
or payload.get("target_resource")
or observed_state.get("target_resource")
),
"observed_state": observed_state,
"live_probe": live_probe,
"access_blockers": _as_string_list(payload.get("access_blockers")),
"side_effects": side_effects,
"source_ref_count": _source_ref_count(envelope),
"provider_ts": row.get("provider_ts"),
"received_at": row.get("received_at"),
}
evidence["work_item"] = _ai_route_repair_work_item(evidence)
evidence["playbook_recommendation"] = _ai_route_repair_playbook_recommendation(
evidence
)
evidence["owner_action"] = _ai_route_repair_owner_action(evidence)
return evidence
def _ai_route_repair_side_effects(value: Any) -> dict[str, bool | None]:
raw = _as_dict(value)
return {
"incident_created": _bool_or_none(raw.get("incident_created")),
"telegram_sent": _bool_or_none(raw.get("telegram_sent")),
"approval_created": _bool_or_none(raw.get("approval_created")),
"runtime_route_changed": _bool_or_none(raw.get("runtime_route_changed")),
}
def _as_string_list(value: Any) -> list[str]:
if isinstance(value, list):
return [str(item) for item in value if str(item or "").strip()]
if value not in (None, ""):
return [str(value)]
return []
def _string_or_none(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _bool_or_none(value: Any) -> bool | None:
return value if isinstance(value, bool) else None
def _source_ref_count(envelope: Any) -> int:
source_refs = _as_dict(_as_dict(envelope).get("source_refs"))
total = 0
for value in source_refs.values():
if isinstance(value, list):
total += len([item for item in value if str(item or "").strip()])
elif value not in (None, ""):
total += 1
return total
def _recent_event_source_summary(row: AwoooPConversationEvent) -> dict[str, Any]:
"""Return redaction-safe source context for recent channel events."""
envelope = _as_dict(row.source_envelope)
extra = _as_dict(envelope.get("extra"))
telegram_callback = _as_dict(
extra.get("telegram_callback_query")
or envelope.get("telegram_callback_query")
)
log_correlation = _as_dict(envelope.get("log_correlation"))
provider = (
envelope.get("provider")
or str(row.provider_event_id or "").split(":", 1)[0]
or row.channel_type
)
summary: dict[str, Any] = {
"schema_version": "awooop_recent_event_source_summary_v1",
"provider": provider,
"stage": envelope.get("stage"),
"provider_event_id": row.provider_event_id,
"source_ref_count": _source_ref_count(envelope),
"redaction_version": envelope.get("redaction_version"),
}
if telegram_callback:
summary["telegram_callback_query"] = {
"action": _string_or_none(
telegram_callback.get("callback_action")
),
"callback_ref": _string_or_none(
telegram_callback.get("callback_ref")
),
"incident_id": _string_or_none(
telegram_callback.get("incident_id")
),
"approval_id": _string_or_none(
telegram_callback.get("approval_id")
),
"message_id": _string_or_none(
telegram_callback.get("message_id")
),
"username_present": _bool_or_none(
telegram_callback.get("username_present")
),
}
if log_correlation:
summary["log_correlation"] = {
"alertname": _string_or_none(log_correlation.get("alertname")),
"severity": _string_or_none(log_correlation.get("severity")),
"namespace": _string_or_none(log_correlation.get("namespace")),
"target_resource": _string_or_none(
log_correlation.get("target_resource")
),
"fingerprint": _string_or_none(
log_correlation.get("fingerprint")
),
}
return summary
def _recent_channel_event_item(row: AwoooPConversationEvent) -> dict[str, Any]:
"""Project one recent channel event into the Operator Console DTO."""
return {
"event_id": row.event_id,
"project_id": row.project_id,
"channel_type": row.channel_type,
"provider_event_id": row.provider_event_id,
"channel_chat_id": row.channel_chat_id,
"run_id": row.run_id,
"content_type": row.content_type,
"content_preview": row.content_preview,
"is_duplicate": row.is_duplicate,
"received_at": row.received_at,
"source_summary": _recent_event_source_summary(row),
}
def _ai_route_repair_work_item(evidence: Mapping[str, Any]) -> dict[str, Any]:
target = str(evidence.get("target_resource") or "unknown").strip()
blockers = _as_string_list(evidence.get("access_blockers"))
open_item = bool(blockers)
work_item_id = f"ai-route-repair:{target or 'unknown'}"
return {
"schema_version": "awooop_ai_route_repair_work_item_v1",
"work_item_id": work_item_id,
"status": "open" if open_item else "watching",
"kind": "ai_route_primary_lane_repair",
"next_step": (
"restore_primary_ollama_lane_access"
if open_item
else "continue_route_monitoring"
),
"reason": "primary_lane_unavailable" if open_item else "primary_lane_observed",
"needs_human": open_item,
"owner": "cloud_sre_operator",
"target_resource": target or None,
"target_href": "/awooop/runs",
"decision_effect": "none",
"safety_level": "read_only_work_item_projection",
"writes_incident_state": False,
"writes_auto_repair_result": False,
"writes_runtime_route": False,
}
def _ai_route_repair_playbook_recommendation(
evidence: Mapping[str, Any],
) -> dict[str, Any]:
blockers = set(_as_string_list(evidence.get("access_blockers")))
live_probe = _as_dict(evidence.get("live_probe"))
steps: list[dict[str, Any]] = []
if any(blocker.startswith("gcloud_") for blocker in blockers):
steps.append({
"step": "verify_cloud_control_plane_access",
"scope": "gcp_compute_read",
"mode": "manual_or_approved",
})
if "gcp_a_ssh_refused" in blockers or (
live_probe.get("gcp_a_direct_22") == "connection_refused"
):
steps.append({
"step": "restore_gcp_a_os_access",
"scope": "gcp_serial_console_or_os_login",
"mode": "manual_or_approved",
})
if "gcp_a_ollama_11434_refused" in blockers or (
live_probe.get("gcp_a_direct_11434") == "connection_refused"
):
steps.append({
"step": "restore_ollama_service_on_gcp_a",
"scope": "systemd_ollama",
"mode": "manual_or_approved",
})
if live_probe.get("proxy_110_11435") == "http_502":
steps.append({
"step": "verify_110_proxy_after_gcp_a_recovery",
"scope": "nginx_proxy_readback",
"mode": "read_only_verification",
})
steps.append({
"step": "verify_ai_route_status_returns_primary",
"scope": "awooop_ai_route_status",
"mode": "read_only_verification",
})
return {
"schema_version": "awooop_ai_route_playbook_recommendation_v1",
"playbook_id": "ai_route_primary_lane_recovery",
"status": "candidate_from_live_evidence",
"safe_to_auto_execute": False,
"requires_approval": True,
"decision_effect": "none",
"steps": steps,
}
def _ai_route_repair_owner_action(evidence: Mapping[str, Any]) -> dict[str, Any]:
work_item = _as_dict(evidence.get("work_item"))
playbook = _as_dict(evidence.get("playbook_recommendation"))
return {
"schema_version": "awooop_ai_route_owner_action_v1",
"lead_agent": "Hermes",
"supporting_agents": ["OpenClaw", "ElephantAlpha"],
"human_owner": "Cloud/SRE owner",
"automation_state": "blocked_by_external_cloud_or_os_access",
"next_step": work_item.get("next_step") or "continue_route_monitoring",
"playbook_id": playbook.get("playbook_id"),
"safe_to_auto_repair": False,
"blocking_reason": work_item.get("reason") or "unknown",
}
def _ai_route_lane_state(
*,
policy_order: list[dict[str, Any]],
selected_provider: str | None,
health: dict[str, dict[str, Any]],
) -> dict[str, Any]:
"""Expose failover lane state separately from policy labels."""
selected_index = next(
(
index
for index, item in enumerate(policy_order)
if item.get("provider_name") == selected_provider
),
None,
)
active_item = (
policy_order[selected_index]
if selected_index is not None
else None
)
skipped_items = policy_order[:selected_index] if selected_index is not None else []
skipped_lanes = [
_ai_route_lane_item(item, health.get(str(item.get("provider_name"))))
for item in skipped_items
if item.get("runtime") == "ollama"
]
if not selected_provider or active_item is None:
lane_mode = "unavailable"
operator_action = {
"human_required": True,
"action": "inspect_ai_router",
"reason": "no_active_provider",
}
elif active_item.get("runtime") == "cloud":
lane_mode = "cloud_fallback"
operator_action = {
"human_required": True,
"action": "restore_ollama_lanes",
"reason": "all_ollama_lanes_unavailable",
}
elif skipped_lanes:
lane_mode = "degraded_failover"
operator_action = {
"human_required": True,
"action": "repair_skipped_primary_lane",
"reason": "fallback_lane_active",
}
else:
lane_mode = "primary"
operator_action = {
"human_required": False,
"action": "monitor",
"reason": "primary_lane_active",
}
return {
"lane_mode": lane_mode,
"active_lane": (
_ai_route_lane_item(active_item, health.get(str(active_item.get("provider_name"))))
if active_item
else None
),
"skipped_lanes": skipped_lanes,
"operator_action": operator_action,
}
def _ai_route_lane_item(
item: dict[str, Any],
health_item: dict[str, Any] | None,
) -> dict[str, Any]:
return {
"priority": item.get("priority"),
"provider_name": item.get("provider_name"),
"role": item.get("role"),
"runtime": item.get("runtime"),
"url": item.get("url"),
"health_status": (health_item or {}).get("status", "not_checked"),
"reason": (health_item or {}).get("reason") or item.get("reason"),
"action_required": (health_item or {}).get("status") not in {
"healthy",
"not_checked",
None,
},
}
def _ai_route_policy_endpoint_item(
endpoint: OllamaEndpointSelection,
*,
priority: int,
) -> dict[str, Any]:
role = {
"ollama_gcp_a": "primary",
"ollama_gcp_b": "secondary",
"ollama_local": "local_fallback",
}.get(endpoint.provider_name, "ollama")
return {
"priority": priority,
"provider_name": endpoint.provider_name,
"url": endpoint.url,
"workload_type": endpoint.workload_type,
"reason": endpoint.reason,
"role": role,
"runtime": "ollama",
}
def _ai_route_runtime_endpoint_item(
endpoint: OllamaEndpoint,
*,
priority: int,
) -> dict[str, Any]:
return {
"priority": priority,
"provider_name": endpoint.provider_name,
"url": endpoint.url or None,
"model": endpoint.model,
"runtime": "ollama" if endpoint.provider_name.startswith("ollama") else "cloud",
}
def _ai_route_health_map(route: OllamaRoutingResult) -> dict[str, dict[str, Any]]:
"""Convert failover health reports into provider keyed status for the UI."""
health: dict[str, dict[str, Any]] = {
"ollama_gcp_a": _ai_route_health_item(route.health_gcp_a),
}
if route.health_gcp_b:
health["ollama_gcp_b"] = _ai_route_health_item(route.health_gcp_b)
else:
health["ollama_gcp_b"] = _ai_route_not_checked_health_item()
if route.health_local:
health["ollama_local"] = _ai_route_health_item(route.health_local)
else:
health["ollama_local"] = _ai_route_not_checked_health_item()
return health
def _ai_route_health_item(report: HealthReport) -> dict[str, Any]:
payload = report.to_dict()
payload["checked"] = True
return payload
def _ai_route_not_checked_health_item() -> dict[str, Any]:
return {
"status": "not_checked",
"host": "",
"latency_ms": None,
"reason": "standby_not_checked_primary_healthy",
"checked_at": None,
"from_cache": False,
"checked": False,
}
def _timeline_item(
*,
ts: Any,
kind: str,
title: str,
status: str,
summary: str | None = None,
metadata: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Build one Operator Console timeline item."""
return {
"ts": ts,
"kind": kind,
"title": title,
"status": status,
"summary": summary,
"metadata": metadata or {},
}
def _utc_now_naive() -> datetime:
"""回傳與 AwoooP timestamp-without-timezone 欄位相容的 UTC 時間。"""
return datetime.now(UTC).replace(tzinfo=None)
def _truncate_step_summary(value: str | None) -> str | None:
"""壓縮 Step summary避免超過 DB 欄位與前端 timeline 需要的短摘要。"""
if not value:
return None
compact = " ".join(str(value).split())
if len(compact) <= _MAX_STEP_SUMMARY_CHARS:
return compact
return f"{compact[: _MAX_STEP_SUMMARY_CHARS - 1]}"
def _approval_step_title(tool_name: str, step_seq: int) -> str:
"""將 operator_console.* step 轉成人能一眼理解的 timeline 標題。"""
if tool_name == "operator_console.approve":
return f"人工審批 {step_seq}: 核准"
if tool_name == "operator_console.reject":
return f"人工審批 {step_seq}: 拒絕"
return f"Step {step_seq}: {tool_name}"
def _outbound_timeline_title(
channel_type: str,
message_type: str,
content_preview: str | None,
callback_reply: dict[str, Any] | None = None,
) -> str:
"""將 legacy Telegram outbound 分類成 Operator 看得懂的語義標題。"""
channel = channel_type.upper()
preview = content_preview or ""
if callback_reply:
action = str(callback_reply.get("action") or "").strip()
status = str(callback_reply.get("status") or "").strip()
action_label = {
"detail": "詳情",
"history": "歷史",
}.get(action, action or "Callback")
status_label = {
"callback_reply_sent": "已送出",
"callback_reply_fallback_sent": " fallback 已送出",
"callback_reply_rescue_sent": "救援已送出",
"callback_reply_failed": "送出失敗",
}.get(status, status or "已記錄")
return f"{channel}{action_label}回覆{status_label}"
if "RUNBOOK REVIEW" in preview:
return f"{channel}Runbook 待人工審核"
if "[AWOOOI CI/CD]" in preview or "AWOOOI CI/CD" in preview:
return f"{channel}CI/CD 狀態通知"
if "AI 治理警報" in preview:
return f"{channel}AI 治理警報"
if "AI RETRY QUEUED" in preview or "AI 自動修復失敗" in preview:
return f"{channel}AI 自動修復失敗,已排入重試"
if "HANDOFF REQUIRED" in preview:
return f"{channel}Break-glass 人工接手"
if "AUTO RESOLVED" in preview or "AI 自動修復完成" in preview:
return f"{channel}AI 自動修復完成"
if "ESCALATION" in preview or "事故升級" in preview:
return f"{channel}:事故升級通知"
if "ACTION REQUIRED" in preview:
return f"{channel}:告警審批卡"
fallback = {
"approval_request": "人工審批請求",
"error": "錯誤回覆",
"final": "處置結果",
"interim": "漸進式狀態回饋",
}.get(message_type, message_type)
return f"{channel}{fallback}"
def _outbound_callback_reply(source_envelope: Any) -> dict[str, Any] | None:
"""Extract Telegram callback reply evidence from outbound source envelope."""
if not isinstance(source_envelope, dict):
return None
callback_reply = source_envelope.get("callback_reply")
return callback_reply if isinstance(callback_reply, dict) else None
def _callback_reply_public_status(callback_reply: dict[str, Any]) -> str:
"""Map raw Telegram callback reply result into the Operator Console filter value."""
raw_status = str(callback_reply.get("status") or "")
return {
"callback_reply_sent": "sent",
"callback_reply_fallback_sent": "fallback_sent",
"callback_reply_rescue_sent": "rescue_sent",
"callback_reply_failed": "failed",
}.get(raw_status, "observed")
def _callback_reply_evidence_capture_status(
*,
callback_reply: Mapping[str, Any],
persisted_awooop_status_chain: dict[str, Any] | None,
persisted_km_stale_completion_summary: dict[str, Any] | None,
event_at: Any,
) -> dict[str, Any]:
"""Explain whether callback-time evidence snapshots were persisted."""
captured: list[str] = []
missing: list[str] = []
if persisted_awooop_status_chain:
captured.append("awooop_status_chain")
else:
missing.append("awooop_status_chain")
if persisted_km_stale_completion_summary:
captured.append("km_stale_completion_summary")
else:
missing.append("km_stale_completion_summary")
if not missing:
status_value = "captured"
reason = "ok"
next_action = "none"
elif captured:
status_value = "partial"
reason = "partial_snapshot_rollout_transition"
next_action = "press_telegram_detail_or_history_after_rollout"
else:
status_value = "not_captured"
raw_status = str(callback_reply.get("status") or "")
reason = (
"callback_reply_delivery_failed_snapshot_missing"
if raw_status == "callback_reply_failed"
else "legacy_callback_before_snapshot_rollout"
)
next_action = "press_telegram_detail_or_history_after_rollout"
return {
"schema_version": _CALLBACK_EVIDENCE_CAPTURE_STATUS_SCHEMA_VERSION,
"status": status_value,
"reason": reason,
"action": str(callback_reply.get("action") or "").strip() or None,
"captured": captured,
"missing": missing,
"snapshot_rollout": "t167_t169",
"next_action": next_action,
"event_at": event_at,
}
def _callback_reply_capture_status_from_outbound(
row: AwoooPOutboundMessage,
callback_reply: Mapping[str, Any],
) -> dict[str, Any]:
"""Build capture status directly from one outbound source envelope."""
source_envelope = _as_dict(row.source_envelope)
return _callback_reply_evidence_capture_status(
callback_reply=callback_reply,
persisted_awooop_status_chain=(
_as_dict(source_envelope.get("awooop_status_chain")) or None
),
persisted_km_stale_completion_summary=(
_as_dict(source_envelope.get("km_stale_completion_summary")) or None
),
event_at=row.sent_at or row.queued_at,
)
def _callback_reply_event_item(row: Mapping[str, Any]) -> dict[str, Any]:
"""Convert one callback reply outbound row into a read-only evidence item."""
callback_reply = _as_dict(row.get("callback_reply"))
action = str(callback_reply.get("action") or "").strip() or None
incident_id = str(callback_reply.get("incident_id") or "").strip() or None
project_id = str(row.get("project_id") or "")
run_id = row.get("run_id")
status_value = _callback_reply_public_status(callback_reply)
event_at = row.get("sent_at") or row.get("queued_at")
persisted_awooop_status_chain = _as_dict(
row.get("persisted_awooop_status_chain"),
) or None
persisted_km_stale_completion_summary = _as_dict(
row.get("persisted_km_stale_completion_summary"),
) or None
return {
"message_id": row.get("message_id"),
"run_id": run_id,
"project_id": project_id,
"status": status_value,
"needs_human": status_value == "failed",
"action": action,
"incident_id": incident_id,
"event_at": event_at,
"channel_type": row.get("channel_type"),
"message_type": row.get("message_type"),
"send_status": row.get("send_status"),
"send_error": row.get("send_error"),
"provider_message_id": row.get("provider_message_id"),
"triggered_by_state": row.get("triggered_by_state"),
"content_preview": row.get("content_preview"),
"run_state": row.get("run_state"),
"agent_id": row.get("agent_id"),
"run_created_at": row.get("run_created_at"),
"callback_reply": callback_reply,
"persisted_awooop_status_chain": persisted_awooop_status_chain,
"persisted_km_stale_completion_summary": (
persisted_km_stale_completion_summary
),
"evidence_capture_status": _callback_reply_evidence_capture_status(
callback_reply=callback_reply,
persisted_awooop_status_chain=persisted_awooop_status_chain,
persisted_km_stale_completion_summary=(
persisted_km_stale_completion_summary
),
event_at=event_at,
),
"run_detail_href": (
f"/awooop/runs/{run_id}?project_id={project_id}"
if run_id and project_id
else None
),
}
def _empty_km_stale_completion_summary(
*,
project_id: str,
incident_id: str | None,
status_value: str,
reason: str | None = None,
) -> dict[str, Any]:
"""Build the nullable KM owner-review summary shape for callback evidence."""
return {
"schema_version": _KM_STALE_COMPLETION_CALLBACK_SCHEMA_VERSION,
"project_id": project_id,
"incident_id": incident_id,
"status": status_value,
"missing_reason": reason,
"total": 0,
"returned": 0,
"pending_count": 0,
"ready_count": 0,
"blocked_count": 0,
"completed_count": 0,
"failed_count": 0,
"writes_on_read": False,
"manual_review_required": True,
"batch_writes_allowed": False,
"items_truncated": False,
"related_total": 0,
"related_items": [],
"work_item": _km_stale_callback_owner_review_work_item(
project_id=project_id,
incident_id=incident_id,
status_value=status_value,
reason=reason,
),
}
def _object_field(payload: Any, name: str, default: Any = None) -> Any:
if isinstance(payload, Mapping):
return payload.get(name, default)
return getattr(payload, name, default)
def _object_int_field(payload: Any, name: str) -> int:
try:
return int(_object_field(payload, name, 0) or 0)
except (TypeError, ValueError):
return 0
def _build_km_stale_completion_summary(
*,
queue: Any,
project_id: str,
incident_id: str,
) -> dict[str, Any]:
"""Summarize KM owner-review completion queue state for one incident."""
related_items: list[dict[str, Any]] = []
for item in list(_object_field(queue, "items", []) or []):
if str(_object_field(item, "related_incident_id") or "").strip() != incident_id:
continue
related_items.append({
"entry_id": _object_field(item, "entry_id"),
"title": _object_field(item, "title"),
"dispatch_id": _object_field(item, "dispatch_id"),
"governance_event_id": _object_field(item, "governance_event_id"),
"readiness": _object_field(item, "readiness"),
"workflow_stage": _object_field(item, "workflow_stage"),
"next_action": _object_field(item, "next_action"),
"priority_tier": _object_field(item, "priority_tier"),
"recommended_completion_outcome": _object_field(
item,
"recommended_completion_outcome",
),
"can_preview": bool(_object_field(item, "can_preview", False)),
})
total = _object_int_field(queue, "total")
returned = _object_int_field(queue, "returned")
return {
"schema_version": _KM_STALE_COMPLETION_CALLBACK_SCHEMA_VERSION,
"project_id": project_id,
"incident_id": incident_id,
"status": "matched_owner_review"
if related_items
else "no_related_owner_review",
"missing_reason": None if related_items else "no_matching_completion_item",
"total": total,
"returned": returned,
"pending_count": _object_int_field(queue, "pending_count"),
"ready_count": _object_int_field(queue, "ready_count"),
"blocked_count": _object_int_field(queue, "blocked_count"),
"completed_count": _object_int_field(queue, "completed_count"),
"failed_count": _object_int_field(queue, "failed_count"),
"writes_on_read": bool(_object_field(queue, "writes_on_read", False)),
"manual_review_required": bool(
_object_field(queue, "manual_review_required", True)
),
"batch_writes_allowed": bool(
_object_field(queue, "batch_writes_allowed", False)
),
"items_truncated": total > returned,
"related_total": len(related_items),
"related_items": related_items[:3],
"work_item": _km_stale_callback_owner_review_work_item(
project_id=project_id,
incident_id=incident_id,
status_value=(
"matched_owner_review"
if related_items
else "no_related_owner_review"
),
reason=None if related_items else "no_matching_completion_item",
),
}
def _km_stale_callback_owner_review_work_item(
*,
project_id: str,
incident_id: str | None,
status_value: str,
reason: str | None,
) -> dict[str, Any] | None:
"""Generate a read-only Work Items link for callback evidence gaps."""
if not incident_id or status_value != "no_related_owner_review":
return None
work_item_id = f"km-callback-owner-review:{project_id}:{incident_id}"
target_query = urlencode(
{
"project_id": project_id,
"incident_id": incident_id,
"callback_reply_status": "sent",
}
)
work_item_query = urlencode(
{
"project_id": project_id,
"work_item_id": work_item_id,
"incident_id": incident_id,
}
)
return {
"schema_version": "km_stale_callback_owner_review_work_item_v1",
"work_item_id": work_item_id,
"kind": "km_stale_callback_owner_review",
"status": "open",
"project_id": project_id,
"incident_id": incident_id,
"reason": reason or "no_matching_completion_item",
"title": "Telegram callback incident has no matching KM owner-review item",
"next_step": "review_or_queue_km_owner_review",
"target_surface": "awooop_runs_callback_evidence",
"target_href": f"/awooop/runs?{target_query}",
"work_item_href": f"/awooop/work-items?{work_item_query}",
"triage": {
"schema_version": "km_stale_callback_owner_review_triage_v1",
"flow_stage": "callback_observed_owner_review_link_missing",
"ai_lead_agent": "Hermes",
"supporting_agents": ["OpenClaw", "ElephantAlpha"],
"automation_state": "manual_owner_review_required",
"safe_to_auto_repair": False,
"blocking_reason": reason or "no_matching_completion_item",
"matching_strategy": "related_incident_id_exact_match",
"already_done": [
"callback_reply_persisted",
"completion_queue_checked",
"generated_read_only_work_item",
],
"next_actions": [
"review_runs_callback_evidence",
"queue_matching_km_stale_candidate",
"complete_owner_review_after_owner_approval",
],
},
"writes_on_read": False,
"manual_review_required": True,
"batch_writes_allowed": False,
}
def _outbound_timeline_status(
send_status: str,
callback_reply: dict[str, Any] | None,
) -> str:
"""Prefer callback delivery status when the outbound row records one."""
if callback_reply:
status = callback_reply.get("status")
if isinstance(status, str) and status:
return status
return send_status
def _outbound_timeline_summary(
*,
content_preview: str | None,
send_error: str | None,
callback_reply: dict[str, Any] | None,
) -> str | None:
"""Summarize callback reply state without forcing operators to inspect raw JSON."""
if not callback_reply:
return content_preview or send_error
parts = [
f"callback={callback_reply.get('action') or '--'}",
f"incident={callback_reply.get('incident_id') or '--'}",
f"status={callback_reply.get('status') or '--'}",
]
parse_mode = callback_reply.get("parse_mode")
if parse_mode:
parts.append(f"parse_mode={parse_mode}")
error = callback_reply.get("error")
if error:
parts.append(f"error={error}")
if content_preview:
parts.append(str(content_preview))
return " · ".join(parts)
def _outbound_timeline_metadata(
row: AwoooPOutboundMessage,
callback_reply: dict[str, Any] | None,
) -> dict[str, Any]:
"""Build compact outbound metadata with callback fields first when present."""
metadata: dict[str, Any] = {}
if callback_reply:
metadata.update({
"callback_status": callback_reply.get("status"),
"callback_action": callback_reply.get("action"),
"callback_incident_id": callback_reply.get("incident_id"),
"callback_parse_mode": callback_reply.get("parse_mode"),
})
metadata.update({
"message_type": row.message_type,
"provider_message_id": row.provider_message_id,
"triggered_by_state": row.triggered_by_state,
})
return metadata
def _validate_cicd_stage_filter(value: str | None) -> str | None:
"""Normalize a CI/CD stage filter without allowing arbitrary SQL fragments."""
if value is None:
return None
stage = value.strip().lower()
if not stage:
return None
if not _CICD_STAGE_RE.fullmatch(stage):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail="stage 格式錯誤,僅允許 a-z、0-9、底線、冒號與短橫線",
)
return stage
def _validate_cicd_status_filter(value: str | None) -> str | None:
"""Normalize and validate CI/CD status filter."""
if value is None:
return None
status_value = value.strip().lower()
if not status_value:
return None
if status_value not in _CICD_STATUS_FILTERS:
allowed = ", ".join(sorted(_CICD_STATUS_FILTERS))
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail=f"status 必須是: {allowed}",
)
return status_value
def _cicd_duration_seconds(value: Any) -> int:
"""Coerce Alertmanager duration_seconds label into a non-negative integer."""
try:
duration = int(str(value or "0"))
except (TypeError, ValueError):
return 0
return max(duration, 0)
def _cicd_event_needs_attention(status_value: str | None, severity: str | None) -> bool:
"""Return whether a CI/CD evidence row should be highlighted for operators."""
normalized_status = str(status_value or "").lower()
normalized_severity = str(severity or "").lower()
return normalized_status in {"failed", "pending"} or normalized_severity in {
"critical",
"warning",
}
def _cicd_event_item_from_row(row: Mapping[str, Any], *, project_id: str) -> dict[str, Any]:
"""Convert one alert_operation_log CI/CD row into an operator-facing item."""
context = _as_dict(row.get("context"))
labels = _as_dict(context.get("labels"))
annotations = _as_dict(context.get("annotations"))
status_value = str(row.get("status") or labels.get("status") or "").lower() or None
severity = str(row.get("severity") or labels.get("severity") or "").lower() or None
summary = row.get("summary") or annotations.get("summary")
description = row.get("description") or annotations.get("description")
workflow_url = row.get("workflow_url") or annotations.get("workflow_url")
return {
"id": str(row.get("id") or ""),
"project_id": project_id,
"alertname": str(row.get("alertname") or labels.get("alertname") or ""),
"stage": row.get("stage") or labels.get("stage"),
"status": status_value,
"severity": severity,
"commit_sha": row.get("commit_sha") or labels.get("commit"),
"triggered_by": row.get("triggered_by") or labels.get("triggered_by"),
"duration_seconds": _cicd_duration_seconds(
row.get("duration_seconds") or labels.get("duration_seconds")
),
"summary": str(summary).strip() if summary else None,
"description": str(description).strip() if description else None,
"workflow_url": str(workflow_url).strip() if workflow_url else None,
"alert_id": row.get("alert_id") or context.get("alert_id"),
"source": row.get("source") or context.get("source"),
"action_detail": row.get("action_detail"),
"needs_attention": _cicd_event_needs_attention(status_value, severity),
"created_at": row.get("created_at"),
}
def _run_callback_reply_summary(
outbound_messages: list[AwoooPOutboundMessage],
) -> dict[str, Any]:
"""Summarize Telegram detail/history callback reply delivery for Run List."""
callback_rows: list[tuple[AwoooPOutboundMessage, dict[str, Any]]] = []
for row in outbound_messages:
callback_reply = _outbound_callback_reply(row.source_envelope)
if callback_reply:
callback_rows.append((row, callback_reply))
if not callback_rows:
return {
"schema_version": "awooop_run_callback_reply_summary_v1",
"status": "no_callback",
"total": 0,
"sent": 0,
"fallback_sent": 0,
"rescue_sent": 0,
"failed": 0,
"needs_human": False,
"latest_status": None,
"latest_action": None,
"latest_incident_id": None,
"latest_at": None,
"latest_provider_message_id": None,
"capture_status": "no_callback",
"capture_captured": 0,
"capture_partial": 0,
"capture_not_captured": 0,
"latest_capture_status": None,
"latest_capture_missing": [],
"latest_capture_next_action": None,
}
sorted_rows = sorted(
callback_rows,
key=lambda item: str(item[0].sent_at or item[0].queued_at or ""),
reverse=True,
)
latest_row, latest_callback = sorted_rows[0]
statuses = [
str(callback.get("status") or "")
for _, callback in sorted_rows
]
failed = statuses.count("callback_reply_failed")
latest_status = str(latest_callback.get("status") or "")
summary_status = _callback_reply_public_status(latest_callback)
capture_rows = [
_callback_reply_capture_status_from_outbound(row, callback)
for row, callback in sorted_rows
]
capture_statuses = [
str(capture.get("status") or "observed")
for capture in capture_rows
]
capture_not_captured = capture_statuses.count("not_captured")
capture_partial = capture_statuses.count("partial")
capture_captured = capture_statuses.count("captured")
latest_capture = capture_rows[0] if capture_rows else {}
if capture_not_captured > 0:
capture_status = "not_captured"
elif capture_partial > 0:
capture_status = "partial"
elif capture_captured > 0 and capture_captured == len(capture_rows):
capture_status = "captured"
else:
capture_status = "observed"
return {
"schema_version": "awooop_run_callback_reply_summary_v1",
"status": summary_status,
"total": len(sorted_rows),
"sent": statuses.count("callback_reply_sent"),
"fallback_sent": statuses.count("callback_reply_fallback_sent"),
"rescue_sent": statuses.count("callback_reply_rescue_sent"),
"failed": failed,
"needs_human": failed > 0 or latest_status == "callback_reply_failed",
"latest_status": latest_status or None,
"latest_action": latest_callback.get("action"),
"latest_incident_id": latest_callback.get("incident_id"),
"latest_at": latest_row.sent_at or latest_row.queued_at,
"latest_provider_message_id": latest_row.provider_message_id,
"capture_status": capture_status,
"capture_captured": capture_captured,
"capture_partial": capture_partial,
"capture_not_captured": capture_not_captured,
"latest_capture_status": latest_capture.get("status"),
"latest_capture_missing": latest_capture.get("missing") or [],
"latest_capture_next_action": latest_capture.get("next_action"),
}
def _mcp_gateway_summary_row(row: AwoooPMcpGatewayAudit) -> dict[str, Any]:
"""Convert SQLAlchemy audit rows into the truth-chain summary shape."""
return {
"agent_id": row.agent_id,
"tool_name": row.tool_name,
"result_status": row.result_status,
"block_gate": row.block_gate,
"gate_result": row.gate_result or {},
}
def _as_dict(value: Any) -> dict[str, Any]:
"""Return dict payloads defensively; DB JSON fields may be null or stale."""
return value if isinstance(value, dict) else {}
def _append_unique(values: list[str], candidate: Any) -> None:
"""Append non-empty string once while preserving discovery order."""
text_value = str(candidate or "").strip()
if text_value and text_value not in values:
values.append(text_value)
def _append_incident_ids_from_text(values: list[str], text_value: Any) -> None:
"""Extract incident ids from legacy text payloads."""
if not text_value:
return
for incident_id in _INCIDENT_ID_RE.findall(str(text_value)):
_append_unique(values, incident_id)
def _append_incident_ids_from_source_envelope(values: list[str], envelope: Any) -> None:
"""Extract incident ids from AwoooP channel event source_refs."""
source_refs = _as_dict(_as_dict(envelope).get("source_refs"))
incident_ids = source_refs.get("incident_ids")
if isinstance(incident_ids, list):
for incident_id in incident_ids:
_append_unique(values, incident_id)
else:
_append_unique(values, incident_ids)
def _collect_run_incident_ids(
*,
run: AwoooPRunState,
inbound_events: list[AwoooPConversationEvent],
outbound_messages: list[AwoooPOutboundMessage],
) -> list[str]:
"""Collect incident ids that tie a Run back to legacy incident evidence."""
incident_ids: list[str] = []
_append_incident_ids_from_text(incident_ids, run.trigger_ref)
_append_incident_ids_from_text(incident_ids, run.error_detail)
for event in inbound_events:
_append_incident_ids_from_source_envelope(incident_ids, event.source_envelope)
_append_incident_ids_from_text(incident_ids, event.content_preview)
_append_incident_ids_from_text(incident_ids, event.content_redacted)
for message in outbound_messages:
_append_incident_ids_from_source_envelope(incident_ids, message.source_envelope)
_append_incident_ids_from_text(incident_ids, message.content_preview)
_append_incident_ids_from_text(incident_ids, message.send_error)
return incident_ids
async def _find_run_ids_for_incident_filter(
db: Any,
*,
project_id: str | None,
incident_id: str,
limit: int,
) -> list[UUID]:
"""Pre-filter run ids for a single incident before loading list context.
The old list filter loaded every run for a project and then searched message
sidecars in Python. That becomes too expensive on production-sized history
and can turn a single incident drilldown into a gateway timeout.
"""
params: dict[str, Any] = {
"incident_id": incident_id,
"incident_like": f"%{incident_id}%",
"limit": max(int(limit), 1),
}
project_run_where = ""
event_project_where = ""
outbound_project_where = ""
if project_id is not None:
params["project_id"] = project_id
project_run_where = "AND r.project_id = :project_id"
event_project_where = "AND e.project_id = :project_id"
outbound_project_where = "AND m.project_id = :project_id"
query = text(f"""
WITH matched AS (
SELECT r.run_id::text AS run_id, r.created_at AS ts
FROM awooop_run_state r
WHERE (
r.trigger_ref ILIKE :incident_like
OR r.error_detail ILIKE :incident_like
)
{project_run_where}
UNION ALL
SELECT e.run_id::text AS run_id, e.received_at AS ts
FROM awooop_conversation_event e
WHERE e.run_id IS NOT NULL
AND (
e.source_envelope #> '{{source_refs,incident_ids}}' ? :incident_id
OR e.content_preview ILIKE :incident_like
OR e.content_redacted ILIKE :incident_like
OR e.provider_event_id ILIKE :incident_like
)
{event_project_where}
UNION ALL
SELECT m.run_id::text AS run_id, COALESCE(m.sent_at, m.queued_at) AS ts
FROM awooop_outbound_message m
WHERE (
m.source_envelope #> '{{source_refs,incident_ids}}' ? :incident_id
OR m.source_envelope #> '{{awooop_status_chain,incident_ids}}' ? :incident_id
OR m.source_envelope #>> '{{awooop_status_chain,source_id}}' = :incident_id
OR m.source_envelope #>> '{{callback_reply,incident_id}}' = :incident_id
OR m.content_preview ILIKE :incident_like
OR m.content_redacted ILIKE :incident_like
OR m.send_error ILIKE :incident_like
)
{outbound_project_where}
)
SELECT run_id
FROM matched
WHERE run_id IS NOT NULL
GROUP BY run_id
ORDER BY MAX(ts) DESC
LIMIT :limit
""")
result = await db.execute(query, params)
run_ids: list[UUID] = []
for row in result.mappings().all():
raw_run_id = str(row.get("run_id") or "")
try:
run_id = uuid.UUID(raw_run_id)
except ValueError:
continue
if run_id not in run_ids:
run_ids.append(run_id)
return run_ids
async def _load_run_message_context(
db: Any,
runs: list[AwoooPRunState],
*,
limit: int = _MAX_LIST_CONTEXT_ROWS,
) -> tuple[
dict[UUID, list[AwoooPConversationEvent]],
dict[UUID, list[AwoooPOutboundMessage]],
]:
"""Load list-page sidecar events needed to link runs back to incidents."""
if not runs:
return {}, {}
inbound_by_run: dict[UUID, list[AwoooPConversationEvent]] = defaultdict(list)
outbound_by_run: dict[UUID, list[AwoooPOutboundMessage]] = defaultdict(list)
remaining_inbound = max(int(limit), 0)
remaining_outbound = max(int(limit), 0)
for batch in _iter_run_context_batches(runs):
if remaining_inbound > 0:
inbound_filters = [AwoooPConversationEvent.run_id.in_(batch["run_ids"])]
if batch["trigger_refs"]:
inbound_filters.append(AwoooPConversationEvent.provider_event_id.in_(
batch["trigger_refs"]
))
if batch["trigger_event_ids"]:
inbound_filters.append(AwoooPConversationEvent.event_id.in_(
batch["trigger_event_ids"]
))
inbound_result = await db.execute(
select(AwoooPConversationEvent)
.where(sa_or(*inbound_filters))
.order_by(AwoooPConversationEvent.received_at.desc())
.limit(remaining_inbound)
)
inbound_events = list(inbound_result.scalars().all())
remaining_inbound = max(remaining_inbound - len(inbound_events), 0)
for event in inbound_events:
target_run_id = (
event.run_id if event.run_id in batch["run_ids_set"] else None
)
if target_run_id is None:
target_run_id = batch["trigger_ref_to_run"].get(
str(event.provider_event_id)
)
if target_run_id is None:
target_run_id = batch["trigger_ref_to_run"].get(str(event.event_id))
if target_run_id is not None:
inbound_by_run[target_run_id].append(event)
if remaining_outbound > 0:
outbound_result = await db.execute(
select(AwoooPOutboundMessage)
.where(AwoooPOutboundMessage.run_id.in_(batch["run_ids"]))
.order_by(AwoooPOutboundMessage.queued_at.desc())
.limit(remaining_outbound)
)
outbound_messages = list(outbound_result.scalars().all())
remaining_outbound = max(remaining_outbound - len(outbound_messages), 0)
for message in outbound_messages:
outbound_by_run[message.run_id].append(message)
return dict(inbound_by_run), dict(outbound_by_run)
def _list_filter_context_limit(candidate_count: int) -> int:
return min(max(candidate_count * 4, _MAX_LIST_CONTEXT_ROWS), 20_000)
def _iter_run_context_batches(
runs: list[AwoooPRunState],
) -> list[dict[str, Any]]:
"""Split run context lookups below asyncpg's bind-parameter ceiling."""
batches: list[dict[str, Any]] = []
for start in range(0, len(runs), _RUN_CONTEXT_QUERY_CHUNK_SIZE):
batch_runs = runs[start : start + _RUN_CONTEXT_QUERY_CHUNK_SIZE]
run_ids = [run.run_id for run in batch_runs]
trigger_refs = [str(run.trigger_ref) for run in batch_runs if run.trigger_ref]
trigger_ref_to_run = {
str(run.trigger_ref): run.run_id
for run in batch_runs
if run.trigger_ref
}
trigger_event_ids: list[UUID] = []
for trigger_ref in trigger_refs:
try:
trigger_event_ids.append(uuid.UUID(trigger_ref))
except ValueError:
continue
batches.append({
"run_ids": run_ids,
"run_ids_set": set(run_ids),
"trigger_refs": trigger_refs,
"trigger_ref_to_run": trigger_ref_to_run,
"trigger_event_ids": trigger_event_ids,
})
return batches
def _route_label_from_remediation(item: dict[str, Any]) -> str:
"""Render remediation MCP route consistently with Telegram / Work Items."""
return "/".join(
str(part)
for part in (
item.get("agent_id"),
item.get("tool_name"),
item.get("required_scope"),
)
if part
) or "--"
def _route_label_from_legacy_mcp(record: dict[str, Any]) -> str:
"""Render self-built/legacy MCP evidence as agent/tool/scope for list UX."""
tool = record.get("tool_name")
server = record.get("mcp_server")
tool_label = ".".join(str(part) for part in (server, tool) if part) or tool
return "/".join(
str(part)
for part in (
record.get("agent_role"),
tool_label,
"read",
)
if part
) or "--"
def _remediation_timeline_status(item: dict[str, Any]) -> str:
if item.get("success") is False or item.get("allowed") is False:
return "failed"
if item.get("verification_result_preview") == "success":
return "success"
return "warning"
def _remediation_timeline_summary(item: dict[str, Any]) -> str:
return (
f"incident={item.get('incident_id') or '--'} "
f"mode={item.get('mode') or '--'} "
f"preview={item.get('verification_result_preview') or '--'} "
f"route={_route_label_from_remediation(item)} "
f"writes_incident={item.get('writes_incident_state')} "
f"writes_auto_repair={item.get('writes_auto_repair_result')}"
)[:500]
def _legacy_mcp_timeline_status(record: dict[str, Any]) -> str:
if record.get("success") is True:
return "success"
if record.get("success") is False:
return "failed"
return "warning"
def _legacy_mcp_timeline_summary(record: dict[str, Any]) -> str:
return (
f"incident={record.get('incident_id') or '--'} "
f"agent={record.get('agent_role') or '--'} "
f"node={record.get('flywheel_node') or '--'} "
f"duration_ms={record.get('duration_ms') if record.get('duration_ms') is not None else '--'} "
f"error={record.get('error_message') or '--'}"
)[:500]
def _run_remediation_list_summary(
*,
run: AwoooPRunState,
incident_ids: list[str],
items: list[dict[str, Any]],
legacy_mcp_records: list[dict[str, Any]] | None = None,
errors: list[dict[str, str]] | None = None,
) -> dict[str, Any]:
"""Summarize durable ADR-100 dry-run and MCP investigation evidence for list UX."""
sorted_items = sorted(
(item for item in items if isinstance(item, dict)),
key=lambda item: str(item.get("created_at") or ""),
reverse=True,
)
sorted_mcp_records = sorted(
(record for record in (legacy_mcp_records or []) if isinstance(record, dict)),
key=lambda record: str(record.get("created_at") or ""),
reverse=True,
)
latest = sorted_items[0] if sorted_items else {}
latest_mcp = sorted_mcp_records[0] if sorted_mcp_records else {}
writes_incident = latest.get("writes_incident_state")
writes_auto_repair = latest.get("writes_auto_repair_result")
route = (
_route_label_from_remediation(latest)
if latest
else _route_label_from_legacy_mcp(latest_mcp)
if latest_mcp
else "--"
)
write_observed = writes_incident is True or writes_auto_repair is True
is_read_only = (
bool(latest)
and latest.get("required_scope") == "read"
and writes_incident is False
and writes_auto_repair is False
)
mcp_total = len(sorted_mcp_records)
mcp_success = sum(1 for record in sorted_mcp_records if record.get("success") is True)
mcp_failed = sum(1 for record in sorted_mcp_records if record.get("success") is False)
if not sorted_items:
status_value = "mcp_observed" if mcp_total > 0 else "no_evidence"
elif latest.get("success") is False or latest.get("allowed") is False:
status_value = "blocked"
elif write_observed:
status_value = "write_observed"
elif is_read_only:
status_value = "read_only_dry_run"
else:
status_value = "observed"
return {
"schema_version": "awooop_run_remediation_summary_v1",
"source": "alert_operation_log" if sorted_items else "mcp_audit_log" if mcp_total > 0 else "none",
"incident_ids": incident_ids,
"total": len(sorted_items),
"evidence_total": len(sorted_items) + mcp_total,
"status": status_value,
"has_dry_run": bool(sorted_items),
"has_mcp_investigation": mcp_total > 0,
"is_read_only": is_read_only,
"human_gate_open": run.state == "waiting_approval",
"latest_at": latest.get("created_at"),
"latest_preview": latest.get("verification_result_preview"),
"latest_mode": latest.get("mode"),
"latest_route": route,
"latest_agent_id": latest.get("agent_id") or latest_mcp.get("agent_role"),
"latest_tool_name": latest.get("tool_name") or latest_mcp.get("tool_name"),
"latest_required_scope": latest.get("required_scope") or ("read" if latest_mcp else None),
"writes_incident_state": writes_incident,
"writes_auto_repair_result": writes_auto_repair,
"mcp_observation_total": mcp_total,
"mcp_observation_success": mcp_success,
"mcp_observation_failed": mcp_failed,
"latest_mcp_server": latest_mcp.get("mcp_server"),
"errors": errors or [],
}
def _safe_int(value: Any) -> int:
try:
return int(value or 0)
except (TypeError, ValueError):
return 0
def _has_repair_execution_evidence(facts: dict[str, Any]) -> bool:
return (
_safe_int(facts.get("auto_repair_execution_records")) > 0
or _safe_int(facts.get("effective_execution_records")) > 0
)
def _has_nonrepair_operation_evidence(facts: dict[str, Any]) -> bool:
return (
_safe_int(facts.get("automation_operation_records")) > 0
and not _has_repair_execution_evidence(facts)
)
def _status_chain_ansible_dry_run_only(
execution_section: dict[str, Any],
facts: dict[str, Any],
) -> bool:
ansible = execution_section.get("ansible")
if not isinstance(ansible, dict):
return False
return (
_safe_int(ansible.get("check_mode_total")) > 0
and _safe_int(ansible.get("apply_total")) == 0
and not bool(ansible.get("applied"))
and not bool(ansible.get("controlled_apply"))
and _safe_int(facts.get("auto_repair_execution_records")) == 0
)
def _compact_release_items(
items: Any,
*,
id_field: str,
summary_field: str,
limit: int = 5,
) -> list[dict[str, Any]]:
if not isinstance(items, list):
return []
compacted: list[dict[str, Any]] = []
for item in items:
if not isinstance(item, dict):
continue
compacted.append(
{
"id": item.get(id_field),
"display_name": item.get("display_name"),
"owner_agent": item.get("owner_agent"),
"status": item.get("status"),
"summary": item.get(summary_field),
"runtime_write_allowed": item.get("runtime_write_allowed", False),
}
)
if len(compacted) >= limit:
break
return compacted
def _owner_release_package_bridge() -> dict[str, Any]:
try:
payload = load_latest_ai_agent_result_capture_owner_release_approval_gate()
except Exception as exc: # pragma: no cover - fail-closed read model guard
return {
"schema_version": "awooop_owner_release_package_bridge_v1",
"status": "snapshot_unavailable",
"source_schema_version": None,
"source_task_id": None,
"runtime_authority": "read_model_unavailable_no_write",
"packet_count": 0,
"approval_required_count": 0,
"blocked_count": 0,
"owner_release_approved_count": 0,
"maintenance_window_approved_count": 0,
"rollback_owner_confirmed_count": 0,
"operator_action_count": 0,
"packets": [],
"operator_actions": [],
"error": str(exc)[:160],
}
rollups = payload.get("rollups") if isinstance(payload.get("rollups"), dict) else {}
status = (
payload.get("program_status")
if isinstance(payload.get("program_status"), dict)
else {}
)
return {
"schema_version": "awooop_owner_release_package_bridge_v1",
"status": "owner_review_required",
"source_schema_version": payload.get("schema_version"),
"source_task_id": status.get("current_task_id"),
"runtime_authority": status.get("runtime_authority"),
"packet_count": _safe_int(rollups.get("owner_release_approval_packet_count")),
"approval_required_count": _safe_int(rollups.get("approval_required_packet_count")),
"blocked_count": _safe_int(rollups.get("blocked_packet_count")),
"owner_release_approved_count": _safe_int(rollups.get("owner_release_approved_count")),
"maintenance_window_approved_count": _safe_int(rollups.get("maintenance_window_approved_count")),
"rollback_owner_confirmed_count": _safe_int(rollups.get("rollback_owner_confirmed_count")),
"operator_action_count": _safe_int(rollups.get("operator_action_count")),
"packets": _compact_release_items(
payload.get("owner_release_approval_packets"),
id_field="packet_id",
summary_field="approval_summary",
),
"operator_actions": _compact_release_items(
payload.get("operator_actions"),
id_field="action_id",
summary_field="operator_instruction",
limit=3,
),
}
def _release_verifier_package_bridge() -> dict[str, Any]:
try:
payload = load_latest_ai_agent_result_capture_release_verifier_preflight_gate()
except Exception as exc: # pragma: no cover - fail-closed read model guard
return {
"schema_version": "awooop_release_verifier_package_bridge_v1",
"status": "snapshot_unavailable",
"source_schema_version": None,
"source_task_id": None,
"runtime_authority": "read_model_unavailable_no_write",
"verifier_count": 0,
"rollback_count": 0,
"maintenance_hold_count": 0,
"live_apply_hold_count": 0,
"owner_release_authorized_count": 0,
"post_release_verifier_ready_count": 0,
"live_apply_release_pass_count": 0,
"operator_action_count": 0,
"verifier_gates": [],
"operator_actions": [],
"error": str(exc)[:160],
}
rollups = payload.get("rollups") if isinstance(payload.get("rollups"), dict) else {}
status = (
payload.get("program_status")
if isinstance(payload.get("program_status"), dict)
else {}
)
return {
"schema_version": "awooop_release_verifier_package_bridge_v1",
"status": "preflight_review_required",
"source_schema_version": payload.get("schema_version"),
"source_task_id": status.get("current_task_id"),
"runtime_authority": status.get("runtime_authority"),
"verifier_count": _safe_int(rollups.get("release_verifier_preflight_count")),
"rollback_count": _safe_int(rollups.get("rollback_verifier_preflight_count")),
"maintenance_hold_count": _safe_int(rollups.get("maintenance_window_verifier_hold_count")),
"live_apply_hold_count": _safe_int(rollups.get("live_apply_verifier_hold_count")),
"owner_release_authorized_count": _safe_int(rollups.get("owner_release_authorized_count")),
"post_release_verifier_ready_count": _safe_int(rollups.get("post_release_verifier_ready_count")),
"live_apply_release_pass_count": _safe_int(rollups.get("live_apply_release_pass_count")),
"operator_action_count": _safe_int(rollups.get("operator_action_count")),
"verifier_gates": _compact_release_items(
payload.get("release_verifier_preflight_gates"),
id_field="readback_id",
summary_field="readback_summary",
),
"operator_actions": _compact_release_items(
payload.get("operator_actions"),
id_field="action_id",
summary_field="operator_instruction",
limit=3,
),
}
def _apply_gate_closure_tasks(
*,
source_ref: str,
safe_source_ref: str,
catalog_id: str,
owner_release_package: dict[str, Any],
verifier_package: dict[str, Any],
controlled_apply_allowed: bool,
) -> list[dict[str, Any]]:
return [
{
"key": "owner_release_packet_review",
"status": "ai_policy_receipt_recorded"
if controlled_apply_allowed
else "ai_repair_required_before_policy_receipt",
"owner_agent": "openclaw",
"source_asset_id": "agent-result-capture-owner-release-approval-gate:P2-131",
"work_item_id": f"owner-release-review:awoooi:{safe_source_ref}",
"summary": (
f"packets={_safe_int(owner_release_package.get('packet_count'))}; "
"owner_policy=low_medium_high_auto_authorized"
),
"next_step": (
"record_autonomous_policy_receipt_and_watch_stop_conditions"
if controlled_apply_allowed
else "wait_for_ai_playbook_or_transport_repair"
),
"runtime_write_allowed": controlled_apply_allowed,
},
{
"key": "maintenance_window_rollback_owner",
"status": "ai_selected_with_rollback_context"
if controlled_apply_allowed
else "waiting_for_repaired_check_mode",
"owner_agent": "hermes",
"source_asset_id": "agent-result-capture-owner-release-approval-gate:P2-131",
"work_item_id": f"maintenance-rollback-review:awoooi:{safe_source_ref}",
"summary": (
"maintenance_window=standard_auto_window; "
"rollback_owner=sre_oncall_or_service_owner"
),
"next_step": (
"prepare_automatic_rollback_and_context"
if controlled_apply_allowed
else "rerun_check_mode_after_repair"
),
"runtime_write_allowed": controlled_apply_allowed,
},
{
"key": "controlled_execution_authorization",
"status": "controlled_apply_authorized"
if controlled_apply_allowed
else "ai_repair_required_before_runtime_gate",
"owner_agent": "ansible_controlled_apply_worker",
"source_asset_id": f"ansible-apply-candidate:{catalog_id}",
"work_item_id": f"controlled-execution-gate:awoooi:{safe_source_ref}",
"summary": (
f"incident={source_ref}; runtime_gate=controlled_apply"
if controlled_apply_allowed
else f"incident={source_ref}; runtime_gate=repair_required"
),
"next_step": (
"execute_allowlisted_apply_after_check_mode"
if controlled_apply_allowed
else "ai_repairs_playbook_or_transport_then_retries_check_mode"
),
"runtime_write_allowed": controlled_apply_allowed,
},
{
"key": "post_apply_verifier_preflight",
"status": "queued_after_controlled_apply"
if controlled_apply_allowed
else "waiting_for_controlled_apply",
"owner_agent": "openclaw",
"source_asset_id": "agent-result-capture-release-verifier-preflight-gate:P2-136",
"work_item_id": f"post-apply-verifier:awoooi:{safe_source_ref}",
"summary": (
f"verifier={_safe_int(verifier_package.get('verifier_count'))}; "
f"ready={_safe_int(verifier_package.get('post_release_verifier_ready_count'))}"
),
"next_step": (
"run_post_apply_verifier_after_apply"
if controlled_apply_allowed
else "wait_for_successful_check_mode_and_apply"
),
"runtime_write_allowed": controlled_apply_allowed,
},
{
"key": "km_playbook_trust_writeback_plan",
"status": "queued_after_verifier_passes"
if controlled_apply_allowed
else "waiting_for_verifier",
"owner_agent": "hermes",
"source_asset_id": f"playbook-trust-update-candidate:{catalog_id}",
"work_item_id": f"km-playbook-writeback:awoooi:{safe_source_ref}",
"summary": "km_write=0; playbook_trust_write=0",
"next_step": "prepare_km_and_playbook_writeback_after_verified_execution",
"runtime_write_allowed": controlled_apply_allowed,
},
]
def _apply_gate_controlled_execution_preflight(
*,
source_ref: str,
safe_source_ref: str,
catalog_id: str,
check_mode_playbook: Any,
apply_playbook: Any,
dry_run_passed: bool,
owner_release_package: dict[str, Any],
verifier_package: dict[str, Any],
) -> dict[str, Any]:
"""Describe the controlled route that becomes executable after check-mode."""
verifier_ready = _safe_int(verifier_package.get("post_release_verifier_ready_count"))
route_candidate_ready = (
dry_run_passed
and bool(catalog_id)
and str(apply_playbook or "").strip()
and str(apply_playbook or "").strip() != "--"
)
controlled_apply_allowed = bool(route_candidate_ready)
prerequisites = [
{
"key": "dry_run_passed",
"status": "passed" if dry_run_passed else "blocked_missing_dry_run",
"detail": f"dry_run_passed={str(dry_run_passed).lower()}",
"required": True,
},
{
"key": "allowlisted_route_candidate",
"status": (
"controlled_apply_allowed"
if route_candidate_ready
else "route_missing"
),
"detail": f"catalog={catalog_id}",
"required": True,
},
{
"key": "owner_release_receipt",
"status": "auto_waived_by_risk_policy" if route_candidate_ready else "waiting",
"detail": "low_medium_high_auto_authorized",
"required": True,
},
{
"key": "maintenance_window",
"status": "ai_selected_standard_window" if route_candidate_ready else "waiting",
"detail": "auto_window_with_rollback_and_verifier",
"required": True,
},
{
"key": "rollback_owner",
"status": "ai_prefilled" if route_candidate_ready else "waiting",
"detail": "rollback_owner=sre_oncall_or_service_owner",
"required": True,
},
{
"key": "post_apply_verifier",
"status": (
"passed"
if verifier_ready > 0
else "blocked_missing_post_apply_verifier"
),
"detail": f"ready={verifier_ready}",
"required": True,
},
{
"key": "km_playbook_writeback",
"status": "blocked_until_verified_execution",
"detail": "km_write=0; playbook_trust_write=0",
"required": True,
},
]
ready_count = sum(
1
for item in prerequisites
if item["status"] in {"passed", "candidate_ready_no_runtime_authority"}
or item["status"] in {
"controlled_apply_allowed",
"auto_waived_by_risk_policy",
"ai_selected_standard_window",
"ai_prefilled",
}
)
blocked_count = len(prerequisites) - ready_count
route_count = 1 if route_candidate_ready else 0
return {
"schema_version": "awooop_controlled_execution_preflight_v1",
"status": "controlled_apply_allowed" if controlled_apply_allowed else "ai_playbook_repair_required",
"source_id": source_ref,
"work_item_id": f"controlled-execution-gate:awoooi:{safe_source_ref}",
"runtime_execution_authorized": controlled_apply_allowed,
"runtime_write_allowed": controlled_apply_allowed,
"allowed_route_count": 1 if controlled_apply_allowed else 0,
"candidate_route_count": route_count,
"ready_count": ready_count,
"total_count": len(prerequisites),
"blocked_count": blocked_count,
"next_action": (
"controlled_apply_worker_executes_after_check_mode"
if controlled_apply_allowed
else "ai_repairs_playbook_or_transport_then_retries_check_mode"
),
"blocked_reason": None if controlled_apply_allowed else "check_mode_failed_or_route_missing",
"routes": [
{
"route_id": f"ansible-allowlisted-apply:{catalog_id}",
"transport": "ansible",
"status": "allowed" if controlled_apply_allowed else "route_missing",
"source_asset_id": f"ansible-apply-candidate:{catalog_id}",
"check_mode_playbook_path": check_mode_playbook,
"apply_playbook_path": apply_playbook,
"allowed": controlled_apply_allowed,
"blocker": None if controlled_apply_allowed else "check_mode_failed_or_route_missing",
}
],
"prerequisites": prerequisites,
"forbidden_until_released": [
"critical_or_break_glass_apply",
"secret_read",
"database_migration",
"stateful_restore_or_prune",
"node_drain",
"reboot",
],
}
def _apply_gate_execution_release_contract(
*,
source_ref: str,
safe_source_ref: str,
catalog_id: str,
check_mode_playbook: Any,
apply_playbook: Any,
dry_run_passed: bool,
facts: dict[str, Any],
controlled_execution_preflight: dict[str, Any],
) -> dict[str, Any]:
"""Build the AI-filled release contract used by controlled apply routes."""
route_id = f"ansible-allowlisted-apply:{catalog_id}"
mcp_evidence_ready = _safe_int(facts.get("mcp_gateway_total")) > 0
controlled_apply_allowed = bool(controlled_execution_preflight.get("runtime_execution_authorized"))
owner_release_draft = {
"schema_version": "awooop_owner_release_draft_v1",
"status": "ai_prefilled_auto_authorized" if controlled_apply_allowed else "ai_prefilled_repair_required",
"source_id": source_ref,
"work_item_id": f"owner-release-draft:awoooi:{safe_source_ref}",
"ai_prefilled_count": 9,
"human_decision_count": 0 if controlled_apply_allowed else 1,
"runtime_execution_authorized": controlled_apply_allowed,
"runtime_write_allowed": controlled_apply_allowed,
"human_only_fields": [] if controlled_apply_allowed else ["critical_break_glass_override"],
"draft_fields": [
{
"key": "maintenance_window",
"status": "ai_selected_auto_policy",
"value": f"maintenance-window-review:awoooi:{safe_source_ref}",
"reason": "AI selected the standard maintenance window under the current autonomous policy.",
},
{
"key": "rollback_owner",
"status": "ai_selected_auto_policy",
"value": "sre_oncall_or_service_owner_required",
"reason": "AI nominated the rollback role from the standard on-call/service-owner policy.",
},
{
"key": "blast_radius",
"status": "ai_prefilled_auto_policy",
"value": f"target={safe_source_ref}; route={route_id}; write_scope=single_allowlisted_apply",
"reason": "Blast radius is limited to the selected allowlisted route.",
},
{
"key": "post_apply_verifier",
"status": "ai_queued_after_controlled_apply",
"value": f"verifier-plan:{safe_source_ref}",
"reason": "Verifier plan is prepared from the same source evidence and runs after controlled apply.",
},
{
"key": "km_writeback_owner",
"status": "ai_prefilled_after_verified_execution",
"value": "hermes",
"reason": "Hermes prepares KM writeback only after verified execution.",
},
{
"key": "playbook_trust_owner",
"status": "ai_prefilled_after_verified_execution",
"value": "openclaw_nemotron_review",
"reason": "Trust writeback waits for verifier output and reviewer scoring.",
},
],
"next_action": (
"controlled_apply_worker_executes"
if controlled_apply_allowed
else "ai_repairs_playbook_or_transport_then_retries"
),
}
field_rows = [
{
"key": "incident_ref",
"status": "prefilled",
"value": source_ref,
"required": True,
"next_step": "verify_incident_still_firing_or_recurring",
},
{
"key": "route_id",
"status": "prefilled",
"value": route_id,
"required": True,
"next_step": "confirm_route_scope_matches_target",
},
{
"key": "dry_run_evidence",
"status": "prefilled" if dry_run_passed else "blocked_missing_dry_run",
"value": f"ansible-check-mode:{catalog_id}" if dry_run_passed else "--",
"required": True,
"next_step": "rerun_check_mode_if_evidence_is_stale",
},
{
"key": "mcp_evidence_refs",
"status": "prefilled" if mcp_evidence_ready else "blocked_missing_mcp_evidence",
"value": f"mcp-evidence:{safe_source_ref}" if mcp_evidence_ready else "--",
"required": True,
"next_step": "collect_fresh_mcp_evidence_refs",
},
{
"key": "owner_approval_receipt",
"status": "auto_waived_by_current_owner_policy" if controlled_apply_allowed else "blocked_missing_break_glass",
"value": "owner_policy_low_medium_high_auto_authorized" if controlled_apply_allowed else "--",
"required": True,
"next_step": "record_autonomous_policy_receipt" if controlled_apply_allowed else "collect_break_glass_receipt",
},
{
"key": "maintenance_window",
"status": "ai_selected_auto_policy",
"value": f"maintenance-window-review:awoooi:{safe_source_ref}",
"required": True,
"next_step": "use_standard_ai_selected_maintenance_window",
},
{
"key": "rollback_owner",
"status": "ai_selected_auto_policy",
"value": "sre_oncall_or_service_owner_required",
"required": True,
"next_step": "use_standard_ai_selected_rollback_owner",
},
{
"key": "blast_radius",
"status": "ai_prefilled_auto_policy",
"value": f"target={safe_source_ref}; route={route_id}; write_scope=single_allowlisted_apply",
"required": True,
"next_step": "record_ai_prefilled_blast_radius",
},
{
"key": "post_apply_verifier",
"status": "ai_queued_after_controlled_apply",
"value": f"verifier-plan:{safe_source_ref}",
"required": True,
"next_step": "run_verifier_after_controlled_apply",
},
{
"key": "km_writeback_owner",
"status": "ai_prefilled_after_verified_execution",
"value": "hermes",
"required": True,
"next_step": "assign_km_writeback_owner_after_verified_execution",
},
{
"key": "playbook_trust_owner",
"status": "ai_prefilled_after_verified_execution",
"value": "openclaw_nemotron_review",
"required": True,
"next_step": "assign_playbook_trust_owner_after_verified_execution",
},
]
ready_statuses = {
"prefilled",
"passed",
"candidate_ready_no_runtime_authority",
"auto_waived_by_current_owner_policy",
"ai_suggested_owner_review_required",
"ai_prefilled_owner_review_required",
"ai_selected_auto_policy",
"ai_prefilled_auto_policy",
"ai_queued_after_controlled_apply",
"ai_prefilled_after_verified_execution",
}
ready_count = sum(1 for row in field_rows if row["status"] in ready_statuses)
blocked_count = len(field_rows) - ready_count
return {
"schema_version": "awooop_execution_release_contract_v1",
"status": "controlled_apply_auto_authorized" if controlled_apply_allowed else "draft_prefilled_needs_ai_repair",
"source_id": source_ref,
"work_item_id": f"execution-release-contract:awoooi:{safe_source_ref}",
"route_id": route_id,
"runtime_execution_authorized": controlled_apply_allowed,
"runtime_write_allowed": controlled_apply_allowed,
"ready_count": ready_count,
"total_count": len(field_rows),
"blocked_count": blocked_count,
"blocked_reason": None if controlled_apply_allowed else "check_mode_or_route_not_ready",
"check_mode_playbook_path": check_mode_playbook,
"apply_playbook_path": apply_playbook,
"controlled_preflight_work_item_id": controlled_execution_preflight.get(
"work_item_id"
),
"fields": field_rows,
"owner_release_draft": owner_release_draft,
"next_steps": [
{
"key": "owner_release",
"owner_agent": "openclaw",
"work_item_id": f"owner-release-review:awoooi:{safe_source_ref}",
"summary": "OpenClaw records autonomous policy receipt and watches stop conditions",
"runtime_write_allowed": controlled_apply_allowed,
},
{
"key": "maintenance_rollback",
"owner_agent": "hermes",
"work_item_id": f"maintenance-rollback-review:awoooi:{safe_source_ref}",
"summary": "Hermes prepares automatic rollback owner and maintenance context",
"runtime_write_allowed": controlled_apply_allowed,
},
{
"key": "verifier_release",
"owner_agent": "openclaw",
"work_item_id": f"post-apply-verifier:awoooi:{safe_source_ref}",
"summary": "OpenClaw runs post-apply verifier after controlled apply",
"runtime_write_allowed": controlled_apply_allowed,
},
{
"key": "learning_writeback",
"owner_agent": "hermes",
"work_item_id": f"km-playbook-writeback:awoooi:{safe_source_ref}",
"summary": "prepare KM and PlayBook trust writeback after verified execution",
"runtime_write_allowed": controlled_apply_allowed,
},
],
"forbidden_until_contract_complete": [
"critical_or_break_glass_apply",
"secret_read",
"database_migration",
"stateful_restore_or_prune",
"node_drain",
"reboot",
],
}
def _status_chain_ansible_apply_gate_handoff(
*,
ansible_dry_run_only: bool,
execution_section: dict[str, Any],
facts: dict[str, Any],
incident_ids: list[str],
source_id: str | None,
verification: str,
) -> dict[str, Any] | None:
"""Build the owner-review handoff after Ansible check-mode succeeds."""
if not ansible_dry_run_only:
return None
ansible = execution_section.get("ansible")
if not isinstance(ansible, dict):
return None
candidate_playbooks = (
ansible.get("candidate_playbooks")
if isinstance(ansible.get("candidate_playbooks"), list)
else []
)
first_candidate = (
candidate_playbooks[0]
if candidate_playbooks and isinstance(candidate_playbooks[0], dict)
else {}
)
source_ref = source_id or (incident_ids[0] if incident_ids else "unknown")
safe_source_ref = "".join(
ch if ch.isalnum() or ch in {"-", "_"} else "-"
for ch in str(source_ref)
).strip("-") or "unknown"
catalog_id = (
ansible.get("latest_catalog_id")
or first_candidate.get("catalog_id")
or "ansible-candidate"
)
check_mode_playbook = ansible.get("latest_playbook_path") or "--"
apply_playbook = (
first_candidate.get("playbook_path")
or str(check_mode_playbook).replace("-readonly.yml", ".yml")
or "--"
)
latest_status = str(ansible.get("latest_status") or "unknown").lower()
latest_returncode = str(ansible.get("latest_returncode") or "")
dry_run_passed = latest_status == "success" and latest_returncode in {"", "0"}
verifier_ready = str(verification).lower() in {"verified", "success", "healthy"}
mcp_evidence_ready = _safe_int(facts.get("mcp_gateway_total")) > 0
controlled_apply_candidate_ready = (
dry_run_passed
and bool(catalog_id)
and str(apply_playbook or "").strip()
and str(apply_playbook or "").strip() != "--"
)
closure_gates = [
{
"key": "mcp_evidence",
"status": "passed" if mcp_evidence_ready else "warning",
"detail": f"mcp={_safe_int(facts.get('mcp_gateway_total'))}",
"asset_id": f"mcp-evidence:{safe_source_ref}",
},
{
"key": "dry_run",
"status": "passed" if dry_run_passed else "warning",
"detail": (
f"check={_safe_int(ansible.get('check_mode_total'))}; "
f"rc={ansible.get('latest_returncode') if ansible.get('latest_returncode') is not None else '--'}"
),
"asset_id": f"ansible-check-mode:{catalog_id}",
},
{
"key": "apply_candidate",
"status": "passed",
"detail": f"catalog={catalog_id}",
"asset_id": f"ansible-apply-candidate:{catalog_id}",
},
{
"key": "owner_release",
"status": (
"auto_waived_by_owner_policy"
if controlled_apply_candidate_ready
else "blocked"
),
"detail": (
"owner_policy=low_medium_high_auto_authorized"
if controlled_apply_candidate_ready
else "owner_release_receipt=0"
),
"asset_id": f"owner-release-approval:{safe_source_ref}",
},
{
"key": "controlled_execution",
"status": (
"controlled_apply_authorized"
if controlled_apply_candidate_ready
else "blocked"
),
"detail": (
"runtime_gate=controlled_apply"
if controlled_apply_candidate_ready
else "runtime_gate=ai_repair_required"
),
"asset_id": f"controlled-execution:{safe_source_ref}",
},
{
"key": "post_apply_verifier",
"status": (
"passed"
if verifier_ready
else "queued_after_controlled_apply"
if controlled_apply_candidate_ready
else "blocked"
),
"detail": (
f"verification={verification or 'missing'}"
if verifier_ready
else "verifier=queued_after_controlled_apply"
if controlled_apply_candidate_ready
else f"verification={verification or 'missing'}"
),
"asset_id": f"verifier-plan:{safe_source_ref}",
},
{
"key": "km_writeback",
"status": (
"queued_after_verifier"
if controlled_apply_candidate_ready
else "blocked"
),
"detail": (
"km_writeback=queued_after_verifier"
if controlled_apply_candidate_ready
else f"km={_safe_int(facts.get('knowledge_entries'))}"
),
"asset_id": f"km-writeback-candidate:{safe_source_ref}",
},
{
"key": "playbook_trust",
"status": (
"queued_after_verifier"
if controlled_apply_candidate_ready
else "blocked"
),
"detail": (
"trust_writeback=queued_after_verifier"
if controlled_apply_candidate_ready
else "trust_writeback=0"
),
"asset_id": f"playbook-trust-update-candidate:{catalog_id}",
},
]
closure_ready_statuses = {
"passed",
"auto_waived_by_owner_policy",
"controlled_apply_authorized",
"queued_after_controlled_apply",
"queued_after_verifier",
}
closure_ready_count = sum(
1 for gate in closure_gates if gate["status"] in closure_ready_statuses
)
closure_total_count = len(closure_gates)
closure_blocked_count = sum(1 for gate in closure_gates if gate["status"] == "blocked")
closure_completion_percent = int(round((closure_ready_count / closure_total_count) * 100))
owner_release_package = _owner_release_package_bridge()
verifier_package = _release_verifier_package_bridge()
closure_tasks = _apply_gate_closure_tasks(
source_ref=str(source_ref),
safe_source_ref=safe_source_ref,
catalog_id=str(catalog_id),
owner_release_package=owner_release_package,
verifier_package=verifier_package,
controlled_apply_allowed=bool(controlled_apply_candidate_ready),
)
controlled_execution_preflight = _apply_gate_controlled_execution_preflight(
source_ref=str(source_ref),
safe_source_ref=safe_source_ref,
catalog_id=str(catalog_id),
check_mode_playbook=check_mode_playbook,
apply_playbook=apply_playbook,
dry_run_passed=dry_run_passed,
owner_release_package=owner_release_package,
verifier_package=verifier_package,
)
execution_release_contract = _apply_gate_execution_release_contract(
source_ref=str(source_ref),
safe_source_ref=safe_source_ref,
catalog_id=str(catalog_id),
check_mode_playbook=check_mode_playbook,
apply_playbook=apply_playbook,
dry_run_passed=dry_run_passed,
facts=facts,
controlled_execution_preflight=controlled_execution_preflight,
)
return {
"schema_version": "awooop_automation_handoff_v1",
"kind": "ansible_check_mode_apply_gate",
"status": (
"controlled_apply_auto_authorized"
if controlled_execution_preflight.get("runtime_execution_authorized")
else "ai_playbook_repair_required"
),
"source_id": source_ref,
"work_item_id": f"ansible-apply-gate:awoooi:{safe_source_ref}",
"decision_effect": (
"controlled_apply_authorized"
if controlled_execution_preflight.get("runtime_execution_authorized")
else "repair_playbook_or_transport"
),
"runtime_execution_authorized": bool(
controlled_execution_preflight.get("runtime_execution_authorized")
),
"writes_runtime_state": bool(
controlled_execution_preflight.get("runtime_write_allowed")
),
"owner_review_gate": "waived_for_low_medium_high_by_current_owner_policy",
"next_action": (
"wait_for_controlled_apply_worker_and_verifier"
if controlled_execution_preflight.get("runtime_execution_authorized")
else "ai_repairs_playbook_or_transport_then_retries_check_mode"
),
"asset_ids": {
"dry_run": f"ansible-check-mode:{catalog_id}",
"apply_candidate": f"ansible-apply-candidate:{catalog_id}",
"verifier": f"verifier-plan:{safe_source_ref}",
},
"closure_readiness": {
"schema_version": "awooop_apply_gate_closure_readiness_v1",
"status": (
"controlled_apply_auto_authorized"
if controlled_execution_preflight.get("runtime_execution_authorized")
else "ai_playbook_repair_required"
),
"completion_percent": closure_completion_percent,
"ready_count": closure_ready_count,
"total_count": closure_total_count,
"blocked_count": closure_blocked_count,
"runtime_execution_authorized": bool(
controlled_execution_preflight.get("runtime_execution_authorized")
),
"writes_runtime_state": bool(
controlled_execution_preflight.get("runtime_write_allowed")
),
"next_action": (
"controlled_apply_worker_executes"
if controlled_execution_preflight.get("runtime_execution_authorized")
else "ai_repairs_playbook_or_transport_then_retries"
),
"blocked_reason": (
None
if controlled_execution_preflight.get("runtime_execution_authorized")
else "check_mode_failed_or_route_missing"
),
"gates": closure_gates,
"required_owner_fields": []
if controlled_execution_preflight.get("runtime_execution_authorized")
else [
"critical_break_glass_override",
"evidence_refs",
],
"readback_assets": [
{
"key": "owner_execution_rehearsal",
"asset_id": (
"agent-result-capture-owner-approved-execution-rehearsal:P2-126"
),
"status": "no_write_rehearsal",
},
{
"key": "final_candidate_readback",
"asset_id": (
"agent-result-capture-final-release-candidate-readback:P2-133"
),
"status": "read_only",
},
{
"key": "release_verifier_preflight",
"asset_id": (
"agent-result-capture-release-verifier-preflight-gate:P2-136"
),
"status": "read_only",
},
],
"owner_release_package": owner_release_package,
"release_verifier_package": verifier_package,
"closure_tasks": closure_tasks,
"controlled_execution_preflight": controlled_execution_preflight,
"execution_release_contract": execution_release_contract,
},
"candidate": {
"catalog_id": catalog_id,
"check_mode_playbook_path": check_mode_playbook,
"apply_playbook_path": apply_playbook,
"risk_level": first_candidate.get("risk_level") or "medium",
"match_score": first_candidate.get("match_score"),
},
"gates": [
{
"key": "dry_run",
"status": "passed" if dry_run_passed else "warning",
"detail": (
f"check={_safe_int(ansible.get('check_mode_total'))}; "
f"rc={ansible.get('latest_returncode') if ansible.get('latest_returncode') is not None else '--'}"
),
},
{
"key": "apply_gate",
"status": (
"controlled_apply_authorized"
if controlled_execution_preflight.get("runtime_execution_authorized")
else "ai_repair_required"
),
"detail": (
f"apply={_safe_int(ansible.get('apply_total'))}; "
f"auto_policy=low_medium_high"
),
},
{
"key": "verifier",
"status": "passed" if verifier_ready else "blocked",
"detail": f"verification={verification or 'missing'}",
},
],
"owner_review_checklist": [
"AI 確認 check-mode 沒有寫入與破壞性變更",
"AI 確認 apply playbook 與 target selector 精準匹配",
"AI 預填 rollback owner、維護窗口與 blast radius",
"AI 在 apply 後執行 verifier 並回寫 Runs / KM / PlayBook trust",
],
"forbidden_actions": [
"no_critical_or_break_glass_without_explicit_receipt",
"no_secret_read_or_data_destructive_action",
"no_runtime_gate_raise_without_successful_check_mode",
],
}
def _latest_remediation_history_item(
history: dict[str, Any] | None,
) -> dict[str, Any]:
if not isinstance(history, dict):
return {}
items = history.get("items") if isinstance(history.get("items"), list) else []
latest = items[0] if items and isinstance(items[0], dict) else {}
return latest
def _remediation_evidence_state(history: dict[str, Any] | None) -> str:
"""Classify ADR-100 evidence with the same operator semantics as Telegram."""
if not isinstance(history, dict):
return "missing"
total = _safe_int(history.get("total"))
if total <= 0:
if history.get("status") == "fetch_failed":
return "fetch_failed"
return "missing"
latest = _latest_remediation_history_item(history)
if latest.get("writes_incident_state") or latest.get("writes_auto_repair_result"):
return "write_observed"
if latest.get("allowed") is False or latest.get("success") is False:
return "blocked"
if (
str(latest.get("safety_level") or "").lower() == "read_only"
or str(latest.get("required_scope") or "").lower() == "read"
):
return "read_only"
return "observed"
def _select_status_chain_source_id(
incident_ids: list[str],
remediation_history: dict[str, Any] | None,
) -> str | None:
latest_incident_id = str(
_latest_remediation_history_item(remediation_history).get("incident_id") or ""
).strip()
if latest_incident_id and latest_incident_id in incident_ids:
return latest_incident_id
return incident_ids[0] if incident_ids else latest_incident_id or None
def _status_chain_mcp_section(truth_chain: dict[str, Any] | None) -> dict[str, Any]:
mcp = truth_chain.get("mcp") if isinstance(truth_chain, dict) else {}
if not isinstance(mcp, dict):
mcp = {}
gateway = mcp.get("awooop_gateway") if isinstance(mcp.get("awooop_gateway"), dict) else {}
legacy = mcp.get("legacy") if isinstance(mcp.get("legacy"), dict) else {}
top_tools: list[dict[str, Any]] = []
seen_tools: set[str] = set()
for source, summary in (("gateway", gateway), ("legacy", legacy)):
by_tool = summary.get("by_tool") if isinstance(summary, dict) else []
if not isinstance(by_tool, list):
continue
for item in by_tool:
if not isinstance(item, dict):
continue
tool_name = str(item.get("tool_name") or "unknown").strip() or "unknown"
key = f"{source}:{tool_name}"
if key in seen_tools:
continue
seen_tools.add(key)
top_tools.append({
"source": source,
"tool_name": tool_name,
"total": (
_safe_int(item.get("total"))
or _safe_int(item.get("success"))
+ _safe_int(item.get("failed"))
+ _safe_int(item.get("blocked"))
),
"success": _safe_int(item.get("success")),
"failed": _safe_int(item.get("failed")),
"blocked": _safe_int(item.get("blocked")),
"last_error": item.get("last_error"),
})
if len(top_tools) >= 5:
break
if len(top_tools) >= 5:
break
return {
"gateway": {
"total": _safe_int(gateway.get("total")),
"success": _safe_int(gateway.get("success")),
"failed": _safe_int(gateway.get("failed")),
"blocked": _safe_int(gateway.get("blocked")),
"first_class_total": _safe_int(gateway.get("first_class_total")),
"legacy_bridge_total": _safe_int(gateway.get("legacy_bridge_total")),
"policy_enforced_total": _safe_int(gateway.get("policy_enforced_total")),
"stage": gateway.get("stage"),
"stage_status": gateway.get("stage_status"),
},
"legacy": {
"total": _safe_int(legacy.get("total")),
"success": _safe_int(legacy.get("success")),
"failed": _safe_int(legacy.get("failed")),
},
"top_tools": top_tools,
}
def _first_non_empty(row: Mapping[str, Any], keys: tuple[str, ...]) -> Any:
for key in keys:
value = row.get(key)
if value not in (None, ""):
return value
return None
def _status_chain_execution_section(truth_chain: dict[str, Any] | None) -> dict[str, Any]:
execution = truth_chain.get("execution") if isinstance(truth_chain, dict) else {}
if not isinstance(execution, dict):
execution = {}
ops = execution.get("automation_operation_log")
if not isinstance(ops, list):
ops = []
latest_op = ops[0] if ops and isinstance(ops[0], dict) else {}
playbook_ids: list[str] = []
playbook_paths: list[str] = []
for row in ops:
if not isinstance(row, dict):
continue
_append_unique(playbook_ids, row.get("matched_playbook_id"))
_append_unique(playbook_ids, row.get("input_playbook_id"))
_append_unique(playbook_ids, row.get("output_playbook_id"))
_append_unique(playbook_paths, row.get("input_playbook_path"))
_append_unique(playbook_paths, row.get("output_playbook_path"))
_append_unique(playbook_paths, row.get("input_ansible_playbook_path"))
_append_unique(playbook_paths, row.get("output_ansible_playbook_path"))
ansible = execution.get("ansible") if isinstance(execution.get("ansible"), dict) else {}
ansible_records = ansible.get("records") if isinstance(ansible.get("records"), list) else []
latest_ansible = (
ansible_records[0]
if ansible_records and isinstance(ansible_records[0], dict)
else {}
)
candidate_catalog = (
ansible.get("candidate_catalog")
if isinstance(ansible.get("candidate_catalog"), dict)
else {}
)
candidates = (
candidate_catalog.get("candidates")
if isinstance(candidate_catalog.get("candidates"), list)
else []
)
ansible_summary = (
ansible.get("summary")
if isinstance(ansible.get("summary"), dict)
else summarize_ansible_execution([row for row in ansible_records if isinstance(row, dict)])
)
return {
"operation_total": len(ops),
"latest_operation_type": latest_op.get("operation_type"),
"latest_status": latest_op.get("status"),
"latest_actor": latest_op.get("actor"),
"latest_action": _first_non_empty(latest_op, ("input_action", "output_action")),
"latest_executor": _first_non_empty(
latest_op,
(
"input_executor",
"output_executor",
"input_execution_backend",
"output_execution_backend",
),
),
"playbook_ids": playbook_ids[:5],
"playbook_paths": playbook_paths[:5],
"ansible": {
"considered": bool(ansible.get("considered")),
"record_total": len(ansible_records),
"candidate_count": len(candidates),
"not_used_reason": ansible.get("not_used_reason"),
"check_mode_total": ansible_summary.get("check_mode_total"),
"apply_total": ansible_summary.get("apply_total"),
"rollback_total": ansible_summary.get("rollback_total"),
"pending_check_mode_total": ansible_summary.get("pending_check_mode_total"),
"applied_success_total": ansible_summary.get("applied_success_total"),
"applied": ansible_summary.get("applied"),
"controlled_apply": ansible_summary.get("controlled_apply"),
"latest_operation_type": ansible_summary.get("latest_operation_type") or latest_ansible.get("operation_type"),
"latest_status": ansible_summary.get("latest_status") or latest_ansible.get("status"),
"latest_catalog_id": ansible_summary.get("latest_catalog_id") or latest_ansible.get("catalog_id"),
"latest_playbook_path": ansible_summary.get("latest_playbook_path") or latest_ansible.get("playbook_path"),
"latest_execution_mode": ansible_summary.get("latest_execution_mode") or latest_ansible.get("execution_mode"),
"latest_check_mode": (
ansible_summary.get("latest_check_mode")
if ansible_summary.get("latest_check_mode") is not None
else latest_ansible.get("check_mode")
),
"latest_returncode": ansible_summary.get("latest_returncode"),
"latest_apply_executed": ansible_summary.get("latest_apply_executed"),
"approval_source": ansible_summary.get("approval_source"),
"candidate_playbooks": [
{
"catalog_id": item.get("catalog_id"),
"playbook_path": item.get("playbook_path"),
"risk_level": item.get("risk_level"),
"match_score": item.get("match_score"),
}
for item in candidates[:3]
if isinstance(item, dict)
],
},
}
def _source_ref_values(envelope: Any, key: str) -> list[str]:
if not isinstance(envelope, dict):
return []
source_refs = envelope.get("source_refs")
if not isinstance(source_refs, dict):
return []
raw_values = source_refs.get(key)
if isinstance(raw_values, list):
return [str(item) for item in raw_values if str(item or "").strip()]
if raw_values not in (None, ""):
return [str(raw_values)]
return []
def _source_correlation_empty(
incident_ids: list[str],
*,
status_value: str,
missing_reason: str,
) -> dict[str, Any]:
return {
"schema_version": _SOURCE_CORRELATION_SCHEMA_VERSION,
"status": status_value,
"missing_reason": missing_reason,
"incident_ids": incident_ids,
"direct_ref_total": 0,
"candidate_total": 0,
"applied_link_total": 0,
"provider_event_total": 0,
"latest_applied_link_at": None,
"verification_status": status_value,
"providers": {
provider: {
"direct_ref_total": 0,
"candidate_total": 0,
"applied_link_total": 0,
"latest_event_at": None,
"latest_heartbeat_at": None,
"latest_applied_link_at": None,
}
for provider in _SOURCE_CORRELATION_PROVIDERS
},
"top_candidates": [],
"matching_criteria": [
"source_correlation_linked_stage",
"direct_source_ref",
"fingerprint_overlap",
"alertname_overlap",
"service_or_namespace_overlap",
"severity_overlap",
],
}
def _normalize_correlation_value(value: Any) -> str:
if hasattr(value, "value"):
value = value.value
return str(value or "").strip().lower()
def _append_correlation_term(values: list[str], value: Any) -> None:
term = _normalize_correlation_value(value)
if term in {"", "--", "n/a", "none", "null", "unknown"}:
return
if len(term) < 2:
return
if term not in values:
values.append(term)
def _intersection(left: list[str], right: list[str]) -> list[str]:
right_set = set(right)
return [item for item in left if item in right_set]
def _as_utc_naive(value: Any) -> datetime | None:
if not isinstance(value, datetime):
return None
if value.tzinfo is not None:
return value.astimezone(UTC).replace(tzinfo=None)
return value
def _iso_or_none(value: Any) -> str | None:
if hasattr(value, "isoformat"):
return value.isoformat()
if value in (None, ""):
return None
return str(value)
def _incident_correlation_context(record: IncidentRecord) -> dict[str, list[str]]:
"""Build compact incident terms used only for read-only source matching."""
alertnames: list[str] = []
severities: list[str] = []
fingerprints: list[str] = []
namespaces: list[str] = []
targets: list[str] = []
_append_correlation_term(alertnames, record.alertname)
_append_correlation_term(severities, record.severity)
for service in record.affected_services or []:
_append_correlation_term(targets, service)
for signal in record.signals or []:
if not isinstance(signal, dict):
continue
_append_correlation_term(alertnames, signal.get("alert_name"))
_append_correlation_term(severities, signal.get("severity"))
_append_correlation_term(fingerprints, signal.get("fingerprint"))
labels = _as_dict(signal.get("labels"))
annotations = _as_dict(signal.get("annotations"))
_append_correlation_term(alertnames, labels.get("alertname"))
_append_correlation_term(fingerprints, labels.get("fingerprint"))
for key in (
"namespace",
"kubernetes_namespace",
):
_append_correlation_term(namespaces, labels.get(key))
for key in (
"service",
"service_name",
"pod",
"pod_name",
"deployment",
"deployment_name",
"container",
"job",
"instance",
"target",
"target_resource",
"workload",
"app",
"app.kubernetes.io/name",
):
_append_correlation_term(targets, labels.get(key))
for key in ("summary", "description"):
_append_correlation_term(alertnames, annotations.get(key))
return {
"incident_ids": [record.incident_id],
"alertnames": alertnames,
"severities": severities,
"fingerprints": fingerprints,
"namespaces": namespaces,
"targets": targets,
}
def _source_event_correlation_context(row: Mapping[str, Any]) -> dict[str, Any]:
envelope = _as_dict(row.get("source_envelope"))
source_refs = _as_dict(envelope.get("source_refs"))
log_correlation = _as_dict(envelope.get("log_correlation"))
labels = _as_dict(envelope.get("labels"))
annotations = _as_dict(envelope.get("annotations"))
alertnames: list[str] = []
severities: list[str] = []
fingerprints: list[str] = []
namespaces: list[str] = []
targets: list[str] = []
_append_correlation_term(alertnames, log_correlation.get("alertname"))
_append_correlation_term(alertnames, labels.get("alertname"))
for value in _source_ref_values(envelope, "signoz_alerts"):
_append_correlation_term(alertnames, value)
_append_correlation_term(severities, log_correlation.get("severity"))
_append_correlation_term(severities, labels.get("severity"))
_append_correlation_term(fingerprints, log_correlation.get("fingerprint"))
_append_correlation_term(fingerprints, labels.get("fingerprint"))
for value in _source_ref_values(envelope, "fingerprints"):
_append_correlation_term(fingerprints, value)
for key in ("namespace", "kubernetes_namespace"):
_append_correlation_term(namespaces, log_correlation.get(key))
_append_correlation_term(namespaces, labels.get(key))
for key in (
"target_resource",
"service",
"service_name",
"pod",
"pod_name",
"deployment",
"deployment_name",
"container",
"job",
"instance",
"target",
"workload",
"app",
"app.kubernetes.io/name",
):
_append_correlation_term(targets, log_correlation.get(key))
_append_correlation_term(targets, labels.get(key))
for key in ("summary", "description"):
_append_correlation_term(alertnames, annotations.get(key))
return {
"provider": str(row.get("provider") or envelope.get("provider") or "").lower(),
"stage": str(row.get("stage") or envelope.get("stage") or ""),
"provider_event_id": row.get("provider_event_id") or envelope.get("provider_event_id"),
"received_at": row.get("received_at"),
"source_refs": source_refs,
"incident_ids": _source_ref_values(envelope, "incident_ids"),
"alertnames": alertnames,
"severities": severities,
"fingerprints": fingerprints,
"namespaces": namespaces,
"targets": targets,
}
def _score_source_correlation_event(
incident_context: dict[str, list[str]],
event_context: dict[str, Any],
) -> dict[str, Any]:
"""Return a deterministic, read-only source-match score for UI evidence."""
reasons: list[str] = []
score = 0
is_direct = False
if _intersection(incident_context["incident_ids"], event_context["incident_ids"]):
is_direct = True
score += 100
reasons.append("direct_incident_ref")
fingerprint_hits = _intersection(
incident_context["fingerprints"],
event_context["fingerprints"],
)
if fingerprint_hits:
is_direct = True
score += 80
reasons.append("fingerprint_overlap")
if _intersection(incident_context["alertnames"], event_context["alertnames"]):
score += 35
reasons.append("alertname_overlap")
if _intersection(incident_context["targets"], event_context["targets"]):
score += 25
reasons.append("target_overlap")
if _intersection(incident_context["namespaces"], event_context["namespaces"]):
score += 10
reasons.append("namespace_overlap")
if _intersection(incident_context["severities"], event_context["severities"]):
score += 5
reasons.append("severity_overlap")
return {
"is_direct": is_direct,
"is_candidate": bool(is_direct or score >= 35),
"score": min(score, 100),
"reasons": reasons[:5],
}
def _is_source_correlation_applied_link(
event_context: dict[str, Any],
scored: dict[str, Any],
) -> bool:
"""Applied source links must be append-only events that still match directly."""
return (
str(event_context.get("stage") or "").lower() == "source_correlation_linked"
and bool(scored.get("is_direct"))
)
async def _fetch_source_correlation_summary(
*,
project_id: str,
incident_ids: list[str],
) -> dict[str, Any]:
"""Fetch read-only Sentry/SigNoz evidence candidates for incident status-chain."""
if not incident_ids:
return _source_correlation_empty(
incident_ids,
status_value="no_incident_context",
missing_reason="no_incident_ids",
)
safe_project_id = project_id or "awoooi"
async with get_db_context(safe_project_id) as db:
incident_result = await db.execute(
select(IncidentRecord)
.where(IncidentRecord.project_id == safe_project_id)
.where(IncidentRecord.incident_id.in_(incident_ids))
)
incident_rows = list(incident_result.scalars().all())
if not incident_rows:
heartbeat_rows = []
source_rows = []
else:
now = _utc_now_naive()
created_candidates = [
value
for value in (_as_utc_naive(row.created_at) for row in incident_rows)
if value is not None
]
earliest_created = min(created_candidates) if created_candidates else now
window_start = max(
earliest_created - timedelta(hours=_SOURCE_CORRELATION_PRE_WINDOW_HOURS),
now - timedelta(days=_SOURCE_CORRELATION_LOOKBACK_DAYS),
)
provider_sql = (
"LOWER(COALESCE(NULLIF(source_envelope->>'provider', ''), "
"NULLIF(split_part(provider_event_id, ':', 1), ''), channel_type))"
)
source_result = await db.execute(
text(f"""
SELECT
event_id::text AS event_id,
project_id,
channel_type,
provider_event_id,
content_preview,
source_envelope,
received_at,
{provider_sql} AS provider,
LOWER(COALESCE(source_envelope->>'stage', '')) AS stage
FROM awooop_conversation_event
WHERE project_id = :project_id
AND {provider_sql} IN ('sentry', 'signoz')
AND LOWER(COALESCE(source_envelope->>'stage', '')) <> 'heartbeat'
AND received_at >= :window_start
ORDER BY received_at DESC
LIMIT :limit
"""),
{
"project_id": safe_project_id,
"window_start": window_start,
"limit": _SOURCE_CORRELATION_EVENT_LIMIT,
},
)
source_rows = list(source_result.mappings().all())
heartbeat_result = await db.execute(
text(f"""
SELECT
{provider_sql} AS provider,
MAX(received_at) AS latest_heartbeat_at
FROM awooop_conversation_event
WHERE project_id = :project_id
AND {provider_sql} IN ('sentry', 'signoz')
AND LOWER(COALESCE(source_envelope->>'stage', '')) = 'heartbeat'
AND received_at >= :window_start
GROUP BY {provider_sql}
"""),
{
"project_id": safe_project_id,
"window_start": window_start,
},
)
heartbeat_rows = list(heartbeat_result.mappings().all())
if not incident_rows:
summary = _source_correlation_empty(
incident_ids,
status_value="no_incident_context",
missing_reason="incident_not_found",
)
return summary
contexts = [_incident_correlation_context(row) for row in incident_rows]
summary = _source_correlation_empty(
incident_ids,
status_value="missing",
missing_reason="no_matching_provider_source_event",
)
providers = summary["providers"]
for heartbeat in heartbeat_rows:
provider = str(heartbeat.get("provider") or "").lower()
if provider in providers:
providers[provider]["latest_heartbeat_at"] = _iso_or_none(
heartbeat.get("latest_heartbeat_at")
)
top_candidates: list[dict[str, Any]] = []
for row in source_rows:
event_context = _source_event_correlation_context(row)
provider = str(event_context.get("provider") or "").lower()
if provider not in providers:
continue
provider_item = providers[provider]
if provider_item.get("latest_event_at") is None:
provider_item["latest_event_at"] = _iso_or_none(row.get("received_at"))
best_match: dict[str, Any] | None = None
for context in contexts:
scored = _score_source_correlation_event(context, event_context)
if best_match is None or scored["score"] > best_match["score"]:
best_match = scored
if not best_match or not best_match["is_candidate"]:
continue
summary["provider_event_total"] += 1
if best_match["is_direct"]:
summary["direct_ref_total"] += 1
provider_item["direct_ref_total"] += 1
else:
summary["candidate_total"] += 1
provider_item["candidate_total"] += 1
is_applied_link = _is_source_correlation_applied_link(
event_context,
best_match,
)
if is_applied_link:
applied_at = _iso_or_none(row.get("received_at"))
summary["applied_link_total"] += 1
provider_item["applied_link_total"] += 1
if summary.get("latest_applied_link_at") is None:
summary["latest_applied_link_at"] = applied_at
if provider_item.get("latest_applied_link_at") is None:
provider_item["latest_applied_link_at"] = applied_at
top_candidates.append(
{
"provider": provider,
"provider_event_id": str(event_context.get("provider_event_id") or ""),
"stage": str(event_context.get("stage") or ""),
"score": best_match["score"],
"match_type": "direct" if best_match["is_direct"] else "candidate",
"link_state": (
"applied"
if is_applied_link
else "direct_ref"
if best_match["is_direct"]
else "candidate"
),
"verification_status": (
"applied_link_verified"
if is_applied_link
else "direct_ref_verified"
if best_match["is_direct"]
else "candidate_only"
),
"reasons": best_match["reasons"],
"received_at": _iso_or_none(row.get("received_at")),
}
)
if summary["applied_link_total"] > 0:
summary["status"] = "linked"
summary["verification_status"] = "applied_link_verified"
summary["missing_reason"] = None
elif summary["direct_ref_total"] > 0:
summary["status"] = "linked"
summary["verification_status"] = "direct_ref_verified"
summary["missing_reason"] = None
elif summary["candidate_total"] > 0:
summary["status"] = "candidate_found"
summary["verification_status"] = "candidate_only"
summary["missing_reason"] = None
elif any(item.get("latest_heartbeat_at") for item in providers.values()):
summary["status"] = "provider_fresh_no_match"
summary["verification_status"] = "provider_fresh_no_match"
summary["missing_reason"] = "provider_heartbeat_present_but_no_incident_match"
summary["top_candidates"] = sorted(
top_candidates,
key=lambda item: (item.get("score") or 0, item.get("received_at") or ""),
reverse=True,
)[:5]
return summary
def _status_chain_source_section(truth_chain: dict[str, Any] | None) -> dict[str, Any]:
channel = truth_chain.get("channel") if isinstance(truth_chain, dict) else {}
if not isinstance(channel, dict):
channel = {}
inbound_events = channel.get("inbound_events")
outbound_messages = channel.get("outbound_messages")
if not isinstance(inbound_events, list):
inbound_events = []
if not isinstance(outbound_messages, list):
outbound_messages = []
source_refs: dict[str, list[str]] = {
"alert_ids": [],
"sentry_issue_ids": [],
"signoz_alerts": [],
"fingerprints": [],
"incident_ids": [],
}
inbound_channels: list[str] = []
for row in inbound_events:
if not isinstance(row, dict):
continue
_append_unique(inbound_channels, row.get("channel_type"))
envelope = row.get("source_envelope")
for key in source_refs:
for value in _source_ref_values(envelope, key):
_append_unique(source_refs[key], value)
latest_inbound = inbound_events[0] if inbound_events and isinstance(inbound_events[0], dict) else {}
latest_outbound = (
outbound_messages[0]
if outbound_messages and isinstance(outbound_messages[0], dict)
else {}
)
return {
"inbound_total": len(inbound_events),
"outbound_total": len(outbound_messages),
"inbound_channels": inbound_channels[:5],
"refs": {key: values[:5] for key, values in source_refs.items()},
"latest_inbound": {
"channel_type": latest_inbound.get("channel_type"),
"provider_event_id": latest_inbound.get("provider_event_id"),
"content_type": latest_inbound.get("content_type"),
"is_duplicate": latest_inbound.get("is_duplicate"),
"received_at": latest_inbound.get("received_at"),
},
"latest_outbound": {
"channel_type": latest_outbound.get("channel_type"),
"message_type": latest_outbound.get("message_type"),
"send_status": latest_outbound.get("send_status"),
"sent_at": latest_outbound.get("sent_at"),
},
}
def _build_awooop_status_chain(
*,
incident_ids: list[str],
truth_chain: dict[str, Any] | None = None,
remediation_history: dict[str, Any] | None = None,
source_id: str | None = None,
fetch_error: str | None = None,
source_correlation: dict[str, Any] | None = None,
repair_candidate_promotion: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Build the shared read-only status chain used by Telegram and Operator UI."""
truth_status = (
truth_chain.get("truth_status")
if isinstance(truth_chain, dict) and isinstance(truth_chain.get("truth_status"), dict)
else {}
)
quality = (
truth_chain.get("automation_quality")
if isinstance(truth_chain, dict) and isinstance(truth_chain.get("automation_quality"), dict)
else {}
)
facts = quality.get("facts") if isinstance(quality.get("facts"), dict) else {}
latest = _latest_remediation_history_item(remediation_history)
remediation_state = _remediation_evidence_state(remediation_history)
remediation_total = (
_safe_int(remediation_history.get("total"))
if isinstance(remediation_history, dict)
else 0
)
latest_route = _route_label_from_remediation(latest) if latest else "--"
current_stage = str(truth_status.get("current_stage") or "unknown")
stage_status = str(truth_status.get("stage_status") or "unknown")
verdict = str(quality.get("verdict") or "unknown")
verification = (
facts.get("verification_result")
or latest.get("verification_result_preview")
or "missing"
)
auto_repair_records = _safe_int(facts.get("auto_repair_execution_records"))
operation_records = _safe_int(facts.get("automation_operation_records"))
has_repair_execution = _has_repair_execution_evidence(facts)
has_nonrepair_operation = _has_nonrepair_operation_evidence(facts)
gateway_total = _safe_int(facts.get("mcp_gateway_total"))
km_entries = _safe_int(facts.get("knowledge_entries"))
needs_human = bool(truth_status.get("needs_human"))
if verdict == "auto_repaired_verified":
repair_state = "auto_repaired_verified"
next_step = "monitor_for_regression"
elif verdict == "approval_rejected_no_execution":
repair_state = "approval_rejected_no_execution"
next_step = "monitor_or_reopen_if_alert_recurs"
elif verdict in {"approval_expired_manual_review", "approval_expired_ai_retry"}:
repair_state = "approval_expired_ai_retry"
next_step = "ai_retry_or_rebuild_controlled_packet"
elif has_repair_execution:
repair_state = (
"executed_pending_verification"
if str(verification) == "missing"
else "executed"
)
next_step = "verify_execution_result"
elif has_nonrepair_operation:
repair_state = "diagnostic_or_audit_recorded"
next_step = "manual_review_or_collect_repair_evidence"
elif remediation_state == "read_only":
repair_state = "read_only_dry_run"
next_step = "approve_or_escalate_from_awooop"
elif remediation_state == "write_observed":
repair_state = "write_observed_manual_review"
next_step = "review_write_evidence"
elif remediation_state == "blocked":
repair_state = "blocked_manual_required"
next_step = "manual_investigation"
elif needs_human:
repair_state = "manual_required"
next_step = "manual_investigation"
else:
repair_state = "no_execution_evidence"
next_step = "collect_evidence_or_wait"
if remediation_state in {"blocked", "fetch_failed"}:
needs_human = True
if (
remediation_state == "write_observed"
and repair_state != "auto_repaired_verified"
):
needs_human = True
mcp_section = _status_chain_mcp_section(truth_chain)
execution_section = _status_chain_execution_section(truth_chain)
ansible_dry_run_only = _status_chain_ansible_dry_run_only(execution_section, facts)
if ansible_dry_run_only:
verdict = "ansible_check_mode_only"
repair_state = "controlled_apply_queued"
next_step = "wait_for_controlled_apply_and_post_apply_verifier"
needs_human = False
source_section = _status_chain_source_section(truth_chain)
if source_correlation is not None:
source_section["correlation"] = source_correlation
automation_handoff = _status_chain_ansible_apply_gate_handoff(
ansible_dry_run_only=ansible_dry_run_only,
execution_section=execution_section,
facts=facts,
incident_ids=incident_ids,
source_id=source_id,
verification=str(verification),
)
blockers = [
str(item)
for item in [
*(truth_status.get("blockers") if isinstance(truth_status.get("blockers"), list) else []),
*(quality.get("blockers") if isinstance(quality.get("blockers"), list) else []),
]
if item
]
blockers = normalize_operator_blockers(blockers, facts)
if fetch_error:
blockers.append("truth_chain_fetch_failed")
outcome = {}
if ansible_dry_run_only:
outcome_quality = dict(quality)
outcome_facts = dict(facts)
ansible = execution_section.get("ansible") if isinstance(execution_section.get("ansible"), dict) else {}
outcome_facts["ansible_check_mode_total"] = _safe_int(ansible.get("check_mode_total"))
outcome_facts["ansible_apply_total"] = _safe_int(ansible.get("apply_total"))
outcome_quality["facts"] = outcome_facts
outcome_quality["verdict"] = "ansible_check_mode_only"
outcome_quality.pop("operator_outcome", None)
outcome = build_operator_outcome(
truth_status=truth_status,
automation_quality=outcome_quality,
remediation_state=remediation_state,
fetch_error=fetch_error,
source_id=source_id,
)
elif isinstance(quality.get("operator_outcome"), dict):
outcome = dict(quality["operator_outcome"])
else:
outcome = build_operator_outcome(
truth_status=truth_status,
automation_quality=quality,
remediation_state=remediation_state,
fetch_error=fetch_error,
source_id=source_id,
)
if outcome:
needs_human = bool(outcome.get("needs_human"))
next_step = str(outcome.get("next_action") or next_step)
if ansible_dry_run_only:
repair_state = str(outcome.get("state") or repair_state)
return {
"schema_version": "awooop_status_chain_v1",
"source": "truth_chain+adr100_history",
"source_id": source_id,
"incident_ids": incident_ids,
"current_stage": current_stage,
"stage_status": stage_status,
"verdict": verdict,
"repair_state": repair_state,
"verification": str(verification),
"needs_human": needs_human,
"next_step": next_step,
"operator_outcome": outcome,
"automation_handoff": automation_handoff,
"blockers": blockers[:8],
"fetch_error": fetch_error,
"evidence": {
"auto_repair_records": auto_repair_records,
"operation_records": operation_records,
"mcp_gateway_total": gateway_total,
"knowledge_entries": km_entries,
"remediation_total": remediation_total,
"remediation_state": remediation_state,
"latest_route": latest_route,
"latest_mode": latest.get("mode"),
"latest_at": latest.get("created_at"),
"latest_preview": latest.get("verification_result_preview"),
"ansible_dry_run_only": ansible_dry_run_only,
},
"writes": {
"incident": latest.get("writes_incident_state"),
"auto_repair": latest.get("writes_auto_repair_result"),
},
"mcp": mcp_section,
"execution": execution_section,
"source_refs": source_section,
"repair_candidate_promotion": repair_candidate_promotion
or _empty_repair_candidate_promotion_projection(),
}
def _empty_repair_candidate_promotion_projection() -> dict[str, Any]:
return {
"schema_version": "repair_candidate_promotion_projection_v1",
"status": "not_available",
"source": "approval_metadata",
"available": False,
"reason": "repair_candidate_promotion_contract_not_found",
"contract": None,
"summary": "",
"runtime_execution_authorized": False,
"runtime_write_allowed": False,
}
def _repair_candidate_promotion_summary(contract: Mapping[str, Any]) -> str:
route = str(contract.get("route_id") or "--")
ready = _safe_int(contract.get("ready_count"))
total = _safe_int(contract.get("total_count"))
blocked = _safe_int(contract.get("blocked_count"))
status_value = str(contract.get("status") or "unknown")
runtime_state = (
"controlled"
if contract.get("runtime_execution_authorized") is True
or contract.get("runtime_write_allowed") is True
else "false"
)
return (
f"route={route}; promotion={ready}/{total}; "
f"blocked={blocked}; status={status_value}; runtime={runtime_state}"
)
def _repair_candidate_projection_from_metadata(
metadata: Mapping[str, Any] | None,
) -> dict[str, Any] | None:
if not isinstance(metadata, Mapping):
return None
package = metadata.get("repair_candidate_draft_package")
package = package if isinstance(package, Mapping) else {}
contract = package.get("candidate_promotion_contract") or metadata.get(
"repair_candidate_promotion_contract"
)
if not isinstance(contract, Mapping):
return None
contract_dict = dict(contract)
summary = str(metadata.get("repair_candidate_promotion_summary") or "").strip()
if not summary:
summary = _repair_candidate_promotion_summary(contract_dict)
work_item = package.get("awooop_work_item")
if not isinstance(work_item, Mapping):
work_item = {}
return {
"schema_version": "repair_candidate_promotion_projection_v1",
"status": str(contract_dict.get("status") or "unknown"),
"source": "approval_metadata",
"available": True,
"approval_id": str(metadata.get("approval_id") or ""),
"work_item_id": str(work_item.get("work_item_id") or ""),
"work_item_url": str(
work_item.get("work_item_url")
or work_item.get("work_item_href")
or ""
),
"contract": contract_dict,
"summary": summary,
"runtime_execution_authorized": bool(
contract_dict.get("runtime_execution_authorized") is True
),
"runtime_write_allowed": bool(contract_dict.get("runtime_write_allowed") is True),
}
async def _fetch_repair_candidate_promotion_projection(
*,
incident_ids: list[str],
project_id: str,
) -> dict[str, Any]:
"""Read the latest repair-candidate promotion contract for status-chain UI.
The projection is read-only: it surfaces existing ApprovalRecord metadata
so Work Items can show why a draft is blocked before runtime execution.
"""
normalized_ids = [incident_id for incident_id in incident_ids if incident_id]
if not normalized_ids:
return _empty_repair_candidate_promotion_projection()
async with get_db_context(project_id or "awoooi") as db:
stmt = (
select(ApprovalRecord)
.where(ApprovalRecord.incident_id.in_(normalized_ids))
.where(ApprovalRecord.extra_metadata.is_not(None))
.order_by(ApprovalRecord.created_at.desc())
.limit(20)
)
result = await db.execute(stmt)
rows = list(result.scalars().all())
for row in rows:
metadata = dict(row.extra_metadata or {})
metadata["approval_id"] = str(row.id)
projection = _repair_candidate_projection_from_metadata(metadata)
if projection is not None:
return projection
return _empty_repair_candidate_promotion_projection()
async def _fetch_awooop_status_chain(
*,
incident_ids: list[str],
project_id: str,
remediation_history: dict[str, Any] | None,
) -> dict[str, Any]:
"""Fetch read-only truth-chain state and merge it with ADR-100 evidence."""
source_id = _select_status_chain_source_id(incident_ids, remediation_history)
truth_chain: dict[str, Any] | None = None
fetch_error: str | None = None
if source_id:
try:
truth_chain = await fetch_truth_chain(
source_id=source_id,
project_id=project_id or "awoooi",
)
except Exception as exc:
fetch_error = str(exc)
logger.warning(
"operator_awooop_status_chain_fetch_failed",
source_id=source_id,
project_id=project_id,
error=fetch_error,
)
try:
source_correlation = await _fetch_source_correlation_summary(
incident_ids=incident_ids,
project_id=project_id or "awoooi",
)
except Exception as exc:
logger.warning(
"operator_source_correlation_fetch_failed",
incident_ids=incident_ids,
project_id=project_id,
error=str(exc),
)
source_correlation = _source_correlation_empty(
incident_ids,
status_value="fetch_failed",
missing_reason="source_correlation_fetch_failed",
)
try:
repair_candidate_promotion = await _fetch_repair_candidate_promotion_projection(
incident_ids=incident_ids,
project_id=project_id or "awoooi",
)
except Exception as exc:
logger.warning(
"operator_repair_candidate_promotion_projection_fetch_failed",
incident_ids=incident_ids,
project_id=project_id,
error=str(exc),
)
repair_candidate_promotion = {
**_empty_repair_candidate_promotion_projection(),
"status": "fetch_failed",
"reason": "repair_candidate_promotion_projection_fetch_failed",
}
return _build_awooop_status_chain(
incident_ids=incident_ids,
truth_chain=truth_chain,
remediation_history=remediation_history,
source_id=source_id,
fetch_error=fetch_error,
source_correlation=source_correlation,
repair_candidate_promotion=repair_candidate_promotion,
)
async def get_awooop_status_chain(
*,
project_id: str | None,
incident_ids: list[str],
) -> dict[str, Any]:
"""Return the shared AwoooP status chain for UI surfaces without writing state."""
normalized_incident_ids: list[str] = []
for incident_id in incident_ids:
safe_incident_id = str(incident_id or "").strip()
if not safe_incident_id:
continue
_validate_incident_id_filter(safe_incident_id)
_append_unique(normalized_incident_ids, safe_incident_id)
if not normalized_incident_ids:
return _build_awooop_status_chain(incident_ids=[], source_id=None)
remediation_history = await _fetch_run_remediation_history(
normalized_incident_ids,
limit=5,
)
return await _fetch_awooop_status_chain(
incident_ids=normalized_incident_ids,
project_id=project_id or "awoooi",
remediation_history=remediation_history,
)
def _validate_remediation_status_filter(value: str | None) -> None:
if value is None:
return
if value not in _REMEDIATION_STATUS_FILTERS:
allowed = ", ".join(sorted(_REMEDIATION_STATUS_FILTERS))
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"remediation_status 必須是: {allowed}",
)
def _validate_callback_reply_status_filter(value: str | None) -> None:
if value is None:
return
if value not in _CALLBACK_REPLY_STATUS_FILTERS:
allowed = ", ".join(sorted(_CALLBACK_REPLY_STATUS_FILTERS))
raise HTTPException(
status_code=422,
detail=f"callback_reply_status 必須是: {allowed}",
)
def _validate_callback_reply_action_filter(value: str | None) -> str | None:
if value is None:
return None
normalized = value.strip().lower()
if not normalized:
return None
if not _CALLBACK_REPLY_ACTION_RE.fullmatch(normalized):
raise HTTPException(
status_code=422,
detail="callback action 格式錯誤,僅允許 a-z、0-9、底線、冒號與短橫線",
)
return normalized
def _validate_incident_id_filter(value: str | None) -> None:
if value is None:
return
if not _INCIDENT_ID_RE.fullmatch(value):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="incident_id 格式錯誤,必須是 INC-YYYYMMDD-XXXX",
)
def _remediation_summary_matches_status(
summary: dict[str, Any] | None,
remediation_status: str | None,
) -> bool:
if remediation_status is None:
return True
status_value = str((summary or {}).get("status") or "no_evidence")
return status_value == remediation_status
def _callback_reply_summary_matches_status(
summary: dict[str, Any] | None,
callback_reply_status: str | None,
) -> bool:
if callback_reply_status is None:
return True
status_value = str((summary or {}).get("status") or "no_callback")
if callback_reply_status == "observed":
return status_value != "no_callback"
return status_value == callback_reply_status
def _remediation_summary_matches_incident_id(
summary: dict[str, Any] | None,
incident_id: str | None,
) -> bool:
if incident_id is None:
return True
incident_ids = (summary or {}).get("incident_ids")
return isinstance(incident_ids, list) and incident_id in incident_ids
async def _build_run_remediation_summaries(
*,
runs: list[AwoooPRunState],
inbound_by_run: dict[UUID, list[AwoooPConversationEvent]],
outbound_by_run: dict[UUID, list[AwoooPOutboundMessage]],
) -> dict[UUID, dict[str, Any]]:
"""Build remediation summaries for list endpoints without writing state."""
if not runs:
return {}
incident_ids_by_run: dict[UUID, list[str]] = {}
all_incident_ids: list[str] = []
for run in runs:
incident_ids = _collect_run_incident_ids(
run=run,
inbound_events=inbound_by_run.get(run.run_id, []),
outbound_messages=outbound_by_run.get(run.run_id, []),
)
incident_ids_by_run[run.run_id] = incident_ids
for incident_id in incident_ids:
_append_unique(all_incident_ids, incident_id)
histories_by_incident: dict[str, list[dict[str, Any]]] = {}
legacy_mcp_by_incident: dict[str, list[dict[str, Any]]] = {}
errors_by_incident: dict[str, dict[str, str]] = {}
if all_incident_ids:
from src.services.adr100_remediation_service import Adr100RemediationService
service = Adr100RemediationService(record_history=False)
for incident_id in all_incident_ids:
try:
history = await service.history(
limit=_REMEDIATION_HISTORY_LIMIT,
incident_id=incident_id,
)
histories_by_incident[incident_id] = [
item
for item in history.get("items", [])
if isinstance(item, dict)
]
except Exception as exc:
logger.warning(
"run_list_remediation_history_fetch_failed",
incident_id=incident_id,
error=str(exc),
)
errors_by_incident[incident_id] = {
"incident_id": incident_id,
"error": str(exc),
}
legacy_mcp_by_incident = await _fetch_legacy_mcp_by_incident_ids(
all_incident_ids,
limit=min(max(len(all_incident_ids) * _REMEDIATION_HISTORY_LIMIT, 100), 5_000),
)
summaries: dict[UUID, dict[str, Any]] = {}
for run in runs:
incident_ids = incident_ids_by_run.get(run.run_id, [])
items: list[dict[str, Any]] = []
legacy_mcp_records: list[dict[str, Any]] = []
errors: list[dict[str, str]] = []
for incident_id in incident_ids:
items.extend(histories_by_incident.get(incident_id, []))
legacy_mcp_records.extend(legacy_mcp_by_incident.get(incident_id, []))
if incident_id in errors_by_incident:
errors.append(errors_by_incident[incident_id])
summaries[run.run_id] = _run_remediation_list_summary(
run=run,
incident_ids=incident_ids,
items=items,
legacy_mcp_records=legacy_mcp_records,
errors=errors,
)
return summaries
def _timeline_sort_key(item: dict[str, Any], fallback_ts: Any) -> str:
"""Normalize mixed DB datetime / ISO string timestamps for timeline sorting."""
value = item.get("ts") or fallback_ts
if hasattr(value, "isoformat"):
return value.isoformat()
return str(value or "")
def _summarize_run_remediation_by_work_item(
items: list[dict[str, Any]],
) -> list[dict[str, Any]]:
summary: dict[str, dict[str, Any]] = {}
for item in items:
key = str(item.get("work_item_id") or item.get("incident_id") or item.get("id"))
if key not in summary:
summary[key] = {
"work_item_id": item.get("work_item_id"),
"incident_id": item.get("incident_id"),
"count": 0,
"latest_at": item.get("created_at"),
"latest_preview": item.get("verification_result_preview"),
"latest_mode": item.get("mode"),
"latest_route": _route_label_from_remediation(item),
}
summary[key]["count"] += 1
return list(summary.values())
async def _fetch_run_remediation_history(
incident_ids: list[str],
*,
limit: int = _REMEDIATION_HISTORY_LIMIT,
) -> dict[str, Any]:
"""Fetch durable ADR-100 remediation dry-run evidence linked to run incidents."""
if not incident_ids:
return {
"schema_version": "awooop_run_remediation_evidence_v1",
"source": "alert_operation_log",
"incident_ids": [],
"total": 0,
"limit": limit,
"items": [],
"by_work_item": [],
"errors": [],
}
from src.services.adr100_remediation_service import Adr100RemediationService
service = Adr100RemediationService(record_history=False)
items: list[dict[str, Any]] = []
errors: list[dict[str, str]] = []
for incident_id in incident_ids:
try:
history = await service.history(limit=limit, incident_id=incident_id)
items.extend(
item
for item in history.get("items", [])
if isinstance(item, dict)
)
except Exception as exc:
logger.warning(
"run_remediation_history_fetch_failed",
incident_id=incident_id,
error=str(exc),
)
errors.append({"incident_id": incident_id, "error": str(exc)})
items.sort(key=lambda item: str(item.get("created_at") or ""), reverse=True)
visible_items = items[:limit]
return {
"schema_version": "awooop_run_remediation_evidence_v1",
"source": "alert_operation_log",
"incident_ids": incident_ids,
"total": len(items),
"limit": limit,
"items": visible_items,
"by_work_item": _summarize_run_remediation_by_work_item(visible_items),
"errors": errors,
}
def _legacy_mcp_record(row: MCPAuditLog) -> dict[str, Any]:
return {
"id": row.id,
"session_id": row.session_id,
"flywheel_node": row.flywheel_node,
"mcp_server": row.mcp_server,
"tool_name": row.tool_name,
"duration_ms": row.duration_ms,
"success": row.success,
"error_message": row.error_message,
"incident_id": row.incident_id,
"agent_role": row.agent_role,
"created_at": row.created_at,
}
async def _fetch_legacy_mcp_by_incident_ids(
incident_ids: list[str],
*,
limit: int,
) -> dict[str, list[dict[str, Any]]]:
"""Fetch legacy/self-built MCP rows for list evidence summaries."""
if not incident_ids:
return {}
async with get_db_context("awoooi") as db:
result = await db.execute(
select(MCPAuditLog)
.where(MCPAuditLog.incident_id.in_(incident_ids))
.order_by(MCPAuditLog.created_at.desc())
.limit(limit)
)
rows = list(result.scalars().all())
by_incident: dict[str, list[dict[str, Any]]] = defaultdict(list)
for row in rows:
if row.incident_id:
by_incident[row.incident_id].append(_legacy_mcp_record(row))
return dict(by_incident)
async def _fetch_run_legacy_mcp_history(
incident_ids: list[str],
*,
limit: int = _MAX_TIMELINE_ITEMS,
) -> dict[str, Any]:
"""Fetch legacy/self-built MCP audit rows linked through incident ids."""
if not incident_ids:
return {
"schema_version": "awooop_run_legacy_mcp_evidence_v1",
"source": "mcp_audit_log",
"incident_ids": [],
"total": 0,
"limit": limit,
"records": [],
"summary": _summarize_mcp([]),
}
async with get_db_context("awoooi") as db:
result = await db.execute(
select(MCPAuditLog)
.where(MCPAuditLog.incident_id.in_(incident_ids))
.order_by(MCPAuditLog.created_at.desc())
.limit(limit)
)
rows = list(result.scalars().all())
records = [_legacy_mcp_record(row) for row in rows]
return {
"schema_version": "awooop_run_legacy_mcp_evidence_v1",
"source": "mcp_audit_log",
"incident_ids": incident_ids,
"total": len(records),
"limit": limit,
"records": records,
"summary": _summarize_mcp(records),
}
async def get_run_detail(
run_id: str,
project_id: str | None = None,
) -> dict[str, Any]:
"""取得單一 Run 的處置脈絡,供 AwoooP Run detail / Timeline 顯示。"""
try:
run_uuid = uuid.UUID(run_id)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"run_id 格式錯誤: {exc}",
) from exc
async with get_db_context(project_id or "awoooi") as db:
run_stmt = select(AwoooPRunState).where(AwoooPRunState.run_id == run_uuid)
if project_id is not None:
run_stmt = run_stmt.where(AwoooPRunState.project_id == project_id)
run_result = await db.execute(run_stmt)
run = run_result.scalar_one_or_none()
if run is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"run {run_id!r} 不存在",
)
steps_result = await db.execute(
select(AwoooPRunStepJournal)
.where(AwoooPRunStepJournal.run_id == run_uuid)
.order_by(AwoooPRunStepJournal.step_seq.asc())
.limit(_MAX_TIMELINE_ITEMS)
)
steps = list(steps_result.scalars().all())
inbound_where = [AwoooPConversationEvent.run_id == run_uuid]
if run.trigger_ref:
try:
trigger_event_uuid = uuid.UUID(run.trigger_ref)
inbound_where.append(AwoooPConversationEvent.event_id == trigger_event_uuid)
except ValueError:
inbound_where.append(
AwoooPConversationEvent.provider_event_id == run.trigger_ref
)
inbound_result = await db.execute(
select(AwoooPConversationEvent)
.where(sa_or(*inbound_where))
.order_by(AwoooPConversationEvent.received_at.asc())
.limit(_MAX_TIMELINE_ITEMS)
)
inbound_events = list(inbound_result.scalars().all())
outbound_result = await db.execute(
select(AwoooPOutboundMessage)
.where(AwoooPOutboundMessage.run_id == run_uuid)
.order_by(AwoooPOutboundMessage.queued_at.asc())
.limit(_MAX_TIMELINE_ITEMS)
)
outbound_messages = list(outbound_result.scalars().all())
mcp_result = await db.execute(
select(AwoooPMcpGatewayAudit)
.where(AwoooPMcpGatewayAudit.run_id == run_uuid)
.order_by(AwoooPMcpGatewayAudit.created_at.asc())
.limit(_MAX_TIMELINE_ITEMS)
)
mcp_calls = list(mcp_result.scalars().all())
run_payload = {
"run_id": run.run_id,
"project_id": run.project_id,
"agent_id": run.agent_id,
"state": run.state,
"is_shadow": run.is_shadow,
"trace_id": run.trace_id,
"trigger_type": run.trigger_type,
"trigger_ref": run.trigger_ref,
"cost_usd": run.cost_usd,
"step_count": run.step_count,
"attempt_count": run.attempt_count,
"max_attempts": run.max_attempts,
"error_code": run.error_code,
"error_detail": run.error_detail,
"created_at": run.created_at,
"started_at": run.started_at,
"completed_at": run.completed_at,
"timeout_at": run.timeout_at,
"heartbeat_at": run.heartbeat_at,
}
step_items = [
{
"step_id": row.step_id,
"step_seq": row.step_seq,
"tool_name": row.tool_name,
"result_status": row.result_status,
"was_blocked": row.was_blocked,
"block_reason": row.block_reason,
"error_code": row.error_code,
"latency_ms": row.latency_ms,
"created_at": row.created_at,
"completed_at": row.completed_at,
}
for row in steps
]
inbound_items = [
{
"event_id": row.event_id,
"channel_type": row.channel_type,
"provider_event_id": row.provider_event_id,
"content_preview": row.content_preview,
"is_duplicate": row.is_duplicate,
"received_at": row.received_at,
}
for row in inbound_events
]
outbound_items = []
for row in outbound_messages:
callback_reply = _outbound_callback_reply(row.source_envelope)
outbound_items.append({
"message_id": row.message_id,
"channel_type": row.channel_type,
"message_type": row.message_type,
"content_preview": row.content_preview,
"send_status": row.send_status,
"send_error": row.send_error,
"provider_message_id": row.provider_message_id,
"queued_at": row.queued_at,
"sent_at": row.sent_at,
"triggered_by_state": row.triggered_by_state,
"callback_reply": callback_reply,
})
def _mcp_item(row: AwoooPMcpGatewayAudit) -> dict[str, Any]:
gate_result = row.gate_result if isinstance(row.gate_result, dict) else {}
return {
"call_id": row.call_id,
"agent_id": row.agent_id,
"tool_name": row.tool_name,
"result_status": row.result_status,
"block_gate": row.block_gate,
"block_reason": row.block_reason,
"latency_ms": row.latency_ms,
"created_at": row.created_at,
"required_scope": gate_result.get("required_scope"),
"policy_enforced": gate_result.get("policy_enforced"),
"is_shadow": gate_result.get("is_shadow"),
"gate_result": gate_result,
}
mcp_items = [_mcp_item(row) for row in mcp_calls]
mcp_gateway_summary = _summarize_gateway_mcp([
_mcp_gateway_summary_row(row) for row in mcp_calls
])
incident_ids = _collect_run_incident_ids(
run=run,
inbound_events=inbound_events,
outbound_messages=outbound_messages,
)
legacy_mcp_history = await _fetch_run_legacy_mcp_history(incident_ids)
remediation_history = await _fetch_run_remediation_history(incident_ids)
awooop_status_chain = await _fetch_awooop_status_chain(
incident_ids=incident_ids,
project_id=run.project_id,
remediation_history=remediation_history,
)
timeline: list[dict[str, Any]] = [
_timeline_item(
ts=run.created_at,
kind="run",
title="Run 建立",
status=run.state,
summary=f"{run.trigger_type or 'unknown'}{run.agent_id}",
metadata={"trace_id": run.trace_id, "trigger_ref": run.trigger_ref},
)
]
if run.started_at:
timeline.append(
_timeline_item(
ts=run.started_at,
kind="run",
title="Run 開始執行",
status="running",
summary=run.worker_id,
)
)
for row in inbound_events:
timeline.append(
_timeline_item(
ts=row.received_at,
kind="inbound",
title=f"{row.channel_type} 入站事件",
status="duplicate" if row.is_duplicate else "received",
summary=row.content_preview,
metadata={"provider_event_id": row.provider_event_id},
)
)
for row in steps:
is_approval_step = row.tool_name.startswith("operator_console.")
timeline.append(
_timeline_item(
ts=row.completed_at or row.created_at,
kind="approval" if is_approval_step else "step",
title=_approval_step_title(row.tool_name, row.step_seq),
status=row.result_status,
summary=row.block_reason or row.error_code,
metadata={
"was_blocked": row.was_blocked,
"latency_ms": row.latency_ms,
},
)
)
for row in mcp_calls:
gate_result = row.gate_result if isinstance(row.gate_result, dict) else {}
scope = gate_result.get("required_scope")
policy_enforced = gate_result.get("policy_enforced")
summary = row.block_reason
if summary is None:
summary = (
f"agent={row.agent_id or 'unknown'}"
f" scope={scope or 'unknown'}"
f" policy_enforced={policy_enforced}"
)
timeline.append(
_timeline_item(
ts=row.created_at,
kind="mcp",
title=f"MCP: {row.tool_name}",
status=row.result_status,
summary=summary,
metadata={
"agent_id": row.agent_id,
"block_gate": row.block_gate,
"required_scope": scope,
"policy_enforced": policy_enforced,
"latency_ms": row.latency_ms,
},
)
)
for record in legacy_mcp_history.get("records", []):
if not isinstance(record, dict):
continue
tool_route = "/".join(
part
for part in (
str(record.get("mcp_server") or ""),
str(record.get("tool_name") or ""),
)
if part
) or "unknown"
timeline.append(
_timeline_item(
ts=record.get("created_at"),
kind="mcp",
title=f"Legacy MCP: {tool_route}",
status=_legacy_mcp_timeline_status(record),
summary=_legacy_mcp_timeline_summary(record),
metadata={
"incident_id": record.get("incident_id"),
"agent_role": record.get("agent_role"),
"flywheel_node": record.get("flywheel_node"),
"history_source": "mcp_audit_log",
},
)
)
for item in remediation_history.get("items", []):
if not isinstance(item, dict):
continue
timeline.append(
_timeline_item(
ts=item.get("created_at"),
kind="remediation",
title="ADR-100 補救試跑",
status=_remediation_timeline_status(item),
summary=_remediation_timeline_summary(item),
metadata={
"incident_id": item.get("incident_id"),
"work_item_id": item.get("work_item_id"),
"mcp_route": _route_label_from_remediation(item),
"writes_incident_state": item.get("writes_incident_state"),
"writes_auto_repair_result": item.get("writes_auto_repair_result"),
"history_source": "alert_operation_log",
},
)
)
for row in outbound_messages:
callback_reply = _outbound_callback_reply(row.source_envelope)
timeline.append(
_timeline_item(
ts=row.sent_at or row.queued_at,
kind="outbound",
title=_outbound_timeline_title(
row.channel_type,
row.message_type,
row.content_preview,
callback_reply,
),
status=_outbound_timeline_status(row.send_status, callback_reply),
summary=_outbound_timeline_summary(
content_preview=row.content_preview,
send_error=row.send_error,
callback_reply=callback_reply,
),
metadata=_outbound_timeline_metadata(row, callback_reply),
)
)
if run.completed_at:
timeline.append(
_timeline_item(
ts=run.completed_at,
kind="run",
title="Run 結束",
status=run.state,
summary=run.error_detail or run.error_code,
)
)
timeline = sorted(
timeline,
key=lambda item: _timeline_sort_key(item, run.created_at),
)[:_MAX_TIMELINE_ITEMS]
return {
"run": run_payload,
"steps": step_items,
"inbound_events": inbound_items,
"outbound_messages": outbound_items,
"mcp_calls": mcp_items,
"mcp_gateway": mcp_gateway_summary,
"mcp_legacy": legacy_mcp_history,
"remediation_history": remediation_history,
"awooop_status_chain": awooop_status_chain,
"timeline": timeline,
"counts": {
"steps": len(step_items),
"inbound_events": len(inbound_items),
"outbound_messages": len(outbound_items),
"mcp_calls": len(mcp_items),
"legacy_mcp_calls": legacy_mcp_history.get("total", 0),
"remediation_history": remediation_history.get("total", 0),
"timeline": len(timeline),
},
}
# =============================================================================
# Channel Events
# =============================================================================
async def list_recent_channel_events(
*,
project_id: str | None,
channel_type: str | None,
provider_prefix: str | None,
limit: int,
) -> dict[str, Any]:
"""列出最近 channel events供 Operator Console 顯示收斂/鏡像脈絡。"""
safe_limit = max(1, min(limit, _MAX_EVENTS))
async with get_db_context("awoooi") as db:
stmt = select(AwoooPConversationEvent).order_by(
AwoooPConversationEvent.received_at.desc()
)
if project_id is not None:
stmt = stmt.where(AwoooPConversationEvent.project_id == project_id)
if channel_type is not None:
stmt = stmt.where(AwoooPConversationEvent.channel_type == channel_type)
if provider_prefix is not None:
stmt = stmt.where(
AwoooPConversationEvent.provider_event_id.like(
f"{provider_prefix}%"
)
)
result = await db.execute(stmt.limit(safe_limit))
rows = list(result.scalars().all())
events = [_recent_channel_event_item(r) for r in rows]
return {"events": events, "total": len(events), "limit": safe_limit}
# =============================================================================
# Approvals
# =============================================================================
async def list_approvals(
project_id: str | None,
run_id: str | None = None,
remediation_status: str | None = None,
) -> dict[str, Any]:
"""列出 waiting_approval runs可依 project_id / run_id / remediation_status 篩選。"""
_validate_remediation_status_filter(remediation_status)
run_uuid: UUID | None = None
if run_id:
try:
run_uuid = uuid.UUID(run_id)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"run_id 格式錯誤: {exc}",
) from exc
async with get_db_context("awoooi") as db:
stmt = (
select(AwoooPRunState)
.where(AwoooPRunState.state == "waiting_approval")
.order_by(AwoooPRunState.created_at.asc())
)
if project_id is not None:
stmt = stmt.where(AwoooPRunState.project_id == project_id)
if run_uuid is not None:
stmt = stmt.where(AwoooPRunState.run_id == run_uuid)
count_stmt = select(func.count()).select_from(stmt.subquery())
total_result = await db.execute(count_stmt)
total = total_result.scalar_one()
result = await db.execute(stmt)
rows = list(result.scalars().all())
inbound_by_run, outbound_by_run = await _load_run_message_context(db, rows)
remediation_summaries = await _build_run_remediation_summaries(
runs=rows,
inbound_by_run=inbound_by_run,
outbound_by_run=outbound_by_run,
)
if remediation_status:
rows = [
row
for row in rows
if _remediation_summary_matches_status(
remediation_summaries.get(row.run_id),
remediation_status,
)
]
total = len(rows)
status_chain_cache: dict[tuple[str, tuple[str, ...]], dict[str, Any]] = {}
items = []
for r in rows:
summary = remediation_summaries.get(r.run_id)
summary_incident_ids = summary.get("incident_ids") if isinstance(summary, dict) else []
incident_ids = [
str(incident_id)
for incident_id in summary_incident_ids
if isinstance(incident_id, str) and incident_id
]
cache_key = (r.project_id, tuple(incident_ids))
status_chain = status_chain_cache.get(cache_key)
if status_chain is None:
status_chain = await get_awooop_status_chain(
project_id=r.project_id,
incident_ids=incident_ids,
)
status_chain_cache[cache_key] = status_chain
items.append({
"run_id": r.run_id,
"project_id": r.project_id,
"agent_id": r.agent_id,
"trigger_type": r.trigger_type,
"trigger_ref": r.trigger_ref,
"is_shadow": r.is_shadow,
"created_at": r.created_at,
"timeout_at": r.timeout_at,
"remediation_summary": summary,
"awooop_status_chain": status_chain,
})
return {"approvals": items, "total": total, "items": items}
async def decide_approval(
run_id: str,
project_id: str,
decision: str,
approver_id: str,
reason: str | None,
) -> dict[str, Any]:
"""核准或拒絕一個待審核的 runADR-116 Gate 5"""
try:
run_uuid = uuid.UUID(run_id)
except ValueError as exc:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"run_id 格式錯誤: {exc}",
) from exc
async with get_db_context(project_id) as db:
result = await db.execute(
select(AwoooPRunState).where(
AwoooPRunState.run_id == run_uuid,
AwoooPRunState.project_id == project_id,
)
)
run = result.scalar_one_or_none()
if run is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"run {run_id!r} 不存在或非此 project 所有",
)
if run.state != "waiting_approval":
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"run {run_id!r} 目前狀態為 {run.state!r},無法審核(需為 waiting_approval",
)
is_projection_only_gate5 = run.trigger_type == _ADR100_GATE5_PROJECTION_TRIGGER
approval_token_jti: str | None = None
new_state: str
if is_projection_only_gate5:
await _record_approval_projection_guard_step(
run_id=run_uuid,
project_id=project_id,
decision=decision,
approver_id=approver_id,
reason=reason,
)
try:
await write_audit(
project_id=project_id,
action=f"run.approval.{decision}.blocked",
resource_type="run",
resource_id=run_id,
details={
"approver_id": approver_id,
"decision": decision,
"reason": reason,
"new_state": "waiting_approval",
"trigger_type": _ADR100_GATE5_PROJECTION_TRIGGER,
"block_reason": "adr100_runtime_replay_gate5_projection_only",
"execution_authorized": False,
"repair_executed": False,
},
run_id=run_id,
)
except Exception as exc:
logger.warning(
"approval_projection_guard_audit_write_failed",
run_id=run_id,
error=str(exc),
)
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=(
"adr100_runtime_replay_gate5_projection_only: "
"此 AwoooP 簽核列只投影 legacy Gate 5 approval 與狀態鏈,"
"尚未接上 auto_repair_executor 執行 handoff不能直接由平台按鈕轉成 running。"
),
)
if decision == "approve":
token = issue_approval_token(
project_id=project_id,
run_id=run_id,
tool_name="operator_console_approve",
approver_id=approver_id,
)
try:
await record_approval(
project_id=project_id,
run_id=run_id,
tool_name="operator_console_approve",
approver_id=approver_id,
token=token,
)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"核准記錄失敗: {exc}",
) from exc
await transition(run_uuid, project_id, "running")
new_state = "running"
await _record_approval_decision_step(
run_id=run_uuid,
project_id=project_id,
decision=decision,
approver_id=approver_id,
reason=reason,
)
import base64
import json as _json
try:
p_b64 = token.split(".")[1]
padding = 4 - len(p_b64) % 4
if padding != 4:
p_b64 += "=" * padding
payload = _json.loads(base64.urlsafe_b64decode(p_b64))
approval_token_jti = payload.get("jti")
except Exception:
approval_token_jti = None
else:
await transition(
run_uuid,
project_id,
"cancelled",
error_code="E-APPR-REJECTED",
error_detail=f"operator 拒絕: approver={approver_id!r}, reason={reason!r}",
)
new_state = "cancelled"
await _record_approval_decision_step(
run_id=run_uuid,
project_id=project_id,
decision=decision,
approver_id=approver_id,
reason=reason,
)
try:
await write_audit(
project_id=project_id,
action=f"run.approval.{decision}",
resource_type="run",
resource_id=run_id,
details={
"approver_id": approver_id,
"decision": decision,
"reason": reason,
"new_state": new_state,
},
run_id=run_id,
)
except Exception as exc:
logger.warning("approval_audit_write_failed", run_id=run_id, error=str(exc))
return {
"run_id": run_id,
"decision": decision,
"new_state": new_state,
"approval_token_jti": approval_token_jti,
}
async def _record_approval_projection_guard_step(
*,
run_id: UUID,
project_id: str,
decision: str,
approver_id: str,
reason: str | None,
) -> None:
summary = _truncate_step_summary(
"projection_only_gate5; "
f"approver={approver_id}; decision={decision}; reason={reason or '-'}"
)
try:
async with get_db_context(project_id) as db:
max_result = await db.execute(
select(func.coalesce(func.max(AwoooPRunStepJournal.step_seq), 0)).where(
AwoooPRunStepJournal.run_id == run_id,
AwoooPRunStepJournal.project_id == project_id,
)
)
step_seq = int(max_result.scalar_one()) + 1
db.add(
AwoooPRunStepJournal(
run_id=run_id,
project_id=project_id,
step_seq=step_seq,
tool_name="operator_console.approval_projection_guard",
result_status="failed",
error_code="E-ADR100-GATE5-PROJECTION",
was_blocked=True,
block_reason=summary,
completed_at=_utc_now_naive(),
)
)
await db.execute(
update(AwoooPRunState)
.where(
AwoooPRunState.run_id == run_id,
AwoooPRunState.project_id == project_id,
)
.values(step_count=AwoooPRunState.step_count + 1)
)
logger.info(
"approval_projection_guard_step_recorded",
run_id=str(run_id),
project_id=project_id,
decision=decision,
approver_id=approver_id,
)
except Exception as exc:
logger.warning(
"approval_projection_guard_step_record_failed",
run_id=str(run_id),
project_id=project_id,
decision=decision,
error=str(exc),
)
async def _record_approval_decision_step(
*,
run_id: UUID,
project_id: str,
decision: str,
approver_id: str,
reason: str | None,
) -> None:
"""把 Operator Console 的人工審批決策寫進 Run Step Journal。
這是治理與可觀測節點,不是執行閘門本身;寫入失敗不可反向阻擋
已完成的 approve / reject否則會讓人工決策狀態機產生二次故障。
"""
tool_name = (
"operator_console.approve"
if decision == "approve"
else "operator_console.reject"
)
summary = _truncate_step_summary(
f"approver={approver_id}; decision={decision}; reason={reason or '-'}"
)
try:
async with get_db_context(project_id) as db:
max_result = await db.execute(
select(func.coalesce(func.max(AwoooPRunStepJournal.step_seq), 0)).where(
AwoooPRunStepJournal.run_id == run_id,
AwoooPRunStepJournal.project_id == project_id,
)
)
step_seq = int(max_result.scalar_one()) + 1
db.add(
AwoooPRunStepJournal(
run_id=run_id,
project_id=project_id,
step_seq=step_seq,
tool_name=tool_name,
result_status="success",
block_reason=summary,
completed_at=_utc_now_naive(),
)
)
await db.execute(
update(AwoooPRunState)
.where(
AwoooPRunState.run_id == run_id,
AwoooPRunState.project_id == project_id,
)
.values(step_count=AwoooPRunState.step_count + 1)
)
logger.info(
"approval_decision_step_recorded",
run_id=str(run_id),
project_id=project_id,
decision=decision,
approver_id=approver_id,
)
except Exception as exc:
logger.warning(
"approval_decision_step_record_failed",
run_id=str(run_id),
project_id=project_id,
decision=decision,
error=str(exc),
)