Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 28s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
4412 lines
179 KiB
Python
4412 lines
179 KiB
Python
"""Current AI Agent autonomous runtime control plane.
|
||
|
||
This read model is the current directive layer. Historical P2 snapshots can
|
||
still describe earlier no-send / no-live states, but this payload states what
|
||
the product should enforce now: low, medium, and high risk routes may proceed
|
||
through controlled automation when allowlist, check-mode, verifier, rollback,
|
||
KM, and Telegram receipts are present.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from collections.abc import Iterable, Mapping
|
||
from datetime import UTC, datetime
|
||
from typing import Any
|
||
|
||
from sqlalchemy import text
|
||
|
||
from src.core.config import settings
|
||
from src.core.logging import get_logger
|
||
from src.db.base import get_db_context
|
||
from src.services.ai_agent_log_controlled_writeback_consumer_readback import (
|
||
load_latest_ai_agent_log_controlled_writeback_consumer_readback,
|
||
)
|
||
from src.services.ai_agent_log_controlled_writeback_dispatch import (
|
||
OPERATION_TYPE as LOG_CONTROLLED_WRITEBACK_DISPATCH_OPERATION_TYPE,
|
||
)
|
||
from src.services.ai_agent_log_controlled_writeback_executor_readback import (
|
||
load_latest_ai_agent_log_controlled_writeback_executor_readback,
|
||
)
|
||
from src.services.report_generation_service import (
|
||
DAILY_REPORT_HOUR_TAIPEI,
|
||
MONTHLY_REPORT_DAY_TAIPEI,
|
||
MONTHLY_REPORT_HOUR_TAIPEI,
|
||
WEEKLY_REPORT_HOUR_TAIPEI,
|
||
WEEKLY_REPORT_WEEKDAY_TAIPEI,
|
||
)
|
||
|
||
_SCHEMA_VERSION = "ai_agent_autonomous_runtime_control_v1"
|
||
_RUNTIME_AUTHORITY = "current_owner_directive_controlled_ai_automation"
|
||
_DEPLOY_READBACK_MARKER = "p2_416_d1n_autonomous_runtime_control_prod_readback_v2"
|
||
_DEPLOY_ATTEMPT_NOTE = "cd_internal_control_plane_readback_retry_20260628_2"
|
||
_LIVE_READBACK_SCHEMA_VERSION = "ai_agent_autonomous_runtime_receipt_readback_v1"
|
||
_DEFAULT_PROJECT_ID = "awoooi"
|
||
_DEFAULT_LOOKBACK_HOURS = 24
|
||
# CD cancel-stale-cd no-op triggers must not change runtime payloads.
|
||
_EXECUTOR_OPERATION_TYPES = (
|
||
"ansible_candidate_matched",
|
||
"ansible_check_mode_executed",
|
||
"ansible_apply_executed",
|
||
"ansible_learning_writeback_recorded",
|
||
"ansible_rollback_executed",
|
||
"ansible_execution_skipped",
|
||
LOG_CONTROLLED_WRITEBACK_DISPATCH_OPERATION_TYPE,
|
||
)
|
||
_PUBLIC_VALUE_REDACTIONS = (
|
||
("raw_payload", "source_payload"),
|
||
("raw payload", "source payload"),
|
||
)
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
def _allowed_risk_levels() -> list[str]:
|
||
raw = str(settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_ALLOWED_RISK_LEVELS or "")
|
||
return sorted({item.strip().lower() for item in raw.split(",") if item.strip()})
|
||
|
||
|
||
def _utc_iso(value: Any) -> str | None:
|
||
if value is None:
|
||
return None
|
||
if isinstance(value, datetime):
|
||
if value.tzinfo is None:
|
||
value = value.replace(tzinfo=UTC)
|
||
return value.astimezone(UTC).isoformat()
|
||
return str(value)
|
||
|
||
|
||
def _row_mapping(row: Mapping[str, Any] | Any) -> dict[str, Any]:
|
||
if isinstance(row, Mapping):
|
||
return dict(row)
|
||
mapping = getattr(row, "_mapping", None)
|
||
if mapping is not None:
|
||
return dict(mapping)
|
||
return dict(row)
|
||
|
||
|
||
def redact_autonomous_runtime_control_public_terms(value: Any) -> Any:
|
||
"""Redact display strings that would imply raw prompt/payload exposure."""
|
||
|
||
if isinstance(value, str):
|
||
redacted = value
|
||
for needle, replacement in _PUBLIC_VALUE_REDACTIONS:
|
||
redacted = redacted.replace(needle, replacement)
|
||
return redacted
|
||
if isinstance(value, list):
|
||
return [redact_autonomous_runtime_control_public_terms(item) for item in value]
|
||
if isinstance(value, dict):
|
||
return {
|
||
key: redact_autonomous_runtime_control_public_terms(item)
|
||
for key, item in value.items()
|
||
}
|
||
return value
|
||
|
||
|
||
def _int_value(value: Any) -> int:
|
||
try:
|
||
return int(value or 0)
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
|
||
|
||
def _sanitize_latest_rows(
|
||
rows: Iterable[Mapping[str, Any] | Any],
|
||
*,
|
||
allowed_keys: tuple[str, ...],
|
||
time_keys: tuple[str, ...] = ("created_at", "collected_at", "queued_at", "sent_at"),
|
||
limit: int = 5,
|
||
) -> list[dict[str, Any]]:
|
||
clean_rows: list[dict[str, Any]] = []
|
||
for row in rows:
|
||
item = _row_mapping(row)
|
||
clean: dict[str, Any] = {}
|
||
for key in allowed_keys:
|
||
if key not in item:
|
||
continue
|
||
value = item.get(key)
|
||
clean[key] = _utc_iso(value) if key in time_keys else value
|
||
clean_rows.append(clean)
|
||
if len(clean_rows) >= limit:
|
||
break
|
||
return clean_rows
|
||
|
||
|
||
def _operation_counts(
|
||
rows: Iterable[Mapping[str, Any] | Any],
|
||
) -> dict[str, dict[str, Any]]:
|
||
counts = {
|
||
operation_type: {
|
||
"total": 0,
|
||
"recent": 0,
|
||
"by_status": {},
|
||
}
|
||
for operation_type in _EXECUTOR_OPERATION_TYPES
|
||
}
|
||
for row in rows:
|
||
item = _row_mapping(row)
|
||
operation_type = str(item.get("operation_type") or "unknown")
|
||
status = str(item.get("status") or "unknown")
|
||
bucket = counts.setdefault(
|
||
operation_type,
|
||
{
|
||
"total": 0,
|
||
"recent": 0,
|
||
"by_status": {},
|
||
},
|
||
)
|
||
total = _int_value(item.get("total"))
|
||
recent = _int_value(item.get("recent"))
|
||
bucket["total"] += total
|
||
bucket["recent"] += recent
|
||
bucket["by_status"][status] = bucket["by_status"].get(status, 0) + total
|
||
return counts
|
||
|
||
|
||
def _status_counts(
|
||
rows: Iterable[Mapping[str, Any] | Any],
|
||
*,
|
||
status_key: str,
|
||
) -> dict[str, Any]:
|
||
by_status: dict[str, int] = {}
|
||
recent_by_status: dict[str, int] = {}
|
||
total = 0
|
||
recent = 0
|
||
for row in rows:
|
||
item = _row_mapping(row)
|
||
status = str(item.get(status_key) or "unknown")
|
||
row_total = _int_value(item.get("total"))
|
||
row_recent = _int_value(item.get("recent"))
|
||
by_status[status] = by_status.get(status, 0) + row_total
|
||
recent_by_status[status] = recent_by_status.get(status, 0) + row_recent
|
||
total += row_total
|
||
recent += row_recent
|
||
return {
|
||
"total": total,
|
||
"recent": recent,
|
||
"by_status": by_status,
|
||
"recent_by_status": recent_by_status,
|
||
}
|
||
|
||
|
||
def _status_total(summary: Mapping[str, Any] | None, *statuses: str) -> int:
|
||
if not isinstance(summary, Mapping):
|
||
return 0
|
||
if not statuses:
|
||
return _int_value(summary.get("total"))
|
||
by_status = summary.get("by_status")
|
||
if not isinstance(by_status, Mapping):
|
||
return 0
|
||
return sum(_int_value(by_status.get(status)) for status in statuses)
|
||
|
||
|
||
def _status_recent(summary: Mapping[str, Any] | None, *statuses: str) -> int:
|
||
if not isinstance(summary, Mapping):
|
||
return 0
|
||
if not statuses:
|
||
return _int_value(summary.get("recent"))
|
||
by_status = summary.get("recent_by_status")
|
||
if not isinstance(by_status, Mapping):
|
||
return 0
|
||
return sum(_int_value(by_status.get(status)) for status in statuses)
|
||
|
||
|
||
def _load_log_controlled_writeback_executor_readback() -> dict[str, Any]:
|
||
"""Attach the LOG/KM/RAG/MCP/PlayBook executor queue without dispatching it."""
|
||
|
||
try:
|
||
return load_latest_ai_agent_log_controlled_writeback_executor_readback()
|
||
except Exception as exc: # pragma: no cover - keeps runtime control API visible
|
||
logger.warning(
|
||
"log_controlled_writeback_executor_readback_failed",
|
||
error_type=type(exc).__name__,
|
||
)
|
||
return {
|
||
"schema_version": "ai_agent_log_controlled_writeback_executor_readback_v1",
|
||
"priority": "P1-LOG-KM-RAG-MCP-PLAYBOOK",
|
||
"scope": "ai_agent_log_controlled_writeback_executor",
|
||
"status": "blocked_waiting_log_controlled_writeback_executor_readback",
|
||
"readback": {
|
||
"workplan_id": "P1-LOG-CONTROLLED-WRITEBACK-EXECUTOR",
|
||
"workplan_title": "LOG feedback controlled writeback executor and AI Agent consumption readback",
|
||
"safe_next_step": "repair_log_controlled_writeback_executor_readback_then_retry",
|
||
"error_type": type(exc).__name__,
|
||
},
|
||
"executor_policy": {
|
||
"executor_route": "ai_agent_metadata_writeback_executor",
|
||
"low_medium_high_controlled_apply_enabled": True,
|
||
"owner_review_required_for_low_medium_high": False,
|
||
"critical_break_glass_required": True,
|
||
"target_selector_required": True,
|
||
"source_of_truth_diff_required": True,
|
||
"check_mode_required": True,
|
||
"rollback_required": True,
|
||
"post_apply_verifier_required": True,
|
||
},
|
||
"execution_batches": [],
|
||
"agent_consumption_context": {
|
||
"context_id": "ai-agent-log-controlled-writeback-consumption-v1",
|
||
"consumable_by": [
|
||
"ai_agent_autonomous_runtime_control",
|
||
"awooop_work_items",
|
||
"alert_triage_loop",
|
||
"km_rag_playbook_learning_loop",
|
||
"mcp_audit_context_loop",
|
||
],
|
||
"evidence_chain": [
|
||
"/api/v1/agents/agent-log-intelligence-integration-readback",
|
||
"/api/v1/agents/agent-log-feedback-receipt-dry-run",
|
||
"/api/v1/agents/agent-log-post-write-verifier-dry-run",
|
||
"/api/v1/agents/agent-log-controlled-writeback-plan-readback",
|
||
],
|
||
"next_action_queue": [],
|
||
"learning_feedback_targets": [],
|
||
"raw_payload_required": False,
|
||
},
|
||
"rollups": {
|
||
"source_writeback_plan_count": 0,
|
||
"execution_batch_count": 0,
|
||
"ready_execution_batch_count": 0,
|
||
"target_count": 0,
|
||
"target_selector_count": 0,
|
||
"source_of_truth_diff_count": 0,
|
||
"check_mode_ready_count": 0,
|
||
"rollback_ready_count": 0,
|
||
"post_apply_verifier_ready_count": 0,
|
||
"controlled_executor_dispatch_ready": False,
|
||
"controlled_apply_enabled_by_policy": True,
|
||
"runtime_dispatch_performed": False,
|
||
},
|
||
"active_blockers": ["log_controlled_writeback_executor_readback_unavailable"],
|
||
"operation_boundaries": {
|
||
"executor_readback_only": True,
|
||
"controlled_apply_enabled_by_policy": True,
|
||
"executor_dispatch_performed": False,
|
||
"km_write_performed": False,
|
||
"rag_index_write_performed": False,
|
||
"playbook_trust_write_performed": False,
|
||
"mcp_tool_call_performed": False,
|
||
"agent_runtime_action_performed": False,
|
||
"workflow_trigger_performed": False,
|
||
"raw_log_payload_persisted": False,
|
||
"secret_value_collection_allowed": False,
|
||
"github_api_used": False,
|
||
},
|
||
}
|
||
|
||
|
||
def _fallback_log_controlled_writeback_consumer_readback(
|
||
error_type: str | None = None,
|
||
) -> dict[str, Any]:
|
||
readback = {
|
||
"schema_version": "ai_agent_log_controlled_writeback_consumer_readback_v1",
|
||
"priority": "P1-LOG-KM-RAG-MCP-PLAYBOOK",
|
||
"scope": "ai_agent_log_controlled_writeback_consumer_readback",
|
||
"status": "blocked_waiting_controlled_writeback_consumer_receipts",
|
||
"readback": {
|
||
"workplan_id": "P1-LOG-CONTROLLED-WRITEBACK-CONSUMER-READBACK",
|
||
"workplan_title": (
|
||
"LOG metadata ledger receipts consumable by KM / RAG / PlayBook / "
|
||
"MCP / verifier / AI Agent context"
|
||
),
|
||
"source_operation_type": LOG_CONTROLLED_WRITEBACK_DISPATCH_OPERATION_TYPE,
|
||
"source_executor_route": "ai_agent_metadata_writeback_executor",
|
||
"safe_next_step": "repair_log_controlled_writeback_consumer_readback_then_retry",
|
||
},
|
||
"controlled_consume": {
|
||
"mode": "blocked_waiting_consumer_readback",
|
||
"controlled_consume_allowed": False,
|
||
"owner_review_required_for_low_medium_high": False,
|
||
"critical_break_glass_required": True,
|
||
"target_selector_required": True,
|
||
"source_of_truth_diff_required": True,
|
||
"check_mode_required": True,
|
||
"rollback_required": True,
|
||
"post_apply_verifier_required": True,
|
||
"runtime_target_write_performed": False,
|
||
},
|
||
"consumer_bindings": [],
|
||
"target_rollups": [],
|
||
"rollups": {
|
||
"target_count": 6,
|
||
"dispatch_ledger_row_count": 0,
|
||
"consumer_binding_count": 0,
|
||
"ready_consumer_binding_count": 0,
|
||
"ready_target_count": 0,
|
||
"metadata_only_receipt_count": 0,
|
||
"post_apply_verifier_ref_count": 0,
|
||
"controlled_consumer_readback_ready": False,
|
||
"runtime_target_write_performed": False,
|
||
},
|
||
"active_blockers": ["log_controlled_writeback_consumer_readback_unavailable"],
|
||
"operation_boundaries": {
|
||
"consumer_readback_only": True,
|
||
"metadata_ledger_read_performed": False,
|
||
"km_write_performed": False,
|
||
"rag_index_write_performed": False,
|
||
"playbook_trust_write_performed": False,
|
||
"mcp_tool_call_performed": False,
|
||
"agent_runtime_action_performed": False,
|
||
"telegram_send_performed": False,
|
||
"workflow_trigger_performed": False,
|
||
"raw_log_payload_persisted": False,
|
||
"secret_value_collection_allowed": False,
|
||
"github_api_used": False,
|
||
},
|
||
}
|
||
if error_type:
|
||
readback["readback"]["error_type"] = error_type
|
||
return readback
|
||
|
||
|
||
async def _load_log_controlled_writeback_consumer_readback(
|
||
*,
|
||
project_id: str,
|
||
) -> dict[str, Any]:
|
||
"""Attach LOG consumer bindings without writing KM/RAG/PlayBook/MCP targets."""
|
||
|
||
try:
|
||
return await load_latest_ai_agent_log_controlled_writeback_consumer_readback(
|
||
project_id=project_id,
|
||
)
|
||
except Exception as exc: # pragma: no cover - keeps runtime control API visible
|
||
logger.warning(
|
||
"log_controlled_writeback_consumer_readback_failed",
|
||
project_id=project_id,
|
||
error_type=type(exc).__name__,
|
||
)
|
||
return _fallback_log_controlled_writeback_consumer_readback(
|
||
error_type=type(exc).__name__,
|
||
)
|
||
|
||
|
||
def _trace_stage(
|
||
*,
|
||
stage_id: str,
|
||
display_name: str,
|
||
source_tables: list[str],
|
||
total: int,
|
||
recent: int,
|
||
required_for_closed_loop: bool,
|
||
feeds_learning: bool,
|
||
public_safe: bool = True,
|
||
next_action_if_missing: str | None = None,
|
||
) -> dict[str, Any]:
|
||
present = total > 0
|
||
return {
|
||
"stage_id": stage_id,
|
||
"display_name": display_name,
|
||
"source_tables": source_tables,
|
||
"recorded": present,
|
||
"record_quality": "recorded" if present else "missing",
|
||
"total": max(0, total),
|
||
"recent": max(0, recent),
|
||
"required_for_closed_loop": required_for_closed_loop,
|
||
"feeds_learning": feeds_learning,
|
||
"public_safe": public_safe,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _trace_total(summary: Mapping[str, Any] | None, *operation_types: str) -> int:
|
||
if not isinstance(summary, Mapping):
|
||
return 0
|
||
if not operation_types:
|
||
return _int_value(summary.get("total"))
|
||
return sum(
|
||
_int_value((summary.get(operation_type) or {}).get("total"))
|
||
for operation_type in operation_types
|
||
)
|
||
|
||
|
||
def _trace_recent(summary: Mapping[str, Any] | None, *operation_types: str) -> int:
|
||
if not isinstance(summary, Mapping):
|
||
return 0
|
||
if not operation_types:
|
||
return _int_value(summary.get("recent"))
|
||
return sum(
|
||
_int_value((summary.get(operation_type) or {}).get("recent"))
|
||
for operation_type in operation_types
|
||
)
|
||
|
||
|
||
def _build_trace_ledger(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
auto_repair_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
telegram_summary: Mapping[str, Any],
|
||
mcp_gateway_summary: Mapping[str, Any],
|
||
legacy_mcp_summary: Mapping[str, Any],
|
||
service_log_summary: Mapping[str, Any],
|
||
executor_log_summary: Mapping[str, Any],
|
||
timeline_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
loop_ledger: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Build the full public-safe AI automation trace ledger."""
|
||
|
||
mcp_total = _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary)
|
||
mcp_recent = _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary)
|
||
stages = [
|
||
_trace_stage(
|
||
stage_id="mcp_context",
|
||
display_name="MCP sensor / tool context",
|
||
source_tables=["awooop_mcp_gateway_audit", "mcp_audit_log"],
|
||
total=mcp_total,
|
||
recent=mcp_recent,
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="record_mcp_gateway_or_legacy_mcp_audit_for_every_ai_decision",
|
||
),
|
||
_trace_stage(
|
||
stage_id="service_log_evidence",
|
||
display_name="Sanitized service / package log evidence",
|
||
source_tables=["incident_evidence.recent_logs", "incident_evidence.evidence_summary"],
|
||
total=_trace_total(service_log_summary),
|
||
recent=_trace_recent(service_log_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="collect_sanitized_service_log_evidence_before_ai_decision",
|
||
),
|
||
_trace_stage(
|
||
stage_id="candidate",
|
||
display_name="AI candidate / playbook match",
|
||
source_tables=["automation_operation_log"],
|
||
total=_trace_total(operation_summary, "ansible_candidate_matched"),
|
||
recent=_trace_recent(operation_summary, "ansible_candidate_matched"),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
|
||
),
|
||
_trace_stage(
|
||
stage_id="check_mode",
|
||
display_name="No-write check-mode / dry-run",
|
||
source_tables=["automation_operation_log"],
|
||
total=_trace_total(operation_summary, "ansible_check_mode_executed"),
|
||
recent=_trace_recent(operation_summary, "ansible_check_mode_executed"),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
|
||
),
|
||
_trace_stage(
|
||
stage_id="executor_log_projection",
|
||
display_name="Executor stdout / stderr / dry-run projection",
|
||
source_tables=[
|
||
"automation_operation_log.output",
|
||
"automation_operation_log.error",
|
||
"automation_operation_log.stderr_feed_back",
|
||
"automation_operation_log.dry_run_result",
|
||
],
|
||
total=_trace_total(executor_log_summary),
|
||
recent=_trace_recent(executor_log_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="persist_sanitized_executor_log_projection_for_failed_or_applied_actions",
|
||
),
|
||
_trace_stage(
|
||
stage_id="controlled_apply",
|
||
display_name="Controlled apply execution",
|
||
source_tables=["automation_operation_log"],
|
||
total=_trace_total(operation_summary, "ansible_apply_executed"),
|
||
recent=_trace_recent(operation_summary, "ansible_apply_executed"),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
|
||
),
|
||
_trace_stage(
|
||
stage_id="auto_repair_execution_receipt",
|
||
display_name="Auto-repair execution receipt",
|
||
source_tables=["auto_repair_executions"],
|
||
total=_trace_total(auto_repair_summary),
|
||
recent=_trace_recent(auto_repair_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="receipt_backfill_records_auto_repair_execution",
|
||
),
|
||
_trace_stage(
|
||
stage_id="post_apply_verifier",
|
||
display_name="Post-apply verifier evidence",
|
||
source_tables=["incident_evidence"],
|
||
total=_trace_total(verifier_summary),
|
||
recent=_trace_recent(verifier_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
|
||
),
|
||
_trace_stage(
|
||
stage_id="rag_km_learning",
|
||
display_name="RAG / KM / PlayBook learning writeback",
|
||
source_tables=["knowledge_entries"],
|
||
total=_trace_total(km_summary),
|
||
recent=_trace_recent(km_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
|
||
),
|
||
_trace_stage(
|
||
stage_id="playbook_trust",
|
||
display_name="PlayBook trust / success-failure learning",
|
||
source_tables=[
|
||
"playbooks.trust_score",
|
||
"playbooks.success_count",
|
||
"playbooks.failure_count",
|
||
"playbooks.review_required",
|
||
],
|
||
total=_trace_total(playbook_trust_summary),
|
||
recent=_trace_recent(playbook_trust_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="write_playbook_trust_delta_after_verified_execution",
|
||
),
|
||
_trace_stage(
|
||
stage_id="timeline_projection",
|
||
display_name="Operator timeline projection",
|
||
source_tables=["timeline_events"],
|
||
total=_trace_total(timeline_summary),
|
||
recent=_trace_recent(timeline_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="project_ai_runtime_stage_to_timeline_events",
|
||
),
|
||
_trace_stage(
|
||
stage_id="telegram_receipt",
|
||
display_name="Telegram Gateway receipt",
|
||
source_tables=["awooop_outbound_message"],
|
||
total=_trace_total(telegram_summary),
|
||
recent=_trace_recent(telegram_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=False,
|
||
next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
|
||
),
|
||
]
|
||
required = [stage for stage in stages if stage["required_for_closed_loop"]]
|
||
missing_required = [
|
||
str(stage["stage_id"])
|
||
for stage in required
|
||
if stage["recorded"] is not True
|
||
]
|
||
recorded_count = sum(1 for stage in stages if stage["recorded"] is True)
|
||
return {
|
||
"schema_version": "ai_agent_autonomous_trace_ledger_v1",
|
||
"purpose": (
|
||
"把 AI 自動化每個節點的 public-safe receipt 收斂成同一份 ledger;"
|
||
"這些紀錄是後續 RAG、KM、PlayBook trust 與報告學習的依據。"
|
||
),
|
||
"latest_flow_closed": latest_flow_closure.get("closed") is True,
|
||
"latest_loop_closed": loop_ledger.get("closed") is True,
|
||
"stage_count": len(stages),
|
||
"recorded_stage_count": recorded_count,
|
||
"required_stage_count": len(required),
|
||
"missing_required_stage_ids": missing_required,
|
||
"learning_source_stage_ids": [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["feeds_learning"] is True
|
||
],
|
||
"public_safety": {
|
||
"reads_raw_sessions": False,
|
||
"stores_secret_values": False,
|
||
"stores_unredacted_telegram_payload": False,
|
||
"stores_internal_reasoning": False,
|
||
},
|
||
"stages": stages,
|
||
}
|
||
|
||
|
||
def _build_log_integration_taxonomy(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
auto_repair_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
telegram_summary: Mapping[str, Any],
|
||
mcp_gateway_summary: Mapping[str, Any],
|
||
legacy_mcp_summary: Mapping[str, Any],
|
||
service_log_summary: Mapping[str, Any],
|
||
executor_log_summary: Mapping[str, Any],
|
||
timeline_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Expose how logs are normalized, labeled, grouped, and fed to agents."""
|
||
|
||
operation_total = sum(_trace_total(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES)
|
||
operation_recent = sum(_trace_recent(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES)
|
||
source_families = [
|
||
{
|
||
"source_family_id": "mcp_gateway_tool_calls",
|
||
"source_tables": ["awooop_mcp_gateway_audit"],
|
||
"normalized_event_schema": "ToolCallEvidence",
|
||
"label_dimensions": ["project", "run", "trace", "agent", "tool", "policy_gate"],
|
||
"total": _trace_total(mcp_gateway_summary),
|
||
"recent": _trace_recent(mcp_gateway_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "hash_only_no_raw_input_output",
|
||
"next_action_if_empty": "route_first_class_tools_through_awooop_mcp_gateway",
|
||
},
|
||
{
|
||
"source_family_id": "legacy_mcp_tool_calls",
|
||
"source_tables": ["mcp_audit_log"],
|
||
"normalized_event_schema": "LegacyToolCallEvidence",
|
||
"label_dimensions": ["incident", "session_ref", "flywheel_node", "agent", "tool"],
|
||
"total": _trace_total(legacy_mcp_summary),
|
||
"recent": _trace_recent(legacy_mcp_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "bridge_to_gateway_hash_or_redacted_summary",
|
||
"next_action_if_empty": "keep_legacy_bridge_until_all_callers_use_gateway",
|
||
},
|
||
{
|
||
"source_family_id": "service_package_logs",
|
||
"source_tables": [
|
||
"incident_evidence.recent_logs",
|
||
"incident_evidence.evidence_summary",
|
||
"incident_evidence.anomaly_context",
|
||
],
|
||
"normalized_event_schema": "ServiceLogEvidence",
|
||
"label_dimensions": ["project", "product", "website", "service", "package", "incident"],
|
||
"total": _trace_total(service_log_summary),
|
||
"recent": _trace_recent(service_log_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "sanitized_summary_only",
|
||
"next_action_if_empty": "collect_sanitized_service_package_logs_before_decision",
|
||
},
|
||
{
|
||
"source_family_id": "executor_operation_logs",
|
||
"source_tables": ["automation_operation_log"],
|
||
"normalized_event_schema": "ExecutorOperationEvidence",
|
||
"label_dimensions": [
|
||
"project",
|
||
"service",
|
||
"package",
|
||
"tool",
|
||
"incident",
|
||
"operation",
|
||
"playbook",
|
||
"risk",
|
||
],
|
||
"total": max(operation_total, _trace_total(executor_log_summary)),
|
||
"recent": max(operation_recent, _trace_recent(executor_log_summary)),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "stdout_stderr_tail_or_structured_result_only",
|
||
"next_action_if_empty": "persist_executor_operation_log_for_candidate_check_apply",
|
||
},
|
||
{
|
||
"source_family_id": "auto_repair_receipts",
|
||
"source_tables": ["auto_repair_executions"],
|
||
"normalized_event_schema": "RepairExecutionReceipt",
|
||
"label_dimensions": ["incident", "service", "playbook", "risk", "result"],
|
||
"total": _trace_total(auto_repair_summary),
|
||
"recent": _trace_recent(auto_repair_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "execution_step_refs_not_raw_secrets",
|
||
"next_action_if_empty": "write_auto_repair_execution_receipt_after_apply",
|
||
},
|
||
{
|
||
"source_family_id": "post_apply_verifier",
|
||
"source_tables": ["incident_evidence.post_execution_state"],
|
||
"normalized_event_schema": "VerifierEvidence",
|
||
"label_dimensions": ["incident", "operation", "playbook", "service", "result"],
|
||
"total": _trace_total(verifier_summary),
|
||
"recent": _trace_recent(verifier_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "post_state_summary_redacted_refs_only",
|
||
"next_action_if_empty": "run_post_apply_verifier_for_each_apply",
|
||
},
|
||
{
|
||
"source_family_id": "rag_km_entries",
|
||
"source_tables": ["knowledge_entries"],
|
||
"normalized_event_schema": "KnowledgeWritebackEvidence",
|
||
"label_dimensions": ["project", "incident", "playbook", "path_type", "status"],
|
||
"total": _trace_total(km_summary),
|
||
"recent": _trace_recent(km_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "curated_summary_and_refs_only",
|
||
"next_action_if_empty": "write_km_entry_after_verifier",
|
||
},
|
||
{
|
||
"source_family_id": "playbook_trust_signals",
|
||
"source_tables": ["playbooks"],
|
||
"normalized_event_schema": "PlayBookTrustSignal",
|
||
"label_dimensions": ["project", "playbook", "status", "trust_band", "review_required"],
|
||
"total": _trace_total(playbook_trust_summary),
|
||
"recent": _trace_recent(playbook_trust_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "aggregate_trust_counters_only",
|
||
"next_action_if_empty": "write_trust_delta_after_verified_execution",
|
||
},
|
||
{
|
||
"source_family_id": "operator_timeline_projection",
|
||
"source_tables": ["timeline_events"],
|
||
"normalized_event_schema": "OperatorTimelineEvent",
|
||
"label_dimensions": ["incident", "event_type", "status", "actor", "actor_role"],
|
||
"total": _trace_total(timeline_summary),
|
||
"recent": _trace_recent(timeline_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "short_public_safe_status_projection",
|
||
"next_action_if_empty": "project_ai_runtime_stage_to_timeline_events",
|
||
},
|
||
{
|
||
"source_family_id": "telegram_delivery_receipts",
|
||
"source_tables": ["awooop_outbound_message"],
|
||
"normalized_event_schema": "NotificationReceipt",
|
||
"label_dimensions": ["project", "channel", "incident", "action", "send_status"],
|
||
"total": _trace_total(telegram_summary),
|
||
"recent": _trace_recent(telegram_summary),
|
||
"feeds_learning": False,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "provider_message_ref_no_unredacted_payload",
|
||
"next_action_if_empty": "send_controlled_apply_result_via_gateway",
|
||
},
|
||
]
|
||
label_dimensions = sorted(
|
||
{"source_family"}
|
||
| {
|
||
str(dimension)
|
||
for source in source_families
|
||
for dimension in source["label_dimensions"]
|
||
}
|
||
)
|
||
active_source_count = sum(1 for source in source_families if _int_value(source["total"]) > 0)
|
||
return {
|
||
"schema_version": "ai_agent_log_integration_taxonomy_v1",
|
||
"purpose": (
|
||
"將專案、產品、網站、服務、套件、工具與通知來源的 log "
|
||
"統一轉成可貼標、可分群、可回放、可餵 RAG/KM/PlayBook 的 evidence。"
|
||
),
|
||
"normalized_event_flow": [
|
||
"collect_source_log_or_receipt",
|
||
"redact_and_hash_sensitive_fields",
|
||
"assign_labels",
|
||
"correlate_incident_operation_playbook",
|
||
"write_trace_ledger",
|
||
"retrieve_similar_context_via_rag",
|
||
"select_or_repair_playbook",
|
||
"run_check_mode_then_controlled_apply",
|
||
"verify_and_write_learning_back",
|
||
],
|
||
"label_dimensions": label_dimensions,
|
||
"required_label_dimensions": [
|
||
"project",
|
||
"product",
|
||
"website",
|
||
"source_family",
|
||
"incident",
|
||
"operation",
|
||
"service",
|
||
"package",
|
||
"tool",
|
||
"playbook",
|
||
],
|
||
"source_families": source_families,
|
||
"rollups": {
|
||
"source_family_count": len(source_families),
|
||
"active_source_family_count": active_source_count,
|
||
"inactive_source_family_count": len(source_families) - active_source_count,
|
||
"label_dimension_count": len(label_dimensions),
|
||
"classified_event_total": sum(_int_value(source["total"]) for source in source_families),
|
||
"recent_classified_event_total": sum(_int_value(source["recent"]) for source in source_families),
|
||
"learning_source_family_count": sum(
|
||
1 for source in source_families if source["feeds_learning"] is True
|
||
),
|
||
},
|
||
"public_safety": {
|
||
"raw_secret_collection_allowed": False,
|
||
"raw_session_collection_allowed": False,
|
||
"unredacted_payload_storage_allowed": False,
|
||
"internal_reasoning_storage_allowed": False,
|
||
},
|
||
}
|
||
|
||
|
||
def _decision_wiring_stage(
|
||
*,
|
||
stage_id: str,
|
||
display_name: str,
|
||
evidence_sources: list[str],
|
||
total: int,
|
||
recent: int,
|
||
required_for_decision_wiring: bool,
|
||
feeds_next_stage: str,
|
||
next_action_if_missing: str,
|
||
) -> dict[str, Any]:
|
||
present = total > 0
|
||
return {
|
||
"stage_id": stage_id,
|
||
"display_name": display_name,
|
||
"evidence_sources": evidence_sources,
|
||
"present": present,
|
||
"total": max(0, total),
|
||
"recent": max(0, recent),
|
||
"required_for_decision_wiring": required_for_decision_wiring,
|
||
"feeds_next_stage": feeds_next_stage,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _build_agent_decision_wiring(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
mcp_gateway_summary: Mapping[str, Any],
|
||
legacy_mcp_summary: Mapping[str, Any],
|
||
service_log_summary: Mapping[str, Any],
|
||
timeline_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
log_integration_taxonomy: Mapping[str, Any],
|
||
loop_ledger: Mapping[str, Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Summarize live evidence-to-execution wiring for the AI Agent path."""
|
||
|
||
taxonomy_rollups = log_integration_taxonomy.get("rollups")
|
||
if not isinstance(taxonomy_rollups, Mapping):
|
||
taxonomy_rollups = {}
|
||
source_family_count = _int_value(taxonomy_rollups.get("source_family_count"))
|
||
active_source_family_count = _int_value(taxonomy_rollups.get("active_source_family_count"))
|
||
all_sources_active = source_family_count > 0 and active_source_family_count == source_family_count
|
||
evidence_total = (
|
||
_trace_total(mcp_gateway_summary)
|
||
+ _trace_total(legacy_mcp_summary)
|
||
+ _trace_total(service_log_summary)
|
||
+ _trace_total(timeline_summary)
|
||
)
|
||
evidence_recent = (
|
||
_trace_recent(mcp_gateway_summary)
|
||
+ _trace_recent(legacy_mcp_summary)
|
||
+ _trace_recent(service_log_summary)
|
||
+ _trace_recent(timeline_summary)
|
||
)
|
||
rag_context_total = _trace_total(km_summary) + _trace_total(playbook_trust_summary)
|
||
rag_context_recent = _trace_recent(km_summary) + _trace_recent(playbook_trust_summary)
|
||
candidate_total = _trace_total(operation_summary, "ansible_candidate_matched")
|
||
candidate_recent = _trace_recent(operation_summary, "ansible_candidate_matched")
|
||
check_mode_total = _trace_total(operation_summary, "ansible_check_mode_executed")
|
||
check_mode_recent = _trace_recent(operation_summary, "ansible_check_mode_executed")
|
||
apply_total = _trace_total(operation_summary, "ansible_apply_executed")
|
||
apply_recent = _trace_recent(operation_summary, "ansible_apply_executed")
|
||
rollback_total = _trace_total(operation_summary, "ansible_rollback_executed")
|
||
verifier_total = _trace_total(verifier_summary)
|
||
verifier_recent = _trace_recent(verifier_summary)
|
||
|
||
stages = [
|
||
_decision_wiring_stage(
|
||
stage_id="labeled_evidence_sources",
|
||
display_name="Labeled log / MCP / timeline evidence available",
|
||
evidence_sources=["log_integration_taxonomy", "mcp", "service_logs", "timeline_events"],
|
||
total=evidence_total if all_sources_active else 0,
|
||
recent=evidence_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="rag_context_retrieval",
|
||
next_action_if_missing="keep_p1a_source_family_ingestion_active_until_10_of_10",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="rag_context_retrieval",
|
||
display_name="RAG / KM / PlayBook trust context available",
|
||
evidence_sources=["knowledge_entries", "playbooks"],
|
||
total=rag_context_total,
|
||
recent=rag_context_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="playbook_candidate_selection",
|
||
next_action_if_missing="retrieve_similar_km_entries_and_playbook_trust_before_candidate",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="playbook_candidate_selection",
|
||
display_name="Allowlisted PlayBook candidate selected",
|
||
evidence_sources=["automation_operation_log:ansible_candidate_matched"],
|
||
total=candidate_total,
|
||
recent=candidate_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="check_mode_dry_run",
|
||
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="check_mode_dry_run",
|
||
display_name="Check-mode / dry-run receipt recorded",
|
||
evidence_sources=["automation_operation_log:ansible_check_mode_executed"],
|
||
total=check_mode_total,
|
||
recent=check_mode_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="controlled_apply_boundary",
|
||
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="controlled_apply_boundary",
|
||
display_name="Controlled apply / rollback boundary recorded",
|
||
evidence_sources=[
|
||
"automation_operation_log:ansible_apply_executed",
|
||
"automation_operation_log:ansible_rollback_executed",
|
||
],
|
||
total=apply_total + rollback_total,
|
||
recent=apply_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="post_apply_verifier",
|
||
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="post_apply_verifier",
|
||
display_name="Post-apply verifier receipt recorded",
|
||
evidence_sources=["incident_evidence"],
|
||
total=verifier_total,
|
||
recent=verifier_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="learning_writeback",
|
||
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
|
||
),
|
||
]
|
||
missing_required = [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["required_for_decision_wiring"] is True and stage["present"] is not True
|
||
]
|
||
present_required_count = sum(
|
||
1
|
||
for stage in stages
|
||
if stage["required_for_decision_wiring"] is True and stage["present"] is True
|
||
)
|
||
required_count = sum(1 for stage in stages if stage["required_for_decision_wiring"] is True)
|
||
closed_loop_observed = bool(
|
||
loop_ledger.get("closed") is True
|
||
or latest_flow_closure.get("closed") is True
|
||
)
|
||
return {
|
||
"schema_version": "ai_agent_decision_wiring_readback_v1",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"stages": stages,
|
||
"missing_required_stage_ids": missing_required,
|
||
"runtime_switches": {
|
||
"candidate_backfill_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CANDIDATE_BACKFILL_WORKER),
|
||
"check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER),
|
||
"controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY),
|
||
"allowed_risk_levels": _allowed_risk_levels(),
|
||
},
|
||
"closed_loop_observed": closed_loop_observed,
|
||
"public_safety": {
|
||
"stores_raw_logs": False,
|
||
"stores_secret_values": False,
|
||
"executes_on_read": False,
|
||
"critical_break_glass_still_required": True,
|
||
},
|
||
"rollups": {
|
||
"stage_count": len(stages),
|
||
"required_stage_count": required_count,
|
||
"required_stage_present_count": present_required_count,
|
||
"required_stage_missing_count": len(missing_required),
|
||
"evidence_event_total": evidence_total,
|
||
"rag_context_total": rag_context_total,
|
||
"candidate_total": candidate_total,
|
||
"check_mode_total": check_mode_total,
|
||
"controlled_apply_total": apply_total,
|
||
"rollback_total": rollback_total,
|
||
"verifier_total": verifier_total,
|
||
},
|
||
}
|
||
|
||
|
||
def _learning_loop_stage(
|
||
*,
|
||
stage_id: str,
|
||
display_name: str,
|
||
evidence_sources: list[str],
|
||
total: int,
|
||
recent: int,
|
||
required_for_learning_loop: bool,
|
||
writes_runtime_state: bool,
|
||
next_action_if_missing: str,
|
||
) -> dict[str, Any]:
|
||
present = total > 0
|
||
return {
|
||
"stage_id": stage_id,
|
||
"display_name": display_name,
|
||
"evidence_sources": evidence_sources,
|
||
"present": present,
|
||
"total": max(0, total),
|
||
"recent": max(0, recent),
|
||
"required_for_learning_loop": required_for_learning_loop,
|
||
"writes_runtime_state": writes_runtime_state,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _build_learning_loop_readback(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
log_integration_taxonomy: Mapping[str, Any],
|
||
agent_decision_wiring: Mapping[str, Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
latest_failure_classification: Mapping[str, Any],
|
||
controlled_retry_package: Mapping[str, Any],
|
||
loop_ledger: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Expose the verified execution to KM/PlayBook learning loop."""
|
||
|
||
taxonomy_rollups = log_integration_taxonomy.get("rollups")
|
||
if not isinstance(taxonomy_rollups, Mapping):
|
||
taxonomy_rollups = {}
|
||
learning_source_family_count = _int_value(
|
||
taxonomy_rollups.get("learning_source_family_count")
|
||
)
|
||
classified_event_total = _int_value(taxonomy_rollups.get("classified_event_total"))
|
||
recent_classified_event_total = _int_value(
|
||
taxonomy_rollups.get("recent_classified_event_total")
|
||
)
|
||
verifier_total = _trace_total(verifier_summary)
|
||
verifier_recent = _trace_recent(verifier_summary)
|
||
km_total = _trace_total(km_summary)
|
||
km_recent = _trace_recent(km_summary)
|
||
learning_writeback_total = _trace_total(
|
||
operation_summary,
|
||
"ansible_learning_writeback_recorded",
|
||
)
|
||
learning_writeback_recent = _trace_recent(
|
||
operation_summary,
|
||
"ansible_learning_writeback_recorded",
|
||
)
|
||
trust_total = _trace_total(playbook_trust_summary)
|
||
trust_recent = _trace_recent(playbook_trust_summary)
|
||
repair_feedback_ready = bool(
|
||
latest_failure_classification.get("classification")
|
||
not in {"", "no_controlled_apply_observed"}
|
||
and controlled_retry_package.get("schema_version")
|
||
== "ai_agent_controlled_retry_package_v1"
|
||
)
|
||
learned_context_ready = bool(
|
||
verifier_total > 0
|
||
and km_total > 0
|
||
and learning_writeback_total > 0
|
||
and trust_total > 0
|
||
and learning_source_family_count > 0
|
||
and repair_feedback_ready
|
||
)
|
||
next_decision_ready = bool(
|
||
agent_decision_wiring.get("status") == "completed"
|
||
and (
|
||
loop_ledger.get("closed") is True
|
||
or latest_flow_closure.get("closed") is True
|
||
or learned_context_ready
|
||
)
|
||
)
|
||
stages = [
|
||
_learning_loop_stage(
|
||
stage_id="verified_execution_outcome",
|
||
display_name="Verified execution outcome available",
|
||
evidence_sources=["incident_evidence.post_execution_state"],
|
||
total=verifier_total
|
||
if latest_flow_closure.get("has_post_apply_verifier") is True
|
||
else 0,
|
||
recent=verifier_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="run_post_apply_verifier_and_attach_apply_op_id",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="km_learning_writeback",
|
||
display_name="KM learning writeback recorded",
|
||
evidence_sources=["knowledge_entries"],
|
||
total=km_total
|
||
if latest_flow_closure.get("has_km_writeback") is True
|
||
else 0,
|
||
recent=km_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="write_verified_execution_summary_to_km",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="learning_repair_record",
|
||
display_name="Learning repository repair result recorded",
|
||
evidence_sources=[
|
||
"automation_operation_log:ansible_learning_writeback_recorded",
|
||
"learning_repository",
|
||
],
|
||
total=learning_writeback_total,
|
||
recent=learning_writeback_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="record_learning_repair_result_after_verifier",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="playbook_trust_delta",
|
||
display_name="PlayBook trust signal available",
|
||
evidence_sources=["playbooks"],
|
||
total=trust_total,
|
||
recent=trust_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="write_playbook_trust_delta_after_verifier",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="similar_case_context",
|
||
display_name="Similar-case context sources active",
|
||
evidence_sources=["log_integration_taxonomy", "knowledge_entries", "playbooks"],
|
||
total=classified_event_total if learning_source_family_count > 0 else 0,
|
||
recent=recent_classified_event_total,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="activate_learning_source_families_for_similar_case_retrieval",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="repair_candidate_feedback",
|
||
display_name="Repair or no-repair feedback classified",
|
||
evidence_sources=["latest_failure_classification", "controlled_retry_package"],
|
||
total=1 if repair_feedback_ready else 0,
|
||
recent=1 if repair_feedback_ready else 0,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="classify_latest_apply_result_and_prepare_retry_package",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="next_decision_context",
|
||
display_name="Next decision can consume learned context",
|
||
evidence_sources=["agent_decision_wiring", "autonomous_execution_loop_ledger"],
|
||
total=1 if next_decision_ready else 0,
|
||
recent=1 if next_decision_ready else 0,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="complete_decision_wiring_and_execution_loop_before_learning_release",
|
||
),
|
||
]
|
||
missing_required = [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["required_for_learning_loop"] is True and stage["present"] is not True
|
||
]
|
||
present_required_count = sum(
|
||
1
|
||
for stage in stages
|
||
if stage["required_for_learning_loop"] is True and stage["present"] is True
|
||
)
|
||
required_count = sum(1 for stage in stages if stage["required_for_learning_loop"] is True)
|
||
return {
|
||
"schema_version": "ai_agent_learning_loop_readback_v1",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"stages": stages,
|
||
"missing_required_stage_ids": missing_required,
|
||
"public_safety": {
|
||
"stores_raw_logs": False,
|
||
"stores_secret_values": False,
|
||
"stores_unredacted_telegram_payload": False,
|
||
"executes_on_read": False,
|
||
"critical_break_glass_still_required": True,
|
||
},
|
||
"rollups": {
|
||
"stage_count": len(stages),
|
||
"required_stage_count": required_count,
|
||
"required_stage_present_count": present_required_count,
|
||
"required_stage_missing_count": len(missing_required),
|
||
"verified_execution_total": verifier_total,
|
||
"km_writeback_total": km_total,
|
||
"learning_writeback_total": learning_writeback_total,
|
||
"learning_writeback_recent": learning_writeback_recent,
|
||
"playbook_trust_total": trust_total,
|
||
"learning_source_family_count": learning_source_family_count,
|
||
"similar_case_source_total": classified_event_total,
|
||
"repair_feedback_ready_count": 1 if repair_feedback_ready else 0,
|
||
"next_decision_ready_count": 1 if next_decision_ready else 0,
|
||
},
|
||
}
|
||
|
||
|
||
def _alert_noise_stage(
|
||
*,
|
||
stage_id: str,
|
||
display_name: str,
|
||
evidence_sources: list[str],
|
||
total: int,
|
||
recent: int,
|
||
required_for_noise_reduction: bool,
|
||
feeds_controlled_queue: bool,
|
||
next_action_if_missing: str,
|
||
) -> dict[str, Any]:
|
||
present = total > 0
|
||
return {
|
||
"stage_id": stage_id,
|
||
"display_name": display_name,
|
||
"evidence_sources": evidence_sources,
|
||
"present": present,
|
||
"total": max(0, total),
|
||
"recent": max(0, recent),
|
||
"required_for_noise_reduction": required_for_noise_reduction,
|
||
"feeds_controlled_queue": feeds_controlled_queue,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _build_alert_noise_reduction_readback(
|
||
*,
|
||
alert_operation_summary: Mapping[str, Any],
|
||
alertmanager_event_summary: Mapping[str, Any],
|
||
grouped_alert_summary: Mapping[str, Any],
|
||
operation_summary: Mapping[str, Any],
|
||
agent_decision_wiring: Mapping[str, Any],
|
||
learning_loop: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Expose alert storm control and AI controlled routing receipts."""
|
||
|
||
alert_received_total = (
|
||
_status_total(alert_operation_summary, "ALERT_RECEIVED")
|
||
+ _status_total(alertmanager_event_summary, "received")
|
||
)
|
||
alert_received_recent = (
|
||
_status_recent(alert_operation_summary, "ALERT_RECEIVED")
|
||
+ _status_recent(alertmanager_event_summary, "received")
|
||
)
|
||
converged_duplicate_total = _status_total(alertmanager_event_summary, "converged")
|
||
converged_duplicate_recent = _status_recent(alertmanager_event_summary, "converged")
|
||
llm_inflight_suppressed_total = _status_total(
|
||
alertmanager_event_summary,
|
||
"llm_inflight_suppressed",
|
||
)
|
||
llm_inflight_suppressed_recent = _status_recent(
|
||
alertmanager_event_summary,
|
||
"llm_inflight_suppressed",
|
||
)
|
||
grouped_child_total = _status_total(grouped_alert_summary, "grouped_child_alert")
|
||
grouped_child_recent = _status_recent(grouped_alert_summary, "grouped_child_alert")
|
||
duplicate_convergence_total = (
|
||
converged_duplicate_total + llm_inflight_suppressed_total + grouped_child_total
|
||
)
|
||
duplicate_convergence_recent = (
|
||
converged_duplicate_recent + llm_inflight_suppressed_recent + grouped_child_recent
|
||
)
|
||
controlled_route_total = (
|
||
_trace_total(
|
||
operation_summary,
|
||
"ansible_candidate_matched",
|
||
"ansible_check_mode_executed",
|
||
"ansible_apply_executed",
|
||
)
|
||
+ _status_total(
|
||
alert_operation_summary,
|
||
"AUTO_REPAIR_TRIGGERED",
|
||
"EXECUTION_STARTED",
|
||
"EXECUTION_COMPLETED",
|
||
"NOTIFICATION_CLASSIFIED",
|
||
)
|
||
)
|
||
controlled_route_recent = (
|
||
_trace_recent(
|
||
operation_summary,
|
||
"ansible_candidate_matched",
|
||
"ansible_check_mode_executed",
|
||
"ansible_apply_executed",
|
||
)
|
||
+ _status_recent(
|
||
alert_operation_summary,
|
||
"AUTO_REPAIR_TRIGGERED",
|
||
"EXECUTION_STARTED",
|
||
"EXECUTION_COMPLETED",
|
||
"NOTIFICATION_CLASSIFIED",
|
||
)
|
||
)
|
||
guardrail_total = _status_total(
|
||
alert_operation_summary,
|
||
"GUARDRAIL_BLOCKED",
|
||
"STATE_GUARD_BLOCKED",
|
||
"ESCALATED",
|
||
"SILENCED",
|
||
)
|
||
guardrail_recent = _status_recent(
|
||
alert_operation_summary,
|
||
"GUARDRAIL_BLOCKED",
|
||
"STATE_GUARD_BLOCKED",
|
||
"ESCALATED",
|
||
"SILENCED",
|
||
)
|
||
decision_complete = agent_decision_wiring.get("status") == "completed"
|
||
learning_complete = learning_loop.get("status") == "completed"
|
||
|
||
stages = [
|
||
_alert_noise_stage(
|
||
stage_id="alert_intake_receipts",
|
||
display_name="Alertmanager receipts recorded",
|
||
evidence_sources=[
|
||
"alert_operation_log:ALERT_RECEIVED",
|
||
"awooop_conversation_event:received",
|
||
],
|
||
total=alert_received_total,
|
||
recent=alert_received_recent,
|
||
required_for_noise_reduction=True,
|
||
feeds_controlled_queue=True,
|
||
next_action_if_missing="record_alertmanager_received_events_before_any_notification_or_ai_route",
|
||
),
|
||
_alert_noise_stage(
|
||
stage_id="duplicate_convergence",
|
||
display_name="Duplicate and recurring alerts converge",
|
||
evidence_sources=[
|
||
"awooop_conversation_event:converged",
|
||
"awooop_conversation_event:llm_inflight_suppressed",
|
||
"awooop_conversation_event:alert-group",
|
||
],
|
||
total=duplicate_convergence_total,
|
||
recent=duplicate_convergence_recent,
|
||
required_for_noise_reduction=True,
|
||
feeds_controlled_queue=True,
|
||
next_action_if_missing="enable_converged_fingerprint_and_grouped_child_alert_receipts",
|
||
),
|
||
_alert_noise_stage(
|
||
stage_id="notification_suppression",
|
||
display_name="Telegram flood is suppressed into parent/digest receipts",
|
||
evidence_sources=[
|
||
"awooop_conversation_event:alert-group",
|
||
"telegram_gateway:grouped_alert_digest_dedup",
|
||
],
|
||
total=grouped_child_total + llm_inflight_suppressed_total,
|
||
recent=grouped_child_recent + llm_inflight_suppressed_recent,
|
||
required_for_noise_reduction=True,
|
||
feeds_controlled_queue=False,
|
||
next_action_if_missing="write_grouped_child_alert_event_or_inflight_suppression_receipt",
|
||
),
|
||
_alert_noise_stage(
|
||
stage_id="ai_controlled_routing",
|
||
display_name="Alerts route to AI controlled candidate/check/apply queue",
|
||
evidence_sources=[
|
||
"automation_operation_log:ansible_candidate_matched",
|
||
"automation_operation_log:ansible_check_mode_executed",
|
||
"automation_operation_log:ansible_apply_executed",
|
||
"alert_operation_log:AUTO_REPAIR_TRIGGERED",
|
||
],
|
||
total=controlled_route_total if decision_complete else 0,
|
||
recent=controlled_route_recent,
|
||
required_for_noise_reduction=True,
|
||
feeds_controlled_queue=True,
|
||
next_action_if_missing="route_repeated_non_critical_alerts_to_controlled_candidate_check_apply",
|
||
),
|
||
_alert_noise_stage(
|
||
stage_id="learning_feedback",
|
||
display_name="Suppressed alert patterns feed KM/RAG/PlayBook learning",
|
||
evidence_sources=[
|
||
"alert_noise_reduction",
|
||
"ai_agent_learning_loop_readback",
|
||
],
|
||
total=1 if learning_complete and duplicate_convergence_total > 0 else 0,
|
||
recent=1 if learning_complete and duplicate_convergence_recent > 0 else 0,
|
||
required_for_noise_reduction=True,
|
||
feeds_controlled_queue=True,
|
||
next_action_if_missing="keep_p1c_learning_loop_complete_before_closing_alert_noise_reduction",
|
||
),
|
||
_alert_noise_stage(
|
||
stage_id="break_glass_boundary",
|
||
display_name="Critical / guardrail cases remain isolated from default alert routing",
|
||
evidence_sources=[
|
||
"alert_operation_log:GUARDRAIL_BLOCKED",
|
||
"alert_operation_log:ESCALATED",
|
||
"current_policy:critical_break_glass_required",
|
||
],
|
||
total=guardrail_total,
|
||
recent=guardrail_recent,
|
||
required_for_noise_reduction=False,
|
||
feeds_controlled_queue=False,
|
||
next_action_if_missing="record_guardrail_or_break_glass_receipts_only_for_true_hard_blockers",
|
||
),
|
||
]
|
||
missing_required = [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["required_for_noise_reduction"] is True and stage["present"] is not True
|
||
]
|
||
present_required_count = sum(
|
||
1
|
||
for stage in stages
|
||
if stage["required_for_noise_reduction"] is True and stage["present"] is True
|
||
)
|
||
required_count = sum(1 for stage in stages if stage["required_for_noise_reduction"] is True)
|
||
return {
|
||
"schema_version": "ai_agent_alert_noise_reduction_readback_v1",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"stages": stages,
|
||
"missing_required_stage_ids": missing_required,
|
||
"routing_policy": {
|
||
"manual_default_route_allowed": False,
|
||
"low_medium_high_alerts_route_to_ai_controlled_queue": True,
|
||
"critical_break_glass_still_required": True,
|
||
"telegram_child_alert_flood_allowed": False,
|
||
},
|
||
"public_safety": {
|
||
"stores_raw_alert_payload": False,
|
||
"stores_secret_values": False,
|
||
"executes_on_read": False,
|
||
"reads_raw_sessions": False,
|
||
},
|
||
"rollups": {
|
||
"stage_count": len(stages),
|
||
"required_stage_count": required_count,
|
||
"required_stage_present_count": present_required_count,
|
||
"required_stage_missing_count": len(missing_required),
|
||
"alert_received_total": alert_received_total,
|
||
"alert_received_recent": alert_received_recent,
|
||
"converged_duplicate_total": converged_duplicate_total,
|
||
"llm_inflight_suppressed_total": llm_inflight_suppressed_total,
|
||
"grouped_child_alert_total": grouped_child_total,
|
||
"suppressed_alert_total": duplicate_convergence_total,
|
||
"suppressed_alert_recent": duplicate_convergence_recent,
|
||
"controlled_route_total": controlled_route_total,
|
||
"controlled_route_recent": controlled_route_recent,
|
||
"break_glass_or_guardrail_total": guardrail_total,
|
||
},
|
||
}
|
||
|
||
|
||
def _build_host_sustained_load_controlled_automation_readback() -> dict[str, Any]:
|
||
"""Expose the sustained CPU/load automation contract as a first-class lane."""
|
||
|
||
action_classes = [
|
||
{
|
||
"class_id": "orphan_browser_smoke_runaway_process",
|
||
"alertnames": [
|
||
"HostLoadAverageSustainedHigh",
|
||
"HostOrphanBrowserSmokeHighCpu",
|
||
],
|
||
"classifier": "host-sustained-load-controller.py:controlled_orphan_browser_remediation_ready",
|
||
"controlled_action": "host-runaway-process-remediation.py dry-run then gated SIGTERM",
|
||
"controlled_apply_allowed": True,
|
||
"post_apply_verifier": "host-sustained-load-controller.py --json",
|
||
"rollback": "no persistent host mutation; workload can be re-run",
|
||
"forbidden_actions": [
|
||
"SIGKILL",
|
||
"docker_restart",
|
||
"systemctl_restart",
|
||
"nginx_reload",
|
||
"firewall_change",
|
||
"reboot",
|
||
],
|
||
},
|
||
{
|
||
"class_id": "ci_runner_load_saturation",
|
||
"alertnames": [
|
||
"HostLoadAverageSustainedHigh",
|
||
"HostCiRunnerLoadSaturation",
|
||
],
|
||
"classifier": "host-sustained-load-controller.py:controlled_ci_runner_saturation_guarded",
|
||
"controlled_action": "keep runner pressure gate fail-closed; prepare stale-run drain/cancel packet only after queue verifier",
|
||
"controlled_apply_allowed": True,
|
||
"post_apply_verifier": "read-public-gitea-actions-queue.py + non110/110 runner readiness verifier",
|
||
"rollback": "do not restore legacy or generic runner labels; re-run CD after pressure clears",
|
||
"forbidden_actions": [
|
||
"legacy_runner_restore",
|
||
"generic_runner_label_restore",
|
||
"process_kill_for_legitimate_ci",
|
||
"warn_only_pressure_gate",
|
||
],
|
||
},
|
||
{
|
||
"class_id": "memory_or_swap_pressure",
|
||
"alertnames": ["HostLoadAverageSustainedHigh", "HostOutOfMemory"],
|
||
"classifier": "host-sustained-load-controller.py:blocked_memory_or_swap_pressure_requires_service_playbook",
|
||
"controlled_action": "route to service-specific memory/cgroup playbook with check-mode diff",
|
||
"controlled_apply_allowed": False,
|
||
"post_apply_verifier": "service-specific health and load readback",
|
||
"rollback": "service-specific resource rollback",
|
||
"forbidden_actions": [
|
||
"blind_limit_reduction",
|
||
"docker_restart_without_service_playbook",
|
||
"destructive_prune",
|
||
],
|
||
},
|
||
{
|
||
"class_id": "unknown_sustained_load",
|
||
"alertnames": ["HostLoadAverageSustainedHigh"],
|
||
"classifier": "host-sustained-load-controller.py:blocked_unknown_sustained_load_requires_source_specific_playbook",
|
||
"controlled_action": "run host-sustained-load-evidence.py then select or generate a source-specific PlayBook",
|
||
"controlled_apply_allowed": False,
|
||
"post_apply_verifier": "host-sustained-load-evidence.py readback plus source-specific verifier before closure",
|
||
"rollback": "source-specific rollback required before apply",
|
||
"forbidden_actions": [
|
||
"generic_kill",
|
||
"generic_docker_restart",
|
||
"generic_systemd_restart",
|
||
"secret_collection",
|
||
],
|
||
},
|
||
]
|
||
required_assets = [
|
||
{
|
||
"asset_id": "host_sustained_load_controller",
|
||
"path": "scripts/ops/host-sustained-load-controller.py",
|
||
"purpose": "classify sustained load and emit the controlled automation packet",
|
||
"ready": True,
|
||
},
|
||
{
|
||
"asset_id": "host_sustained_load_sanitized_evidence",
|
||
"path": "scripts/ops/host-sustained-load-evidence.py",
|
||
"purpose": "collect sanitized process-family and container evidence for source-specific PlayBooks",
|
||
"ready": True,
|
||
},
|
||
{
|
||
"asset_id": "host_runaway_process_exporter",
|
||
"path": "scripts/ops/host-runaway-process-exporter.py",
|
||
"purpose": "publish read-only load/root-cause metrics",
|
||
"ready": True,
|
||
},
|
||
{
|
||
"asset_id": "orphan_browser_remediation_helper",
|
||
"path": "scripts/ops/host-runaway-process-remediation.py",
|
||
"purpose": "dry-run and controlled SIGTERM for allowlisted orphan browser process groups",
|
||
"ready": True,
|
||
},
|
||
{
|
||
"asset_id": "prometheus_alert_route",
|
||
"path": "ops/monitoring/alerts-unified.yml:HostLoadAverageSustainedHigh",
|
||
"purpose": "route sustained load alerts to the controller instead of generic SSH top",
|
||
"ready": True,
|
||
},
|
||
{
|
||
"asset_id": "ai_agent_work_item_readback",
|
||
"path": "/api/v1/agents/agent-autonomous-runtime-control",
|
||
"purpose": "make this lane visible in work_item_progress and rollups",
|
||
"ready": True,
|
||
},
|
||
]
|
||
return {
|
||
"schema_version": "host_sustained_load_controlled_automation_readback_v1",
|
||
"status": "completed",
|
||
"current_work_item_id": "P1-D2-host-sustained-load-controlled-automation",
|
||
"problem_statement": (
|
||
"HostLoadAverageSustainedHigh must not stop at alerting; it must "
|
||
"classify root cause, produce a controlled action packet, run a "
|
||
"post-apply verifier, and write back learning evidence."
|
||
),
|
||
"action_classes": action_classes,
|
||
"required_assets": required_assets,
|
||
"control_flow": [
|
||
"alert_received",
|
||
"read_textfile_metrics",
|
||
"classify_root_cause",
|
||
"emit_controlled_packet",
|
||
"dry_run_or_check_mode",
|
||
"controlled_apply_when_allowlisted",
|
||
"post_apply_verifier",
|
||
"km_playbook_telegram_receipt_writeback",
|
||
],
|
||
"operation_boundaries": {
|
||
"executes_on_read": False,
|
||
"secret_value_read": False,
|
||
"raw_session_read": False,
|
||
"raw_runner_registration_read": False,
|
||
"critical_break_glass_still_required": True,
|
||
"legacy_runner_restore_allowed": False,
|
||
"generic_runner_label_restore_allowed": False,
|
||
},
|
||
"rollups": {
|
||
"action_class_count": len(action_classes),
|
||
"controlled_apply_class_count": sum(
|
||
1 for item in action_classes if item["controlled_apply_allowed"] is True
|
||
),
|
||
"required_asset_count": len(required_assets),
|
||
"ready_asset_count": sum(1 for item in required_assets if item["ready"] is True),
|
||
"forbidden_action_count": sum(
|
||
len(item["forbidden_actions"]) for item in action_classes
|
||
),
|
||
},
|
||
}
|
||
|
||
|
||
def _build_ui_productization_readback() -> dict[str, Any]:
|
||
"""Expose the concrete AwoooP product UI surfaces used to track this work."""
|
||
|
||
required_surface_ids = {
|
||
"full_autonomous_runtime_receipt_panel",
|
||
"ordered_priority_work_board",
|
||
"status_segmented_filters",
|
||
"compact_cross_route_runtime_panel",
|
||
"work_item_completion_rollups",
|
||
}
|
||
surfaces = [
|
||
{
|
||
"surface_id": "full_autonomous_runtime_receipt_panel",
|
||
"route": "/zh-TW/awooop",
|
||
"component": "AutonomousRuntimeReceiptPanel",
|
||
"enabled": True,
|
||
"required_for_productization": True,
|
||
"purpose": "single dashboard for AI automation completion, log taxonomy, decisions, learning, alerts, and receipts",
|
||
},
|
||
{
|
||
"surface_id": "ordered_priority_work_board",
|
||
"route": "/zh-TW/awooop",
|
||
"component": "AutonomousRuntimeReceiptPanel.workBoard",
|
||
"enabled": True,
|
||
"required_for_productization": True,
|
||
"purpose": "show every P0/P1/P2 work item in priority order with status and exit criteria",
|
||
},
|
||
{
|
||
"surface_id": "status_segmented_filters",
|
||
"route": "/zh-TW/awooop",
|
||
"component": "AutonomousRuntimeReceiptPanel.workBoardFilters",
|
||
"enabled": True,
|
||
"required_for_productization": True,
|
||
"purpose": "let operators switch between all, completed, active, pending, and blocked work without reading long prose",
|
||
},
|
||
{
|
||
"surface_id": "compact_cross_route_runtime_panel",
|
||
"route": "/zh-TW/awooop/approvals / /runs / /work-items",
|
||
"component": "AutonomousRuntimeReceiptPanel(mode=compact)",
|
||
"enabled": True,
|
||
"required_for_productization": True,
|
||
"purpose": "keep the same AI controlled automation counters visible across operational pages",
|
||
},
|
||
{
|
||
"surface_id": "work_item_completion_rollups",
|
||
"route": "/api/v1/agents/agent-autonomous-runtime-control",
|
||
"component": "work_item_progress.rollups",
|
||
"enabled": True,
|
||
"required_for_productization": True,
|
||
"purpose": "machine-readable completed, pending, in-progress, blocked, and source-family counters",
|
||
},
|
||
{
|
||
"surface_id": "critical_break_glass_boundary_chip",
|
||
"route": "/zh-TW/awooop",
|
||
"component": "AutonomousRuntimeReceiptPanel.policyRail",
|
||
"enabled": True,
|
||
"required_for_productization": False,
|
||
"purpose": "keep critical break-glass visible without making manual handling the default outcome",
|
||
},
|
||
]
|
||
present_required = [
|
||
surface["surface_id"]
|
||
for surface in surfaces
|
||
if surface["enabled"] and surface["surface_id"] in required_surface_ids
|
||
]
|
||
missing_required = sorted(required_surface_ids - set(present_required))
|
||
return {
|
||
"schema_version": "ai_agent_ui_productization_readback_v1",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"surfaces": surfaces,
|
||
"missing_required_surface_ids": missing_required,
|
||
"public_safety": {
|
||
"uses_secret_values": False,
|
||
"reads_raw_sessions": False,
|
||
"uses_github_surface": False,
|
||
"manual_default_outcome_allowed": False,
|
||
},
|
||
"rollups": {
|
||
"surface_count": len(surfaces),
|
||
"required_surface_count": len(required_surface_ids),
|
||
"required_surface_present_count": len(present_required),
|
||
"required_surface_missing_count": len(missing_required),
|
||
"route_count": 4,
|
||
"segmented_filter_count": 5,
|
||
},
|
||
}
|
||
|
||
|
||
def _build_multi_product_taxonomy_contract(
|
||
log_integration_taxonomy: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Publish the shared taxonomy contract for AWOOOI-managed products."""
|
||
|
||
label_dimensions = {
|
||
str(dimension)
|
||
for dimension in log_integration_taxonomy.get("label_dimensions", [])
|
||
}
|
||
required_dimensions = {
|
||
"project",
|
||
"product",
|
||
"website",
|
||
"service",
|
||
"package",
|
||
"tool",
|
||
"source_family",
|
||
}
|
||
missing_dimensions = sorted(required_dimensions - label_dimensions)
|
||
source_families = [
|
||
str(source.get("source_family_id"))
|
||
for source in log_integration_taxonomy.get("source_families", [])
|
||
if isinstance(source, Mapping) and source.get("source_family_id")
|
||
]
|
||
product_scopes = [
|
||
{
|
||
"product_id": "awoooi",
|
||
"display_name": "AWOOOI / AwoooP",
|
||
"scope_kind": "core_aiops_platform",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": True,
|
||
"labels": {
|
||
"project": "awoooi",
|
||
"product": "awoooi",
|
||
"website": "awoooi.wooo.work",
|
||
"service": "api/web/k8s",
|
||
"package": "apps/api apps/web ops",
|
||
"tool": "awooop",
|
||
},
|
||
},
|
||
{
|
||
"product_id": "stockplatform",
|
||
"display_name": "StockPlatform",
|
||
"scope_kind": "financial_research_product",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": False,
|
||
"labels": {
|
||
"project": "stockplatform",
|
||
"product": "stockplatform",
|
||
"website": "stock.wooo.work",
|
||
"service": "market_data_research",
|
||
"package": "stockplatform-v2",
|
||
"tool": "ai_research_agent",
|
||
},
|
||
},
|
||
{
|
||
"product_id": "vibework",
|
||
"display_name": "VibeWork",
|
||
"scope_kind": "talent_marketplace_product",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": False,
|
||
"labels": {
|
||
"project": "vibework",
|
||
"product": "vibework",
|
||
"website": "vibework",
|
||
"service": "matching_admin_payments",
|
||
"package": "VibeWork",
|
||
"tool": "scout_and_stripe",
|
||
},
|
||
},
|
||
{
|
||
"product_id": "momo",
|
||
"display_name": "MOMO / EwoooC",
|
||
"scope_kind": "commerce_operations_product",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": False,
|
||
"labels": {
|
||
"project": "momo-pro-system",
|
||
"product": "momo",
|
||
"website": "momo",
|
||
"service": "price_sales_competitor_intake",
|
||
"package": "momo-pro-system",
|
||
"tool": "source_arrival_gate",
|
||
},
|
||
},
|
||
{
|
||
"product_id": "awooogo",
|
||
"display_name": "AwoooGo / 2026FIFA",
|
||
"scope_kind": "consumer_merchant_overlay_product",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": False,
|
||
"labels": {
|
||
"project": "awoogo",
|
||
"product": "awoogo",
|
||
"website": "worldcup_overlay",
|
||
"service": "orders_groups_merchant",
|
||
"package": "AwoooGo",
|
||
"tool": "merchant_workbench",
|
||
},
|
||
},
|
||
{
|
||
"product_id": "tsenyang",
|
||
"display_name": "Tsenyang Website",
|
||
"scope_kind": "public_site_and_lead_automation",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": False,
|
||
"labels": {
|
||
"project": "tsenyang-website",
|
||
"product": "tsenyang",
|
||
"website": "tsenyang",
|
||
"service": "public_site_admin_insights",
|
||
"package": "tsenyang-website",
|
||
"tool": "support_chat_insights",
|
||
},
|
||
},
|
||
{
|
||
"product_id": "agent_bounty_protocol",
|
||
"display_name": "Agent Bounty Protocol",
|
||
"scope_kind": "external_agent_intake_product",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": False,
|
||
"labels": {
|
||
"project": "agent-bounty-protocol",
|
||
"product": "agent_bounty",
|
||
"website": "agent_bounty",
|
||
"service": "paid_intake_runtime",
|
||
"package": "agent-bounty-protocol",
|
||
"tool": "paid_intake_agent",
|
||
},
|
||
},
|
||
{
|
||
"product_id": "public_websites",
|
||
"display_name": "Public Websites / Bitan",
|
||
"scope_kind": "public_route_monitoring",
|
||
"contract_status": "contract_ready",
|
||
"runtime_adapter_active": False,
|
||
"labels": {
|
||
"project": "public-websites",
|
||
"product": "public_websites",
|
||
"website": "bitan_and_public_routes",
|
||
"service": "tls_blackbox_public_route",
|
||
"package": "public-site-monitors",
|
||
"tool": "blackbox_exporter",
|
||
},
|
||
},
|
||
]
|
||
missing_product_scope_ids = [
|
||
str(scope["product_id"])
|
||
for scope in product_scopes
|
||
if scope["contract_status"] != "contract_ready" or missing_dimensions
|
||
]
|
||
return {
|
||
"schema_version": "ai_agent_multi_product_taxonomy_contract_v1",
|
||
"status": "completed" if not missing_product_scope_ids else "in_progress",
|
||
"shared_contract": {
|
||
"required_label_dimensions": sorted(required_dimensions),
|
||
"source_family_contract_ids": source_families,
|
||
"normalization_flow": log_integration_taxonomy.get("normalized_event_flow") or [],
|
||
},
|
||
"product_scopes": product_scopes,
|
||
"missing_required_dimension_ids": missing_dimensions,
|
||
"missing_product_scope_ids": missing_product_scope_ids,
|
||
"public_safety": {
|
||
"raw_secret_collection_allowed": False,
|
||
"raw_session_collection_allowed": False,
|
||
"external_product_runtime_write_enabled_on_read": False,
|
||
"github_surface_required": False,
|
||
},
|
||
"rollups": {
|
||
"product_scope_count": len(product_scopes),
|
||
"contract_ready_product_scope_count": sum(
|
||
1 for scope in product_scopes if scope["contract_status"] == "contract_ready"
|
||
),
|
||
"runtime_adapter_active_count": sum(
|
||
1 for scope in product_scopes if scope["runtime_adapter_active"] is True
|
||
),
|
||
"required_dimension_count": len(required_dimensions),
|
||
"missing_required_dimension_count": len(missing_dimensions),
|
||
"source_family_contract_count": len(source_families),
|
||
},
|
||
}
|
||
|
||
|
||
def _build_work_item_progress(
|
||
*,
|
||
trace_ledger: Mapping[str, Any],
|
||
log_integration_taxonomy: Mapping[str, Any],
|
||
log_controlled_writeback_executor: Mapping[str, Any],
|
||
log_controlled_writeback_consumer: Mapping[str, Any],
|
||
agent_decision_wiring: Mapping[str, Any],
|
||
learning_loop: Mapping[str, Any],
|
||
alert_noise_reduction: Mapping[str, Any],
|
||
host_sustained_load_automation: Mapping[str, Any],
|
||
ui_productization: Mapping[str, Any],
|
||
multi_product_taxonomy: Mapping[str, Any],
|
||
db_read_status: str,
|
||
) -> dict[str, Any]:
|
||
"""Build ordered work items that the UI and agent can keep advancing."""
|
||
|
||
taxonomy_rollups = log_integration_taxonomy.get("rollups")
|
||
if not isinstance(taxonomy_rollups, Mapping):
|
||
taxonomy_rollups = {}
|
||
source_families = log_integration_taxonomy.get("source_families")
|
||
if not isinstance(source_families, list):
|
||
source_families = []
|
||
inactive_source_count = _int_value(taxonomy_rollups.get("inactive_source_family_count"))
|
||
missing_required = trace_ledger.get("missing_required_stage_ids")
|
||
if not isinstance(missing_required, list):
|
||
missing_required = []
|
||
decision_rollups = agent_decision_wiring.get("rollups")
|
||
if not isinstance(decision_rollups, Mapping):
|
||
decision_rollups = {}
|
||
decision_wiring_missing = _int_value(decision_rollups.get("required_stage_missing_count"))
|
||
p1a_completed = inactive_source_count == 0
|
||
p1b_completed = (
|
||
agent_decision_wiring.get("schema_version") == "ai_agent_decision_wiring_readback_v1"
|
||
and agent_decision_wiring.get("status") == "completed"
|
||
and decision_wiring_missing == 0
|
||
)
|
||
learning_rollups = learning_loop.get("rollups")
|
||
if not isinstance(learning_rollups, Mapping):
|
||
learning_rollups = {}
|
||
learning_loop_missing = _int_value(learning_rollups.get("required_stage_missing_count"))
|
||
p1c_completed = (
|
||
learning_loop.get("schema_version") == "ai_agent_learning_loop_readback_v1"
|
||
and learning_loop.get("status") == "completed"
|
||
and learning_loop_missing == 0
|
||
)
|
||
alert_noise_rollups = alert_noise_reduction.get("rollups")
|
||
if not isinstance(alert_noise_rollups, Mapping):
|
||
alert_noise_rollups = {}
|
||
alert_noise_missing = _int_value(alert_noise_rollups.get("required_stage_missing_count"))
|
||
p1d_completed = (
|
||
alert_noise_reduction.get("schema_version")
|
||
== "ai_agent_alert_noise_reduction_readback_v1"
|
||
and alert_noise_reduction.get("status") == "completed"
|
||
and alert_noise_missing == 0
|
||
)
|
||
host_load_rollups = host_sustained_load_automation.get("rollups")
|
||
if not isinstance(host_load_rollups, Mapping):
|
||
host_load_rollups = {}
|
||
host_load_ready = (
|
||
host_sustained_load_automation.get("schema_version")
|
||
== "host_sustained_load_controlled_automation_readback_v1"
|
||
and host_sustained_load_automation.get("status") == "completed"
|
||
and _int_value(host_load_rollups.get("required_asset_count"))
|
||
== _int_value(host_load_rollups.get("ready_asset_count"))
|
||
and _int_value(host_load_rollups.get("controlled_apply_class_count")) >= 1
|
||
)
|
||
log_executor_rollups = log_controlled_writeback_executor.get("rollups")
|
||
if not isinstance(log_executor_rollups, Mapping):
|
||
log_executor_rollups = {}
|
||
log_executor_blockers = log_controlled_writeback_executor.get("active_blockers")
|
||
if not isinstance(log_executor_blockers, list):
|
||
log_executor_blockers = []
|
||
log_executor_ready = (
|
||
log_controlled_writeback_executor.get("schema_version")
|
||
== "ai_agent_log_controlled_writeback_executor_readback_v1"
|
||
and log_controlled_writeback_executor.get("status")
|
||
== "controlled_writeback_executor_ready"
|
||
and log_executor_rollups.get("controlled_executor_dispatch_ready") is True
|
||
and not log_executor_blockers
|
||
)
|
||
log_consumer_rollups = log_controlled_writeback_consumer.get("rollups")
|
||
if not isinstance(log_consumer_rollups, Mapping):
|
||
log_consumer_rollups = {}
|
||
log_consumer_blockers = log_controlled_writeback_consumer.get("active_blockers")
|
||
if not isinstance(log_consumer_blockers, list):
|
||
log_consumer_blockers = []
|
||
log_consumer_ready = (
|
||
log_controlled_writeback_consumer.get("schema_version")
|
||
== "ai_agent_log_controlled_writeback_consumer_readback_v1"
|
||
and log_controlled_writeback_consumer.get("status")
|
||
== "controlled_writeback_consumer_readback_ready"
|
||
and log_consumer_rollups.get("controlled_consumer_readback_ready") is True
|
||
and not log_consumer_blockers
|
||
)
|
||
ui_rollups = ui_productization.get("rollups")
|
||
if not isinstance(ui_rollups, Mapping):
|
||
ui_rollups = {}
|
||
ui_surface_missing = _int_value(ui_rollups.get("required_surface_missing_count"))
|
||
p2a_completed = (
|
||
ui_productization.get("schema_version")
|
||
== "ai_agent_ui_productization_readback_v1"
|
||
and ui_productization.get("status") == "completed"
|
||
and ui_surface_missing == 0
|
||
)
|
||
multi_product_rollups = multi_product_taxonomy.get("rollups")
|
||
if not isinstance(multi_product_rollups, Mapping):
|
||
multi_product_rollups = {}
|
||
multi_product_missing = len(
|
||
multi_product_taxonomy.get("missing_product_scope_ids")
|
||
if isinstance(multi_product_taxonomy.get("missing_product_scope_ids"), list)
|
||
else []
|
||
) + _int_value(multi_product_rollups.get("missing_required_dimension_count"))
|
||
p2b_completed = (
|
||
multi_product_taxonomy.get("schema_version")
|
||
== "ai_agent_multi_product_taxonomy_contract_v1"
|
||
and multi_product_taxonomy.get("status") == "completed"
|
||
and multi_product_missing == 0
|
||
)
|
||
deployed_readback_complete = (
|
||
db_read_status == "ok"
|
||
and trace_ledger.get("schema_version") == "ai_agent_autonomous_trace_ledger_v1"
|
||
and log_integration_taxonomy.get("schema_version") == "ai_agent_log_integration_taxonomy_v1"
|
||
)
|
||
|
||
ordered_items = [
|
||
{
|
||
"work_item_id": "P0-A-runtime-truth",
|
||
"priority": "P0-A",
|
||
"title": "Controlled apply runtime truth readback",
|
||
"status": "completed",
|
||
"exit_criteria": "production API reports db_read_status=ok and live executor receipts",
|
||
},
|
||
{
|
||
"work_item_id": "P0-B-trace-ledger",
|
||
"priority": "P0-B",
|
||
"title": "Trace ledger for MCP/log/executor/verifier/KM/PlayBook/Telegram",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"exit_criteria": "trace_ledger exposes required closed-loop stages and missing_required_stage_ids",
|
||
},
|
||
{
|
||
"work_item_id": "P0-C-log-taxonomy",
|
||
"priority": "P0-C",
|
||
"title": "Project/product/site/service/package/tool log taxonomy",
|
||
"status": "completed",
|
||
"exit_criteria": "log_integration_taxonomy lists source families, labels, and public-safety policy",
|
||
},
|
||
{
|
||
"work_item_id": "P0-D-ui-visibility",
|
||
"priority": "P0-D",
|
||
"title": "AwoooP UI shows automation loop and log integration progress",
|
||
"status": "completed",
|
||
"exit_criteria": "AwoooP, Approvals, Runs, and Work Items show trace/log taxonomy panel",
|
||
},
|
||
{
|
||
"work_item_id": "P0-E-verification-deploy",
|
||
"priority": "P0-E",
|
||
"title": "Focused verification and production deploy marker readback",
|
||
"status": "completed" if deployed_readback_complete else "in_progress",
|
||
"exit_criteria": "deploy marker includes this code and production API exposes trace_ledger/log_integration_taxonomy",
|
||
"blocker": None if deployed_readback_complete else "waiting_for_successful_gitea_cd_deploy_marker",
|
||
},
|
||
{
|
||
"work_item_id": "P1-A-ingestion-coverage",
|
||
"priority": "P1-A",
|
||
"title": "Collector and sanitizer coverage for all source families",
|
||
"status": "completed" if p1a_completed else "in_progress",
|
||
"exit_criteria": "all source families have active sanitized classified events",
|
||
"remaining_source_family_count": inactive_source_count,
|
||
},
|
||
{
|
||
"work_item_id": "P1-B-agent-decision-wiring",
|
||
"priority": "P1-B",
|
||
"title": "RAG retrieval to PlayBook select/repair/check-mode/apply/verifier",
|
||
"status": "completed" if p1b_completed else "in_progress" if p1a_completed else "pending",
|
||
"exit_criteria": "AI Agent consumes labeled evidence and emits target selector, dry-run, apply, verifier, rollback",
|
||
"remaining_decision_wiring_stage_count": decision_wiring_missing,
|
||
},
|
||
{
|
||
"work_item_id": "P1-C-learning-loop",
|
||
"priority": "P1-C",
|
||
"title": "KM / PlayBook trust learning loop",
|
||
"status": "completed" if p1c_completed else "in_progress" if p1b_completed else "pending",
|
||
"exit_criteria": "verified execution updates KM entries, trust delta, similar-case clusters, and repair candidates",
|
||
"remaining_learning_loop_stage_count": learning_loop_missing,
|
||
},
|
||
{
|
||
"work_item_id": "P1-D-alert-noise-reduction",
|
||
"priority": "P1-D",
|
||
"title": "Alert grouping and AI controlled workflow routing",
|
||
"status": "completed" if p1d_completed else "in_progress" if p1c_completed else "pending",
|
||
"exit_criteria": "repeated alerts are clustered, deduped, routed to controlled automation, and no longer default to manual handling",
|
||
"remaining_alert_noise_stage_count": alert_noise_missing,
|
||
},
|
||
{
|
||
"work_item_id": "P1-D2-host-sustained-load-controlled-automation",
|
||
"priority": "P1-D2",
|
||
"title": "CPU sustained-load alerts classify and run AI controlled remediation",
|
||
"status": "completed" if host_load_ready else "in_progress" if p1d_completed else "pending",
|
||
"exit_criteria": "HostLoadAverageSustainedHigh routes to classifier, dry-run/check-mode, controlled apply packet, verifier, and KM/PlayBook writeback",
|
||
"controlled_action_class_count": _int_value(
|
||
host_load_rollups.get("controlled_apply_class_count")
|
||
),
|
||
"ready_asset_count": _int_value(host_load_rollups.get("ready_asset_count")),
|
||
"required_asset_count": _int_value(host_load_rollups.get("required_asset_count")),
|
||
},
|
||
{
|
||
"work_item_id": "P1-E-log-controlled-writeback-executor",
|
||
"priority": "P1-E",
|
||
"title": "LOG feedback executor queue for KM / RAG / MCP / PlayBook",
|
||
"status": "completed" if log_executor_ready else "in_progress" if host_load_ready else "pending",
|
||
"exit_criteria": "executor readback exposes ready batches, target selectors, source diffs, rollback, verifier, and next-action queue",
|
||
"remaining_executor_batch_count": max(
|
||
0,
|
||
_int_value(log_executor_rollups.get("execution_batch_count"))
|
||
- _int_value(log_executor_rollups.get("ready_execution_batch_count")),
|
||
),
|
||
"active_blocker_count": len(log_executor_blockers),
|
||
},
|
||
{
|
||
"work_item_id": "P1-F-log-controlled-writeback-consumer",
|
||
"priority": "P1-F",
|
||
"title": "LOG metadata receipts consumable by KM / RAG / MCP / PlayBook / AI Agent",
|
||
"status": "completed" if log_consumer_ready else "in_progress" if log_executor_ready else "pending",
|
||
"exit_criteria": "runtime-control exposes ready consumer bindings for all LOG metadata writeback targets",
|
||
"remaining_consumer_binding_count": max(
|
||
0,
|
||
_int_value(log_consumer_rollups.get("target_count"))
|
||
- _int_value(log_consumer_rollups.get("ready_target_count")),
|
||
),
|
||
"active_blocker_count": len(log_consumer_blockers),
|
||
},
|
||
{
|
||
"work_item_id": "P2-A-ui-ux-productization",
|
||
"priority": "P2-A",
|
||
"title": "Professional product UI replacing text-heavy surfaces",
|
||
"status": "completed" if p2a_completed else "in_progress" if log_consumer_ready else "pending",
|
||
"exit_criteria": "AI automation status is shown as dense dashboard controls, filters, counters, and action rails",
|
||
"remaining_ui_surface_count": ui_surface_missing,
|
||
},
|
||
{
|
||
"work_item_id": "P2-B-multi-product-expansion",
|
||
"priority": "P2-B",
|
||
"title": "Reuse taxonomy across AWOOOI products/projects",
|
||
"status": "completed" if p2b_completed else "in_progress" if p2a_completed else "pending",
|
||
"exit_criteria": "StockPlatform, VibeWork, MOMO, AwoooGo, and other products report the same log taxonomy contract",
|
||
"remaining_product_scope_count": multi_product_missing,
|
||
},
|
||
]
|
||
source_family_items = []
|
||
for source in source_families:
|
||
if not isinstance(source, Mapping):
|
||
continue
|
||
total = _int_value(source.get("total"))
|
||
source_family_items.append({
|
||
"work_item_id": f"P1-A-source-{source.get('source_family_id')}",
|
||
"priority": "P1-A",
|
||
"source_family_id": source.get("source_family_id"),
|
||
"title": f"Ingest and label {source.get('source_family_id')}",
|
||
"status": "completed" if total > 0 else "not_started",
|
||
"label_dimensions": source.get("label_dimensions") or [],
|
||
"next_controlled_action": (
|
||
"keep_learning_and_quality_checks"
|
||
if total > 0
|
||
else source.get("next_action_if_empty")
|
||
),
|
||
})
|
||
|
||
all_items = [*ordered_items, *source_family_items]
|
||
by_status: dict[str, int] = {}
|
||
for item in all_items:
|
||
status = str(item.get("status") or "unknown")
|
||
by_status[status] = by_status.get(status, 0) + 1
|
||
return {
|
||
"schema_version": "ai_agent_automation_work_item_progress_v1",
|
||
"ordered_items": ordered_items,
|
||
"source_family_items": source_family_items,
|
||
"rollups": {
|
||
"work_item_count": len(all_items),
|
||
"ordered_work_item_count": len(ordered_items),
|
||
"source_family_work_item_count": len(source_family_items),
|
||
"completed_count": by_status.get("completed", 0),
|
||
"in_progress_count": by_status.get("in_progress", 0),
|
||
"pending_count": by_status.get("pending", 0),
|
||
"blocked_count": by_status.get("blocked", 0),
|
||
"not_started_count": by_status.get("not_started", 0),
|
||
"by_status": by_status,
|
||
},
|
||
}
|
||
|
||
|
||
def _first_operation(
|
||
rows: Iterable[Mapping[str, Any]],
|
||
operation_type: str,
|
||
) -> dict[str, Any] | None:
|
||
for row in rows:
|
||
if str(row.get("operation_type") or "") == operation_type:
|
||
return dict(row)
|
||
return None
|
||
|
||
|
||
def _operation_by_id(
|
||
rows: Iterable[Mapping[str, Any]],
|
||
op_id: Any,
|
||
) -> dict[str, Any] | None:
|
||
needle = str(op_id or "")
|
||
if not needle:
|
||
return None
|
||
for row in rows:
|
||
if str(row.get("op_id") or "") == needle:
|
||
return dict(row)
|
||
return None
|
||
|
||
|
||
def _stage_status(row: Mapping[str, Any] | None, *, fallback_status: str | None = None) -> str:
|
||
if row is None:
|
||
return fallback_status or "missing"
|
||
return str(row.get("status") or row.get("result_status") or fallback_status or "present")
|
||
|
||
|
||
def _loop_stage(
|
||
*,
|
||
stage_id: str,
|
||
receipt_source: str,
|
||
present: bool,
|
||
status: str,
|
||
ref_id: str | None,
|
||
writes_runtime_state: bool,
|
||
next_action_if_missing: str,
|
||
) -> dict[str, Any]:
|
||
return {
|
||
"stage_id": stage_id,
|
||
"receipt_source": receipt_source,
|
||
"present": present,
|
||
"status": status,
|
||
"ref_id": ref_id,
|
||
"writes_runtime_state": writes_runtime_state,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _autonomous_execution_loop_ledger(
|
||
*,
|
||
project_id: str,
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
km_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
latest_failure_classification: Mapping[str, Any],
|
||
controlled_retry_package: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Build the operation-id ledger that proves whether the runtime loop closed."""
|
||
|
||
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
|
||
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
|
||
km_rows = [_row_mapping(row) for row in km_latest_rows]
|
||
telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
|
||
auto_repair_rows = [_row_mapping(row) for row in auto_repair_latest_rows]
|
||
|
||
latest_apply = _first_operation(operation_rows, "ansible_apply_executed")
|
||
latest_check = None
|
||
latest_candidate = None
|
||
if latest_apply is not None:
|
||
latest_check = _operation_by_id(
|
||
operation_rows,
|
||
latest_apply.get("check_mode_op_id") or latest_apply.get("parent_op_id"),
|
||
)
|
||
else:
|
||
latest_check = _first_operation(operation_rows, "ansible_check_mode_executed")
|
||
|
||
source_candidate_op_id = None
|
||
if latest_check is not None:
|
||
source_candidate_op_id = latest_check.get("parent_op_id") or latest_check.get("source_candidate_op_id")
|
||
if latest_apply is not None and not source_candidate_op_id:
|
||
source_candidate_op_id = latest_apply.get("source_candidate_op_id")
|
||
latest_candidate = _operation_by_id(operation_rows, source_candidate_op_id)
|
||
if latest_candidate is None and latest_apply is None and latest_check is None:
|
||
latest_candidate = _first_operation(operation_rows, "ansible_candidate_matched")
|
||
|
||
anchor = latest_apply or latest_check or latest_candidate or {}
|
||
apply_op_id = str((latest_apply or {}).get("op_id") or "")
|
||
check_mode_op_id = str(
|
||
(latest_check or {}).get("op_id")
|
||
or (latest_apply or {}).get("check_mode_op_id")
|
||
or (latest_apply or {}).get("parent_op_id")
|
||
or ""
|
||
)
|
||
candidate_op_id = str(
|
||
(latest_candidate or {}).get("op_id")
|
||
or source_candidate_op_id
|
||
or ""
|
||
)
|
||
incident_id = str(anchor.get("incident_id") or "")
|
||
catalog_id = str(anchor.get("catalog_id") or "")
|
||
playbook_path = str(anchor.get("playbook_path") or "")
|
||
|
||
verifier = next(
|
||
(
|
||
row
|
||
for row in verifier_rows
|
||
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
|
||
),
|
||
None,
|
||
)
|
||
km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
|
||
km = next(
|
||
(
|
||
row
|
||
for row in km_rows
|
||
if (
|
||
km_path_type
|
||
and str(row.get("path_type") or "") == km_path_type
|
||
)
|
||
or (
|
||
incident_id
|
||
and str(row.get("related_incident_id") or "") == incident_id
|
||
)
|
||
),
|
||
None,
|
||
)
|
||
telegram = next(
|
||
(
|
||
row
|
||
for row in telegram_rows
|
||
if str(row.get("send_status") or "") == "sent"
|
||
and str(row.get("action") or "") == "controlled_apply_result"
|
||
and (
|
||
not incident_id
|
||
or str(row.get("incident_id") or "") == incident_id
|
||
)
|
||
),
|
||
None,
|
||
)
|
||
auto_repair = next(
|
||
(
|
||
row
|
||
for row in auto_repair_rows
|
||
if apply_op_id
|
||
and apply_op_id
|
||
in str(row.get("executed_steps_text") or row.get("executed_steps") or "")
|
||
),
|
||
None,
|
||
)
|
||
|
||
candidate_present = bool(latest_candidate or candidate_op_id)
|
||
check_present = bool(latest_check or check_mode_op_id)
|
||
apply_present = latest_apply is not None
|
||
auto_repair_present = auto_repair is not None
|
||
verifier_present = verifier is not None
|
||
km_present = km is not None
|
||
telegram_present = telegram is not None
|
||
|
||
stages = [
|
||
_loop_stage(
|
||
stage_id="candidate",
|
||
receipt_source="automation_operation_log:ansible_candidate_matched",
|
||
present=candidate_present,
|
||
status=_stage_status(latest_candidate, fallback_status="inferred_from_check_mode")
|
||
if candidate_present
|
||
else "missing",
|
||
ref_id=candidate_op_id or None,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
|
||
),
|
||
_loop_stage(
|
||
stage_id="check_mode",
|
||
receipt_source="automation_operation_log:ansible_check_mode_executed",
|
||
present=check_present,
|
||
status=_stage_status(latest_check, fallback_status="inferred_from_apply_parent")
|
||
if check_present
|
||
else "missing",
|
||
ref_id=check_mode_op_id or None,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
|
||
),
|
||
_loop_stage(
|
||
stage_id="controlled_apply",
|
||
receipt_source="automation_operation_log:ansible_apply_executed",
|
||
present=apply_present,
|
||
status=_stage_status(latest_apply),
|
||
ref_id=apply_op_id or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
|
||
),
|
||
_loop_stage(
|
||
stage_id="auto_repair_execution_receipt",
|
||
receipt_source="auto_repair_executions:ansible_controlled_apply",
|
||
present=auto_repair_present,
|
||
status=str((auto_repair or {}).get("result_status") or "missing"),
|
||
ref_id=str((auto_repair or {}).get("id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="receipt_backfill_records_auto_repair_execution",
|
||
),
|
||
_loop_stage(
|
||
stage_id="post_apply_verifier",
|
||
receipt_source="incident_evidence.post_execution_state",
|
||
present=verifier_present,
|
||
status=str((verifier or {}).get("verification_result") or "missing"),
|
||
ref_id=str((verifier or {}).get("id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
|
||
),
|
||
_loop_stage(
|
||
stage_id="km_playbook_writeback",
|
||
receipt_source="knowledge_entries:ansible_apply_receipt",
|
||
present=km_present,
|
||
status=str((km or {}).get("status") or "missing"),
|
||
ref_id=str((km or {}).get("id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
|
||
),
|
||
_loop_stage(
|
||
stage_id="telegram_receipt",
|
||
receipt_source="awooop_outbound_message:controlled_apply_result",
|
||
present=telegram_present,
|
||
status=str((telegram or {}).get("send_status") or "missing"),
|
||
ref_id=str((telegram or {}).get("message_id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
|
||
),
|
||
]
|
||
missing_stage_ids = [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["present"] is not True
|
||
]
|
||
closed = bool(
|
||
apply_op_id
|
||
and auto_repair_present
|
||
and latest_flow_closure.get("closed") is True
|
||
)
|
||
|
||
classification = str(latest_failure_classification.get("classification") or "")
|
||
if not candidate_present and not check_present and not apply_present:
|
||
execution_state = "waiting_for_candidate"
|
||
next_executor_action = "candidate_backfill_worker_waits_for_matching_incident"
|
||
elif not apply_present:
|
||
execution_state = "executor_in_progress_or_waiting"
|
||
next_executor_action = "continue_candidate_to_check_mode_to_apply"
|
||
elif closed and classification == "latest_controlled_apply_closed_success":
|
||
execution_state = "closed_success"
|
||
next_executor_action = "keep_receipt_chain_closed"
|
||
elif closed:
|
||
execution_state = "closed_failed_apply_repair_ready"
|
||
next_executor_action = str(
|
||
controlled_retry_package.get("next_ai_action")
|
||
or "run_no_write_check_mode_replay"
|
||
)
|
||
elif "telegram_receipt" in missing_stage_ids:
|
||
execution_state = "open_waiting_for_live_gateway_receipt"
|
||
next_executor_action = "do_not_fake_send_backfill_wait_for_live_apply_gateway"
|
||
else:
|
||
execution_state = "open_missing_internal_receipts"
|
||
next_executor_action = "backfill_missing_auto_repair_verifier_km_receipts"
|
||
|
||
return {
|
||
"schema_version": "ai_agent_autonomous_execution_loop_ledger_v1",
|
||
"project_id": project_id,
|
||
"operation_id": apply_op_id or check_mode_op_id or candidate_op_id or None,
|
||
"root_candidate_op_id": candidate_op_id or None,
|
||
"check_mode_op_id": check_mode_op_id or None,
|
||
"apply_op_id": apply_op_id or None,
|
||
"incident_id": incident_id or None,
|
||
"catalog_id": catalog_id or None,
|
||
"playbook_path": playbook_path or None,
|
||
"execution_state": execution_state,
|
||
"closed": closed,
|
||
"missing_stage_ids": missing_stage_ids,
|
||
"next_executor_action": next_executor_action,
|
||
"stages": stages,
|
||
"safety_contract": {
|
||
"writes_on_read": False,
|
||
"backfill_may_write_auto_repair_verifier_km": True,
|
||
"backfill_may_send_telegram": False,
|
||
"live_apply_may_send_telegram_gateway_receipt": True,
|
||
"reads_raw_sessions": False,
|
||
"reads_secret_values": False,
|
||
},
|
||
}
|
||
|
||
|
||
def _latest_flow_closure(
|
||
*,
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
km_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
) -> dict[str, Any]:
|
||
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
|
||
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
|
||
km_rows = [_row_mapping(row) for row in km_latest_rows]
|
||
telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
|
||
latest_apply = next(
|
||
(
|
||
row
|
||
for row in operation_rows
|
||
if str(row.get("operation_type") or "") == "ansible_apply_executed"
|
||
),
|
||
None,
|
||
)
|
||
if latest_apply is None:
|
||
return {
|
||
"apply_op_id": None,
|
||
"incident_id": None,
|
||
"has_post_apply_verifier": False,
|
||
"has_km_writeback": False,
|
||
"has_telegram_receipt": False,
|
||
"closed": False,
|
||
"missing": [
|
||
"ansible_apply_executed",
|
||
"post_apply_verifier",
|
||
"km_writeback",
|
||
"telegram_receipt",
|
||
],
|
||
}
|
||
|
||
apply_op_id = str(latest_apply.get("op_id") or "")
|
||
incident_id = str(latest_apply.get("incident_id") or "")
|
||
km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
|
||
has_verifier = any(
|
||
str(row.get("apply_op_id") or "") == apply_op_id
|
||
for row in verifier_rows
|
||
)
|
||
has_km = any(
|
||
str(row.get("path_type") or "") == km_path_type
|
||
or (
|
||
incident_id
|
||
and str(row.get("related_incident_id") or "") == incident_id
|
||
)
|
||
for row in km_rows
|
||
)
|
||
has_telegram = any(
|
||
str(row.get("send_status") or "") == "sent"
|
||
and str(row.get("action") or "") == "controlled_apply_result"
|
||
and (
|
||
not incident_id
|
||
or str(row.get("incident_id") or "") == incident_id
|
||
)
|
||
for row in telegram_rows
|
||
)
|
||
missing = [
|
||
name
|
||
for name, present in (
|
||
("post_apply_verifier", has_verifier),
|
||
("km_writeback", has_km),
|
||
("telegram_receipt", has_telegram),
|
||
)
|
||
if not present
|
||
]
|
||
return {
|
||
"apply_op_id": apply_op_id or None,
|
||
"incident_id": incident_id or None,
|
||
"has_post_apply_verifier": has_verifier,
|
||
"has_km_writeback": has_km,
|
||
"has_telegram_receipt": has_telegram,
|
||
"closed": not missing,
|
||
"missing": missing,
|
||
}
|
||
|
||
|
||
def _latest_failure_classification(
|
||
*,
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Classify the newest controlled apply outcome without exposing command output."""
|
||
|
||
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
|
||
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
|
||
latest_apply = next(
|
||
(
|
||
row
|
||
for row in operation_rows
|
||
if str(row.get("operation_type") or "") == "ansible_apply_executed"
|
||
),
|
||
None,
|
||
)
|
||
if latest_apply is None:
|
||
return {
|
||
"schema_version": "ai_agent_executor_failure_classification_v1",
|
||
"classification": "no_controlled_apply_observed",
|
||
"action": "wait_for_controlled_apply_receipt",
|
||
"target_selector": {},
|
||
"evidence": {
|
||
"latest_flow_closed": False,
|
||
"output_tail_in_readback": False,
|
||
"unredacted_output_required": False,
|
||
},
|
||
}
|
||
|
||
apply_op_id = str(latest_apply.get("op_id") or "")
|
||
incident_id = str(latest_apply.get("incident_id") or "")
|
||
returncode = _int_value(latest_apply.get("returncode"))
|
||
verifier = next(
|
||
(
|
||
row
|
||
for row in verifier_rows
|
||
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
|
||
),
|
||
{},
|
||
)
|
||
verification_result = str(verifier.get("verification_result") or "").lower()
|
||
latest_flow_closed = latest_flow_closure.get("closed") is True
|
||
|
||
if returncode == 0 and verification_result in {"success", ""} and latest_flow_closed:
|
||
classification = "latest_controlled_apply_closed_success"
|
||
action = "keep_receipt_chain_closed"
|
||
elif returncode == 0:
|
||
classification = "controlled_apply_success_receipt_gap"
|
||
action = "backfill_missing_verifier_km_or_telegram_receipt"
|
||
elif latest_flow_closed:
|
||
classification = "closed_failed_apply_requires_ai_repair"
|
||
action = "queue_check_mode_replay_and_playbook_repair_candidate"
|
||
else:
|
||
classification = "failed_apply_receipt_gap_requires_backfill_then_repair"
|
||
action = "backfill_missing_receipts_then_queue_repair_candidate"
|
||
|
||
return {
|
||
"schema_version": "ai_agent_executor_failure_classification_v1",
|
||
"classification": classification,
|
||
"action": action,
|
||
"target_selector": {
|
||
"incident_id": incident_id or None,
|
||
"apply_op_id": apply_op_id or None,
|
||
"parent_op_id": latest_apply.get("parent_op_id"),
|
||
"catalog_id": latest_apply.get("catalog_id"),
|
||
"playbook_path": latest_apply.get("playbook_path"),
|
||
"execution_mode": latest_apply.get("execution_mode"),
|
||
},
|
||
"evidence": {
|
||
"operation_status": latest_apply.get("status"),
|
||
"returncode": latest_apply.get("returncode"),
|
||
"verification_result": verification_result or None,
|
||
"latest_flow_closed": latest_flow_closed,
|
||
"has_post_apply_verifier": latest_flow_closure.get("has_post_apply_verifier") is True,
|
||
"has_km_writeback": latest_flow_closure.get("has_km_writeback") is True,
|
||
"has_telegram_receipt": latest_flow_closure.get("has_telegram_receipt") is True,
|
||
"output_tail_in_readback": False,
|
||
"unredacted_output_required": False,
|
||
},
|
||
"safe_next_steps": [
|
||
"run_no_write_check_mode_replay",
|
||
"extract_sanitized_failed_task_summary",
|
||
"write_km_playbook_repair_candidate",
|
||
"retry_controlled_apply_only_after_check_mode_passes",
|
||
],
|
||
}
|
||
|
||
|
||
def _controlled_retry_package(classification: Mapping[str, Any]) -> dict[str, Any]:
|
||
"""Build the next no-write repair package from the public failure classification."""
|
||
|
||
target_selector = classification.get("target_selector")
|
||
if not isinstance(target_selector, Mapping):
|
||
target_selector = {}
|
||
apply_op_id = str(target_selector.get("apply_op_id") or "")
|
||
repair_required = classification.get("classification") in {
|
||
"closed_failed_apply_requires_ai_repair",
|
||
"failed_apply_receipt_gap_requires_backfill_then_repair",
|
||
}
|
||
return {
|
||
"schema_version": "ai_agent_controlled_retry_package_v1",
|
||
"package_id": (
|
||
f"ansible_retry:{apply_op_id[:8]}"
|
||
if repair_required and apply_op_id
|
||
else None
|
||
),
|
||
"status": (
|
||
"ready_for_no_write_check_mode_replay"
|
||
if repair_required
|
||
else "not_required_for_latest_apply"
|
||
),
|
||
"target_selector": dict(target_selector),
|
||
"source_of_truth": {
|
||
"catalog_id": target_selector.get("catalog_id"),
|
||
"playbook_path": target_selector.get("playbook_path"),
|
||
"source_diff_required_before_retry": True,
|
||
"failed_task_summary_required": True,
|
||
},
|
||
"preflight": {
|
||
"no_write_check_mode_replay_required": repair_required,
|
||
"reuse_parent_check_mode_op_id": target_selector.get("parent_op_id"),
|
||
"unredacted_output_required": False,
|
||
"secret_value_collection_allowed": False,
|
||
},
|
||
"apply_gate": {
|
||
"controlled_apply_retry_allowed_now": False,
|
||
"opens_legacy_runner": False,
|
||
"requires_check_mode_success_before_apply": repair_required,
|
||
},
|
||
"rollback": {
|
||
"rollback_candidate_required": repair_required,
|
||
"destructive_rollback_allowed": False,
|
||
"rollback_plan_source": "playbook_repair_candidate_after_failed_task_summary",
|
||
},
|
||
"post_apply": {
|
||
"post_apply_verifier_required": repair_required,
|
||
"km_playbook_trust_writeback_required": repair_required,
|
||
"telegram_receipt_required": repair_required,
|
||
},
|
||
"next_ai_action": (
|
||
"run_no_write_check_mode_replay"
|
||
if repair_required
|
||
else "keep_latest_apply_receipts"
|
||
),
|
||
}
|
||
|
||
|
||
def classify_deploy_control_plane_observation(
|
||
*,
|
||
run_status: str,
|
||
is_latest_deploy_intent: bool,
|
||
active_task_container_count: int,
|
||
production_marker_hit: bool,
|
||
latest_flow_closed: bool,
|
||
runner_capacity_ok: bool,
|
||
runner_forbidden_label_count: int,
|
||
) -> dict[str, Any]:
|
||
"""Classify CD/run noise into an internal PlayBook decision."""
|
||
|
||
normalized_status = str(run_status or "unknown").strip().lower()
|
||
has_active_task = active_task_container_count > 0
|
||
runner_lane_safe = runner_capacity_ok and runner_forbidden_label_count == 0
|
||
production_truth_ok = production_marker_hit and latest_flow_closed
|
||
|
||
if not is_latest_deploy_intent:
|
||
classification = "superseded_run_skip"
|
||
action = "skip_cd_work_and_attach_to_superseded_intent"
|
||
elif production_truth_ok and normalized_status == "success":
|
||
classification = "deploy_succeeded_marker_hit"
|
||
action = "close_deploy_intent_and_write_receipts"
|
||
elif normalized_status == "running" and has_active_task and runner_lane_safe:
|
||
classification = "running_with_controlled_task"
|
||
action = "continue_observing_without_restarting_runner"
|
||
elif normalized_status == "running" and not has_active_task and production_truth_ok:
|
||
classification = "running_no_container_stale_ui"
|
||
action = "treat_gitea_spinner_as_stale_and_keep_production_truth"
|
||
elif normalized_status == "failure" and production_truth_ok:
|
||
classification = "failed_run_superseded_by_marker_hit"
|
||
action = "record_non_blocking_failure_and_keep_current_marker"
|
||
elif normalized_status == "failure":
|
||
classification = "real_failure_requires_playbook_repair"
|
||
action = "open_cd_repair_playbook_with_target_selector_and_verifier"
|
||
elif not runner_lane_safe:
|
||
classification = "runner_lane_guardrail_violation"
|
||
action = "fail_closed_runner_lane_and_open_repair_playbook"
|
||
else:
|
||
classification = "waiting_for_controlled_observation"
|
||
action = "wait_for_mcp_observation_or_deploy_intent_update"
|
||
|
||
return {
|
||
"schema_version": "ai_agent_deploy_control_plane_decision_v1",
|
||
"classification": classification,
|
||
"action": action,
|
||
"inputs": {
|
||
"run_status": normalized_status,
|
||
"is_latest_deploy_intent": is_latest_deploy_intent,
|
||
"active_task_container_count": max(0, active_task_container_count),
|
||
"production_marker_hit": production_marker_hit,
|
||
"latest_flow_closed": latest_flow_closed,
|
||
"runner_capacity_ok": runner_capacity_ok,
|
||
"runner_forbidden_label_count": max(0, runner_forbidden_label_count),
|
||
},
|
||
"internal_writeback": {
|
||
"mcp_event_type": "deploy_run_observation",
|
||
"rag_context_required": True,
|
||
"km_writeback_required": True,
|
||
"playbook_route_required": True,
|
||
"log_projection_required": True,
|
||
"telegram_receipt_required": classification in {
|
||
"deploy_succeeded_marker_hit",
|
||
"real_failure_requires_playbook_repair",
|
||
"runner_lane_guardrail_violation",
|
||
},
|
||
},
|
||
"safety_boundary": {
|
||
"reads_raw_sessions": False,
|
||
"reads_secret_values": False,
|
||
"opens_legacy_runner": False,
|
||
"uses_force_push": False,
|
||
"writes_runtime_state": classification in {
|
||
"deploy_succeeded_marker_hit",
|
||
"real_failure_requires_playbook_repair",
|
||
"runner_lane_guardrail_violation",
|
||
},
|
||
},
|
||
}
|
||
|
||
|
||
def _control_plane_integration() -> dict[str, Any]:
|
||
classifier_examples = [
|
||
classify_deploy_control_plane_observation(
|
||
run_status="success",
|
||
is_latest_deploy_intent=True,
|
||
active_task_container_count=0,
|
||
production_marker_hit=True,
|
||
latest_flow_closed=True,
|
||
runner_capacity_ok=True,
|
||
runner_forbidden_label_count=0,
|
||
),
|
||
classify_deploy_control_plane_observation(
|
||
run_status="running",
|
||
is_latest_deploy_intent=True,
|
||
active_task_container_count=0,
|
||
production_marker_hit=True,
|
||
latest_flow_closed=True,
|
||
runner_capacity_ok=True,
|
||
runner_forbidden_label_count=0,
|
||
),
|
||
classify_deploy_control_plane_observation(
|
||
run_status="failure",
|
||
is_latest_deploy_intent=True,
|
||
active_task_container_count=0,
|
||
production_marker_hit=False,
|
||
latest_flow_closed=False,
|
||
runner_capacity_ok=True,
|
||
runner_forbidden_label_count=0,
|
||
),
|
||
]
|
||
return {
|
||
"schema_version": "ai_agent_autonomous_runtime_internal_loop_v1",
|
||
"status": "mcp_rag_km_playbook_log_control_loop_declared",
|
||
"purpose": (
|
||
"把 Gitea run、runner lane、production marker、browser smoke 與 executor receipt "
|
||
"先收斂成內部事件,再由 PlayBook decision 推進或跳過。"
|
||
),
|
||
"mcp_sensors": [
|
||
{
|
||
"sensor_id": "gitea_actions_run_observer",
|
||
"normalized_event": "RunObservation",
|
||
"raw_secret_access_allowed": False,
|
||
},
|
||
{
|
||
"sensor_id": "controlled_runner_lane_observer",
|
||
"normalized_event": "RunnerLaneState",
|
||
"raw_runner_token_access_allowed": False,
|
||
},
|
||
{
|
||
"sensor_id": "production_marker_observer",
|
||
"normalized_event": "ProductionTruthSnapshot",
|
||
"raw_session_access_allowed": False,
|
||
},
|
||
{
|
||
"sensor_id": "browser_smoke_observer",
|
||
"normalized_event": "FrontendTruthSnapshot",
|
||
"raw_conversation_access_allowed": False,
|
||
},
|
||
],
|
||
"rag_context_queries": [
|
||
"runner_pressure_buildkit_stockplatform_collision",
|
||
"controlled_cd_lane_capacity_label_guardrails",
|
||
"autonomous_runtime_marker_receipt_contract",
|
||
],
|
||
"playbook_decision_classes": [
|
||
"deploy_succeeded_marker_hit",
|
||
"running_with_controlled_task",
|
||
"running_no_container_stale_ui",
|
||
"superseded_run_skip",
|
||
"failed_run_superseded_by_marker_hit",
|
||
"real_failure_requires_playbook_repair",
|
||
"runner_lane_guardrail_violation",
|
||
],
|
||
"km_writeback_contract": {
|
||
"knowledge_entry_path_type": "deploy_control_plane_decision:<deploy_intent_id>",
|
||
"required_refs": [
|
||
"deploy_intent_id",
|
||
"target_sha",
|
||
"gitea_run_id",
|
||
"production_marker",
|
||
"latest_flow_closure",
|
||
"runner_lane_state",
|
||
],
|
||
"stores_raw_logs": False,
|
||
"stores_secret_values": False,
|
||
},
|
||
"log_projection_contract": {
|
||
"timeline_event_type": "ai_agent_deploy_control_plane_decision",
|
||
"logbook_projection": "summary_only_after_verifier",
|
||
"raw_html_or_long_log_allowed": False,
|
||
},
|
||
"classifier_examples": classifier_examples,
|
||
"rollups": {
|
||
"mcp_sensor_count": 4,
|
||
"rag_context_query_count": 3,
|
||
"playbook_decision_class_count": 7,
|
||
"classifier_example_count": len(classifier_examples),
|
||
},
|
||
}
|
||
|
||
|
||
def build_runtime_receipt_readback_from_rows(
|
||
*,
|
||
project_id: str = _DEFAULT_PROJECT_ID,
|
||
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
|
||
db_read_status: str = "ok",
|
||
operation_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
auto_repair_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
verifier_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
km_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
km_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
telegram_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
telegram_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
mcp_gateway_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
legacy_mcp_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
service_log_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
executor_log_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
timeline_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
playbook_trust_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
alert_operation_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
alertmanager_event_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
grouped_alert_event_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
log_controlled_writeback_consumer: Mapping[str, Any] | None = None,
|
||
error_type: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Build the live executor receipt readback from already-fetched rows."""
|
||
|
||
operation_latest = list(operation_latest_rows)
|
||
auto_repair_latest = list(auto_repair_latest_rows)
|
||
verifier_latest = list(verifier_latest_rows)
|
||
km_latest = list(km_latest_rows)
|
||
telegram_latest = list(telegram_latest_rows)
|
||
operation_summary = _operation_counts(operation_count_rows)
|
||
auto_repair_summary = _status_counts(
|
||
auto_repair_count_rows,
|
||
status_key="result_status",
|
||
)
|
||
verifier_summary = _status_counts(
|
||
verifier_count_rows,
|
||
status_key="verification_result",
|
||
)
|
||
km_summary = _status_counts(km_count_rows, status_key="status")
|
||
telegram_summary = _status_counts(telegram_count_rows, status_key="send_status")
|
||
mcp_gateway_summary = _status_counts(mcp_gateway_count_rows, status_key="status")
|
||
legacy_mcp_summary = _status_counts(legacy_mcp_count_rows, status_key="status")
|
||
service_log_summary = _status_counts(service_log_count_rows, status_key="status")
|
||
executor_log_summary = _status_counts(executor_log_count_rows, status_key="status")
|
||
timeline_summary = _status_counts(timeline_count_rows, status_key="status")
|
||
playbook_trust_summary = _status_counts(playbook_trust_count_rows, status_key="status")
|
||
alert_operation_summary = _status_counts(
|
||
alert_operation_count_rows,
|
||
status_key="event_type",
|
||
)
|
||
alertmanager_event_summary = _status_counts(
|
||
alertmanager_event_count_rows,
|
||
status_key="stage",
|
||
)
|
||
grouped_alert_summary = _status_counts(
|
||
grouped_alert_event_count_rows,
|
||
status_key="status",
|
||
)
|
||
latest_closure = _latest_flow_closure(
|
||
operation_latest_rows=operation_latest,
|
||
verifier_latest_rows=verifier_latest,
|
||
km_latest_rows=km_latest,
|
||
telegram_latest_rows=telegram_latest,
|
||
)
|
||
latest_failure = _latest_failure_classification(
|
||
operation_latest_rows=operation_latest,
|
||
verifier_latest_rows=verifier_latest,
|
||
latest_flow_closure=latest_closure,
|
||
)
|
||
retry_package = _controlled_retry_package(latest_failure)
|
||
loop_ledger = _autonomous_execution_loop_ledger(
|
||
project_id=project_id,
|
||
operation_latest_rows=operation_latest,
|
||
verifier_latest_rows=verifier_latest,
|
||
km_latest_rows=km_latest,
|
||
telegram_latest_rows=telegram_latest,
|
||
auto_repair_latest_rows=auto_repair_latest,
|
||
latest_flow_closure=latest_closure,
|
||
latest_failure_classification=latest_failure,
|
||
controlled_retry_package=retry_package,
|
||
)
|
||
trace_ledger = _build_trace_ledger(
|
||
operation_summary=operation_summary,
|
||
auto_repair_summary=auto_repair_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
telegram_summary=telegram_summary,
|
||
mcp_gateway_summary=mcp_gateway_summary,
|
||
legacy_mcp_summary=legacy_mcp_summary,
|
||
service_log_summary=service_log_summary,
|
||
executor_log_summary=executor_log_summary,
|
||
timeline_summary=timeline_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
latest_flow_closure=latest_closure,
|
||
loop_ledger=loop_ledger,
|
||
)
|
||
log_integration_taxonomy = _build_log_integration_taxonomy(
|
||
operation_summary=operation_summary,
|
||
auto_repair_summary=auto_repair_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
telegram_summary=telegram_summary,
|
||
mcp_gateway_summary=mcp_gateway_summary,
|
||
legacy_mcp_summary=legacy_mcp_summary,
|
||
service_log_summary=service_log_summary,
|
||
executor_log_summary=executor_log_summary,
|
||
timeline_summary=timeline_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
)
|
||
agent_decision_wiring = _build_agent_decision_wiring(
|
||
operation_summary=operation_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
mcp_gateway_summary=mcp_gateway_summary,
|
||
legacy_mcp_summary=legacy_mcp_summary,
|
||
service_log_summary=service_log_summary,
|
||
timeline_summary=timeline_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
log_integration_taxonomy=log_integration_taxonomy,
|
||
loop_ledger=loop_ledger,
|
||
latest_flow_closure=latest_closure,
|
||
)
|
||
learning_loop = _build_learning_loop_readback(
|
||
operation_summary=operation_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
log_integration_taxonomy=log_integration_taxonomy,
|
||
agent_decision_wiring=agent_decision_wiring,
|
||
latest_flow_closure=latest_closure,
|
||
latest_failure_classification=latest_failure,
|
||
controlled_retry_package=retry_package,
|
||
loop_ledger=loop_ledger,
|
||
)
|
||
alert_noise_reduction = _build_alert_noise_reduction_readback(
|
||
alert_operation_summary=alert_operation_summary,
|
||
alertmanager_event_summary=alertmanager_event_summary,
|
||
grouped_alert_summary=grouped_alert_summary,
|
||
operation_summary=operation_summary,
|
||
agent_decision_wiring=agent_decision_wiring,
|
||
learning_loop=learning_loop,
|
||
)
|
||
host_sustained_load_automation = (
|
||
_build_host_sustained_load_controlled_automation_readback()
|
||
)
|
||
ui_productization = _build_ui_productization_readback()
|
||
multi_product_taxonomy = _build_multi_product_taxonomy_contract(log_integration_taxonomy)
|
||
log_controlled_writeback_executor = _load_log_controlled_writeback_executor_readback()
|
||
if not isinstance(log_controlled_writeback_consumer, Mapping):
|
||
log_controlled_writeback_consumer = (
|
||
_fallback_log_controlled_writeback_consumer_readback()
|
||
)
|
||
work_item_progress = _build_work_item_progress(
|
||
trace_ledger=trace_ledger,
|
||
log_integration_taxonomy=log_integration_taxonomy,
|
||
log_controlled_writeback_executor=log_controlled_writeback_executor,
|
||
log_controlled_writeback_consumer=log_controlled_writeback_consumer,
|
||
agent_decision_wiring=agent_decision_wiring,
|
||
learning_loop=learning_loop,
|
||
alert_noise_reduction=alert_noise_reduction,
|
||
host_sustained_load_automation=host_sustained_load_automation,
|
||
ui_productization=ui_productization,
|
||
multi_product_taxonomy=multi_product_taxonomy,
|
||
db_read_status=db_read_status,
|
||
)
|
||
apply_summary = operation_summary.get("ansible_apply_executed") or {}
|
||
readback = {
|
||
"schema_version": _LIVE_READBACK_SCHEMA_VERSION,
|
||
"project_id": project_id,
|
||
"lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)),
|
||
"db_read_status": db_read_status,
|
||
"writes_on_read": False,
|
||
"ansible_operations": {
|
||
"counts": operation_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
operation_latest,
|
||
allowed_keys=(
|
||
"op_id",
|
||
"parent_op_id",
|
||
"operation_type",
|
||
"status",
|
||
"actor",
|
||
"incident_id",
|
||
"catalog_id",
|
||
"playbook_path",
|
||
"execution_mode",
|
||
"source_candidate_op_id",
|
||
"check_mode_op_id",
|
||
"risk_level",
|
||
"controlled_apply_allowed",
|
||
"returncode",
|
||
"duration_ms",
|
||
"created_at",
|
||
),
|
||
),
|
||
},
|
||
"auto_repair_execution_receipt": {
|
||
**auto_repair_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
auto_repair_latest,
|
||
allowed_keys=(
|
||
"id",
|
||
"incident_id",
|
||
"catalog_id",
|
||
"playbook_name",
|
||
"result_status",
|
||
"triggered_by",
|
||
"risk_level",
|
||
"execution_time_ms",
|
||
"created_at",
|
||
),
|
||
),
|
||
},
|
||
"ansible_apply_executed": {
|
||
"total": _int_value(apply_summary.get("total")),
|
||
"recent": _int_value(apply_summary.get("recent")),
|
||
"by_status": apply_summary.get("by_status") or {},
|
||
},
|
||
"post_apply_verifier": {
|
||
**verifier_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
verifier_latest,
|
||
allowed_keys=(
|
||
"id",
|
||
"incident_id",
|
||
"matched_playbook_id",
|
||
"verification_result",
|
||
"apply_op_id",
|
||
"catalog_id",
|
||
"playbook_path",
|
||
"returncode",
|
||
"collected_at",
|
||
),
|
||
),
|
||
},
|
||
"km_writeback": {
|
||
**km_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
km_latest,
|
||
allowed_keys=(
|
||
"id",
|
||
"title",
|
||
"related_incident_id",
|
||
"related_playbook_id",
|
||
"path_type",
|
||
"status",
|
||
"created_by",
|
||
"created_at",
|
||
),
|
||
),
|
||
},
|
||
"telegram_receipt": {
|
||
**telegram_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
telegram_latest,
|
||
allowed_keys=(
|
||
"message_id",
|
||
"run_id",
|
||
"message_type",
|
||
"send_status",
|
||
"provider_message_id",
|
||
"incident_id",
|
||
"action",
|
||
"queued_at",
|
||
"sent_at",
|
||
),
|
||
),
|
||
},
|
||
"mcp_context": {
|
||
"gateway": mcp_gateway_summary,
|
||
"legacy": legacy_mcp_summary,
|
||
"total": _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary),
|
||
"recent": _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary),
|
||
},
|
||
"service_log_evidence": service_log_summary,
|
||
"executor_log_projection": executor_log_summary,
|
||
"timeline_projection": timeline_summary,
|
||
"playbook_trust": playbook_trust_summary,
|
||
"latest_flow_closure": latest_closure,
|
||
"latest_failure_classification": latest_failure,
|
||
"controlled_retry_package": retry_package,
|
||
"autonomous_execution_loop_ledger": loop_ledger,
|
||
"trace_ledger": trace_ledger,
|
||
"log_integration_taxonomy": log_integration_taxonomy,
|
||
"log_controlled_writeback_executor": log_controlled_writeback_executor,
|
||
"log_controlled_writeback_consumer": dict(log_controlled_writeback_consumer),
|
||
"agent_decision_wiring": agent_decision_wiring,
|
||
"learning_loop": learning_loop,
|
||
"alert_noise_reduction": alert_noise_reduction,
|
||
"host_sustained_load_automation": host_sustained_load_automation,
|
||
"ui_productization": ui_productization,
|
||
"multi_product_taxonomy": multi_product_taxonomy,
|
||
"work_item_progress": work_item_progress,
|
||
}
|
||
if error_type:
|
||
readback["error"] = {
|
||
"type": error_type,
|
||
"message": "runtime receipt DB read failed; see API logs",
|
||
}
|
||
return readback
|
||
|
||
|
||
def _attach_runtime_receipt_readback(
|
||
payload: dict[str, Any],
|
||
readback: dict[str, Any],
|
||
) -> dict[str, Any]:
|
||
payload["runtime_receipt_readback"] = readback
|
||
rollups = payload.setdefault("rollups", {})
|
||
log_executor = readback.get("log_controlled_writeback_executor")
|
||
if not isinstance(log_executor, Mapping):
|
||
log_executor = {}
|
||
log_executor_rollups = log_executor.get("rollups")
|
||
if not isinstance(log_executor_rollups, Mapping):
|
||
log_executor_rollups = {}
|
||
log_executor_context = log_executor.get("agent_consumption_context")
|
||
if not isinstance(log_executor_context, Mapping):
|
||
log_executor_context = {}
|
||
log_executor_queue = log_executor_context.get("next_action_queue")
|
||
if not isinstance(log_executor_queue, list):
|
||
log_executor_queue = []
|
||
log_executor_current_blocker_queue = log_executor_context.get(
|
||
"current_blocker_execution_queue"
|
||
)
|
||
if not isinstance(log_executor_current_blocker_queue, list):
|
||
log_executor_current_blocker_queue = []
|
||
log_executor_blockers = log_executor.get("active_blockers")
|
||
if not isinstance(log_executor_blockers, list):
|
||
log_executor_blockers = []
|
||
log_consumer = readback.get("log_controlled_writeback_consumer")
|
||
if not isinstance(log_consumer, Mapping):
|
||
log_consumer = {}
|
||
log_consumer_rollups = log_consumer.get("rollups")
|
||
if not isinstance(log_consumer_rollups, Mapping):
|
||
log_consumer_rollups = {}
|
||
log_consumer_blockers = log_consumer.get("active_blockers")
|
||
if not isinstance(log_consumer_blockers, list):
|
||
log_consumer_blockers = []
|
||
operation_counts = (readback.get("ansible_operations") or {}).get("counts")
|
||
if not isinstance(operation_counts, Mapping):
|
||
operation_counts = {}
|
||
log_dispatch_summary = (
|
||
operation_counts.get(LOG_CONTROLLED_WRITEBACK_DISPATCH_OPERATION_TYPE) or {}
|
||
)
|
||
rollups.update({
|
||
"live_ansible_apply_executed_count": _int_value(
|
||
readback.get("ansible_apply_executed", {}).get("total")
|
||
),
|
||
"live_auto_repair_execution_receipt_count": _int_value(
|
||
readback.get("auto_repair_execution_receipt", {}).get("total")
|
||
),
|
||
"live_post_apply_verifier_count": _int_value(
|
||
readback.get("post_apply_verifier", {}).get("total")
|
||
),
|
||
"live_km_writeback_count": _int_value(
|
||
readback.get("km_writeback", {}).get("total")
|
||
),
|
||
"live_telegram_receipt_count": _int_value(
|
||
readback.get("telegram_receipt", {}).get("total")
|
||
),
|
||
"live_executor_latest_flow_closed_count": (
|
||
1
|
||
if (readback.get("latest_flow_closure") or {}).get("closed") is True
|
||
else 0
|
||
),
|
||
"live_autonomous_execution_loop_closed_count": (
|
||
1
|
||
if (readback.get("autonomous_execution_loop_ledger") or {}).get("closed") is True
|
||
else 0
|
||
),
|
||
"live_executor_latest_apply_repair_required_count": (
|
||
1
|
||
if (
|
||
(readback.get("latest_failure_classification") or {}).get("classification")
|
||
in {
|
||
"closed_failed_apply_requires_ai_repair",
|
||
"failed_apply_receipt_gap_requires_backfill_then_repair",
|
||
}
|
||
)
|
||
else 0
|
||
),
|
||
"live_executor_retry_package_ready_count": (
|
||
1
|
||
if (readback.get("controlled_retry_package") or {}).get("status")
|
||
== "ready_for_no_write_check_mode_replay"
|
||
else 0
|
||
),
|
||
"live_mcp_context_count": _int_value(readback.get("mcp_context", {}).get("total")),
|
||
"live_service_log_evidence_count": _int_value(
|
||
readback.get("service_log_evidence", {}).get("total")
|
||
),
|
||
"live_executor_log_projection_count": _int_value(
|
||
readback.get("executor_log_projection", {}).get("total")
|
||
),
|
||
"live_timeline_projection_count": _int_value(
|
||
readback.get("timeline_projection", {}).get("total")
|
||
),
|
||
"live_playbook_trust_signal_count": _int_value(
|
||
readback.get("playbook_trust", {}).get("total")
|
||
),
|
||
"live_trace_recorded_stage_count": _int_value(
|
||
readback.get("trace_ledger", {}).get("recorded_stage_count")
|
||
),
|
||
"live_trace_required_missing_count": len(
|
||
(readback.get("trace_ledger") or {}).get("missing_required_stage_ids") or []
|
||
),
|
||
"live_log_source_family_count": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"source_family_count"
|
||
)
|
||
),
|
||
"live_log_active_source_family_count": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"active_source_family_count"
|
||
)
|
||
),
|
||
"live_log_label_dimension_count": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"label_dimension_count"
|
||
)
|
||
),
|
||
"live_log_classified_event_total": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"classified_event_total"
|
||
)
|
||
),
|
||
"live_log_recent_classified_event_total": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"recent_classified_event_total"
|
||
)
|
||
),
|
||
"live_log_controlled_writeback_executor_batch_count": _int_value(
|
||
log_executor_rollups.get("execution_batch_count")
|
||
),
|
||
"live_log_controlled_writeback_executor_ready_batch_count": _int_value(
|
||
log_executor_rollups.get("ready_execution_batch_count")
|
||
),
|
||
"live_log_controlled_writeback_executor_ready_count": (
|
||
1
|
||
if log_executor.get("status") == "controlled_writeback_executor_ready"
|
||
else 0
|
||
),
|
||
"live_log_controlled_writeback_executor_blocker_count": len(
|
||
log_executor_blockers
|
||
),
|
||
"live_log_controlled_writeback_next_action_queue_count": len(
|
||
log_executor_queue
|
||
),
|
||
"live_log_controlled_writeback_current_blocker_queue_count": len(
|
||
log_executor_current_blocker_queue
|
||
),
|
||
"live_log_controlled_writeback_current_blocker_control_path_blocked_count": (
|
||
_int_value(
|
||
log_executor_rollups.get("current_blocker_control_path_blocked_count")
|
||
)
|
||
),
|
||
"live_log_controlled_writeback_current_blocker_local_recovery_package_count": (
|
||
_int_value(
|
||
log_executor_rollups.get("current_blocker_local_recovery_package_count")
|
||
)
|
||
),
|
||
"live_log_controlled_writeback_dispatch_count": _int_value(
|
||
log_dispatch_summary.get("total")
|
||
),
|
||
"live_log_controlled_writeback_recent_dispatch_count": _int_value(
|
||
log_dispatch_summary.get("recent")
|
||
),
|
||
"live_log_controlled_writeback_consumer_binding_count": _int_value(
|
||
log_consumer_rollups.get("consumer_binding_count")
|
||
),
|
||
"live_log_controlled_writeback_consumer_ready_binding_count": _int_value(
|
||
log_consumer_rollups.get("ready_consumer_binding_count")
|
||
),
|
||
"live_log_controlled_writeback_consumer_ready_target_count": _int_value(
|
||
log_consumer_rollups.get("ready_target_count")
|
||
),
|
||
"live_log_controlled_writeback_consumer_ready_count": (
|
||
1
|
||
if log_consumer.get("status")
|
||
== "controlled_writeback_consumer_readback_ready"
|
||
else 0
|
||
),
|
||
"live_log_controlled_writeback_consumer_blocker_count": len(
|
||
log_consumer_blockers
|
||
),
|
||
"live_log_controlled_writeback_consumer_metadata_only_count": _int_value(
|
||
log_consumer_rollups.get("metadata_only_receipt_count")
|
||
),
|
||
"live_log_controlled_writeback_consumer_verifier_ref_count": _int_value(
|
||
log_consumer_rollups.get("post_apply_verifier_ref_count")
|
||
),
|
||
"live_log_controlled_writeback_km_consumer_binding_count": _int_value(
|
||
log_consumer_rollups.get("km_consumer_binding_count")
|
||
),
|
||
"live_log_controlled_writeback_rag_consumer_binding_count": _int_value(
|
||
log_consumer_rollups.get("rag_consumer_binding_count")
|
||
),
|
||
"live_log_controlled_writeback_playbook_consumer_binding_count": _int_value(
|
||
log_consumer_rollups.get("playbook_consumer_binding_count")
|
||
),
|
||
"live_log_controlled_writeback_mcp_consumer_binding_count": _int_value(
|
||
log_consumer_rollups.get("mcp_consumer_binding_count")
|
||
),
|
||
"live_log_controlled_writeback_verifier_consumer_binding_count": _int_value(
|
||
log_consumer_rollups.get("verifier_consumer_binding_count")
|
||
),
|
||
"live_log_controlled_writeback_ai_agent_consumer_binding_count": _int_value(
|
||
log_consumer_rollups.get("ai_agent_consumer_binding_count")
|
||
),
|
||
"live_agent_decision_wiring_stage_count": _int_value(
|
||
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
|
||
"stage_count"
|
||
)
|
||
),
|
||
"live_agent_decision_wiring_required_present_count": _int_value(
|
||
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
|
||
"required_stage_present_count"
|
||
)
|
||
),
|
||
"live_agent_decision_wiring_required_missing_count": _int_value(
|
||
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
|
||
"required_stage_missing_count"
|
||
)
|
||
),
|
||
"live_agent_decision_wiring_complete_count": (
|
||
1
|
||
if (readback.get("agent_decision_wiring") or {}).get("status") == "completed"
|
||
else 0
|
||
),
|
||
"live_learning_loop_stage_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"stage_count"
|
||
)
|
||
),
|
||
"live_learning_loop_required_present_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"required_stage_present_count"
|
||
)
|
||
),
|
||
"live_learning_loop_required_missing_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"required_stage_missing_count"
|
||
)
|
||
),
|
||
"live_learning_loop_complete_count": (
|
||
1
|
||
if (readback.get("learning_loop") or {}).get("status") == "completed"
|
||
else 0
|
||
),
|
||
"live_learning_loop_similar_case_source_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"similar_case_source_total"
|
||
)
|
||
),
|
||
"live_alert_noise_stage_count": _int_value(
|
||
((readback.get("alert_noise_reduction") or {}).get("rollups") or {}).get(
|
||
"stage_count"
|
||
)
|
||
),
|
||
"live_alert_noise_required_present_count": _int_value(
|
||
((readback.get("alert_noise_reduction") or {}).get("rollups") or {}).get(
|
||
"required_stage_present_count"
|
||
)
|
||
),
|
||
"live_alert_noise_required_missing_count": _int_value(
|
||
((readback.get("alert_noise_reduction") or {}).get("rollups") or {}).get(
|
||
"required_stage_missing_count"
|
||
)
|
||
),
|
||
"live_alert_noise_complete_count": (
|
||
1
|
||
if (readback.get("alert_noise_reduction") or {}).get("status") == "completed"
|
||
else 0
|
||
),
|
||
"live_alert_noise_suppressed_count": _int_value(
|
||
((readback.get("alert_noise_reduction") or {}).get("rollups") or {}).get(
|
||
"suppressed_alert_total"
|
||
)
|
||
),
|
||
"live_alert_noise_controlled_route_count": _int_value(
|
||
((readback.get("alert_noise_reduction") or {}).get("rollups") or {}).get(
|
||
"controlled_route_total"
|
||
)
|
||
),
|
||
"live_host_sustained_load_action_class_count": _int_value(
|
||
((readback.get("host_sustained_load_automation") or {}).get("rollups") or {}).get(
|
||
"action_class_count"
|
||
)
|
||
),
|
||
"live_host_sustained_load_controlled_apply_class_count": _int_value(
|
||
((readback.get("host_sustained_load_automation") or {}).get("rollups") or {}).get(
|
||
"controlled_apply_class_count"
|
||
)
|
||
),
|
||
"live_host_sustained_load_ready_asset_count": _int_value(
|
||
((readback.get("host_sustained_load_automation") or {}).get("rollups") or {}).get(
|
||
"ready_asset_count"
|
||
)
|
||
),
|
||
"live_host_sustained_load_complete_count": (
|
||
1
|
||
if (readback.get("host_sustained_load_automation") or {}).get("status")
|
||
== "completed"
|
||
else 0
|
||
),
|
||
"live_ui_productization_surface_count": _int_value(
|
||
((readback.get("ui_productization") or {}).get("rollups") or {}).get(
|
||
"surface_count"
|
||
)
|
||
),
|
||
"live_ui_productization_required_present_count": _int_value(
|
||
((readback.get("ui_productization") or {}).get("rollups") or {}).get(
|
||
"required_surface_present_count"
|
||
)
|
||
),
|
||
"live_ui_productization_required_missing_count": _int_value(
|
||
((readback.get("ui_productization") or {}).get("rollups") or {}).get(
|
||
"required_surface_missing_count"
|
||
)
|
||
),
|
||
"live_ui_productization_complete_count": (
|
||
1
|
||
if (readback.get("ui_productization") or {}).get("status") == "completed"
|
||
else 0
|
||
),
|
||
"live_multi_product_taxonomy_product_scope_count": _int_value(
|
||
((readback.get("multi_product_taxonomy") or {}).get("rollups") or {}).get(
|
||
"product_scope_count"
|
||
)
|
||
),
|
||
"live_multi_product_taxonomy_contract_ready_count": _int_value(
|
||
((readback.get("multi_product_taxonomy") or {}).get("rollups") or {}).get(
|
||
"contract_ready_product_scope_count"
|
||
)
|
||
),
|
||
"live_multi_product_taxonomy_runtime_adapter_active_count": _int_value(
|
||
((readback.get("multi_product_taxonomy") or {}).get("rollups") or {}).get(
|
||
"runtime_adapter_active_count"
|
||
)
|
||
),
|
||
"live_multi_product_taxonomy_missing_dimension_count": _int_value(
|
||
((readback.get("multi_product_taxonomy") or {}).get("rollups") or {}).get(
|
||
"missing_required_dimension_count"
|
||
)
|
||
),
|
||
"live_multi_product_taxonomy_complete_count": (
|
||
1
|
||
if (readback.get("multi_product_taxonomy") or {}).get("status") == "completed"
|
||
else 0
|
||
),
|
||
"live_work_item_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"work_item_count"
|
||
)
|
||
),
|
||
"live_work_item_completed_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"completed_count"
|
||
)
|
||
),
|
||
"live_work_item_in_progress_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"in_progress_count"
|
||
)
|
||
),
|
||
"live_work_item_pending_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"pending_count"
|
||
)
|
||
),
|
||
"live_work_item_blocked_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"blocked_count"
|
||
)
|
||
),
|
||
})
|
||
return payload
|
||
|
||
|
||
def build_ai_agent_autonomous_runtime_control() -> dict[str, Any]:
|
||
"""Build the current AI Agent autonomy control-plane readback."""
|
||
|
||
allowed_risks = _allowed_risk_levels()
|
||
report_cadences = [
|
||
{
|
||
"cadence": "daily",
|
||
"display_name": "日報",
|
||
"schedule": f"每日 {DAILY_REPORT_HOUR_TAIPEI:02d}:00 台北時間",
|
||
"worker": "report_generation_service.run_daily_report_loop",
|
||
"telegram_gateway_delivery_enabled": True,
|
||
"direct_bot_api_allowed": False,
|
||
"receipt_source": "daily_report_sent log + Telegram Gateway result",
|
||
},
|
||
{
|
||
"cadence": "weekly",
|
||
"display_name": "週報",
|
||
"schedule": (
|
||
f"每週五 {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間"
|
||
if WEEKLY_REPORT_WEEKDAY_TAIPEI == 4
|
||
else f"每週 weekday={WEEKLY_REPORT_WEEKDAY_TAIPEI} {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間"
|
||
),
|
||
"worker": "report_generation_service.run_weekly_report_loop",
|
||
"telegram_gateway_delivery_enabled": True,
|
||
"direct_bot_api_allowed": False,
|
||
"receipt_source": "weekly_report_sent log + Telegram Gateway result",
|
||
},
|
||
{
|
||
"cadence": "monthly",
|
||
"display_name": "月報",
|
||
"schedule": f"每月 {MONTHLY_REPORT_DAY_TAIPEI} 日 {MONTHLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間",
|
||
"worker": "report_generation_service.run_monthly_report_loop",
|
||
"telegram_gateway_delivery_enabled": True,
|
||
"direct_bot_api_allowed": False,
|
||
"receipt_source": "monthly_report_sent log + Telegram Gateway result",
|
||
},
|
||
]
|
||
executor_receipts = [
|
||
{
|
||
"operation_type": "ansible_candidate_matched",
|
||
"owner_agent": "Hermes",
|
||
"purpose": "把修復候選寫入 executor 可認領佇列",
|
||
"writes_runtime_state": False,
|
||
},
|
||
{
|
||
"operation_type": "ansible_check_mode_executed",
|
||
"owner_agent": "AwoooP Ansible check-mode worker",
|
||
"purpose": "執行 ansible-playbook --check --diff 並留下乾跑收據",
|
||
"writes_runtime_state": False,
|
||
},
|
||
{
|
||
"operation_type": "ansible_apply_executed",
|
||
"owner_agent": "AwoooP controlled apply worker",
|
||
"purpose": "check-mode 通過後,對 allowlisted low / medium / high PlayBook 受控 apply",
|
||
"writes_runtime_state": True,
|
||
},
|
||
{
|
||
"operation_type": "incident_evidence.post_execution_state",
|
||
"owner_agent": "post_apply_verifier",
|
||
"purpose": "apply 後寫入 verifier 結果與 post-execution evidence",
|
||
"writes_runtime_state": True,
|
||
},
|
||
{
|
||
"operation_type": "knowledge_entries",
|
||
"owner_agent": "Hermes",
|
||
"purpose": "把已驗證執行沉澱成 KM / PlayBook trust 候選",
|
||
"writes_runtime_state": True,
|
||
},
|
||
{
|
||
"operation_type": LOG_CONTROLLED_WRITEBACK_DISPATCH_OPERATION_TYPE,
|
||
"owner_agent": "ai_agent_metadata_writeback_executor",
|
||
"purpose": "把 LOG feedback batch 寫入 metadata-only controlled dispatch ledger",
|
||
"writes_runtime_state": True,
|
||
},
|
||
]
|
||
hard_blockers = [
|
||
"secret_token_private_key_cookie_session_auth_header_cleartext",
|
||
"drop_truncate_restore_prune_destructive_database_operation",
|
||
"reboot_node_drain_irreversible_firewall_or_host_lockout",
|
||
"credentialed_exploit_or_external_active_scan",
|
||
"new_paid_provider_cost_ceiling_or_provider_switch_without_replay_shadow_canary",
|
||
"force_push_delete_repo_refs_or_visibility_change",
|
||
"critical_or_break_glass_route_without_explicit_break_glass_contract",
|
||
]
|
||
legacy_overrides = [
|
||
{
|
||
"legacy_area": "report_status_board_no_live_send",
|
||
"current_effect": "overridden",
|
||
"new_behavior": "日報 / 週報 / 月報透過 Telegram Gateway 排程派送",
|
||
},
|
||
{
|
||
"legacy_area": "report_live_delivery_owner_review_required",
|
||
"current_effect": "overridden",
|
||
"new_behavior": "報告派送走低/中/高風險自動化政策;critical 才 break-glass",
|
||
},
|
||
{
|
||
"legacy_area": "high_risk_owner_review_queue",
|
||
"current_effect": "overridden_for_high_non_critical",
|
||
"new_behavior": "high 風險允許 controlled apply;critical / hard blocker 仍不自動",
|
||
},
|
||
{
|
||
"legacy_area": "telegram_no_send_preview_only",
|
||
"current_effect": "overridden",
|
||
"new_behavior": "用 Telegram Gateway 實送報告與 actionable receipt;不直接暴露 Bot API",
|
||
},
|
||
]
|
||
control_plane_integration = _control_plane_integration()
|
||
payload = {
|
||
"schema_version": _SCHEMA_VERSION,
|
||
"generated_at": datetime.now(UTC).isoformat(),
|
||
"program_status": {
|
||
"current_task_id": "P2-416-D1N",
|
||
"status": "current_directive_control_plane_active",
|
||
"runtime_authority": _RUNTIME_AUTHORITY,
|
||
"deploy_readback_marker": _DEPLOY_READBACK_MARKER,
|
||
"deploy_attempt_note": _DEPLOY_ATTEMPT_NOTE,
|
||
"legacy_no_send_no_live_rules_overridden": True,
|
||
"implementation_completion_percent": 100,
|
||
"status_note": (
|
||
"目前有效規則:low / medium / high 風險由 AI Agent 在 allowlist、"
|
||
"Ansible check-mode、verifier、rollback、KM 與 Telegram receipt 下受控自動處理。"
|
||
),
|
||
},
|
||
"current_policy": {
|
||
"low_risk_controlled_apply_allowed": "low" in allowed_risks,
|
||
"medium_risk_controlled_apply_allowed": "medium" in allowed_risks,
|
||
"high_risk_controlled_apply_allowed": "high" in allowed_risks,
|
||
"critical_break_glass_required": True,
|
||
"owner_review_required_for_low_medium_high": False,
|
||
"direct_bot_api_allowed": False,
|
||
"telegram_gateway_required": True,
|
||
"post_apply_verifier_required": True,
|
||
"km_learning_writeback_required": True,
|
||
},
|
||
"runtime_switches": {
|
||
"ansible_check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER),
|
||
"ansible_controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY),
|
||
"ansible_controlled_apply_allowed_risk_levels": allowed_risks,
|
||
"ansible_check_mode_interval_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS,
|
||
"ansible_check_mode_batch_limit": settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT,
|
||
"ansible_check_mode_timeout_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS,
|
||
"ansible_controlled_apply_timeout_seconds": settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_TIMEOUT_SECONDS,
|
||
},
|
||
"agent_roles": [
|
||
{
|
||
"agent_id": "openclaw",
|
||
"role": "仲裁 / hard blocker / replay-shadow-canary gate",
|
||
"current_job": "只阻擋真正 critical 與 hard blocker,不再用身份保護舊架構",
|
||
},
|
||
{
|
||
"agent_id": "hermes",
|
||
"role": "報告 / Telegram digest / KM 與 PlayBook trust writeback",
|
||
"current_job": "日週月報、收據摘要與 verifier 後學習沉澱",
|
||
},
|
||
{
|
||
"agent_id": "nemotron",
|
||
"role": "市場技術雷達 / no-write replay / challenger scorecard",
|
||
"current_job": "用市場與回放數據挑戰 OpenClaw / provider / Agent 組合",
|
||
},
|
||
{
|
||
"agent_id": "awooop_ansible_worker",
|
||
"role": "executor",
|
||
"current_job": "candidate → check-mode → controlled apply → verifier → KM",
|
||
},
|
||
{
|
||
"agent_id": "telegram_ops",
|
||
"role": "Telegram Gateway receipt",
|
||
"current_job": "群組報告、actionable receipt、失敗告警;不展示敏感值或未脫敏資料",
|
||
},
|
||
],
|
||
"report_delivery": {
|
||
"status": "telegram_gateway_delivery_enabled",
|
||
"cadences": report_cadences,
|
||
},
|
||
"controlled_executor": {
|
||
"status": "check_mode_then_apply_enabled"
|
||
if settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY
|
||
else "check_mode_only_by_config",
|
||
"operation_receipts": executor_receipts,
|
||
"required_flow": [
|
||
"allowlisted_candidate",
|
||
"ansible_check_mode_success",
|
||
"controlled_apply",
|
||
"post_apply_verifier",
|
||
"auto_repair_execution_receipt",
|
||
"km_learning_writeback",
|
||
"telegram_receipt_or_alert",
|
||
],
|
||
},
|
||
"control_plane_integration": control_plane_integration,
|
||
"legacy_policy_overrides": legacy_overrides,
|
||
"hard_blockers": hard_blockers,
|
||
"visibility_contract": {
|
||
"frontend_displays_runtime_truth": True,
|
||
"work_window_transcript_display_allowed": False,
|
||
"prompt_body_display_allowed": False,
|
||
"internal_reasoning_display_allowed": False,
|
||
"sensitive_value_display_allowed": False,
|
||
"telegram_unredacted_payload_display_allowed": False,
|
||
"lan_topology_redaction_required": True,
|
||
},
|
||
"rollups": {
|
||
"automated_risk_tier_count": sum(1 for risk in ("low", "medium", "high") if risk in allowed_risks),
|
||
"hard_blocker_count": len(hard_blockers),
|
||
"report_cadence_enabled_count": len(report_cadences),
|
||
"telegram_gateway_delivery_enabled_count": sum(
|
||
1 for item in report_cadences if item["telegram_gateway_delivery_enabled"]
|
||
),
|
||
"direct_bot_api_allowed_count": 0,
|
||
"controlled_executor_operation_receipt_count": len(executor_receipts),
|
||
"runtime_write_receipt_type_count": sum(
|
||
1 for item in executor_receipts if item["writes_runtime_state"]
|
||
),
|
||
"legacy_policy_overridden_count": len(legacy_overrides),
|
||
"mcp_sensor_count": control_plane_integration["rollups"]["mcp_sensor_count"],
|
||
"rag_context_query_count": control_plane_integration["rollups"]["rag_context_query_count"],
|
||
"playbook_decision_class_count": control_plane_integration["rollups"]["playbook_decision_class_count"],
|
||
"deploy_control_classifier_example_count": control_plane_integration["rollups"]["classifier_example_count"],
|
||
},
|
||
}
|
||
_attach_runtime_receipt_readback(
|
||
payload,
|
||
build_runtime_receipt_readback_from_rows(
|
||
project_id=_DEFAULT_PROJECT_ID,
|
||
db_read_status="not_queried",
|
||
),
|
||
)
|
||
_validate_payload(payload)
|
||
return payload
|
||
|
||
|
||
async def load_ai_agent_autonomous_runtime_receipt_readback(
|
||
*,
|
||
project_id: str = _DEFAULT_PROJECT_ID,
|
||
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
|
||
limit: int = 20,
|
||
) -> dict[str, Any]:
|
||
"""Read live executor receipts without sending messages or mutating runtime state."""
|
||
|
||
params = {
|
||
"project_id": project_id,
|
||
"lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)),
|
||
"limit": max(1, int(limit or 20)),
|
||
}
|
||
try:
|
||
async with get_db_context(project_id) as db:
|
||
await db.execute(text("SET LOCAL statement_timeout = '5000ms'"))
|
||
|
||
async def _safe_aux_rows(
|
||
query_name: str,
|
||
sql: str,
|
||
fallback_sql: str | None = None,
|
||
) -> list[Mapping[str, Any]]:
|
||
try:
|
||
return (await db.execute(text(sql), params)).mappings().all()
|
||
except Exception as exc: # pragma: no cover - depends on live schema drift
|
||
logger.warning(
|
||
"ai_agent_autonomous_runtime_trace_aux_read_failed",
|
||
project_id=project_id,
|
||
query_name=query_name,
|
||
error_type=type(exc).__name__,
|
||
)
|
||
if fallback_sql:
|
||
try:
|
||
return (await db.execute(text(fallback_sql), params)).mappings().all()
|
||
except Exception as fallback_exc: # pragma: no cover - live schema drift
|
||
logger.warning(
|
||
"ai_agent_autonomous_runtime_trace_aux_fallback_failed",
|
||
project_id=project_id,
|
||
query_name=query_name,
|
||
error_type=type(fallback_exc).__name__,
|
||
)
|
||
return []
|
||
|
||
operation_counts = (
|
||
await db.execute(text(_RUNTIME_OPERATION_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
operation_latest = (
|
||
await db.execute(text(_RUNTIME_OPERATION_LATEST_SQL), params)
|
||
).mappings().all()
|
||
auto_repair_counts = (
|
||
await db.execute(text(_RUNTIME_AUTO_REPAIR_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
auto_repair_latest = (
|
||
await db.execute(text(_RUNTIME_AUTO_REPAIR_LATEST_SQL), params)
|
||
).mappings().all()
|
||
verifier_counts = (
|
||
await db.execute(text(_RUNTIME_VERIFIER_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
verifier_latest = (
|
||
await db.execute(text(_RUNTIME_VERIFIER_LATEST_SQL), params)
|
||
).mappings().all()
|
||
km_counts = (
|
||
await db.execute(text(_RUNTIME_KM_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
km_latest = (
|
||
await db.execute(text(_RUNTIME_KM_LATEST_SQL), params)
|
||
).mappings().all()
|
||
telegram_counts = (
|
||
await db.execute(text(_RUNTIME_TELEGRAM_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
telegram_latest = (
|
||
await db.execute(text(_RUNTIME_TELEGRAM_LATEST_SQL), params)
|
||
).mappings().all()
|
||
mcp_gateway_counts = await _safe_aux_rows(
|
||
"mcp_gateway_counts",
|
||
_RUNTIME_MCP_GATEWAY_COUNTS_SQL,
|
||
)
|
||
legacy_mcp_counts = await _safe_aux_rows(
|
||
"legacy_mcp_counts",
|
||
_RUNTIME_LEGACY_MCP_COUNTS_SQL,
|
||
)
|
||
service_log_counts = await _safe_aux_rows(
|
||
"service_log_counts",
|
||
_RUNTIME_SERVICE_LOG_COUNTS_SQL,
|
||
)
|
||
executor_log_counts = await _safe_aux_rows(
|
||
"executor_log_counts",
|
||
_RUNTIME_EXECUTOR_LOG_COUNTS_SQL,
|
||
)
|
||
timeline_counts = await _safe_aux_rows(
|
||
"timeline_counts",
|
||
_RUNTIME_TIMELINE_COUNTS_SQL,
|
||
_RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL,
|
||
)
|
||
playbook_trust_counts = await _safe_aux_rows(
|
||
"playbook_trust_counts",
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL,
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL,
|
||
)
|
||
alert_operation_counts = await _safe_aux_rows(
|
||
"alert_operation_counts",
|
||
_RUNTIME_ALERT_OPERATION_COUNTS_SQL,
|
||
)
|
||
alertmanager_event_counts = await _safe_aux_rows(
|
||
"alertmanager_event_counts",
|
||
_RUNTIME_ALERTMANAGER_EVENT_COUNTS_SQL,
|
||
)
|
||
grouped_alert_event_counts = await _safe_aux_rows(
|
||
"grouped_alert_event_counts",
|
||
_RUNTIME_GROUPED_ALERT_EVENT_COUNTS_SQL,
|
||
)
|
||
log_controlled_writeback_consumer = (
|
||
await _load_log_controlled_writeback_consumer_readback(
|
||
project_id=project_id,
|
||
)
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"ai_agent_autonomous_runtime_receipt_readback_failed",
|
||
project_id=project_id,
|
||
error_type=type(exc).__name__,
|
||
)
|
||
return build_runtime_receipt_readback_from_rows(
|
||
project_id=project_id,
|
||
lookback_hours=params["lookback_hours"],
|
||
db_read_status="unavailable",
|
||
error_type=type(exc).__name__,
|
||
)
|
||
|
||
return build_runtime_receipt_readback_from_rows(
|
||
project_id=project_id,
|
||
lookback_hours=params["lookback_hours"],
|
||
db_read_status="ok",
|
||
operation_count_rows=operation_counts,
|
||
operation_latest_rows=operation_latest,
|
||
auto_repair_count_rows=auto_repair_counts,
|
||
auto_repair_latest_rows=auto_repair_latest,
|
||
verifier_count_rows=verifier_counts,
|
||
verifier_latest_rows=verifier_latest,
|
||
km_count_rows=km_counts,
|
||
km_latest_rows=km_latest,
|
||
telegram_count_rows=telegram_counts,
|
||
telegram_latest_rows=telegram_latest,
|
||
mcp_gateway_count_rows=mcp_gateway_counts,
|
||
legacy_mcp_count_rows=legacy_mcp_counts,
|
||
service_log_count_rows=service_log_counts,
|
||
executor_log_count_rows=executor_log_counts,
|
||
timeline_count_rows=timeline_counts,
|
||
playbook_trust_count_rows=playbook_trust_counts,
|
||
alert_operation_count_rows=alert_operation_counts,
|
||
alertmanager_event_count_rows=alertmanager_event_counts,
|
||
grouped_alert_event_count_rows=grouped_alert_event_counts,
|
||
log_controlled_writeback_consumer=log_controlled_writeback_consumer,
|
||
)
|
||
|
||
|
||
async def build_ai_agent_autonomous_runtime_control_with_live_readback(
|
||
*,
|
||
project_id: str = _DEFAULT_PROJECT_ID,
|
||
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
|
||
) -> dict[str, Any]:
|
||
"""Build the control plane and attach live DB receipt readback."""
|
||
|
||
payload = build_ai_agent_autonomous_runtime_control()
|
||
readback = await load_ai_agent_autonomous_runtime_receipt_readback(
|
||
project_id=project_id,
|
||
lookback_hours=lookback_hours,
|
||
)
|
||
_attach_runtime_receipt_readback(payload, readback)
|
||
_validate_payload(payload)
|
||
return payload
|
||
|
||
|
||
_RUNTIME_OPERATION_COUNTS_SQL = """
|
||
SELECT
|
||
CASE
|
||
WHEN operation_type = 'km_linked'
|
||
AND input ->> 'semantic_operation_type' = 'log_controlled_writeback_dispatched'
|
||
THEN 'log_controlled_writeback_dispatched'
|
||
ELSE operation_type
|
||
END AS operation_type,
|
||
status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM automation_operation_log
|
||
WHERE (
|
||
operation_type IN (
|
||
'ansible_candidate_matched',
|
||
'ansible_check_mode_executed',
|
||
'ansible_apply_executed',
|
||
'ansible_learning_writeback_recorded',
|
||
'ansible_rollback_executed',
|
||
'ansible_execution_skipped',
|
||
'log_controlled_writeback_dispatched'
|
||
)
|
||
OR (
|
||
operation_type = 'km_linked'
|
||
AND input ->> 'semantic_operation_type' = 'log_controlled_writeback_dispatched'
|
||
)
|
||
)
|
||
GROUP BY 1, status
|
||
ORDER BY 1, status
|
||
"""
|
||
|
||
|
||
_RUNTIME_OPERATION_LATEST_SQL = """
|
||
SELECT
|
||
op_id::text AS op_id,
|
||
parent_op_id::text AS parent_op_id,
|
||
CASE
|
||
WHEN operation_type = 'km_linked'
|
||
AND input ->> 'semantic_operation_type' = 'log_controlled_writeback_dispatched'
|
||
THEN 'log_controlled_writeback_dispatched'
|
||
ELSE operation_type
|
||
END AS operation_type,
|
||
status,
|
||
actor,
|
||
coalesce(incident_id::text, input ->> 'incident_id') AS incident_id,
|
||
input ->> 'catalog_id' AS catalog_id,
|
||
coalesce(input ->> 'apply_playbook_path', input ->> 'playbook_path') AS playbook_path,
|
||
input ->> 'execution_mode' AS execution_mode,
|
||
input ->> 'source_candidate_op_id' AS source_candidate_op_id,
|
||
input ->> 'check_mode_op_id' AS check_mode_op_id,
|
||
input ->> 'risk_level' AS risk_level,
|
||
input ->> 'controlled_apply_allowed' AS controlled_apply_allowed,
|
||
coalesce(output ->> 'returncode', dry_run_result ->> 'returncode') AS returncode,
|
||
duration_ms,
|
||
created_at
|
||
FROM automation_operation_log
|
||
WHERE (
|
||
operation_type IN (
|
||
'ansible_candidate_matched',
|
||
'ansible_check_mode_executed',
|
||
'ansible_apply_executed',
|
||
'ansible_learning_writeback_recorded',
|
||
'ansible_rollback_executed',
|
||
'ansible_execution_skipped',
|
||
'log_controlled_writeback_dispatched'
|
||
)
|
||
OR (
|
||
operation_type = 'km_linked'
|
||
AND input ->> 'semantic_operation_type' = 'log_controlled_writeback_dispatched'
|
||
)
|
||
)
|
||
ORDER BY created_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_AUTO_REPAIR_COUNTS_SQL = """
|
||
SELECT
|
||
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM auto_repair_executions
|
||
WHERE triggered_by = 'ansible_controlled_apply'
|
||
GROUP BY CASE WHEN success THEN 'success' ELSE 'failed' END
|
||
ORDER BY result_status
|
||
"""
|
||
|
||
|
||
_RUNTIME_AUTO_REPAIR_LATEST_SQL = """
|
||
SELECT
|
||
id,
|
||
incident_id,
|
||
playbook_id AS catalog_id,
|
||
playbook_name,
|
||
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
|
||
executed_steps::text AS executed_steps_text,
|
||
triggered_by,
|
||
risk_level,
|
||
execution_time_ms,
|
||
created_at
|
||
FROM auto_repair_executions
|
||
WHERE triggered_by = 'ansible_controlled_apply'
|
||
ORDER BY created_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_VERIFIER_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(verification_result, 'missing') AS verification_result,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM incident_evidence
|
||
WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL
|
||
GROUP BY coalesce(verification_result, 'missing')
|
||
ORDER BY verification_result
|
||
"""
|
||
|
||
|
||
_RUNTIME_VERIFIER_LATEST_SQL = """
|
||
SELECT
|
||
id,
|
||
incident_id,
|
||
matched_playbook_id,
|
||
coalesce(verification_result, 'missing') AS verification_result,
|
||
post_execution_state ->> 'apply_op_id' AS apply_op_id,
|
||
post_execution_state ->> 'catalog_id' AS catalog_id,
|
||
post_execution_state ->> 'playbook_path' AS playbook_path,
|
||
post_execution_state ->> 'returncode' AS returncode,
|
||
collected_at
|
||
FROM incident_evidence
|
||
WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL
|
||
ORDER BY collected_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_KM_COUNTS_SQL = """
|
||
SELECT
|
||
status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM knowledge_entries
|
||
WHERE project_id = :project_id
|
||
AND (
|
||
path_type LIKE 'ansible_apply_receipt:%'
|
||
OR tags::text LIKE '%ansible_controlled_apply%'
|
||
)
|
||
GROUP BY status
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_KM_LATEST_SQL = """
|
||
SELECT
|
||
id,
|
||
title,
|
||
related_incident_id,
|
||
related_playbook_id,
|
||
path_type,
|
||
status,
|
||
created_by,
|
||
created_at
|
||
FROM knowledge_entries
|
||
WHERE project_id = :project_id
|
||
AND (
|
||
path_type LIKE 'ansible_apply_receipt:%'
|
||
OR tags::text LIKE '%ansible_controlled_apply%'
|
||
)
|
||
ORDER BY created_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_TELEGRAM_COUNTS_SQL = """
|
||
SELECT
|
||
send_status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE queued_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM awooop_outbound_message
|
||
WHERE project_id = :project_id
|
||
AND channel_type = 'telegram'
|
||
AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result'
|
||
GROUP BY send_status
|
||
ORDER BY send_status
|
||
"""
|
||
|
||
|
||
_RUNTIME_TELEGRAM_LATEST_SQL = """
|
||
SELECT
|
||
message_id::text AS message_id,
|
||
run_id::text AS run_id,
|
||
message_type,
|
||
send_status,
|
||
provider_message_id,
|
||
source_envelope #>> '{callback_reply,incident_id}' AS incident_id,
|
||
source_envelope #>> '{callback_reply,action}' AS action,
|
||
queued_at,
|
||
sent_at
|
||
FROM awooop_outbound_message
|
||
WHERE project_id = :project_id
|
||
AND channel_type = 'telegram'
|
||
AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result'
|
||
ORDER BY queued_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_MCP_GATEWAY_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(result_status, 'unknown') AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM awooop_mcp_gateway_audit
|
||
WHERE project_id = :project_id
|
||
GROUP BY coalesce(result_status, 'unknown')
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_LEGACY_MCP_COUNTS_SQL = """
|
||
SELECT
|
||
CASE
|
||
WHEN success IS TRUE THEN 'success'
|
||
WHEN success IS FALSE THEN 'failed'
|
||
ELSE 'unknown'
|
||
END AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM mcp_audit_log
|
||
GROUP BY
|
||
CASE
|
||
WHEN success IS TRUE THEN 'success'
|
||
WHEN success IS FALSE THEN 'failed'
|
||
ELSE 'unknown'
|
||
END
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_SERVICE_LOG_COUNTS_SQL = """
|
||
SELECT
|
||
'sanitized_recent_logs' AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM incident_evidence
|
||
WHERE recent_logs IS NOT NULL
|
||
OR evidence_summary IS NOT NULL
|
||
OR mcp_health IS NOT NULL
|
||
OR anomaly_context IS NOT NULL
|
||
"""
|
||
|
||
|
||
_RUNTIME_EXECUTOR_LOG_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(status, 'unknown') AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM automation_operation_log
|
||
WHERE operation_type IN (
|
||
'ansible_candidate_matched',
|
||
'ansible_check_mode_executed',
|
||
'ansible_apply_executed',
|
||
'ansible_rollback_executed',
|
||
'ansible_execution_skipped'
|
||
)
|
||
AND (
|
||
output IS NOT NULL
|
||
OR error IS NOT NULL
|
||
OR stderr_feed_back IS NOT NULL
|
||
OR dry_run_result IS NOT NULL
|
||
)
|
||
GROUP BY coalesce(status, 'unknown')
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_TIMELINE_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(status, 'unknown') AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM timeline_events
|
||
WHERE event_type IS NOT NULL
|
||
OR actor IS NOT NULL
|
||
OR actor_role IS NOT NULL
|
||
GROUP BY coalesce(status, 'unknown')
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL = """
|
||
SELECT
|
||
'timeline_event' AS status,
|
||
count(*) AS total,
|
||
0 AS recent
|
||
FROM timeline_events
|
||
"""
|
||
|
||
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL = """
|
||
SELECT
|
||
CASE
|
||
WHEN review_required IS TRUE THEN 'review_required'
|
||
WHEN trust_score >= 0.8 THEN 'high_trust'
|
||
WHEN trust_score < 0.3 THEN 'low_trust'
|
||
WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active'
|
||
ELSE 'seeded_not_used'
|
||
END AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE updated_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM playbooks
|
||
GROUP BY
|
||
CASE
|
||
WHEN review_required IS TRUE THEN 'review_required'
|
||
WHEN trust_score >= 0.8 THEN 'high_trust'
|
||
WHEN trust_score < 0.3 THEN 'low_trust'
|
||
WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active'
|
||
ELSE 'seeded_not_used'
|
||
END
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL = """
|
||
SELECT
|
||
'cataloged' AS status,
|
||
count(*) AS total,
|
||
0 AS recent
|
||
FROM playbooks
|
||
"""
|
||
|
||
|
||
_RUNTIME_ALERT_OPERATION_COUNTS_SQL = """
|
||
SELECT
|
||
event_type,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM alert_operation_log
|
||
WHERE event_type IN (
|
||
'ALERT_RECEIVED',
|
||
'AUTO_REPAIR_TRIGGERED',
|
||
'EXECUTION_STARTED',
|
||
'EXECUTION_COMPLETED',
|
||
'NOTIFICATION_CLASSIFIED',
|
||
'GUARDRAIL_BLOCKED',
|
||
'STATE_GUARD_BLOCKED',
|
||
'SILENCED',
|
||
'ESCALATED'
|
||
)
|
||
GROUP BY event_type
|
||
ORDER BY event_type
|
||
"""
|
||
|
||
|
||
_RUNTIME_ALERTMANAGER_EVENT_COUNTS_SQL = """
|
||
SELECT
|
||
COALESCE(NULLIF(source_envelope ->> 'stage', ''), 'unknown') AS stage,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE received_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM awooop_conversation_event
|
||
WHERE project_id = :project_id
|
||
AND COALESCE(
|
||
NULLIF(source_envelope ->> 'provider', ''),
|
||
platform_subject_id,
|
||
''
|
||
) = 'alertmanager'
|
||
GROUP BY COALESCE(NULLIF(source_envelope ->> 'stage', ''), 'unknown')
|
||
ORDER BY stage
|
||
"""
|
||
|
||
|
||
_RUNTIME_GROUPED_ALERT_EVENT_COUNTS_SQL = """
|
||
SELECT
|
||
'grouped_child_alert' AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE received_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM awooop_conversation_event
|
||
WHERE project_id = :project_id
|
||
AND channel_chat_id LIKE 'alert-group:%'
|
||
"""
|
||
|
||
|
||
def _validate_payload(payload: dict[str, Any]) -> None:
|
||
if payload.get("schema_version") != _SCHEMA_VERSION:
|
||
raise ValueError(f"schema_version must be {_SCHEMA_VERSION}")
|
||
status = payload.get("program_status") or {}
|
||
if status.get("runtime_authority") != _RUNTIME_AUTHORITY:
|
||
raise ValueError(f"runtime_authority must be {_RUNTIME_AUTHORITY}")
|
||
if status.get("deploy_readback_marker") != _DEPLOY_READBACK_MARKER:
|
||
raise ValueError(f"deploy_readback_marker must be {_DEPLOY_READBACK_MARKER}")
|
||
if status.get("deploy_attempt_note") != _DEPLOY_ATTEMPT_NOTE:
|
||
raise ValueError(f"deploy_attempt_note must be {_DEPLOY_ATTEMPT_NOTE}")
|
||
policy = payload.get("current_policy") or {}
|
||
for key in (
|
||
"low_risk_controlled_apply_allowed",
|
||
"medium_risk_controlled_apply_allowed",
|
||
"high_risk_controlled_apply_allowed",
|
||
"telegram_gateway_required",
|
||
"post_apply_verifier_required",
|
||
"km_learning_writeback_required",
|
||
):
|
||
if policy.get(key) is not True:
|
||
raise ValueError(f"current_policy.{key} must be true")
|
||
if policy.get("owner_review_required_for_low_medium_high") is not False:
|
||
raise ValueError("owner_review_required_for_low_medium_high must be false")
|
||
if policy.get("direct_bot_api_allowed") is not False:
|
||
raise ValueError("direct_bot_api_allowed must be false")
|
||
visibility = payload.get("visibility_contract") or {}
|
||
for key in (
|
||
"work_window_transcript_display_allowed",
|
||
"prompt_body_display_allowed",
|
||
"internal_reasoning_display_allowed",
|
||
"sensitive_value_display_allowed",
|
||
"telegram_unredacted_payload_display_allowed",
|
||
):
|
||
if visibility.get(key) is not False:
|
||
raise ValueError(f"visibility_contract.{key} must remain false")
|