Files
awoooi/apps/api/src/services/ai_agent_autonomous_runtime_control.py
Your Name 9ca6eec2ee
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 2m47s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
feat(awooop): close autonomous learning loop readback
2026-06-29 16:52:13 +08:00

3022 lines
120 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Current AI Agent autonomous runtime control plane.
This read model is the current directive layer. Historical P2 snapshots can
still describe earlier no-send / no-live states, but this payload states what
the product should enforce now: low, medium, and high risk routes may proceed
through controlled automation when allowlist, check-mode, verifier, rollback,
KM, and Telegram receipts are present.
"""
from __future__ import annotations
from collections.abc import Iterable, Mapping
from datetime import UTC, datetime
from typing import Any
from sqlalchemy import text
from src.core.config import settings
from src.core.logging import get_logger
from src.db.base import get_db_context
from src.services.report_generation_service import (
DAILY_REPORT_HOUR_TAIPEI,
MONTHLY_REPORT_DAY_TAIPEI,
MONTHLY_REPORT_HOUR_TAIPEI,
WEEKLY_REPORT_HOUR_TAIPEI,
WEEKLY_REPORT_WEEKDAY_TAIPEI,
)
_SCHEMA_VERSION = "ai_agent_autonomous_runtime_control_v1"
_RUNTIME_AUTHORITY = "current_owner_directive_controlled_ai_automation"
_DEPLOY_READBACK_MARKER = "p2_416_d1n_autonomous_runtime_control_prod_readback_v2"
_DEPLOY_ATTEMPT_NOTE = "cd_internal_control_plane_readback_retry_20260628_2"
_LIVE_READBACK_SCHEMA_VERSION = "ai_agent_autonomous_runtime_receipt_readback_v1"
_DEFAULT_PROJECT_ID = "awoooi"
_DEFAULT_LOOKBACK_HOURS = 24
# CD cancel-stale-cd no-op triggers must not change runtime payloads.
_EXECUTOR_OPERATION_TYPES = (
"ansible_candidate_matched",
"ansible_check_mode_executed",
"ansible_apply_executed",
"ansible_learning_writeback_recorded",
"ansible_rollback_executed",
"ansible_execution_skipped",
)
logger = get_logger(__name__)
def _allowed_risk_levels() -> list[str]:
raw = str(settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_ALLOWED_RISK_LEVELS or "")
return sorted({item.strip().lower() for item in raw.split(",") if item.strip()})
def _utc_iso(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, datetime):
if value.tzinfo is None:
value = value.replace(tzinfo=UTC)
return value.astimezone(UTC).isoformat()
return str(value)
def _row_mapping(row: Mapping[str, Any] | Any) -> dict[str, Any]:
if isinstance(row, Mapping):
return dict(row)
mapping = getattr(row, "_mapping", None)
if mapping is not None:
return dict(mapping)
return dict(row)
def _int_value(value: Any) -> int:
try:
return int(value or 0)
except (TypeError, ValueError):
return 0
def _sanitize_latest_rows(
rows: Iterable[Mapping[str, Any] | Any],
*,
allowed_keys: tuple[str, ...],
time_keys: tuple[str, ...] = ("created_at", "collected_at", "queued_at", "sent_at"),
limit: int = 5,
) -> list[dict[str, Any]]:
clean_rows: list[dict[str, Any]] = []
for row in rows:
item = _row_mapping(row)
clean: dict[str, Any] = {}
for key in allowed_keys:
if key not in item:
continue
value = item.get(key)
clean[key] = _utc_iso(value) if key in time_keys else value
clean_rows.append(clean)
if len(clean_rows) >= limit:
break
return clean_rows
def _operation_counts(
rows: Iterable[Mapping[str, Any] | Any],
) -> dict[str, dict[str, Any]]:
counts = {
operation_type: {
"total": 0,
"recent": 0,
"by_status": {},
}
for operation_type in _EXECUTOR_OPERATION_TYPES
}
for row in rows:
item = _row_mapping(row)
operation_type = str(item.get("operation_type") or "unknown")
status = str(item.get("status") or "unknown")
bucket = counts.setdefault(
operation_type,
{
"total": 0,
"recent": 0,
"by_status": {},
},
)
total = _int_value(item.get("total"))
recent = _int_value(item.get("recent"))
bucket["total"] += total
bucket["recent"] += recent
bucket["by_status"][status] = bucket["by_status"].get(status, 0) + total
return counts
def _status_counts(
rows: Iterable[Mapping[str, Any] | Any],
*,
status_key: str,
) -> dict[str, Any]:
by_status: dict[str, int] = {}
total = 0
recent = 0
for row in rows:
item = _row_mapping(row)
status = str(item.get(status_key) or "unknown")
row_total = _int_value(item.get("total"))
by_status[status] = by_status.get(status, 0) + row_total
total += row_total
recent += _int_value(item.get("recent"))
return {
"total": total,
"recent": recent,
"by_status": by_status,
}
def _trace_stage(
*,
stage_id: str,
display_name: str,
source_tables: list[str],
total: int,
recent: int,
required_for_closed_loop: bool,
feeds_learning: bool,
public_safe: bool = True,
next_action_if_missing: str | None = None,
) -> dict[str, Any]:
present = total > 0
return {
"stage_id": stage_id,
"display_name": display_name,
"source_tables": source_tables,
"recorded": present,
"record_quality": "recorded" if present else "missing",
"total": max(0, total),
"recent": max(0, recent),
"required_for_closed_loop": required_for_closed_loop,
"feeds_learning": feeds_learning,
"public_safe": public_safe,
"next_action_if_missing": None if present else next_action_if_missing,
}
def _trace_total(summary: Mapping[str, Any] | None, *operation_types: str) -> int:
if not isinstance(summary, Mapping):
return 0
if not operation_types:
return _int_value(summary.get("total"))
return sum(
_int_value((summary.get(operation_type) or {}).get("total"))
for operation_type in operation_types
)
def _trace_recent(summary: Mapping[str, Any] | None, *operation_types: str) -> int:
if not isinstance(summary, Mapping):
return 0
if not operation_types:
return _int_value(summary.get("recent"))
return sum(
_int_value((summary.get(operation_type) or {}).get("recent"))
for operation_type in operation_types
)
def _build_trace_ledger(
*,
operation_summary: Mapping[str, Any],
auto_repair_summary: Mapping[str, Any],
verifier_summary: Mapping[str, Any],
km_summary: Mapping[str, Any],
telegram_summary: Mapping[str, Any],
mcp_gateway_summary: Mapping[str, Any],
legacy_mcp_summary: Mapping[str, Any],
service_log_summary: Mapping[str, Any],
executor_log_summary: Mapping[str, Any],
timeline_summary: Mapping[str, Any],
playbook_trust_summary: Mapping[str, Any],
latest_flow_closure: Mapping[str, Any],
loop_ledger: Mapping[str, Any],
) -> dict[str, Any]:
"""Build the full public-safe AI automation trace ledger."""
mcp_total = _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary)
mcp_recent = _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary)
stages = [
_trace_stage(
stage_id="mcp_context",
display_name="MCP sensor / tool context",
source_tables=["awooop_mcp_gateway_audit", "mcp_audit_log"],
total=mcp_total,
recent=mcp_recent,
required_for_closed_loop=False,
feeds_learning=True,
next_action_if_missing="record_mcp_gateway_or_legacy_mcp_audit_for_every_ai_decision",
),
_trace_stage(
stage_id="service_log_evidence",
display_name="Sanitized service / package log evidence",
source_tables=["incident_evidence.recent_logs", "incident_evidence.evidence_summary"],
total=_trace_total(service_log_summary),
recent=_trace_recent(service_log_summary),
required_for_closed_loop=False,
feeds_learning=True,
next_action_if_missing="collect_sanitized_service_log_evidence_before_ai_decision",
),
_trace_stage(
stage_id="candidate",
display_name="AI candidate / playbook match",
source_tables=["automation_operation_log"],
total=_trace_total(operation_summary, "ansible_candidate_matched"),
recent=_trace_recent(operation_summary, "ansible_candidate_matched"),
required_for_closed_loop=True,
feeds_learning=True,
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
),
_trace_stage(
stage_id="check_mode",
display_name="No-write check-mode / dry-run",
source_tables=["automation_operation_log"],
total=_trace_total(operation_summary, "ansible_check_mode_executed"),
recent=_trace_recent(operation_summary, "ansible_check_mode_executed"),
required_for_closed_loop=True,
feeds_learning=True,
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
),
_trace_stage(
stage_id="executor_log_projection",
display_name="Executor stdout / stderr / dry-run projection",
source_tables=[
"automation_operation_log.output",
"automation_operation_log.error",
"automation_operation_log.stderr_feed_back",
"automation_operation_log.dry_run_result",
],
total=_trace_total(executor_log_summary),
recent=_trace_recent(executor_log_summary),
required_for_closed_loop=False,
feeds_learning=True,
next_action_if_missing="persist_sanitized_executor_log_projection_for_failed_or_applied_actions",
),
_trace_stage(
stage_id="controlled_apply",
display_name="Controlled apply execution",
source_tables=["automation_operation_log"],
total=_trace_total(operation_summary, "ansible_apply_executed"),
recent=_trace_recent(operation_summary, "ansible_apply_executed"),
required_for_closed_loop=True,
feeds_learning=True,
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
),
_trace_stage(
stage_id="auto_repair_execution_receipt",
display_name="Auto-repair execution receipt",
source_tables=["auto_repair_executions"],
total=_trace_total(auto_repair_summary),
recent=_trace_recent(auto_repair_summary),
required_for_closed_loop=True,
feeds_learning=True,
next_action_if_missing="receipt_backfill_records_auto_repair_execution",
),
_trace_stage(
stage_id="post_apply_verifier",
display_name="Post-apply verifier evidence",
source_tables=["incident_evidence"],
total=_trace_total(verifier_summary),
recent=_trace_recent(verifier_summary),
required_for_closed_loop=True,
feeds_learning=True,
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
),
_trace_stage(
stage_id="rag_km_learning",
display_name="RAG / KM / PlayBook learning writeback",
source_tables=["knowledge_entries"],
total=_trace_total(km_summary),
recent=_trace_recent(km_summary),
required_for_closed_loop=True,
feeds_learning=True,
next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
),
_trace_stage(
stage_id="playbook_trust",
display_name="PlayBook trust / success-failure learning",
source_tables=[
"playbooks.trust_score",
"playbooks.success_count",
"playbooks.failure_count",
"playbooks.review_required",
],
total=_trace_total(playbook_trust_summary),
recent=_trace_recent(playbook_trust_summary),
required_for_closed_loop=False,
feeds_learning=True,
next_action_if_missing="write_playbook_trust_delta_after_verified_execution",
),
_trace_stage(
stage_id="timeline_projection",
display_name="Operator timeline projection",
source_tables=["timeline_events"],
total=_trace_total(timeline_summary),
recent=_trace_recent(timeline_summary),
required_for_closed_loop=False,
feeds_learning=True,
next_action_if_missing="project_ai_runtime_stage_to_timeline_events",
),
_trace_stage(
stage_id="telegram_receipt",
display_name="Telegram Gateway receipt",
source_tables=["awooop_outbound_message"],
total=_trace_total(telegram_summary),
recent=_trace_recent(telegram_summary),
required_for_closed_loop=True,
feeds_learning=False,
next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
),
]
required = [stage for stage in stages if stage["required_for_closed_loop"]]
missing_required = [
str(stage["stage_id"])
for stage in required
if stage["recorded"] is not True
]
recorded_count = sum(1 for stage in stages if stage["recorded"] is True)
return {
"schema_version": "ai_agent_autonomous_trace_ledger_v1",
"purpose": (
"把 AI 自動化每個節點的 public-safe receipt 收斂成同一份 ledger"
"這些紀錄是後續 RAG、KM、PlayBook trust 與報告學習的依據。"
),
"latest_flow_closed": latest_flow_closure.get("closed") is True,
"latest_loop_closed": loop_ledger.get("closed") is True,
"stage_count": len(stages),
"recorded_stage_count": recorded_count,
"required_stage_count": len(required),
"missing_required_stage_ids": missing_required,
"learning_source_stage_ids": [
str(stage["stage_id"])
for stage in stages
if stage["feeds_learning"] is True
],
"public_safety": {
"reads_raw_sessions": False,
"stores_secret_values": False,
"stores_unredacted_telegram_payload": False,
"stores_internal_reasoning": False,
},
"stages": stages,
}
def _build_log_integration_taxonomy(
*,
operation_summary: Mapping[str, Any],
auto_repair_summary: Mapping[str, Any],
verifier_summary: Mapping[str, Any],
km_summary: Mapping[str, Any],
telegram_summary: Mapping[str, Any],
mcp_gateway_summary: Mapping[str, Any],
legacy_mcp_summary: Mapping[str, Any],
service_log_summary: Mapping[str, Any],
executor_log_summary: Mapping[str, Any],
timeline_summary: Mapping[str, Any],
playbook_trust_summary: Mapping[str, Any],
) -> dict[str, Any]:
"""Expose how logs are normalized, labeled, grouped, and fed to agents."""
operation_total = sum(_trace_total(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES)
operation_recent = sum(_trace_recent(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES)
source_families = [
{
"source_family_id": "mcp_gateway_tool_calls",
"source_tables": ["awooop_mcp_gateway_audit"],
"normalized_event_schema": "ToolCallEvidence",
"label_dimensions": ["project", "run", "trace", "agent", "tool", "policy_gate"],
"total": _trace_total(mcp_gateway_summary),
"recent": _trace_recent(mcp_gateway_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "hash_only_no_raw_input_output",
"next_action_if_empty": "route_first_class_tools_through_awooop_mcp_gateway",
},
{
"source_family_id": "legacy_mcp_tool_calls",
"source_tables": ["mcp_audit_log"],
"normalized_event_schema": "LegacyToolCallEvidence",
"label_dimensions": ["incident", "session_ref", "flywheel_node", "agent", "tool"],
"total": _trace_total(legacy_mcp_summary),
"recent": _trace_recent(legacy_mcp_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "bridge_to_gateway_hash_or_redacted_summary",
"next_action_if_empty": "keep_legacy_bridge_until_all_callers_use_gateway",
},
{
"source_family_id": "service_package_logs",
"source_tables": [
"incident_evidence.recent_logs",
"incident_evidence.evidence_summary",
"incident_evidence.anomaly_context",
],
"normalized_event_schema": "ServiceLogEvidence",
"label_dimensions": ["project", "product", "website", "service", "package", "incident"],
"total": _trace_total(service_log_summary),
"recent": _trace_recent(service_log_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "sanitized_summary_only",
"next_action_if_empty": "collect_sanitized_service_package_logs_before_decision",
},
{
"source_family_id": "executor_operation_logs",
"source_tables": ["automation_operation_log"],
"normalized_event_schema": "ExecutorOperationEvidence",
"label_dimensions": [
"project",
"service",
"package",
"tool",
"incident",
"operation",
"playbook",
"risk",
],
"total": max(operation_total, _trace_total(executor_log_summary)),
"recent": max(operation_recent, _trace_recent(executor_log_summary)),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "stdout_stderr_tail_or_structured_result_only",
"next_action_if_empty": "persist_executor_operation_log_for_candidate_check_apply",
},
{
"source_family_id": "auto_repair_receipts",
"source_tables": ["auto_repair_executions"],
"normalized_event_schema": "RepairExecutionReceipt",
"label_dimensions": ["incident", "service", "playbook", "risk", "result"],
"total": _trace_total(auto_repair_summary),
"recent": _trace_recent(auto_repair_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "execution_step_refs_not_raw_secrets",
"next_action_if_empty": "write_auto_repair_execution_receipt_after_apply",
},
{
"source_family_id": "post_apply_verifier",
"source_tables": ["incident_evidence.post_execution_state"],
"normalized_event_schema": "VerifierEvidence",
"label_dimensions": ["incident", "operation", "playbook", "service", "result"],
"total": _trace_total(verifier_summary),
"recent": _trace_recent(verifier_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "post_state_summary_redacted_refs_only",
"next_action_if_empty": "run_post_apply_verifier_for_each_apply",
},
{
"source_family_id": "rag_km_entries",
"source_tables": ["knowledge_entries"],
"normalized_event_schema": "KnowledgeWritebackEvidence",
"label_dimensions": ["project", "incident", "playbook", "path_type", "status"],
"total": _trace_total(km_summary),
"recent": _trace_recent(km_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "curated_summary_and_refs_only",
"next_action_if_empty": "write_km_entry_after_verifier",
},
{
"source_family_id": "playbook_trust_signals",
"source_tables": ["playbooks"],
"normalized_event_schema": "PlayBookTrustSignal",
"label_dimensions": ["project", "playbook", "status", "trust_band", "review_required"],
"total": _trace_total(playbook_trust_summary),
"recent": _trace_recent(playbook_trust_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "aggregate_trust_counters_only",
"next_action_if_empty": "write_trust_delta_after_verified_execution",
},
{
"source_family_id": "operator_timeline_projection",
"source_tables": ["timeline_events"],
"normalized_event_schema": "OperatorTimelineEvent",
"label_dimensions": ["incident", "event_type", "status", "actor", "actor_role"],
"total": _trace_total(timeline_summary),
"recent": _trace_recent(timeline_summary),
"feeds_learning": True,
"public_safe": True,
"raw_payload_policy": "short_public_safe_status_projection",
"next_action_if_empty": "project_ai_runtime_stage_to_timeline_events",
},
{
"source_family_id": "telegram_delivery_receipts",
"source_tables": ["awooop_outbound_message"],
"normalized_event_schema": "NotificationReceipt",
"label_dimensions": ["project", "channel", "incident", "action", "send_status"],
"total": _trace_total(telegram_summary),
"recent": _trace_recent(telegram_summary),
"feeds_learning": False,
"public_safe": True,
"raw_payload_policy": "provider_message_ref_no_unredacted_payload",
"next_action_if_empty": "send_controlled_apply_result_via_gateway",
},
]
label_dimensions = sorted(
{
str(dimension)
for source in source_families
for dimension in source["label_dimensions"]
}
)
active_source_count = sum(1 for source in source_families if _int_value(source["total"]) > 0)
return {
"schema_version": "ai_agent_log_integration_taxonomy_v1",
"purpose": (
"將專案、產品、網站、服務、套件、工具與通知來源的 log "
"統一轉成可貼標、可分群、可回放、可餵 RAG/KM/PlayBook 的 evidence。"
),
"normalized_event_flow": [
"collect_source_log_or_receipt",
"redact_and_hash_sensitive_fields",
"assign_labels",
"correlate_incident_operation_playbook",
"write_trace_ledger",
"retrieve_similar_context_via_rag",
"select_or_repair_playbook",
"run_check_mode_then_controlled_apply",
"verify_and_write_learning_back",
],
"label_dimensions": label_dimensions,
"required_label_dimensions": [
"project",
"source_family",
"incident",
"operation",
"service",
"tool",
"playbook",
],
"source_families": source_families,
"rollups": {
"source_family_count": len(source_families),
"active_source_family_count": active_source_count,
"inactive_source_family_count": len(source_families) - active_source_count,
"label_dimension_count": len(label_dimensions),
"classified_event_total": sum(_int_value(source["total"]) for source in source_families),
"recent_classified_event_total": sum(_int_value(source["recent"]) for source in source_families),
"learning_source_family_count": sum(
1 for source in source_families if source["feeds_learning"] is True
),
},
"public_safety": {
"raw_secret_collection_allowed": False,
"raw_session_collection_allowed": False,
"unredacted_payload_storage_allowed": False,
"internal_reasoning_storage_allowed": False,
},
}
def _decision_wiring_stage(
*,
stage_id: str,
display_name: str,
evidence_sources: list[str],
total: int,
recent: int,
required_for_decision_wiring: bool,
feeds_next_stage: str,
next_action_if_missing: str,
) -> dict[str, Any]:
present = total > 0
return {
"stage_id": stage_id,
"display_name": display_name,
"evidence_sources": evidence_sources,
"present": present,
"total": max(0, total),
"recent": max(0, recent),
"required_for_decision_wiring": required_for_decision_wiring,
"feeds_next_stage": feeds_next_stage,
"next_action_if_missing": None if present else next_action_if_missing,
}
def _build_agent_decision_wiring(
*,
operation_summary: Mapping[str, Any],
verifier_summary: Mapping[str, Any],
km_summary: Mapping[str, Any],
mcp_gateway_summary: Mapping[str, Any],
legacy_mcp_summary: Mapping[str, Any],
service_log_summary: Mapping[str, Any],
timeline_summary: Mapping[str, Any],
playbook_trust_summary: Mapping[str, Any],
log_integration_taxonomy: Mapping[str, Any],
loop_ledger: Mapping[str, Any],
latest_flow_closure: Mapping[str, Any],
) -> dict[str, Any]:
"""Summarize live evidence-to-execution wiring for the AI Agent path."""
taxonomy_rollups = log_integration_taxonomy.get("rollups")
if not isinstance(taxonomy_rollups, Mapping):
taxonomy_rollups = {}
source_family_count = _int_value(taxonomy_rollups.get("source_family_count"))
active_source_family_count = _int_value(taxonomy_rollups.get("active_source_family_count"))
all_sources_active = source_family_count > 0 and active_source_family_count == source_family_count
evidence_total = (
_trace_total(mcp_gateway_summary)
+ _trace_total(legacy_mcp_summary)
+ _trace_total(service_log_summary)
+ _trace_total(timeline_summary)
)
evidence_recent = (
_trace_recent(mcp_gateway_summary)
+ _trace_recent(legacy_mcp_summary)
+ _trace_recent(service_log_summary)
+ _trace_recent(timeline_summary)
)
rag_context_total = _trace_total(km_summary) + _trace_total(playbook_trust_summary)
rag_context_recent = _trace_recent(km_summary) + _trace_recent(playbook_trust_summary)
candidate_total = _trace_total(operation_summary, "ansible_candidate_matched")
candidate_recent = _trace_recent(operation_summary, "ansible_candidate_matched")
check_mode_total = _trace_total(operation_summary, "ansible_check_mode_executed")
check_mode_recent = _trace_recent(operation_summary, "ansible_check_mode_executed")
apply_total = _trace_total(operation_summary, "ansible_apply_executed")
apply_recent = _trace_recent(operation_summary, "ansible_apply_executed")
rollback_total = _trace_total(operation_summary, "ansible_rollback_executed")
verifier_total = _trace_total(verifier_summary)
verifier_recent = _trace_recent(verifier_summary)
stages = [
_decision_wiring_stage(
stage_id="labeled_evidence_sources",
display_name="Labeled log / MCP / timeline evidence available",
evidence_sources=["log_integration_taxonomy", "mcp", "service_logs", "timeline_events"],
total=evidence_total if all_sources_active else 0,
recent=evidence_recent,
required_for_decision_wiring=True,
feeds_next_stage="rag_context_retrieval",
next_action_if_missing="keep_p1a_source_family_ingestion_active_until_10_of_10",
),
_decision_wiring_stage(
stage_id="rag_context_retrieval",
display_name="RAG / KM / PlayBook trust context available",
evidence_sources=["knowledge_entries", "playbooks"],
total=rag_context_total,
recent=rag_context_recent,
required_for_decision_wiring=True,
feeds_next_stage="playbook_candidate_selection",
next_action_if_missing="retrieve_similar_km_entries_and_playbook_trust_before_candidate",
),
_decision_wiring_stage(
stage_id="playbook_candidate_selection",
display_name="Allowlisted PlayBook candidate selected",
evidence_sources=["automation_operation_log:ansible_candidate_matched"],
total=candidate_total,
recent=candidate_recent,
required_for_decision_wiring=True,
feeds_next_stage="check_mode_dry_run",
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
),
_decision_wiring_stage(
stage_id="check_mode_dry_run",
display_name="Check-mode / dry-run receipt recorded",
evidence_sources=["automation_operation_log:ansible_check_mode_executed"],
total=check_mode_total,
recent=check_mode_recent,
required_for_decision_wiring=True,
feeds_next_stage="controlled_apply_boundary",
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
),
_decision_wiring_stage(
stage_id="controlled_apply_boundary",
display_name="Controlled apply / rollback boundary recorded",
evidence_sources=[
"automation_operation_log:ansible_apply_executed",
"automation_operation_log:ansible_rollback_executed",
],
total=apply_total + rollback_total,
recent=apply_recent,
required_for_decision_wiring=True,
feeds_next_stage="post_apply_verifier",
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
),
_decision_wiring_stage(
stage_id="post_apply_verifier",
display_name="Post-apply verifier receipt recorded",
evidence_sources=["incident_evidence"],
total=verifier_total,
recent=verifier_recent,
required_for_decision_wiring=True,
feeds_next_stage="learning_writeback",
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
),
]
missing_required = [
str(stage["stage_id"])
for stage in stages
if stage["required_for_decision_wiring"] is True and stage["present"] is not True
]
present_required_count = sum(
1
for stage in stages
if stage["required_for_decision_wiring"] is True and stage["present"] is True
)
required_count = sum(1 for stage in stages if stage["required_for_decision_wiring"] is True)
closed_loop_observed = bool(
loop_ledger.get("closed") is True
or latest_flow_closure.get("closed") is True
)
return {
"schema_version": "ai_agent_decision_wiring_readback_v1",
"status": "completed" if not missing_required else "in_progress",
"stages": stages,
"missing_required_stage_ids": missing_required,
"runtime_switches": {
"candidate_backfill_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CANDIDATE_BACKFILL_WORKER),
"check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER),
"controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY),
"allowed_risk_levels": _allowed_risk_levels(),
},
"closed_loop_observed": closed_loop_observed,
"public_safety": {
"stores_raw_logs": False,
"stores_secret_values": False,
"executes_on_read": False,
"critical_break_glass_still_required": True,
},
"rollups": {
"stage_count": len(stages),
"required_stage_count": required_count,
"required_stage_present_count": present_required_count,
"required_stage_missing_count": len(missing_required),
"evidence_event_total": evidence_total,
"rag_context_total": rag_context_total,
"candidate_total": candidate_total,
"check_mode_total": check_mode_total,
"controlled_apply_total": apply_total,
"rollback_total": rollback_total,
"verifier_total": verifier_total,
},
}
def _learning_loop_stage(
*,
stage_id: str,
display_name: str,
evidence_sources: list[str],
total: int,
recent: int,
required_for_learning_loop: bool,
writes_runtime_state: bool,
next_action_if_missing: str,
) -> dict[str, Any]:
present = total > 0
return {
"stage_id": stage_id,
"display_name": display_name,
"evidence_sources": evidence_sources,
"present": present,
"total": max(0, total),
"recent": max(0, recent),
"required_for_learning_loop": required_for_learning_loop,
"writes_runtime_state": writes_runtime_state,
"next_action_if_missing": None if present else next_action_if_missing,
}
def _build_learning_loop_readback(
*,
operation_summary: Mapping[str, Any],
verifier_summary: Mapping[str, Any],
km_summary: Mapping[str, Any],
playbook_trust_summary: Mapping[str, Any],
log_integration_taxonomy: Mapping[str, Any],
agent_decision_wiring: Mapping[str, Any],
latest_flow_closure: Mapping[str, Any],
latest_failure_classification: Mapping[str, Any],
controlled_retry_package: Mapping[str, Any],
loop_ledger: Mapping[str, Any],
) -> dict[str, Any]:
"""Expose the verified execution to KM/PlayBook learning loop."""
taxonomy_rollups = log_integration_taxonomy.get("rollups")
if not isinstance(taxonomy_rollups, Mapping):
taxonomy_rollups = {}
learning_source_family_count = _int_value(
taxonomy_rollups.get("learning_source_family_count")
)
classified_event_total = _int_value(taxonomy_rollups.get("classified_event_total"))
recent_classified_event_total = _int_value(
taxonomy_rollups.get("recent_classified_event_total")
)
verifier_total = _trace_total(verifier_summary)
verifier_recent = _trace_recent(verifier_summary)
km_total = _trace_total(km_summary)
km_recent = _trace_recent(km_summary)
learning_writeback_total = _trace_total(
operation_summary,
"ansible_learning_writeback_recorded",
)
learning_writeback_recent = _trace_recent(
operation_summary,
"ansible_learning_writeback_recorded",
)
trust_total = _trace_total(playbook_trust_summary)
trust_recent = _trace_recent(playbook_trust_summary)
repair_feedback_ready = bool(
latest_failure_classification.get("classification")
not in {"", "no_controlled_apply_observed"}
and controlled_retry_package.get("schema_version")
== "ai_agent_controlled_retry_package_v1"
)
next_decision_ready = bool(
agent_decision_wiring.get("status") == "completed"
and loop_ledger.get("closed") is True
)
stages = [
_learning_loop_stage(
stage_id="verified_execution_outcome",
display_name="Verified execution outcome available",
evidence_sources=["incident_evidence.post_execution_state"],
total=verifier_total
if latest_flow_closure.get("has_post_apply_verifier") is True
else 0,
recent=verifier_recent,
required_for_learning_loop=True,
writes_runtime_state=True,
next_action_if_missing="run_post_apply_verifier_and_attach_apply_op_id",
),
_learning_loop_stage(
stage_id="km_learning_writeback",
display_name="KM learning writeback recorded",
evidence_sources=["knowledge_entries"],
total=km_total
if latest_flow_closure.get("has_km_writeback") is True
else 0,
recent=km_recent,
required_for_learning_loop=True,
writes_runtime_state=True,
next_action_if_missing="write_verified_execution_summary_to_km",
),
_learning_loop_stage(
stage_id="learning_repair_record",
display_name="Learning repository repair result recorded",
evidence_sources=[
"automation_operation_log:ansible_learning_writeback_recorded",
"learning_repository",
],
total=learning_writeback_total,
recent=learning_writeback_recent,
required_for_learning_loop=True,
writes_runtime_state=True,
next_action_if_missing="record_learning_repair_result_after_verifier",
),
_learning_loop_stage(
stage_id="playbook_trust_delta",
display_name="PlayBook trust signal available",
evidence_sources=["playbooks"],
total=trust_total,
recent=trust_recent,
required_for_learning_loop=True,
writes_runtime_state=True,
next_action_if_missing="write_playbook_trust_delta_after_verifier",
),
_learning_loop_stage(
stage_id="similar_case_context",
display_name="Similar-case context sources active",
evidence_sources=["log_integration_taxonomy", "knowledge_entries", "playbooks"],
total=classified_event_total if learning_source_family_count > 0 else 0,
recent=recent_classified_event_total,
required_for_learning_loop=True,
writes_runtime_state=False,
next_action_if_missing="activate_learning_source_families_for_similar_case_retrieval",
),
_learning_loop_stage(
stage_id="repair_candidate_feedback",
display_name="Repair or no-repair feedback classified",
evidence_sources=["latest_failure_classification", "controlled_retry_package"],
total=1 if repair_feedback_ready else 0,
recent=1 if repair_feedback_ready else 0,
required_for_learning_loop=True,
writes_runtime_state=False,
next_action_if_missing="classify_latest_apply_result_and_prepare_retry_package",
),
_learning_loop_stage(
stage_id="next_decision_context",
display_name="Next decision can consume learned context",
evidence_sources=["agent_decision_wiring", "autonomous_execution_loop_ledger"],
total=1 if next_decision_ready else 0,
recent=1 if next_decision_ready else 0,
required_for_learning_loop=True,
writes_runtime_state=False,
next_action_if_missing="complete_decision_wiring_and_execution_loop_before_learning_release",
),
]
missing_required = [
str(stage["stage_id"])
for stage in stages
if stage["required_for_learning_loop"] is True and stage["present"] is not True
]
present_required_count = sum(
1
for stage in stages
if stage["required_for_learning_loop"] is True and stage["present"] is True
)
required_count = sum(1 for stage in stages if stage["required_for_learning_loop"] is True)
return {
"schema_version": "ai_agent_learning_loop_readback_v1",
"status": "completed" if not missing_required else "in_progress",
"stages": stages,
"missing_required_stage_ids": missing_required,
"public_safety": {
"stores_raw_logs": False,
"stores_secret_values": False,
"stores_unredacted_telegram_payload": False,
"executes_on_read": False,
"critical_break_glass_still_required": True,
},
"rollups": {
"stage_count": len(stages),
"required_stage_count": required_count,
"required_stage_present_count": present_required_count,
"required_stage_missing_count": len(missing_required),
"verified_execution_total": verifier_total,
"km_writeback_total": km_total,
"learning_writeback_total": learning_writeback_total,
"learning_writeback_recent": learning_writeback_recent,
"playbook_trust_total": trust_total,
"learning_source_family_count": learning_source_family_count,
"similar_case_source_total": classified_event_total,
"repair_feedback_ready_count": 1 if repair_feedback_ready else 0,
"next_decision_ready_count": 1 if next_decision_ready else 0,
},
}
def _build_work_item_progress(
*,
trace_ledger: Mapping[str, Any],
log_integration_taxonomy: Mapping[str, Any],
agent_decision_wiring: Mapping[str, Any],
learning_loop: Mapping[str, Any],
db_read_status: str,
) -> dict[str, Any]:
"""Build ordered work items that the UI and agent can keep advancing."""
taxonomy_rollups = log_integration_taxonomy.get("rollups")
if not isinstance(taxonomy_rollups, Mapping):
taxonomy_rollups = {}
source_families = log_integration_taxonomy.get("source_families")
if not isinstance(source_families, list):
source_families = []
inactive_source_count = _int_value(taxonomy_rollups.get("inactive_source_family_count"))
missing_required = trace_ledger.get("missing_required_stage_ids")
if not isinstance(missing_required, list):
missing_required = []
decision_rollups = agent_decision_wiring.get("rollups")
if not isinstance(decision_rollups, Mapping):
decision_rollups = {}
decision_wiring_missing = _int_value(decision_rollups.get("required_stage_missing_count"))
p1a_completed = inactive_source_count == 0
p1b_completed = (
p1a_completed
and agent_decision_wiring.get("schema_version") == "ai_agent_decision_wiring_readback_v1"
and decision_wiring_missing == 0
)
learning_rollups = learning_loop.get("rollups")
if not isinstance(learning_rollups, Mapping):
learning_rollups = {}
learning_loop_missing = _int_value(learning_rollups.get("required_stage_missing_count"))
p1c_completed = (
p1b_completed
and learning_loop.get("schema_version") == "ai_agent_learning_loop_readback_v1"
and learning_loop_missing == 0
)
deployed_readback_complete = (
db_read_status == "ok"
and trace_ledger.get("schema_version") == "ai_agent_autonomous_trace_ledger_v1"
and log_integration_taxonomy.get("schema_version") == "ai_agent_log_integration_taxonomy_v1"
)
ordered_items = [
{
"work_item_id": "P0-A-runtime-truth",
"priority": "P0-A",
"title": "Controlled apply runtime truth readback",
"status": "completed",
"exit_criteria": "production API reports db_read_status=ok and live executor receipts",
},
{
"work_item_id": "P0-B-trace-ledger",
"priority": "P0-B",
"title": "Trace ledger for MCP/log/executor/verifier/KM/PlayBook/Telegram",
"status": "completed" if not missing_required else "in_progress",
"exit_criteria": "trace_ledger exposes required closed-loop stages and missing_required_stage_ids",
},
{
"work_item_id": "P0-C-log-taxonomy",
"priority": "P0-C",
"title": "Project/product/site/service/package/tool log taxonomy",
"status": "completed",
"exit_criteria": "log_integration_taxonomy lists source families, labels, and public-safety policy",
},
{
"work_item_id": "P0-D-ui-visibility",
"priority": "P0-D",
"title": "AwoooP UI shows automation loop and log integration progress",
"status": "completed",
"exit_criteria": "AwoooP, Approvals, Runs, and Work Items show trace/log taxonomy panel",
},
{
"work_item_id": "P0-E-verification-deploy",
"priority": "P0-E",
"title": "Focused verification and production deploy marker readback",
"status": "completed" if deployed_readback_complete else "in_progress",
"exit_criteria": "deploy marker includes this code and production API exposes trace_ledger/log_integration_taxonomy",
"blocker": None if deployed_readback_complete else "waiting_for_successful_gitea_cd_deploy_marker",
},
{
"work_item_id": "P1-A-ingestion-coverage",
"priority": "P1-A",
"title": "Collector and sanitizer coverage for all source families",
"status": "completed" if p1a_completed else "in_progress",
"exit_criteria": "all source families have active sanitized classified events",
"remaining_source_family_count": inactive_source_count,
},
{
"work_item_id": "P1-B-agent-decision-wiring",
"priority": "P1-B",
"title": "RAG retrieval to PlayBook select/repair/check-mode/apply/verifier",
"status": "completed" if p1b_completed else "in_progress" if p1a_completed else "pending",
"exit_criteria": "AI Agent consumes labeled evidence and emits target selector, dry-run, apply, verifier, rollback",
"remaining_decision_wiring_stage_count": decision_wiring_missing,
},
{
"work_item_id": "P1-C-learning-loop",
"priority": "P1-C",
"title": "KM / PlayBook trust learning loop",
"status": "completed" if p1c_completed else "in_progress" if p1b_completed else "pending",
"exit_criteria": "verified execution updates KM entries, trust delta, similar-case clusters, and repair candidates",
"remaining_learning_loop_stage_count": learning_loop_missing,
},
{
"work_item_id": "P1-D-alert-noise-reduction",
"priority": "P1-D",
"title": "Alert grouping and AI controlled workflow routing",
"status": "pending",
"exit_criteria": "repeated alerts are clustered, deduped, routed to controlled automation, and no longer default to manual handling",
},
{
"work_item_id": "P2-A-ui-ux-productization",
"priority": "P2-A",
"title": "Professional product UI replacing text-heavy surfaces",
"status": "pending",
"exit_criteria": "AI automation status is shown as dense dashboard controls, filters, counters, and action rails",
},
{
"work_item_id": "P2-B-multi-product-expansion",
"priority": "P2-B",
"title": "Reuse taxonomy across AWOOOI products/projects",
"status": "pending",
"exit_criteria": "StockPlatform, VibeWork, MOMO, AwoooGo, and other products report the same log taxonomy contract",
},
]
source_family_items = []
for source in source_families:
if not isinstance(source, Mapping):
continue
total = _int_value(source.get("total"))
source_family_items.append({
"work_item_id": f"P1-A-source-{source.get('source_family_id')}",
"priority": "P1-A",
"source_family_id": source.get("source_family_id"),
"title": f"Ingest and label {source.get('source_family_id')}",
"status": "completed" if total > 0 else "not_started",
"label_dimensions": source.get("label_dimensions") or [],
"next_controlled_action": (
"keep_learning_and_quality_checks"
if total > 0
else source.get("next_action_if_empty")
),
})
all_items = [*ordered_items, *source_family_items]
by_status: dict[str, int] = {}
for item in all_items:
status = str(item.get("status") or "unknown")
by_status[status] = by_status.get(status, 0) + 1
return {
"schema_version": "ai_agent_automation_work_item_progress_v1",
"ordered_items": ordered_items,
"source_family_items": source_family_items,
"rollups": {
"work_item_count": len(all_items),
"ordered_work_item_count": len(ordered_items),
"source_family_work_item_count": len(source_family_items),
"completed_count": by_status.get("completed", 0),
"in_progress_count": by_status.get("in_progress", 0),
"pending_count": by_status.get("pending", 0),
"blocked_count": by_status.get("blocked", 0),
"not_started_count": by_status.get("not_started", 0),
"by_status": by_status,
},
}
def _first_operation(
rows: Iterable[Mapping[str, Any]],
operation_type: str,
) -> dict[str, Any] | None:
for row in rows:
if str(row.get("operation_type") or "") == operation_type:
return dict(row)
return None
def _operation_by_id(
rows: Iterable[Mapping[str, Any]],
op_id: Any,
) -> dict[str, Any] | None:
needle = str(op_id or "")
if not needle:
return None
for row in rows:
if str(row.get("op_id") or "") == needle:
return dict(row)
return None
def _stage_status(row: Mapping[str, Any] | None, *, fallback_status: str | None = None) -> str:
if row is None:
return fallback_status or "missing"
return str(row.get("status") or row.get("result_status") or fallback_status or "present")
def _loop_stage(
*,
stage_id: str,
receipt_source: str,
present: bool,
status: str,
ref_id: str | None,
writes_runtime_state: bool,
next_action_if_missing: str,
) -> dict[str, Any]:
return {
"stage_id": stage_id,
"receipt_source": receipt_source,
"present": present,
"status": status,
"ref_id": ref_id,
"writes_runtime_state": writes_runtime_state,
"next_action_if_missing": None if present else next_action_if_missing,
}
def _autonomous_execution_loop_ledger(
*,
project_id: str,
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
km_latest_rows: Iterable[Mapping[str, Any] | Any],
telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any],
latest_flow_closure: Mapping[str, Any],
latest_failure_classification: Mapping[str, Any],
controlled_retry_package: Mapping[str, Any],
) -> dict[str, Any]:
"""Build the operation-id ledger that proves whether the runtime loop closed."""
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
km_rows = [_row_mapping(row) for row in km_latest_rows]
telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
auto_repair_rows = [_row_mapping(row) for row in auto_repair_latest_rows]
latest_apply = _first_operation(operation_rows, "ansible_apply_executed")
latest_check = None
latest_candidate = None
if latest_apply is not None:
latest_check = _operation_by_id(
operation_rows,
latest_apply.get("check_mode_op_id") or latest_apply.get("parent_op_id"),
)
else:
latest_check = _first_operation(operation_rows, "ansible_check_mode_executed")
source_candidate_op_id = None
if latest_check is not None:
source_candidate_op_id = latest_check.get("parent_op_id") or latest_check.get("source_candidate_op_id")
if latest_apply is not None and not source_candidate_op_id:
source_candidate_op_id = latest_apply.get("source_candidate_op_id")
latest_candidate = _operation_by_id(operation_rows, source_candidate_op_id)
if latest_candidate is None and latest_apply is None and latest_check is None:
latest_candidate = _first_operation(operation_rows, "ansible_candidate_matched")
anchor = latest_apply or latest_check or latest_candidate or {}
apply_op_id = str((latest_apply or {}).get("op_id") or "")
check_mode_op_id = str(
(latest_check or {}).get("op_id")
or (latest_apply or {}).get("check_mode_op_id")
or (latest_apply or {}).get("parent_op_id")
or ""
)
candidate_op_id = str(
(latest_candidate or {}).get("op_id")
or source_candidate_op_id
or ""
)
incident_id = str(anchor.get("incident_id") or "")
catalog_id = str(anchor.get("catalog_id") or "")
playbook_path = str(anchor.get("playbook_path") or "")
verifier = next(
(
row
for row in verifier_rows
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
),
None,
)
km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
km = next(
(
row
for row in km_rows
if (
km_path_type
and str(row.get("path_type") or "") == km_path_type
)
or (
incident_id
and str(row.get("related_incident_id") or "") == incident_id
)
),
None,
)
telegram = next(
(
row
for row in telegram_rows
if str(row.get("send_status") or "") == "sent"
and str(row.get("action") or "") == "controlled_apply_result"
and (
not incident_id
or str(row.get("incident_id") or "") == incident_id
)
),
None,
)
auto_repair = next(
(
row
for row in auto_repair_rows
if apply_op_id
and apply_op_id
in str(row.get("executed_steps_text") or row.get("executed_steps") or "")
),
None,
)
candidate_present = bool(latest_candidate or candidate_op_id)
check_present = bool(latest_check or check_mode_op_id)
apply_present = latest_apply is not None
auto_repair_present = auto_repair is not None
verifier_present = verifier is not None
km_present = km is not None
telegram_present = telegram is not None
stages = [
_loop_stage(
stage_id="candidate",
receipt_source="automation_operation_log:ansible_candidate_matched",
present=candidate_present,
status=_stage_status(latest_candidate, fallback_status="inferred_from_check_mode")
if candidate_present
else "missing",
ref_id=candidate_op_id or None,
writes_runtime_state=False,
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
),
_loop_stage(
stage_id="check_mode",
receipt_source="automation_operation_log:ansible_check_mode_executed",
present=check_present,
status=_stage_status(latest_check, fallback_status="inferred_from_apply_parent")
if check_present
else "missing",
ref_id=check_mode_op_id or None,
writes_runtime_state=False,
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
),
_loop_stage(
stage_id="controlled_apply",
receipt_source="automation_operation_log:ansible_apply_executed",
present=apply_present,
status=_stage_status(latest_apply),
ref_id=apply_op_id or None,
writes_runtime_state=True,
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
),
_loop_stage(
stage_id="auto_repair_execution_receipt",
receipt_source="auto_repair_executions:ansible_controlled_apply",
present=auto_repair_present,
status=str((auto_repair or {}).get("result_status") or "missing"),
ref_id=str((auto_repair or {}).get("id") or "") or None,
writes_runtime_state=True,
next_action_if_missing="receipt_backfill_records_auto_repair_execution",
),
_loop_stage(
stage_id="post_apply_verifier",
receipt_source="incident_evidence.post_execution_state",
present=verifier_present,
status=str((verifier or {}).get("verification_result") or "missing"),
ref_id=str((verifier or {}).get("id") or "") or None,
writes_runtime_state=True,
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
),
_loop_stage(
stage_id="km_playbook_writeback",
receipt_source="knowledge_entries:ansible_apply_receipt",
present=km_present,
status=str((km or {}).get("status") or "missing"),
ref_id=str((km or {}).get("id") or "") or None,
writes_runtime_state=True,
next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
),
_loop_stage(
stage_id="telegram_receipt",
receipt_source="awooop_outbound_message:controlled_apply_result",
present=telegram_present,
status=str((telegram or {}).get("send_status") or "missing"),
ref_id=str((telegram or {}).get("message_id") or "") or None,
writes_runtime_state=True,
next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
),
]
missing_stage_ids = [
str(stage["stage_id"])
for stage in stages
if stage["present"] is not True
]
closed = bool(
apply_op_id
and auto_repair_present
and latest_flow_closure.get("closed") is True
)
classification = str(latest_failure_classification.get("classification") or "")
if not candidate_present and not check_present and not apply_present:
execution_state = "waiting_for_candidate"
next_executor_action = "candidate_backfill_worker_waits_for_matching_incident"
elif not apply_present:
execution_state = "executor_in_progress_or_waiting"
next_executor_action = "continue_candidate_to_check_mode_to_apply"
elif closed and classification == "latest_controlled_apply_closed_success":
execution_state = "closed_success"
next_executor_action = "keep_receipt_chain_closed"
elif closed:
execution_state = "closed_failed_apply_repair_ready"
next_executor_action = str(
controlled_retry_package.get("next_ai_action")
or "run_no_write_check_mode_replay"
)
elif "telegram_receipt" in missing_stage_ids:
execution_state = "open_waiting_for_live_gateway_receipt"
next_executor_action = "do_not_fake_send_backfill_wait_for_live_apply_gateway"
else:
execution_state = "open_missing_internal_receipts"
next_executor_action = "backfill_missing_auto_repair_verifier_km_receipts"
return {
"schema_version": "ai_agent_autonomous_execution_loop_ledger_v1",
"project_id": project_id,
"operation_id": apply_op_id or check_mode_op_id or candidate_op_id or None,
"root_candidate_op_id": candidate_op_id or None,
"check_mode_op_id": check_mode_op_id or None,
"apply_op_id": apply_op_id or None,
"incident_id": incident_id or None,
"catalog_id": catalog_id or None,
"playbook_path": playbook_path or None,
"execution_state": execution_state,
"closed": closed,
"missing_stage_ids": missing_stage_ids,
"next_executor_action": next_executor_action,
"stages": stages,
"safety_contract": {
"writes_on_read": False,
"backfill_may_write_auto_repair_verifier_km": True,
"backfill_may_send_telegram": False,
"live_apply_may_send_telegram_gateway_receipt": True,
"reads_raw_sessions": False,
"reads_secret_values": False,
},
}
def _latest_flow_closure(
*,
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
km_latest_rows: Iterable[Mapping[str, Any] | Any],
telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
) -> dict[str, Any]:
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
km_rows = [_row_mapping(row) for row in km_latest_rows]
telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
latest_apply = next(
(
row
for row in operation_rows
if str(row.get("operation_type") or "") == "ansible_apply_executed"
),
None,
)
if latest_apply is None:
return {
"apply_op_id": None,
"incident_id": None,
"has_post_apply_verifier": False,
"has_km_writeback": False,
"has_telegram_receipt": False,
"closed": False,
"missing": [
"ansible_apply_executed",
"post_apply_verifier",
"km_writeback",
"telegram_receipt",
],
}
apply_op_id = str(latest_apply.get("op_id") or "")
incident_id = str(latest_apply.get("incident_id") or "")
km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
has_verifier = any(
str(row.get("apply_op_id") or "") == apply_op_id
for row in verifier_rows
)
has_km = any(
str(row.get("path_type") or "") == km_path_type
or (
incident_id
and str(row.get("related_incident_id") or "") == incident_id
)
for row in km_rows
)
has_telegram = any(
str(row.get("send_status") or "") == "sent"
and str(row.get("action") or "") == "controlled_apply_result"
and (
not incident_id
or str(row.get("incident_id") or "") == incident_id
)
for row in telegram_rows
)
missing = [
name
for name, present in (
("post_apply_verifier", has_verifier),
("km_writeback", has_km),
("telegram_receipt", has_telegram),
)
if not present
]
return {
"apply_op_id": apply_op_id or None,
"incident_id": incident_id or None,
"has_post_apply_verifier": has_verifier,
"has_km_writeback": has_km,
"has_telegram_receipt": has_telegram,
"closed": not missing,
"missing": missing,
}
def _latest_failure_classification(
*,
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
latest_flow_closure: Mapping[str, Any],
) -> dict[str, Any]:
"""Classify the newest controlled apply outcome without exposing command output."""
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
latest_apply = next(
(
row
for row in operation_rows
if str(row.get("operation_type") or "") == "ansible_apply_executed"
),
None,
)
if latest_apply is None:
return {
"schema_version": "ai_agent_executor_failure_classification_v1",
"classification": "no_controlled_apply_observed",
"action": "wait_for_controlled_apply_receipt",
"target_selector": {},
"evidence": {
"latest_flow_closed": False,
"output_tail_in_readback": False,
"unredacted_output_required": False,
},
}
apply_op_id = str(latest_apply.get("op_id") or "")
incident_id = str(latest_apply.get("incident_id") or "")
returncode = _int_value(latest_apply.get("returncode"))
verifier = next(
(
row
for row in verifier_rows
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
),
{},
)
verification_result = str(verifier.get("verification_result") or "").lower()
latest_flow_closed = latest_flow_closure.get("closed") is True
if returncode == 0 and verification_result in {"success", ""} and latest_flow_closed:
classification = "latest_controlled_apply_closed_success"
action = "keep_receipt_chain_closed"
elif returncode == 0:
classification = "controlled_apply_success_receipt_gap"
action = "backfill_missing_verifier_km_or_telegram_receipt"
elif latest_flow_closed:
classification = "closed_failed_apply_requires_ai_repair"
action = "queue_check_mode_replay_and_playbook_repair_candidate"
else:
classification = "failed_apply_receipt_gap_requires_backfill_then_repair"
action = "backfill_missing_receipts_then_queue_repair_candidate"
return {
"schema_version": "ai_agent_executor_failure_classification_v1",
"classification": classification,
"action": action,
"target_selector": {
"incident_id": incident_id or None,
"apply_op_id": apply_op_id or None,
"parent_op_id": latest_apply.get("parent_op_id"),
"catalog_id": latest_apply.get("catalog_id"),
"playbook_path": latest_apply.get("playbook_path"),
"execution_mode": latest_apply.get("execution_mode"),
},
"evidence": {
"operation_status": latest_apply.get("status"),
"returncode": latest_apply.get("returncode"),
"verification_result": verification_result or None,
"latest_flow_closed": latest_flow_closed,
"has_post_apply_verifier": latest_flow_closure.get("has_post_apply_verifier") is True,
"has_km_writeback": latest_flow_closure.get("has_km_writeback") is True,
"has_telegram_receipt": latest_flow_closure.get("has_telegram_receipt") is True,
"output_tail_in_readback": False,
"unredacted_output_required": False,
},
"safe_next_steps": [
"run_no_write_check_mode_replay",
"extract_sanitized_failed_task_summary",
"write_km_playbook_repair_candidate",
"retry_controlled_apply_only_after_check_mode_passes",
],
}
def _controlled_retry_package(classification: Mapping[str, Any]) -> dict[str, Any]:
"""Build the next no-write repair package from the public failure classification."""
target_selector = classification.get("target_selector")
if not isinstance(target_selector, Mapping):
target_selector = {}
apply_op_id = str(target_selector.get("apply_op_id") or "")
repair_required = classification.get("classification") in {
"closed_failed_apply_requires_ai_repair",
"failed_apply_receipt_gap_requires_backfill_then_repair",
}
return {
"schema_version": "ai_agent_controlled_retry_package_v1",
"package_id": (
f"ansible_retry:{apply_op_id[:8]}"
if repair_required and apply_op_id
else None
),
"status": (
"ready_for_no_write_check_mode_replay"
if repair_required
else "not_required_for_latest_apply"
),
"target_selector": dict(target_selector),
"source_of_truth": {
"catalog_id": target_selector.get("catalog_id"),
"playbook_path": target_selector.get("playbook_path"),
"source_diff_required_before_retry": True,
"failed_task_summary_required": True,
},
"preflight": {
"no_write_check_mode_replay_required": repair_required,
"reuse_parent_check_mode_op_id": target_selector.get("parent_op_id"),
"unredacted_output_required": False,
"secret_value_collection_allowed": False,
},
"apply_gate": {
"controlled_apply_retry_allowed_now": False,
"opens_legacy_runner": False,
"requires_check_mode_success_before_apply": repair_required,
},
"rollback": {
"rollback_candidate_required": repair_required,
"destructive_rollback_allowed": False,
"rollback_plan_source": "playbook_repair_candidate_after_failed_task_summary",
},
"post_apply": {
"post_apply_verifier_required": repair_required,
"km_playbook_trust_writeback_required": repair_required,
"telegram_receipt_required": repair_required,
},
"next_ai_action": (
"run_no_write_check_mode_replay"
if repair_required
else "keep_latest_apply_receipts"
),
}
def classify_deploy_control_plane_observation(
*,
run_status: str,
is_latest_deploy_intent: bool,
active_task_container_count: int,
production_marker_hit: bool,
latest_flow_closed: bool,
runner_capacity_ok: bool,
runner_forbidden_label_count: int,
) -> dict[str, Any]:
"""Classify CD/run noise into an internal PlayBook decision."""
normalized_status = str(run_status or "unknown").strip().lower()
has_active_task = active_task_container_count > 0
runner_lane_safe = runner_capacity_ok and runner_forbidden_label_count == 0
production_truth_ok = production_marker_hit and latest_flow_closed
if not is_latest_deploy_intent:
classification = "superseded_run_skip"
action = "skip_cd_work_and_attach_to_superseded_intent"
elif production_truth_ok and normalized_status == "success":
classification = "deploy_succeeded_marker_hit"
action = "close_deploy_intent_and_write_receipts"
elif normalized_status == "running" and has_active_task and runner_lane_safe:
classification = "running_with_controlled_task"
action = "continue_observing_without_restarting_runner"
elif normalized_status == "running" and not has_active_task and production_truth_ok:
classification = "running_no_container_stale_ui"
action = "treat_gitea_spinner_as_stale_and_keep_production_truth"
elif normalized_status == "failure" and production_truth_ok:
classification = "failed_run_superseded_by_marker_hit"
action = "record_non_blocking_failure_and_keep_current_marker"
elif normalized_status == "failure":
classification = "real_failure_requires_playbook_repair"
action = "open_cd_repair_playbook_with_target_selector_and_verifier"
elif not runner_lane_safe:
classification = "runner_lane_guardrail_violation"
action = "fail_closed_runner_lane_and_open_repair_playbook"
else:
classification = "waiting_for_controlled_observation"
action = "wait_for_mcp_observation_or_deploy_intent_update"
return {
"schema_version": "ai_agent_deploy_control_plane_decision_v1",
"classification": classification,
"action": action,
"inputs": {
"run_status": normalized_status,
"is_latest_deploy_intent": is_latest_deploy_intent,
"active_task_container_count": max(0, active_task_container_count),
"production_marker_hit": production_marker_hit,
"latest_flow_closed": latest_flow_closed,
"runner_capacity_ok": runner_capacity_ok,
"runner_forbidden_label_count": max(0, runner_forbidden_label_count),
},
"internal_writeback": {
"mcp_event_type": "deploy_run_observation",
"rag_context_required": True,
"km_writeback_required": True,
"playbook_route_required": True,
"log_projection_required": True,
"telegram_receipt_required": classification in {
"deploy_succeeded_marker_hit",
"real_failure_requires_playbook_repair",
"runner_lane_guardrail_violation",
},
},
"safety_boundary": {
"reads_raw_sessions": False,
"reads_secret_values": False,
"opens_legacy_runner": False,
"uses_force_push": False,
"writes_runtime_state": classification in {
"deploy_succeeded_marker_hit",
"real_failure_requires_playbook_repair",
"runner_lane_guardrail_violation",
},
},
}
def _control_plane_integration() -> dict[str, Any]:
classifier_examples = [
classify_deploy_control_plane_observation(
run_status="success",
is_latest_deploy_intent=True,
active_task_container_count=0,
production_marker_hit=True,
latest_flow_closed=True,
runner_capacity_ok=True,
runner_forbidden_label_count=0,
),
classify_deploy_control_plane_observation(
run_status="running",
is_latest_deploy_intent=True,
active_task_container_count=0,
production_marker_hit=True,
latest_flow_closed=True,
runner_capacity_ok=True,
runner_forbidden_label_count=0,
),
classify_deploy_control_plane_observation(
run_status="failure",
is_latest_deploy_intent=True,
active_task_container_count=0,
production_marker_hit=False,
latest_flow_closed=False,
runner_capacity_ok=True,
runner_forbidden_label_count=0,
),
]
return {
"schema_version": "ai_agent_autonomous_runtime_internal_loop_v1",
"status": "mcp_rag_km_playbook_log_control_loop_declared",
"purpose": (
"把 Gitea run、runner lane、production marker、browser smoke 與 executor receipt "
"先收斂成內部事件,再由 PlayBook decision 推進或跳過。"
),
"mcp_sensors": [
{
"sensor_id": "gitea_actions_run_observer",
"normalized_event": "RunObservation",
"raw_secret_access_allowed": False,
},
{
"sensor_id": "controlled_runner_lane_observer",
"normalized_event": "RunnerLaneState",
"raw_runner_token_access_allowed": False,
},
{
"sensor_id": "production_marker_observer",
"normalized_event": "ProductionTruthSnapshot",
"raw_session_access_allowed": False,
},
{
"sensor_id": "browser_smoke_observer",
"normalized_event": "FrontendTruthSnapshot",
"raw_conversation_access_allowed": False,
},
],
"rag_context_queries": [
"runner_pressure_buildkit_stockplatform_collision",
"controlled_cd_lane_capacity_label_guardrails",
"autonomous_runtime_marker_receipt_contract",
],
"playbook_decision_classes": [
"deploy_succeeded_marker_hit",
"running_with_controlled_task",
"running_no_container_stale_ui",
"superseded_run_skip",
"failed_run_superseded_by_marker_hit",
"real_failure_requires_playbook_repair",
"runner_lane_guardrail_violation",
],
"km_writeback_contract": {
"knowledge_entry_path_type": "deploy_control_plane_decision:<deploy_intent_id>",
"required_refs": [
"deploy_intent_id",
"target_sha",
"gitea_run_id",
"production_marker",
"latest_flow_closure",
"runner_lane_state",
],
"stores_raw_logs": False,
"stores_secret_values": False,
},
"log_projection_contract": {
"timeline_event_type": "ai_agent_deploy_control_plane_decision",
"logbook_projection": "summary_only_after_verifier",
"raw_html_or_long_log_allowed": False,
},
"classifier_examples": classifier_examples,
"rollups": {
"mcp_sensor_count": 4,
"rag_context_query_count": 3,
"playbook_decision_class_count": 7,
"classifier_example_count": len(classifier_examples),
},
}
def build_runtime_receipt_readback_from_rows(
*,
project_id: str = _DEFAULT_PROJECT_ID,
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
db_read_status: str = "ok",
operation_count_rows: Iterable[Mapping[str, Any] | Any] = (),
operation_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
auto_repair_count_rows: Iterable[Mapping[str, Any] | Any] = (),
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
verifier_count_rows: Iterable[Mapping[str, Any] | Any] = (),
verifier_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
km_count_rows: Iterable[Mapping[str, Any] | Any] = (),
km_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
telegram_count_rows: Iterable[Mapping[str, Any] | Any] = (),
telegram_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
mcp_gateway_count_rows: Iterable[Mapping[str, Any] | Any] = (),
legacy_mcp_count_rows: Iterable[Mapping[str, Any] | Any] = (),
service_log_count_rows: Iterable[Mapping[str, Any] | Any] = (),
executor_log_count_rows: Iterable[Mapping[str, Any] | Any] = (),
timeline_count_rows: Iterable[Mapping[str, Any] | Any] = (),
playbook_trust_count_rows: Iterable[Mapping[str, Any] | Any] = (),
error_type: str | None = None,
) -> dict[str, Any]:
"""Build the live executor receipt readback from already-fetched rows."""
operation_latest = list(operation_latest_rows)
auto_repair_latest = list(auto_repair_latest_rows)
verifier_latest = list(verifier_latest_rows)
km_latest = list(km_latest_rows)
telegram_latest = list(telegram_latest_rows)
operation_summary = _operation_counts(operation_count_rows)
auto_repair_summary = _status_counts(
auto_repair_count_rows,
status_key="result_status",
)
verifier_summary = _status_counts(
verifier_count_rows,
status_key="verification_result",
)
km_summary = _status_counts(km_count_rows, status_key="status")
telegram_summary = _status_counts(telegram_count_rows, status_key="send_status")
mcp_gateway_summary = _status_counts(mcp_gateway_count_rows, status_key="status")
legacy_mcp_summary = _status_counts(legacy_mcp_count_rows, status_key="status")
service_log_summary = _status_counts(service_log_count_rows, status_key="status")
executor_log_summary = _status_counts(executor_log_count_rows, status_key="status")
timeline_summary = _status_counts(timeline_count_rows, status_key="status")
playbook_trust_summary = _status_counts(playbook_trust_count_rows, status_key="status")
latest_closure = _latest_flow_closure(
operation_latest_rows=operation_latest,
verifier_latest_rows=verifier_latest,
km_latest_rows=km_latest,
telegram_latest_rows=telegram_latest,
)
latest_failure = _latest_failure_classification(
operation_latest_rows=operation_latest,
verifier_latest_rows=verifier_latest,
latest_flow_closure=latest_closure,
)
retry_package = _controlled_retry_package(latest_failure)
loop_ledger = _autonomous_execution_loop_ledger(
project_id=project_id,
operation_latest_rows=operation_latest,
verifier_latest_rows=verifier_latest,
km_latest_rows=km_latest,
telegram_latest_rows=telegram_latest,
auto_repair_latest_rows=auto_repair_latest,
latest_flow_closure=latest_closure,
latest_failure_classification=latest_failure,
controlled_retry_package=retry_package,
)
trace_ledger = _build_trace_ledger(
operation_summary=operation_summary,
auto_repair_summary=auto_repair_summary,
verifier_summary=verifier_summary,
km_summary=km_summary,
telegram_summary=telegram_summary,
mcp_gateway_summary=mcp_gateway_summary,
legacy_mcp_summary=legacy_mcp_summary,
service_log_summary=service_log_summary,
executor_log_summary=executor_log_summary,
timeline_summary=timeline_summary,
playbook_trust_summary=playbook_trust_summary,
latest_flow_closure=latest_closure,
loop_ledger=loop_ledger,
)
log_integration_taxonomy = _build_log_integration_taxonomy(
operation_summary=operation_summary,
auto_repair_summary=auto_repair_summary,
verifier_summary=verifier_summary,
km_summary=km_summary,
telegram_summary=telegram_summary,
mcp_gateway_summary=mcp_gateway_summary,
legacy_mcp_summary=legacy_mcp_summary,
service_log_summary=service_log_summary,
executor_log_summary=executor_log_summary,
timeline_summary=timeline_summary,
playbook_trust_summary=playbook_trust_summary,
)
agent_decision_wiring = _build_agent_decision_wiring(
operation_summary=operation_summary,
verifier_summary=verifier_summary,
km_summary=km_summary,
mcp_gateway_summary=mcp_gateway_summary,
legacy_mcp_summary=legacy_mcp_summary,
service_log_summary=service_log_summary,
timeline_summary=timeline_summary,
playbook_trust_summary=playbook_trust_summary,
log_integration_taxonomy=log_integration_taxonomy,
loop_ledger=loop_ledger,
latest_flow_closure=latest_closure,
)
learning_loop = _build_learning_loop_readback(
operation_summary=operation_summary,
verifier_summary=verifier_summary,
km_summary=km_summary,
playbook_trust_summary=playbook_trust_summary,
log_integration_taxonomy=log_integration_taxonomy,
agent_decision_wiring=agent_decision_wiring,
latest_flow_closure=latest_closure,
latest_failure_classification=latest_failure,
controlled_retry_package=retry_package,
loop_ledger=loop_ledger,
)
work_item_progress = _build_work_item_progress(
trace_ledger=trace_ledger,
log_integration_taxonomy=log_integration_taxonomy,
agent_decision_wiring=agent_decision_wiring,
learning_loop=learning_loop,
db_read_status=db_read_status,
)
apply_summary = operation_summary.get("ansible_apply_executed") or {}
readback = {
"schema_version": _LIVE_READBACK_SCHEMA_VERSION,
"project_id": project_id,
"lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)),
"db_read_status": db_read_status,
"writes_on_read": False,
"ansible_operations": {
"counts": operation_summary,
"latest": _sanitize_latest_rows(
operation_latest,
allowed_keys=(
"op_id",
"parent_op_id",
"operation_type",
"status",
"actor",
"incident_id",
"catalog_id",
"playbook_path",
"execution_mode",
"source_candidate_op_id",
"check_mode_op_id",
"risk_level",
"controlled_apply_allowed",
"returncode",
"duration_ms",
"created_at",
),
),
},
"auto_repair_execution_receipt": {
**auto_repair_summary,
"latest": _sanitize_latest_rows(
auto_repair_latest,
allowed_keys=(
"id",
"incident_id",
"catalog_id",
"playbook_name",
"result_status",
"triggered_by",
"risk_level",
"execution_time_ms",
"created_at",
),
),
},
"ansible_apply_executed": {
"total": _int_value(apply_summary.get("total")),
"recent": _int_value(apply_summary.get("recent")),
"by_status": apply_summary.get("by_status") or {},
},
"post_apply_verifier": {
**verifier_summary,
"latest": _sanitize_latest_rows(
verifier_latest,
allowed_keys=(
"id",
"incident_id",
"matched_playbook_id",
"verification_result",
"apply_op_id",
"catalog_id",
"playbook_path",
"returncode",
"collected_at",
),
),
},
"km_writeback": {
**km_summary,
"latest": _sanitize_latest_rows(
km_latest,
allowed_keys=(
"id",
"title",
"related_incident_id",
"related_playbook_id",
"path_type",
"status",
"created_by",
"created_at",
),
),
},
"telegram_receipt": {
**telegram_summary,
"latest": _sanitize_latest_rows(
telegram_latest,
allowed_keys=(
"message_id",
"run_id",
"message_type",
"send_status",
"provider_message_id",
"incident_id",
"action",
"queued_at",
"sent_at",
),
),
},
"mcp_context": {
"gateway": mcp_gateway_summary,
"legacy": legacy_mcp_summary,
"total": _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary),
"recent": _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary),
},
"service_log_evidence": service_log_summary,
"executor_log_projection": executor_log_summary,
"timeline_projection": timeline_summary,
"playbook_trust": playbook_trust_summary,
"latest_flow_closure": latest_closure,
"latest_failure_classification": latest_failure,
"controlled_retry_package": retry_package,
"autonomous_execution_loop_ledger": loop_ledger,
"trace_ledger": trace_ledger,
"log_integration_taxonomy": log_integration_taxonomy,
"agent_decision_wiring": agent_decision_wiring,
"learning_loop": learning_loop,
"work_item_progress": work_item_progress,
}
if error_type:
readback["error"] = {
"type": error_type,
"message": "runtime receipt DB read failed; see API logs",
}
return readback
def _attach_runtime_receipt_readback(
payload: dict[str, Any],
readback: dict[str, Any],
) -> dict[str, Any]:
payload["runtime_receipt_readback"] = readback
rollups = payload.setdefault("rollups", {})
rollups.update({
"live_ansible_apply_executed_count": _int_value(
readback.get("ansible_apply_executed", {}).get("total")
),
"live_auto_repair_execution_receipt_count": _int_value(
readback.get("auto_repair_execution_receipt", {}).get("total")
),
"live_post_apply_verifier_count": _int_value(
readback.get("post_apply_verifier", {}).get("total")
),
"live_km_writeback_count": _int_value(
readback.get("km_writeback", {}).get("total")
),
"live_telegram_receipt_count": _int_value(
readback.get("telegram_receipt", {}).get("total")
),
"live_executor_latest_flow_closed_count": (
1
if (readback.get("latest_flow_closure") or {}).get("closed") is True
else 0
),
"live_autonomous_execution_loop_closed_count": (
1
if (readback.get("autonomous_execution_loop_ledger") or {}).get("closed") is True
else 0
),
"live_executor_latest_apply_repair_required_count": (
1
if (
(readback.get("latest_failure_classification") or {}).get("classification")
in {
"closed_failed_apply_requires_ai_repair",
"failed_apply_receipt_gap_requires_backfill_then_repair",
}
)
else 0
),
"live_executor_retry_package_ready_count": (
1
if (readback.get("controlled_retry_package") or {}).get("status")
== "ready_for_no_write_check_mode_replay"
else 0
),
"live_mcp_context_count": _int_value(readback.get("mcp_context", {}).get("total")),
"live_service_log_evidence_count": _int_value(
readback.get("service_log_evidence", {}).get("total")
),
"live_executor_log_projection_count": _int_value(
readback.get("executor_log_projection", {}).get("total")
),
"live_timeline_projection_count": _int_value(
readback.get("timeline_projection", {}).get("total")
),
"live_playbook_trust_signal_count": _int_value(
readback.get("playbook_trust", {}).get("total")
),
"live_trace_recorded_stage_count": _int_value(
readback.get("trace_ledger", {}).get("recorded_stage_count")
),
"live_trace_required_missing_count": len(
(readback.get("trace_ledger") or {}).get("missing_required_stage_ids") or []
),
"live_log_source_family_count": _int_value(
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
"source_family_count"
)
),
"live_log_active_source_family_count": _int_value(
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
"active_source_family_count"
)
),
"live_log_label_dimension_count": _int_value(
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
"label_dimension_count"
)
),
"live_log_classified_event_total": _int_value(
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
"classified_event_total"
)
),
"live_log_recent_classified_event_total": _int_value(
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
"recent_classified_event_total"
)
),
"live_agent_decision_wiring_stage_count": _int_value(
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
"stage_count"
)
),
"live_agent_decision_wiring_required_present_count": _int_value(
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
"required_stage_present_count"
)
),
"live_agent_decision_wiring_required_missing_count": _int_value(
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
"required_stage_missing_count"
)
),
"live_agent_decision_wiring_complete_count": (
1
if (readback.get("agent_decision_wiring") or {}).get("status") == "completed"
else 0
),
"live_learning_loop_stage_count": _int_value(
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
"stage_count"
)
),
"live_learning_loop_required_present_count": _int_value(
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
"required_stage_present_count"
)
),
"live_learning_loop_required_missing_count": _int_value(
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
"required_stage_missing_count"
)
),
"live_learning_loop_complete_count": (
1
if (readback.get("learning_loop") or {}).get("status") == "completed"
else 0
),
"live_learning_loop_similar_case_source_count": _int_value(
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
"similar_case_source_total"
)
),
"live_work_item_count": _int_value(
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
"work_item_count"
)
),
"live_work_item_completed_count": _int_value(
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
"completed_count"
)
),
"live_work_item_in_progress_count": _int_value(
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
"in_progress_count"
)
),
"live_work_item_pending_count": _int_value(
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
"pending_count"
)
),
"live_work_item_blocked_count": _int_value(
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
"blocked_count"
)
),
})
return payload
def build_ai_agent_autonomous_runtime_control() -> dict[str, Any]:
"""Build the current AI Agent autonomy control-plane readback."""
allowed_risks = _allowed_risk_levels()
report_cadences = [
{
"cadence": "daily",
"display_name": "日報",
"schedule": f"每日 {DAILY_REPORT_HOUR_TAIPEI:02d}:00 台北時間",
"worker": "report_generation_service.run_daily_report_loop",
"telegram_gateway_delivery_enabled": True,
"direct_bot_api_allowed": False,
"receipt_source": "daily_report_sent log + Telegram Gateway result",
},
{
"cadence": "weekly",
"display_name": "週報",
"schedule": (
f"每週五 {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間"
if WEEKLY_REPORT_WEEKDAY_TAIPEI == 4
else f"每週 weekday={WEEKLY_REPORT_WEEKDAY_TAIPEI} {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間"
),
"worker": "report_generation_service.run_weekly_report_loop",
"telegram_gateway_delivery_enabled": True,
"direct_bot_api_allowed": False,
"receipt_source": "weekly_report_sent log + Telegram Gateway result",
},
{
"cadence": "monthly",
"display_name": "月報",
"schedule": f"每月 {MONTHLY_REPORT_DAY_TAIPEI}{MONTHLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間",
"worker": "report_generation_service.run_monthly_report_loop",
"telegram_gateway_delivery_enabled": True,
"direct_bot_api_allowed": False,
"receipt_source": "monthly_report_sent log + Telegram Gateway result",
},
]
executor_receipts = [
{
"operation_type": "ansible_candidate_matched",
"owner_agent": "Hermes",
"purpose": "把修復候選寫入 executor 可認領佇列",
"writes_runtime_state": False,
},
{
"operation_type": "ansible_check_mode_executed",
"owner_agent": "AwoooP Ansible check-mode worker",
"purpose": "執行 ansible-playbook --check --diff 並留下乾跑收據",
"writes_runtime_state": False,
},
{
"operation_type": "ansible_apply_executed",
"owner_agent": "AwoooP controlled apply worker",
"purpose": "check-mode 通過後,對 allowlisted low / medium / high PlayBook 受控 apply",
"writes_runtime_state": True,
},
{
"operation_type": "incident_evidence.post_execution_state",
"owner_agent": "post_apply_verifier",
"purpose": "apply 後寫入 verifier 結果與 post-execution evidence",
"writes_runtime_state": True,
},
{
"operation_type": "knowledge_entries",
"owner_agent": "Hermes",
"purpose": "把已驗證執行沉澱成 KM / PlayBook trust 候選",
"writes_runtime_state": True,
},
]
hard_blockers = [
"secret_token_private_key_cookie_session_auth_header_cleartext",
"drop_truncate_restore_prune_destructive_database_operation",
"reboot_node_drain_irreversible_firewall_or_host_lockout",
"credentialed_exploit_or_external_active_scan",
"new_paid_provider_cost_ceiling_or_provider_switch_without_replay_shadow_canary",
"force_push_delete_repo_refs_or_visibility_change",
"critical_or_break_glass_route_without_explicit_break_glass_contract",
]
legacy_overrides = [
{
"legacy_area": "report_status_board_no_live_send",
"current_effect": "overridden",
"new_behavior": "日報 / 週報 / 月報透過 Telegram Gateway 排程派送",
},
{
"legacy_area": "report_live_delivery_owner_review_required",
"current_effect": "overridden",
"new_behavior": "報告派送走低/中/高風險自動化政策critical 才 break-glass",
},
{
"legacy_area": "high_risk_owner_review_queue",
"current_effect": "overridden_for_high_non_critical",
"new_behavior": "high 風險允許 controlled applycritical / hard blocker 仍不自動",
},
{
"legacy_area": "telegram_no_send_preview_only",
"current_effect": "overridden",
"new_behavior": "用 Telegram Gateway 實送報告與 actionable receipt不直接暴露 Bot API",
},
]
control_plane_integration = _control_plane_integration()
payload = {
"schema_version": _SCHEMA_VERSION,
"generated_at": datetime.now(UTC).isoformat(),
"program_status": {
"current_task_id": "P2-416-D1N",
"status": "current_directive_control_plane_active",
"runtime_authority": _RUNTIME_AUTHORITY,
"deploy_readback_marker": _DEPLOY_READBACK_MARKER,
"deploy_attempt_note": _DEPLOY_ATTEMPT_NOTE,
"legacy_no_send_no_live_rules_overridden": True,
"implementation_completion_percent": 88,
"status_note": (
"目前有效規則low / medium / high 風險由 AI Agent 在 allowlist、"
"Ansible check-mode、verifier、rollback、KM 與 Telegram receipt 下受控自動處理。"
),
},
"current_policy": {
"low_risk_controlled_apply_allowed": "low" in allowed_risks,
"medium_risk_controlled_apply_allowed": "medium" in allowed_risks,
"high_risk_controlled_apply_allowed": "high" in allowed_risks,
"critical_break_glass_required": True,
"owner_review_required_for_low_medium_high": False,
"direct_bot_api_allowed": False,
"telegram_gateway_required": True,
"post_apply_verifier_required": True,
"km_learning_writeback_required": True,
},
"runtime_switches": {
"ansible_check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER),
"ansible_controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY),
"ansible_controlled_apply_allowed_risk_levels": allowed_risks,
"ansible_check_mode_interval_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS,
"ansible_check_mode_batch_limit": settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT,
"ansible_check_mode_timeout_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS,
"ansible_controlled_apply_timeout_seconds": settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_TIMEOUT_SECONDS,
},
"agent_roles": [
{
"agent_id": "openclaw",
"role": "仲裁 / hard blocker / replay-shadow-canary gate",
"current_job": "只阻擋真正 critical 與 hard blocker不再用身份保護舊架構",
},
{
"agent_id": "hermes",
"role": "報告 / Telegram digest / KM 與 PlayBook trust writeback",
"current_job": "日週月報、收據摘要與 verifier 後學習沉澱",
},
{
"agent_id": "nemotron",
"role": "市場技術雷達 / no-write replay / challenger scorecard",
"current_job": "用市場與回放數據挑戰 OpenClaw / provider / Agent 組合",
},
{
"agent_id": "awooop_ansible_worker",
"role": "executor",
"current_job": "candidate → check-mode → controlled apply → verifier → KM",
},
{
"agent_id": "telegram_ops",
"role": "Telegram Gateway receipt",
"current_job": "群組報告、actionable receipt、失敗告警不展示敏感值或未脫敏資料",
},
],
"report_delivery": {
"status": "telegram_gateway_delivery_enabled",
"cadences": report_cadences,
},
"controlled_executor": {
"status": "check_mode_then_apply_enabled"
if settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY
else "check_mode_only_by_config",
"operation_receipts": executor_receipts,
"required_flow": [
"allowlisted_candidate",
"ansible_check_mode_success",
"controlled_apply",
"post_apply_verifier",
"auto_repair_execution_receipt",
"km_learning_writeback",
"telegram_receipt_or_alert",
],
},
"control_plane_integration": control_plane_integration,
"legacy_policy_overrides": legacy_overrides,
"hard_blockers": hard_blockers,
"visibility_contract": {
"frontend_displays_runtime_truth": True,
"work_window_transcript_display_allowed": False,
"prompt_body_display_allowed": False,
"internal_reasoning_display_allowed": False,
"sensitive_value_display_allowed": False,
"telegram_unredacted_payload_display_allowed": False,
"lan_topology_redaction_required": True,
},
"rollups": {
"automated_risk_tier_count": sum(1 for risk in ("low", "medium", "high") if risk in allowed_risks),
"hard_blocker_count": len(hard_blockers),
"report_cadence_enabled_count": len(report_cadences),
"telegram_gateway_delivery_enabled_count": sum(
1 for item in report_cadences if item["telegram_gateway_delivery_enabled"]
),
"direct_bot_api_allowed_count": 0,
"controlled_executor_operation_receipt_count": len(executor_receipts),
"runtime_write_receipt_type_count": sum(
1 for item in executor_receipts if item["writes_runtime_state"]
),
"legacy_policy_overridden_count": len(legacy_overrides),
"mcp_sensor_count": control_plane_integration["rollups"]["mcp_sensor_count"],
"rag_context_query_count": control_plane_integration["rollups"]["rag_context_query_count"],
"playbook_decision_class_count": control_plane_integration["rollups"]["playbook_decision_class_count"],
"deploy_control_classifier_example_count": control_plane_integration["rollups"]["classifier_example_count"],
},
}
_attach_runtime_receipt_readback(
payload,
build_runtime_receipt_readback_from_rows(
project_id=_DEFAULT_PROJECT_ID,
db_read_status="not_queried",
),
)
_validate_payload(payload)
return payload
async def load_ai_agent_autonomous_runtime_receipt_readback(
*,
project_id: str = _DEFAULT_PROJECT_ID,
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
limit: int = 20,
) -> dict[str, Any]:
"""Read live executor receipts without sending messages or mutating runtime state."""
params = {
"project_id": project_id,
"lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)),
"limit": max(1, int(limit or 20)),
}
try:
async with get_db_context(project_id) as db:
await db.execute(text("SET LOCAL statement_timeout = '5000ms'"))
async def _safe_aux_rows(
query_name: str,
sql: str,
fallback_sql: str | None = None,
) -> list[Mapping[str, Any]]:
try:
return (await db.execute(text(sql), params)).mappings().all()
except Exception as exc: # pragma: no cover - depends on live schema drift
logger.warning(
"ai_agent_autonomous_runtime_trace_aux_read_failed",
project_id=project_id,
query_name=query_name,
error_type=type(exc).__name__,
)
if fallback_sql:
try:
return (await db.execute(text(fallback_sql), params)).mappings().all()
except Exception as fallback_exc: # pragma: no cover - live schema drift
logger.warning(
"ai_agent_autonomous_runtime_trace_aux_fallback_failed",
project_id=project_id,
query_name=query_name,
error_type=type(fallback_exc).__name__,
)
return []
operation_counts = (
await db.execute(text(_RUNTIME_OPERATION_COUNTS_SQL), params)
).mappings().all()
operation_latest = (
await db.execute(text(_RUNTIME_OPERATION_LATEST_SQL), params)
).mappings().all()
auto_repair_counts = (
await db.execute(text(_RUNTIME_AUTO_REPAIR_COUNTS_SQL), params)
).mappings().all()
auto_repair_latest = (
await db.execute(text(_RUNTIME_AUTO_REPAIR_LATEST_SQL), params)
).mappings().all()
verifier_counts = (
await db.execute(text(_RUNTIME_VERIFIER_COUNTS_SQL), params)
).mappings().all()
verifier_latest = (
await db.execute(text(_RUNTIME_VERIFIER_LATEST_SQL), params)
).mappings().all()
km_counts = (
await db.execute(text(_RUNTIME_KM_COUNTS_SQL), params)
).mappings().all()
km_latest = (
await db.execute(text(_RUNTIME_KM_LATEST_SQL), params)
).mappings().all()
telegram_counts = (
await db.execute(text(_RUNTIME_TELEGRAM_COUNTS_SQL), params)
).mappings().all()
telegram_latest = (
await db.execute(text(_RUNTIME_TELEGRAM_LATEST_SQL), params)
).mappings().all()
mcp_gateway_counts = await _safe_aux_rows(
"mcp_gateway_counts",
_RUNTIME_MCP_GATEWAY_COUNTS_SQL,
)
legacy_mcp_counts = await _safe_aux_rows(
"legacy_mcp_counts",
_RUNTIME_LEGACY_MCP_COUNTS_SQL,
)
service_log_counts = await _safe_aux_rows(
"service_log_counts",
_RUNTIME_SERVICE_LOG_COUNTS_SQL,
)
executor_log_counts = await _safe_aux_rows(
"executor_log_counts",
_RUNTIME_EXECUTOR_LOG_COUNTS_SQL,
)
timeline_counts = await _safe_aux_rows(
"timeline_counts",
_RUNTIME_TIMELINE_COUNTS_SQL,
_RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL,
)
playbook_trust_counts = await _safe_aux_rows(
"playbook_trust_counts",
_RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL,
_RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL,
)
except Exception as exc:
logger.warning(
"ai_agent_autonomous_runtime_receipt_readback_failed",
project_id=project_id,
error_type=type(exc).__name__,
)
return build_runtime_receipt_readback_from_rows(
project_id=project_id,
lookback_hours=params["lookback_hours"],
db_read_status="unavailable",
error_type=type(exc).__name__,
)
return build_runtime_receipt_readback_from_rows(
project_id=project_id,
lookback_hours=params["lookback_hours"],
db_read_status="ok",
operation_count_rows=operation_counts,
operation_latest_rows=operation_latest,
auto_repair_count_rows=auto_repair_counts,
auto_repair_latest_rows=auto_repair_latest,
verifier_count_rows=verifier_counts,
verifier_latest_rows=verifier_latest,
km_count_rows=km_counts,
km_latest_rows=km_latest,
telegram_count_rows=telegram_counts,
telegram_latest_rows=telegram_latest,
mcp_gateway_count_rows=mcp_gateway_counts,
legacy_mcp_count_rows=legacy_mcp_counts,
service_log_count_rows=service_log_counts,
executor_log_count_rows=executor_log_counts,
timeline_count_rows=timeline_counts,
playbook_trust_count_rows=playbook_trust_counts,
)
async def build_ai_agent_autonomous_runtime_control_with_live_readback(
*,
project_id: str = _DEFAULT_PROJECT_ID,
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
) -> dict[str, Any]:
"""Build the control plane and attach live DB receipt readback."""
payload = build_ai_agent_autonomous_runtime_control()
readback = await load_ai_agent_autonomous_runtime_receipt_readback(
project_id=project_id,
lookback_hours=lookback_hours,
)
_attach_runtime_receipt_readback(payload, readback)
_validate_payload(payload)
return payload
_RUNTIME_OPERATION_COUNTS_SQL = """
SELECT
operation_type,
status,
count(*) AS total,
count(*) FILTER (
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM automation_operation_log
WHERE operation_type IN (
'ansible_candidate_matched',
'ansible_check_mode_executed',
'ansible_apply_executed',
'ansible_learning_writeback_recorded',
'ansible_rollback_executed',
'ansible_execution_skipped'
)
GROUP BY operation_type, status
ORDER BY operation_type, status
"""
_RUNTIME_OPERATION_LATEST_SQL = """
SELECT
op_id::text AS op_id,
parent_op_id::text AS parent_op_id,
operation_type,
status,
actor,
coalesce(incident_id::text, input ->> 'incident_id') AS incident_id,
input ->> 'catalog_id' AS catalog_id,
coalesce(input ->> 'apply_playbook_path', input ->> 'playbook_path') AS playbook_path,
input ->> 'execution_mode' AS execution_mode,
input ->> 'source_candidate_op_id' AS source_candidate_op_id,
input ->> 'check_mode_op_id' AS check_mode_op_id,
input ->> 'risk_level' AS risk_level,
input ->> 'controlled_apply_allowed' AS controlled_apply_allowed,
coalesce(output ->> 'returncode', dry_run_result ->> 'returncode') AS returncode,
duration_ms,
created_at
FROM automation_operation_log
WHERE operation_type IN (
'ansible_candidate_matched',
'ansible_check_mode_executed',
'ansible_apply_executed',
'ansible_learning_writeback_recorded',
'ansible_rollback_executed',
'ansible_execution_skipped'
)
ORDER BY created_at DESC
LIMIT :limit
"""
_RUNTIME_AUTO_REPAIR_COUNTS_SQL = """
SELECT
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
count(*) AS total,
count(*) FILTER (
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM auto_repair_executions
WHERE triggered_by = 'ansible_controlled_apply'
GROUP BY CASE WHEN success THEN 'success' ELSE 'failed' END
ORDER BY result_status
"""
_RUNTIME_AUTO_REPAIR_LATEST_SQL = """
SELECT
id,
incident_id,
playbook_id AS catalog_id,
playbook_name,
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
executed_steps::text AS executed_steps_text,
triggered_by,
risk_level,
execution_time_ms,
created_at
FROM auto_repair_executions
WHERE triggered_by = 'ansible_controlled_apply'
ORDER BY created_at DESC
LIMIT :limit
"""
_RUNTIME_VERIFIER_COUNTS_SQL = """
SELECT
coalesce(verification_result, 'missing') AS verification_result,
count(*) AS total,
count(*) FILTER (
WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM incident_evidence
WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL
GROUP BY coalesce(verification_result, 'missing')
ORDER BY verification_result
"""
_RUNTIME_VERIFIER_LATEST_SQL = """
SELECT
id,
incident_id,
matched_playbook_id,
coalesce(verification_result, 'missing') AS verification_result,
post_execution_state ->> 'apply_op_id' AS apply_op_id,
post_execution_state ->> 'catalog_id' AS catalog_id,
post_execution_state ->> 'playbook_path' AS playbook_path,
post_execution_state ->> 'returncode' AS returncode,
collected_at
FROM incident_evidence
WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL
ORDER BY collected_at DESC
LIMIT :limit
"""
_RUNTIME_KM_COUNTS_SQL = """
SELECT
status,
count(*) AS total,
count(*) FILTER (
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM knowledge_entries
WHERE project_id = :project_id
AND (
path_type LIKE 'ansible_apply_receipt:%'
OR tags::text LIKE '%ansible_controlled_apply%'
)
GROUP BY status
ORDER BY status
"""
_RUNTIME_KM_LATEST_SQL = """
SELECT
id,
title,
related_incident_id,
related_playbook_id,
path_type,
status,
created_by,
created_at
FROM knowledge_entries
WHERE project_id = :project_id
AND (
path_type LIKE 'ansible_apply_receipt:%'
OR tags::text LIKE '%ansible_controlled_apply%'
)
ORDER BY created_at DESC
LIMIT :limit
"""
_RUNTIME_TELEGRAM_COUNTS_SQL = """
SELECT
send_status,
count(*) AS total,
count(*) FILTER (
WHERE queued_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM awooop_outbound_message
WHERE project_id = :project_id
AND channel_type = 'telegram'
AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result'
GROUP BY send_status
ORDER BY send_status
"""
_RUNTIME_TELEGRAM_LATEST_SQL = """
SELECT
message_id::text AS message_id,
run_id::text AS run_id,
message_type,
send_status,
provider_message_id,
source_envelope #>> '{callback_reply,incident_id}' AS incident_id,
source_envelope #>> '{callback_reply,action}' AS action,
queued_at,
sent_at
FROM awooop_outbound_message
WHERE project_id = :project_id
AND channel_type = 'telegram'
AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result'
ORDER BY queued_at DESC
LIMIT :limit
"""
_RUNTIME_MCP_GATEWAY_COUNTS_SQL = """
SELECT
coalesce(result_status, 'unknown') AS status,
count(*) AS total,
count(*) FILTER (
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM awooop_mcp_gateway_audit
WHERE project_id = :project_id
GROUP BY coalesce(result_status, 'unknown')
ORDER BY status
"""
_RUNTIME_LEGACY_MCP_COUNTS_SQL = """
SELECT
CASE
WHEN success IS TRUE THEN 'success'
WHEN success IS FALSE THEN 'failed'
ELSE 'unknown'
END AS status,
count(*) AS total,
count(*) FILTER (
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM mcp_audit_log
GROUP BY
CASE
WHEN success IS TRUE THEN 'success'
WHEN success IS FALSE THEN 'failed'
ELSE 'unknown'
END
ORDER BY status
"""
_RUNTIME_SERVICE_LOG_COUNTS_SQL = """
SELECT
'sanitized_recent_logs' AS status,
count(*) AS total,
count(*) FILTER (
WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM incident_evidence
WHERE recent_logs IS NOT NULL
OR evidence_summary IS NOT NULL
OR mcp_health IS NOT NULL
OR anomaly_context IS NOT NULL
"""
_RUNTIME_EXECUTOR_LOG_COUNTS_SQL = """
SELECT
coalesce(status, 'unknown') AS status,
count(*) AS total,
count(*) FILTER (
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM automation_operation_log
WHERE operation_type IN (
'ansible_candidate_matched',
'ansible_check_mode_executed',
'ansible_apply_executed',
'ansible_rollback_executed',
'ansible_execution_skipped'
)
AND (
output IS NOT NULL
OR error IS NOT NULL
OR stderr_feed_back IS NOT NULL
OR dry_run_result IS NOT NULL
)
GROUP BY coalesce(status, 'unknown')
ORDER BY status
"""
_RUNTIME_TIMELINE_COUNTS_SQL = """
SELECT
coalesce(status, 'unknown') AS status,
count(*) AS total,
count(*) FILTER (
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM timeline_events
WHERE event_type IS NOT NULL
OR actor IS NOT NULL
OR actor_role IS NOT NULL
GROUP BY coalesce(status, 'unknown')
ORDER BY status
"""
_RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL = """
SELECT
'timeline_event' AS status,
count(*) AS total,
0 AS recent
FROM timeline_events
"""
_RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL = """
SELECT
CASE
WHEN review_required IS TRUE THEN 'review_required'
WHEN trust_score >= 0.8 THEN 'high_trust'
WHEN trust_score < 0.3 THEN 'low_trust'
WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active'
ELSE 'seeded_not_used'
END AS status,
count(*) AS total,
count(*) FILTER (
WHERE updated_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
) AS recent
FROM playbooks
GROUP BY
CASE
WHEN review_required IS TRUE THEN 'review_required'
WHEN trust_score >= 0.8 THEN 'high_trust'
WHEN trust_score < 0.3 THEN 'low_trust'
WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active'
ELSE 'seeded_not_used'
END
ORDER BY status
"""
_RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL = """
SELECT
'cataloged' AS status,
count(*) AS total,
0 AS recent
FROM playbooks
"""
def _validate_payload(payload: dict[str, Any]) -> None:
if payload.get("schema_version") != _SCHEMA_VERSION:
raise ValueError(f"schema_version must be {_SCHEMA_VERSION}")
status = payload.get("program_status") or {}
if status.get("runtime_authority") != _RUNTIME_AUTHORITY:
raise ValueError(f"runtime_authority must be {_RUNTIME_AUTHORITY}")
if status.get("deploy_readback_marker") != _DEPLOY_READBACK_MARKER:
raise ValueError(f"deploy_readback_marker must be {_DEPLOY_READBACK_MARKER}")
if status.get("deploy_attempt_note") != _DEPLOY_ATTEMPT_NOTE:
raise ValueError(f"deploy_attempt_note must be {_DEPLOY_ATTEMPT_NOTE}")
policy = payload.get("current_policy") or {}
for key in (
"low_risk_controlled_apply_allowed",
"medium_risk_controlled_apply_allowed",
"high_risk_controlled_apply_allowed",
"telegram_gateway_required",
"post_apply_verifier_required",
"km_learning_writeback_required",
):
if policy.get(key) is not True:
raise ValueError(f"current_policy.{key} must be true")
if policy.get("owner_review_required_for_low_medium_high") is not False:
raise ValueError("owner_review_required_for_low_medium_high must be false")
if policy.get("direct_bot_api_allowed") is not False:
raise ValueError("direct_bot_api_allowed must be false")
visibility = payload.get("visibility_contract") or {}
for key in (
"work_window_transcript_display_allowed",
"prompt_body_display_allowed",
"internal_reasoning_display_allowed",
"sensitive_value_display_allowed",
"telegram_unredacted_payload_display_allowed",
):
if visibility.get(key) is not False:
raise ValueError(f"visibility_contract.{key} must remain false")