Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 2m47s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
3022 lines
120 KiB
Python
3022 lines
120 KiB
Python
"""Current AI Agent autonomous runtime control plane.
|
||
|
||
This read model is the current directive layer. Historical P2 snapshots can
|
||
still describe earlier no-send / no-live states, but this payload states what
|
||
the product should enforce now: low, medium, and high risk routes may proceed
|
||
through controlled automation when allowlist, check-mode, verifier, rollback,
|
||
KM, and Telegram receipts are present.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from collections.abc import Iterable, Mapping
|
||
from datetime import UTC, datetime
|
||
from typing import Any
|
||
|
||
from sqlalchemy import text
|
||
|
||
from src.core.config import settings
|
||
from src.core.logging import get_logger
|
||
from src.db.base import get_db_context
|
||
from src.services.report_generation_service import (
|
||
DAILY_REPORT_HOUR_TAIPEI,
|
||
MONTHLY_REPORT_DAY_TAIPEI,
|
||
MONTHLY_REPORT_HOUR_TAIPEI,
|
||
WEEKLY_REPORT_HOUR_TAIPEI,
|
||
WEEKLY_REPORT_WEEKDAY_TAIPEI,
|
||
)
|
||
|
||
_SCHEMA_VERSION = "ai_agent_autonomous_runtime_control_v1"
|
||
_RUNTIME_AUTHORITY = "current_owner_directive_controlled_ai_automation"
|
||
_DEPLOY_READBACK_MARKER = "p2_416_d1n_autonomous_runtime_control_prod_readback_v2"
|
||
_DEPLOY_ATTEMPT_NOTE = "cd_internal_control_plane_readback_retry_20260628_2"
|
||
_LIVE_READBACK_SCHEMA_VERSION = "ai_agent_autonomous_runtime_receipt_readback_v1"
|
||
_DEFAULT_PROJECT_ID = "awoooi"
|
||
_DEFAULT_LOOKBACK_HOURS = 24
|
||
# CD cancel-stale-cd no-op triggers must not change runtime payloads.
|
||
_EXECUTOR_OPERATION_TYPES = (
|
||
"ansible_candidate_matched",
|
||
"ansible_check_mode_executed",
|
||
"ansible_apply_executed",
|
||
"ansible_learning_writeback_recorded",
|
||
"ansible_rollback_executed",
|
||
"ansible_execution_skipped",
|
||
)
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
def _allowed_risk_levels() -> list[str]:
|
||
raw = str(settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_ALLOWED_RISK_LEVELS or "")
|
||
return sorted({item.strip().lower() for item in raw.split(",") if item.strip()})
|
||
|
||
|
||
def _utc_iso(value: Any) -> str | None:
|
||
if value is None:
|
||
return None
|
||
if isinstance(value, datetime):
|
||
if value.tzinfo is None:
|
||
value = value.replace(tzinfo=UTC)
|
||
return value.astimezone(UTC).isoformat()
|
||
return str(value)
|
||
|
||
|
||
def _row_mapping(row: Mapping[str, Any] | Any) -> dict[str, Any]:
|
||
if isinstance(row, Mapping):
|
||
return dict(row)
|
||
mapping = getattr(row, "_mapping", None)
|
||
if mapping is not None:
|
||
return dict(mapping)
|
||
return dict(row)
|
||
|
||
|
||
def _int_value(value: Any) -> int:
|
||
try:
|
||
return int(value or 0)
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
|
||
|
||
def _sanitize_latest_rows(
|
||
rows: Iterable[Mapping[str, Any] | Any],
|
||
*,
|
||
allowed_keys: tuple[str, ...],
|
||
time_keys: tuple[str, ...] = ("created_at", "collected_at", "queued_at", "sent_at"),
|
||
limit: int = 5,
|
||
) -> list[dict[str, Any]]:
|
||
clean_rows: list[dict[str, Any]] = []
|
||
for row in rows:
|
||
item = _row_mapping(row)
|
||
clean: dict[str, Any] = {}
|
||
for key in allowed_keys:
|
||
if key not in item:
|
||
continue
|
||
value = item.get(key)
|
||
clean[key] = _utc_iso(value) if key in time_keys else value
|
||
clean_rows.append(clean)
|
||
if len(clean_rows) >= limit:
|
||
break
|
||
return clean_rows
|
||
|
||
|
||
def _operation_counts(
|
||
rows: Iterable[Mapping[str, Any] | Any],
|
||
) -> dict[str, dict[str, Any]]:
|
||
counts = {
|
||
operation_type: {
|
||
"total": 0,
|
||
"recent": 0,
|
||
"by_status": {},
|
||
}
|
||
for operation_type in _EXECUTOR_OPERATION_TYPES
|
||
}
|
||
for row in rows:
|
||
item = _row_mapping(row)
|
||
operation_type = str(item.get("operation_type") or "unknown")
|
||
status = str(item.get("status") or "unknown")
|
||
bucket = counts.setdefault(
|
||
operation_type,
|
||
{
|
||
"total": 0,
|
||
"recent": 0,
|
||
"by_status": {},
|
||
},
|
||
)
|
||
total = _int_value(item.get("total"))
|
||
recent = _int_value(item.get("recent"))
|
||
bucket["total"] += total
|
||
bucket["recent"] += recent
|
||
bucket["by_status"][status] = bucket["by_status"].get(status, 0) + total
|
||
return counts
|
||
|
||
|
||
def _status_counts(
|
||
rows: Iterable[Mapping[str, Any] | Any],
|
||
*,
|
||
status_key: str,
|
||
) -> dict[str, Any]:
|
||
by_status: dict[str, int] = {}
|
||
total = 0
|
||
recent = 0
|
||
for row in rows:
|
||
item = _row_mapping(row)
|
||
status = str(item.get(status_key) or "unknown")
|
||
row_total = _int_value(item.get("total"))
|
||
by_status[status] = by_status.get(status, 0) + row_total
|
||
total += row_total
|
||
recent += _int_value(item.get("recent"))
|
||
return {
|
||
"total": total,
|
||
"recent": recent,
|
||
"by_status": by_status,
|
||
}
|
||
|
||
|
||
def _trace_stage(
|
||
*,
|
||
stage_id: str,
|
||
display_name: str,
|
||
source_tables: list[str],
|
||
total: int,
|
||
recent: int,
|
||
required_for_closed_loop: bool,
|
||
feeds_learning: bool,
|
||
public_safe: bool = True,
|
||
next_action_if_missing: str | None = None,
|
||
) -> dict[str, Any]:
|
||
present = total > 0
|
||
return {
|
||
"stage_id": stage_id,
|
||
"display_name": display_name,
|
||
"source_tables": source_tables,
|
||
"recorded": present,
|
||
"record_quality": "recorded" if present else "missing",
|
||
"total": max(0, total),
|
||
"recent": max(0, recent),
|
||
"required_for_closed_loop": required_for_closed_loop,
|
||
"feeds_learning": feeds_learning,
|
||
"public_safe": public_safe,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _trace_total(summary: Mapping[str, Any] | None, *operation_types: str) -> int:
|
||
if not isinstance(summary, Mapping):
|
||
return 0
|
||
if not operation_types:
|
||
return _int_value(summary.get("total"))
|
||
return sum(
|
||
_int_value((summary.get(operation_type) or {}).get("total"))
|
||
for operation_type in operation_types
|
||
)
|
||
|
||
|
||
def _trace_recent(summary: Mapping[str, Any] | None, *operation_types: str) -> int:
|
||
if not isinstance(summary, Mapping):
|
||
return 0
|
||
if not operation_types:
|
||
return _int_value(summary.get("recent"))
|
||
return sum(
|
||
_int_value((summary.get(operation_type) or {}).get("recent"))
|
||
for operation_type in operation_types
|
||
)
|
||
|
||
|
||
def _build_trace_ledger(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
auto_repair_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
telegram_summary: Mapping[str, Any],
|
||
mcp_gateway_summary: Mapping[str, Any],
|
||
legacy_mcp_summary: Mapping[str, Any],
|
||
service_log_summary: Mapping[str, Any],
|
||
executor_log_summary: Mapping[str, Any],
|
||
timeline_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
loop_ledger: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Build the full public-safe AI automation trace ledger."""
|
||
|
||
mcp_total = _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary)
|
||
mcp_recent = _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary)
|
||
stages = [
|
||
_trace_stage(
|
||
stage_id="mcp_context",
|
||
display_name="MCP sensor / tool context",
|
||
source_tables=["awooop_mcp_gateway_audit", "mcp_audit_log"],
|
||
total=mcp_total,
|
||
recent=mcp_recent,
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="record_mcp_gateway_or_legacy_mcp_audit_for_every_ai_decision",
|
||
),
|
||
_trace_stage(
|
||
stage_id="service_log_evidence",
|
||
display_name="Sanitized service / package log evidence",
|
||
source_tables=["incident_evidence.recent_logs", "incident_evidence.evidence_summary"],
|
||
total=_trace_total(service_log_summary),
|
||
recent=_trace_recent(service_log_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="collect_sanitized_service_log_evidence_before_ai_decision",
|
||
),
|
||
_trace_stage(
|
||
stage_id="candidate",
|
||
display_name="AI candidate / playbook match",
|
||
source_tables=["automation_operation_log"],
|
||
total=_trace_total(operation_summary, "ansible_candidate_matched"),
|
||
recent=_trace_recent(operation_summary, "ansible_candidate_matched"),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
|
||
),
|
||
_trace_stage(
|
||
stage_id="check_mode",
|
||
display_name="No-write check-mode / dry-run",
|
||
source_tables=["automation_operation_log"],
|
||
total=_trace_total(operation_summary, "ansible_check_mode_executed"),
|
||
recent=_trace_recent(operation_summary, "ansible_check_mode_executed"),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
|
||
),
|
||
_trace_stage(
|
||
stage_id="executor_log_projection",
|
||
display_name="Executor stdout / stderr / dry-run projection",
|
||
source_tables=[
|
||
"automation_operation_log.output",
|
||
"automation_operation_log.error",
|
||
"automation_operation_log.stderr_feed_back",
|
||
"automation_operation_log.dry_run_result",
|
||
],
|
||
total=_trace_total(executor_log_summary),
|
||
recent=_trace_recent(executor_log_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="persist_sanitized_executor_log_projection_for_failed_or_applied_actions",
|
||
),
|
||
_trace_stage(
|
||
stage_id="controlled_apply",
|
||
display_name="Controlled apply execution",
|
||
source_tables=["automation_operation_log"],
|
||
total=_trace_total(operation_summary, "ansible_apply_executed"),
|
||
recent=_trace_recent(operation_summary, "ansible_apply_executed"),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
|
||
),
|
||
_trace_stage(
|
||
stage_id="auto_repair_execution_receipt",
|
||
display_name="Auto-repair execution receipt",
|
||
source_tables=["auto_repair_executions"],
|
||
total=_trace_total(auto_repair_summary),
|
||
recent=_trace_recent(auto_repair_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="receipt_backfill_records_auto_repair_execution",
|
||
),
|
||
_trace_stage(
|
||
stage_id="post_apply_verifier",
|
||
display_name="Post-apply verifier evidence",
|
||
source_tables=["incident_evidence"],
|
||
total=_trace_total(verifier_summary),
|
||
recent=_trace_recent(verifier_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
|
||
),
|
||
_trace_stage(
|
||
stage_id="rag_km_learning",
|
||
display_name="RAG / KM / PlayBook learning writeback",
|
||
source_tables=["knowledge_entries"],
|
||
total=_trace_total(km_summary),
|
||
recent=_trace_recent(km_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=True,
|
||
next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
|
||
),
|
||
_trace_stage(
|
||
stage_id="playbook_trust",
|
||
display_name="PlayBook trust / success-failure learning",
|
||
source_tables=[
|
||
"playbooks.trust_score",
|
||
"playbooks.success_count",
|
||
"playbooks.failure_count",
|
||
"playbooks.review_required",
|
||
],
|
||
total=_trace_total(playbook_trust_summary),
|
||
recent=_trace_recent(playbook_trust_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="write_playbook_trust_delta_after_verified_execution",
|
||
),
|
||
_trace_stage(
|
||
stage_id="timeline_projection",
|
||
display_name="Operator timeline projection",
|
||
source_tables=["timeline_events"],
|
||
total=_trace_total(timeline_summary),
|
||
recent=_trace_recent(timeline_summary),
|
||
required_for_closed_loop=False,
|
||
feeds_learning=True,
|
||
next_action_if_missing="project_ai_runtime_stage_to_timeline_events",
|
||
),
|
||
_trace_stage(
|
||
stage_id="telegram_receipt",
|
||
display_name="Telegram Gateway receipt",
|
||
source_tables=["awooop_outbound_message"],
|
||
total=_trace_total(telegram_summary),
|
||
recent=_trace_recent(telegram_summary),
|
||
required_for_closed_loop=True,
|
||
feeds_learning=False,
|
||
next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
|
||
),
|
||
]
|
||
required = [stage for stage in stages if stage["required_for_closed_loop"]]
|
||
missing_required = [
|
||
str(stage["stage_id"])
|
||
for stage in required
|
||
if stage["recorded"] is not True
|
||
]
|
||
recorded_count = sum(1 for stage in stages if stage["recorded"] is True)
|
||
return {
|
||
"schema_version": "ai_agent_autonomous_trace_ledger_v1",
|
||
"purpose": (
|
||
"把 AI 自動化每個節點的 public-safe receipt 收斂成同一份 ledger;"
|
||
"這些紀錄是後續 RAG、KM、PlayBook trust 與報告學習的依據。"
|
||
),
|
||
"latest_flow_closed": latest_flow_closure.get("closed") is True,
|
||
"latest_loop_closed": loop_ledger.get("closed") is True,
|
||
"stage_count": len(stages),
|
||
"recorded_stage_count": recorded_count,
|
||
"required_stage_count": len(required),
|
||
"missing_required_stage_ids": missing_required,
|
||
"learning_source_stage_ids": [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["feeds_learning"] is True
|
||
],
|
||
"public_safety": {
|
||
"reads_raw_sessions": False,
|
||
"stores_secret_values": False,
|
||
"stores_unredacted_telegram_payload": False,
|
||
"stores_internal_reasoning": False,
|
||
},
|
||
"stages": stages,
|
||
}
|
||
|
||
|
||
def _build_log_integration_taxonomy(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
auto_repair_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
telegram_summary: Mapping[str, Any],
|
||
mcp_gateway_summary: Mapping[str, Any],
|
||
legacy_mcp_summary: Mapping[str, Any],
|
||
service_log_summary: Mapping[str, Any],
|
||
executor_log_summary: Mapping[str, Any],
|
||
timeline_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Expose how logs are normalized, labeled, grouped, and fed to agents."""
|
||
|
||
operation_total = sum(_trace_total(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES)
|
||
operation_recent = sum(_trace_recent(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES)
|
||
source_families = [
|
||
{
|
||
"source_family_id": "mcp_gateway_tool_calls",
|
||
"source_tables": ["awooop_mcp_gateway_audit"],
|
||
"normalized_event_schema": "ToolCallEvidence",
|
||
"label_dimensions": ["project", "run", "trace", "agent", "tool", "policy_gate"],
|
||
"total": _trace_total(mcp_gateway_summary),
|
||
"recent": _trace_recent(mcp_gateway_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "hash_only_no_raw_input_output",
|
||
"next_action_if_empty": "route_first_class_tools_through_awooop_mcp_gateway",
|
||
},
|
||
{
|
||
"source_family_id": "legacy_mcp_tool_calls",
|
||
"source_tables": ["mcp_audit_log"],
|
||
"normalized_event_schema": "LegacyToolCallEvidence",
|
||
"label_dimensions": ["incident", "session_ref", "flywheel_node", "agent", "tool"],
|
||
"total": _trace_total(legacy_mcp_summary),
|
||
"recent": _trace_recent(legacy_mcp_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "bridge_to_gateway_hash_or_redacted_summary",
|
||
"next_action_if_empty": "keep_legacy_bridge_until_all_callers_use_gateway",
|
||
},
|
||
{
|
||
"source_family_id": "service_package_logs",
|
||
"source_tables": [
|
||
"incident_evidence.recent_logs",
|
||
"incident_evidence.evidence_summary",
|
||
"incident_evidence.anomaly_context",
|
||
],
|
||
"normalized_event_schema": "ServiceLogEvidence",
|
||
"label_dimensions": ["project", "product", "website", "service", "package", "incident"],
|
||
"total": _trace_total(service_log_summary),
|
||
"recent": _trace_recent(service_log_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "sanitized_summary_only",
|
||
"next_action_if_empty": "collect_sanitized_service_package_logs_before_decision",
|
||
},
|
||
{
|
||
"source_family_id": "executor_operation_logs",
|
||
"source_tables": ["automation_operation_log"],
|
||
"normalized_event_schema": "ExecutorOperationEvidence",
|
||
"label_dimensions": [
|
||
"project",
|
||
"service",
|
||
"package",
|
||
"tool",
|
||
"incident",
|
||
"operation",
|
||
"playbook",
|
||
"risk",
|
||
],
|
||
"total": max(operation_total, _trace_total(executor_log_summary)),
|
||
"recent": max(operation_recent, _trace_recent(executor_log_summary)),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "stdout_stderr_tail_or_structured_result_only",
|
||
"next_action_if_empty": "persist_executor_operation_log_for_candidate_check_apply",
|
||
},
|
||
{
|
||
"source_family_id": "auto_repair_receipts",
|
||
"source_tables": ["auto_repair_executions"],
|
||
"normalized_event_schema": "RepairExecutionReceipt",
|
||
"label_dimensions": ["incident", "service", "playbook", "risk", "result"],
|
||
"total": _trace_total(auto_repair_summary),
|
||
"recent": _trace_recent(auto_repair_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "execution_step_refs_not_raw_secrets",
|
||
"next_action_if_empty": "write_auto_repair_execution_receipt_after_apply",
|
||
},
|
||
{
|
||
"source_family_id": "post_apply_verifier",
|
||
"source_tables": ["incident_evidence.post_execution_state"],
|
||
"normalized_event_schema": "VerifierEvidence",
|
||
"label_dimensions": ["incident", "operation", "playbook", "service", "result"],
|
||
"total": _trace_total(verifier_summary),
|
||
"recent": _trace_recent(verifier_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "post_state_summary_redacted_refs_only",
|
||
"next_action_if_empty": "run_post_apply_verifier_for_each_apply",
|
||
},
|
||
{
|
||
"source_family_id": "rag_km_entries",
|
||
"source_tables": ["knowledge_entries"],
|
||
"normalized_event_schema": "KnowledgeWritebackEvidence",
|
||
"label_dimensions": ["project", "incident", "playbook", "path_type", "status"],
|
||
"total": _trace_total(km_summary),
|
||
"recent": _trace_recent(km_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "curated_summary_and_refs_only",
|
||
"next_action_if_empty": "write_km_entry_after_verifier",
|
||
},
|
||
{
|
||
"source_family_id": "playbook_trust_signals",
|
||
"source_tables": ["playbooks"],
|
||
"normalized_event_schema": "PlayBookTrustSignal",
|
||
"label_dimensions": ["project", "playbook", "status", "trust_band", "review_required"],
|
||
"total": _trace_total(playbook_trust_summary),
|
||
"recent": _trace_recent(playbook_trust_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "aggregate_trust_counters_only",
|
||
"next_action_if_empty": "write_trust_delta_after_verified_execution",
|
||
},
|
||
{
|
||
"source_family_id": "operator_timeline_projection",
|
||
"source_tables": ["timeline_events"],
|
||
"normalized_event_schema": "OperatorTimelineEvent",
|
||
"label_dimensions": ["incident", "event_type", "status", "actor", "actor_role"],
|
||
"total": _trace_total(timeline_summary),
|
||
"recent": _trace_recent(timeline_summary),
|
||
"feeds_learning": True,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "short_public_safe_status_projection",
|
||
"next_action_if_empty": "project_ai_runtime_stage_to_timeline_events",
|
||
},
|
||
{
|
||
"source_family_id": "telegram_delivery_receipts",
|
||
"source_tables": ["awooop_outbound_message"],
|
||
"normalized_event_schema": "NotificationReceipt",
|
||
"label_dimensions": ["project", "channel", "incident", "action", "send_status"],
|
||
"total": _trace_total(telegram_summary),
|
||
"recent": _trace_recent(telegram_summary),
|
||
"feeds_learning": False,
|
||
"public_safe": True,
|
||
"raw_payload_policy": "provider_message_ref_no_unredacted_payload",
|
||
"next_action_if_empty": "send_controlled_apply_result_via_gateway",
|
||
},
|
||
]
|
||
label_dimensions = sorted(
|
||
{
|
||
str(dimension)
|
||
for source in source_families
|
||
for dimension in source["label_dimensions"]
|
||
}
|
||
)
|
||
active_source_count = sum(1 for source in source_families if _int_value(source["total"]) > 0)
|
||
return {
|
||
"schema_version": "ai_agent_log_integration_taxonomy_v1",
|
||
"purpose": (
|
||
"將專案、產品、網站、服務、套件、工具與通知來源的 log "
|
||
"統一轉成可貼標、可分群、可回放、可餵 RAG/KM/PlayBook 的 evidence。"
|
||
),
|
||
"normalized_event_flow": [
|
||
"collect_source_log_or_receipt",
|
||
"redact_and_hash_sensitive_fields",
|
||
"assign_labels",
|
||
"correlate_incident_operation_playbook",
|
||
"write_trace_ledger",
|
||
"retrieve_similar_context_via_rag",
|
||
"select_or_repair_playbook",
|
||
"run_check_mode_then_controlled_apply",
|
||
"verify_and_write_learning_back",
|
||
],
|
||
"label_dimensions": label_dimensions,
|
||
"required_label_dimensions": [
|
||
"project",
|
||
"source_family",
|
||
"incident",
|
||
"operation",
|
||
"service",
|
||
"tool",
|
||
"playbook",
|
||
],
|
||
"source_families": source_families,
|
||
"rollups": {
|
||
"source_family_count": len(source_families),
|
||
"active_source_family_count": active_source_count,
|
||
"inactive_source_family_count": len(source_families) - active_source_count,
|
||
"label_dimension_count": len(label_dimensions),
|
||
"classified_event_total": sum(_int_value(source["total"]) for source in source_families),
|
||
"recent_classified_event_total": sum(_int_value(source["recent"]) for source in source_families),
|
||
"learning_source_family_count": sum(
|
||
1 for source in source_families if source["feeds_learning"] is True
|
||
),
|
||
},
|
||
"public_safety": {
|
||
"raw_secret_collection_allowed": False,
|
||
"raw_session_collection_allowed": False,
|
||
"unredacted_payload_storage_allowed": False,
|
||
"internal_reasoning_storage_allowed": False,
|
||
},
|
||
}
|
||
|
||
|
||
def _decision_wiring_stage(
|
||
*,
|
||
stage_id: str,
|
||
display_name: str,
|
||
evidence_sources: list[str],
|
||
total: int,
|
||
recent: int,
|
||
required_for_decision_wiring: bool,
|
||
feeds_next_stage: str,
|
||
next_action_if_missing: str,
|
||
) -> dict[str, Any]:
|
||
present = total > 0
|
||
return {
|
||
"stage_id": stage_id,
|
||
"display_name": display_name,
|
||
"evidence_sources": evidence_sources,
|
||
"present": present,
|
||
"total": max(0, total),
|
||
"recent": max(0, recent),
|
||
"required_for_decision_wiring": required_for_decision_wiring,
|
||
"feeds_next_stage": feeds_next_stage,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _build_agent_decision_wiring(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
mcp_gateway_summary: Mapping[str, Any],
|
||
legacy_mcp_summary: Mapping[str, Any],
|
||
service_log_summary: Mapping[str, Any],
|
||
timeline_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
log_integration_taxonomy: Mapping[str, Any],
|
||
loop_ledger: Mapping[str, Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Summarize live evidence-to-execution wiring for the AI Agent path."""
|
||
|
||
taxonomy_rollups = log_integration_taxonomy.get("rollups")
|
||
if not isinstance(taxonomy_rollups, Mapping):
|
||
taxonomy_rollups = {}
|
||
source_family_count = _int_value(taxonomy_rollups.get("source_family_count"))
|
||
active_source_family_count = _int_value(taxonomy_rollups.get("active_source_family_count"))
|
||
all_sources_active = source_family_count > 0 and active_source_family_count == source_family_count
|
||
evidence_total = (
|
||
_trace_total(mcp_gateway_summary)
|
||
+ _trace_total(legacy_mcp_summary)
|
||
+ _trace_total(service_log_summary)
|
||
+ _trace_total(timeline_summary)
|
||
)
|
||
evidence_recent = (
|
||
_trace_recent(mcp_gateway_summary)
|
||
+ _trace_recent(legacy_mcp_summary)
|
||
+ _trace_recent(service_log_summary)
|
||
+ _trace_recent(timeline_summary)
|
||
)
|
||
rag_context_total = _trace_total(km_summary) + _trace_total(playbook_trust_summary)
|
||
rag_context_recent = _trace_recent(km_summary) + _trace_recent(playbook_trust_summary)
|
||
candidate_total = _trace_total(operation_summary, "ansible_candidate_matched")
|
||
candidate_recent = _trace_recent(operation_summary, "ansible_candidate_matched")
|
||
check_mode_total = _trace_total(operation_summary, "ansible_check_mode_executed")
|
||
check_mode_recent = _trace_recent(operation_summary, "ansible_check_mode_executed")
|
||
apply_total = _trace_total(operation_summary, "ansible_apply_executed")
|
||
apply_recent = _trace_recent(operation_summary, "ansible_apply_executed")
|
||
rollback_total = _trace_total(operation_summary, "ansible_rollback_executed")
|
||
verifier_total = _trace_total(verifier_summary)
|
||
verifier_recent = _trace_recent(verifier_summary)
|
||
|
||
stages = [
|
||
_decision_wiring_stage(
|
||
stage_id="labeled_evidence_sources",
|
||
display_name="Labeled log / MCP / timeline evidence available",
|
||
evidence_sources=["log_integration_taxonomy", "mcp", "service_logs", "timeline_events"],
|
||
total=evidence_total if all_sources_active else 0,
|
||
recent=evidence_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="rag_context_retrieval",
|
||
next_action_if_missing="keep_p1a_source_family_ingestion_active_until_10_of_10",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="rag_context_retrieval",
|
||
display_name="RAG / KM / PlayBook trust context available",
|
||
evidence_sources=["knowledge_entries", "playbooks"],
|
||
total=rag_context_total,
|
||
recent=rag_context_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="playbook_candidate_selection",
|
||
next_action_if_missing="retrieve_similar_km_entries_and_playbook_trust_before_candidate",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="playbook_candidate_selection",
|
||
display_name="Allowlisted PlayBook candidate selected",
|
||
evidence_sources=["automation_operation_log:ansible_candidate_matched"],
|
||
total=candidate_total,
|
||
recent=candidate_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="check_mode_dry_run",
|
||
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="check_mode_dry_run",
|
||
display_name="Check-mode / dry-run receipt recorded",
|
||
evidence_sources=["automation_operation_log:ansible_check_mode_executed"],
|
||
total=check_mode_total,
|
||
recent=check_mode_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="controlled_apply_boundary",
|
||
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="controlled_apply_boundary",
|
||
display_name="Controlled apply / rollback boundary recorded",
|
||
evidence_sources=[
|
||
"automation_operation_log:ansible_apply_executed",
|
||
"automation_operation_log:ansible_rollback_executed",
|
||
],
|
||
total=apply_total + rollback_total,
|
||
recent=apply_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="post_apply_verifier",
|
||
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
|
||
),
|
||
_decision_wiring_stage(
|
||
stage_id="post_apply_verifier",
|
||
display_name="Post-apply verifier receipt recorded",
|
||
evidence_sources=["incident_evidence"],
|
||
total=verifier_total,
|
||
recent=verifier_recent,
|
||
required_for_decision_wiring=True,
|
||
feeds_next_stage="learning_writeback",
|
||
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
|
||
),
|
||
]
|
||
missing_required = [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["required_for_decision_wiring"] is True and stage["present"] is not True
|
||
]
|
||
present_required_count = sum(
|
||
1
|
||
for stage in stages
|
||
if stage["required_for_decision_wiring"] is True and stage["present"] is True
|
||
)
|
||
required_count = sum(1 for stage in stages if stage["required_for_decision_wiring"] is True)
|
||
closed_loop_observed = bool(
|
||
loop_ledger.get("closed") is True
|
||
or latest_flow_closure.get("closed") is True
|
||
)
|
||
return {
|
||
"schema_version": "ai_agent_decision_wiring_readback_v1",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"stages": stages,
|
||
"missing_required_stage_ids": missing_required,
|
||
"runtime_switches": {
|
||
"candidate_backfill_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CANDIDATE_BACKFILL_WORKER),
|
||
"check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER),
|
||
"controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY),
|
||
"allowed_risk_levels": _allowed_risk_levels(),
|
||
},
|
||
"closed_loop_observed": closed_loop_observed,
|
||
"public_safety": {
|
||
"stores_raw_logs": False,
|
||
"stores_secret_values": False,
|
||
"executes_on_read": False,
|
||
"critical_break_glass_still_required": True,
|
||
},
|
||
"rollups": {
|
||
"stage_count": len(stages),
|
||
"required_stage_count": required_count,
|
||
"required_stage_present_count": present_required_count,
|
||
"required_stage_missing_count": len(missing_required),
|
||
"evidence_event_total": evidence_total,
|
||
"rag_context_total": rag_context_total,
|
||
"candidate_total": candidate_total,
|
||
"check_mode_total": check_mode_total,
|
||
"controlled_apply_total": apply_total,
|
||
"rollback_total": rollback_total,
|
||
"verifier_total": verifier_total,
|
||
},
|
||
}
|
||
|
||
|
||
def _learning_loop_stage(
|
||
*,
|
||
stage_id: str,
|
||
display_name: str,
|
||
evidence_sources: list[str],
|
||
total: int,
|
||
recent: int,
|
||
required_for_learning_loop: bool,
|
||
writes_runtime_state: bool,
|
||
next_action_if_missing: str,
|
||
) -> dict[str, Any]:
|
||
present = total > 0
|
||
return {
|
||
"stage_id": stage_id,
|
||
"display_name": display_name,
|
||
"evidence_sources": evidence_sources,
|
||
"present": present,
|
||
"total": max(0, total),
|
||
"recent": max(0, recent),
|
||
"required_for_learning_loop": required_for_learning_loop,
|
||
"writes_runtime_state": writes_runtime_state,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _build_learning_loop_readback(
|
||
*,
|
||
operation_summary: Mapping[str, Any],
|
||
verifier_summary: Mapping[str, Any],
|
||
km_summary: Mapping[str, Any],
|
||
playbook_trust_summary: Mapping[str, Any],
|
||
log_integration_taxonomy: Mapping[str, Any],
|
||
agent_decision_wiring: Mapping[str, Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
latest_failure_classification: Mapping[str, Any],
|
||
controlled_retry_package: Mapping[str, Any],
|
||
loop_ledger: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Expose the verified execution to KM/PlayBook learning loop."""
|
||
|
||
taxonomy_rollups = log_integration_taxonomy.get("rollups")
|
||
if not isinstance(taxonomy_rollups, Mapping):
|
||
taxonomy_rollups = {}
|
||
learning_source_family_count = _int_value(
|
||
taxonomy_rollups.get("learning_source_family_count")
|
||
)
|
||
classified_event_total = _int_value(taxonomy_rollups.get("classified_event_total"))
|
||
recent_classified_event_total = _int_value(
|
||
taxonomy_rollups.get("recent_classified_event_total")
|
||
)
|
||
verifier_total = _trace_total(verifier_summary)
|
||
verifier_recent = _trace_recent(verifier_summary)
|
||
km_total = _trace_total(km_summary)
|
||
km_recent = _trace_recent(km_summary)
|
||
learning_writeback_total = _trace_total(
|
||
operation_summary,
|
||
"ansible_learning_writeback_recorded",
|
||
)
|
||
learning_writeback_recent = _trace_recent(
|
||
operation_summary,
|
||
"ansible_learning_writeback_recorded",
|
||
)
|
||
trust_total = _trace_total(playbook_trust_summary)
|
||
trust_recent = _trace_recent(playbook_trust_summary)
|
||
repair_feedback_ready = bool(
|
||
latest_failure_classification.get("classification")
|
||
not in {"", "no_controlled_apply_observed"}
|
||
and controlled_retry_package.get("schema_version")
|
||
== "ai_agent_controlled_retry_package_v1"
|
||
)
|
||
next_decision_ready = bool(
|
||
agent_decision_wiring.get("status") == "completed"
|
||
and loop_ledger.get("closed") is True
|
||
)
|
||
stages = [
|
||
_learning_loop_stage(
|
||
stage_id="verified_execution_outcome",
|
||
display_name="Verified execution outcome available",
|
||
evidence_sources=["incident_evidence.post_execution_state"],
|
||
total=verifier_total
|
||
if latest_flow_closure.get("has_post_apply_verifier") is True
|
||
else 0,
|
||
recent=verifier_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="run_post_apply_verifier_and_attach_apply_op_id",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="km_learning_writeback",
|
||
display_name="KM learning writeback recorded",
|
||
evidence_sources=["knowledge_entries"],
|
||
total=km_total
|
||
if latest_flow_closure.get("has_km_writeback") is True
|
||
else 0,
|
||
recent=km_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="write_verified_execution_summary_to_km",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="learning_repair_record",
|
||
display_name="Learning repository repair result recorded",
|
||
evidence_sources=[
|
||
"automation_operation_log:ansible_learning_writeback_recorded",
|
||
"learning_repository",
|
||
],
|
||
total=learning_writeback_total,
|
||
recent=learning_writeback_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="record_learning_repair_result_after_verifier",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="playbook_trust_delta",
|
||
display_name="PlayBook trust signal available",
|
||
evidence_sources=["playbooks"],
|
||
total=trust_total,
|
||
recent=trust_recent,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="write_playbook_trust_delta_after_verifier",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="similar_case_context",
|
||
display_name="Similar-case context sources active",
|
||
evidence_sources=["log_integration_taxonomy", "knowledge_entries", "playbooks"],
|
||
total=classified_event_total if learning_source_family_count > 0 else 0,
|
||
recent=recent_classified_event_total,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="activate_learning_source_families_for_similar_case_retrieval",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="repair_candidate_feedback",
|
||
display_name="Repair or no-repair feedback classified",
|
||
evidence_sources=["latest_failure_classification", "controlled_retry_package"],
|
||
total=1 if repair_feedback_ready else 0,
|
||
recent=1 if repair_feedback_ready else 0,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="classify_latest_apply_result_and_prepare_retry_package",
|
||
),
|
||
_learning_loop_stage(
|
||
stage_id="next_decision_context",
|
||
display_name="Next decision can consume learned context",
|
||
evidence_sources=["agent_decision_wiring", "autonomous_execution_loop_ledger"],
|
||
total=1 if next_decision_ready else 0,
|
||
recent=1 if next_decision_ready else 0,
|
||
required_for_learning_loop=True,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="complete_decision_wiring_and_execution_loop_before_learning_release",
|
||
),
|
||
]
|
||
missing_required = [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["required_for_learning_loop"] is True and stage["present"] is not True
|
||
]
|
||
present_required_count = sum(
|
||
1
|
||
for stage in stages
|
||
if stage["required_for_learning_loop"] is True and stage["present"] is True
|
||
)
|
||
required_count = sum(1 for stage in stages if stage["required_for_learning_loop"] is True)
|
||
return {
|
||
"schema_version": "ai_agent_learning_loop_readback_v1",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"stages": stages,
|
||
"missing_required_stage_ids": missing_required,
|
||
"public_safety": {
|
||
"stores_raw_logs": False,
|
||
"stores_secret_values": False,
|
||
"stores_unredacted_telegram_payload": False,
|
||
"executes_on_read": False,
|
||
"critical_break_glass_still_required": True,
|
||
},
|
||
"rollups": {
|
||
"stage_count": len(stages),
|
||
"required_stage_count": required_count,
|
||
"required_stage_present_count": present_required_count,
|
||
"required_stage_missing_count": len(missing_required),
|
||
"verified_execution_total": verifier_total,
|
||
"km_writeback_total": km_total,
|
||
"learning_writeback_total": learning_writeback_total,
|
||
"learning_writeback_recent": learning_writeback_recent,
|
||
"playbook_trust_total": trust_total,
|
||
"learning_source_family_count": learning_source_family_count,
|
||
"similar_case_source_total": classified_event_total,
|
||
"repair_feedback_ready_count": 1 if repair_feedback_ready else 0,
|
||
"next_decision_ready_count": 1 if next_decision_ready else 0,
|
||
},
|
||
}
|
||
|
||
|
||
def _build_work_item_progress(
|
||
*,
|
||
trace_ledger: Mapping[str, Any],
|
||
log_integration_taxonomy: Mapping[str, Any],
|
||
agent_decision_wiring: Mapping[str, Any],
|
||
learning_loop: Mapping[str, Any],
|
||
db_read_status: str,
|
||
) -> dict[str, Any]:
|
||
"""Build ordered work items that the UI and agent can keep advancing."""
|
||
|
||
taxonomy_rollups = log_integration_taxonomy.get("rollups")
|
||
if not isinstance(taxonomy_rollups, Mapping):
|
||
taxonomy_rollups = {}
|
||
source_families = log_integration_taxonomy.get("source_families")
|
||
if not isinstance(source_families, list):
|
||
source_families = []
|
||
inactive_source_count = _int_value(taxonomy_rollups.get("inactive_source_family_count"))
|
||
missing_required = trace_ledger.get("missing_required_stage_ids")
|
||
if not isinstance(missing_required, list):
|
||
missing_required = []
|
||
decision_rollups = agent_decision_wiring.get("rollups")
|
||
if not isinstance(decision_rollups, Mapping):
|
||
decision_rollups = {}
|
||
decision_wiring_missing = _int_value(decision_rollups.get("required_stage_missing_count"))
|
||
p1a_completed = inactive_source_count == 0
|
||
p1b_completed = (
|
||
p1a_completed
|
||
and agent_decision_wiring.get("schema_version") == "ai_agent_decision_wiring_readback_v1"
|
||
and decision_wiring_missing == 0
|
||
)
|
||
learning_rollups = learning_loop.get("rollups")
|
||
if not isinstance(learning_rollups, Mapping):
|
||
learning_rollups = {}
|
||
learning_loop_missing = _int_value(learning_rollups.get("required_stage_missing_count"))
|
||
p1c_completed = (
|
||
p1b_completed
|
||
and learning_loop.get("schema_version") == "ai_agent_learning_loop_readback_v1"
|
||
and learning_loop_missing == 0
|
||
)
|
||
deployed_readback_complete = (
|
||
db_read_status == "ok"
|
||
and trace_ledger.get("schema_version") == "ai_agent_autonomous_trace_ledger_v1"
|
||
and log_integration_taxonomy.get("schema_version") == "ai_agent_log_integration_taxonomy_v1"
|
||
)
|
||
|
||
ordered_items = [
|
||
{
|
||
"work_item_id": "P0-A-runtime-truth",
|
||
"priority": "P0-A",
|
||
"title": "Controlled apply runtime truth readback",
|
||
"status": "completed",
|
||
"exit_criteria": "production API reports db_read_status=ok and live executor receipts",
|
||
},
|
||
{
|
||
"work_item_id": "P0-B-trace-ledger",
|
||
"priority": "P0-B",
|
||
"title": "Trace ledger for MCP/log/executor/verifier/KM/PlayBook/Telegram",
|
||
"status": "completed" if not missing_required else "in_progress",
|
||
"exit_criteria": "trace_ledger exposes required closed-loop stages and missing_required_stage_ids",
|
||
},
|
||
{
|
||
"work_item_id": "P0-C-log-taxonomy",
|
||
"priority": "P0-C",
|
||
"title": "Project/product/site/service/package/tool log taxonomy",
|
||
"status": "completed",
|
||
"exit_criteria": "log_integration_taxonomy lists source families, labels, and public-safety policy",
|
||
},
|
||
{
|
||
"work_item_id": "P0-D-ui-visibility",
|
||
"priority": "P0-D",
|
||
"title": "AwoooP UI shows automation loop and log integration progress",
|
||
"status": "completed",
|
||
"exit_criteria": "AwoooP, Approvals, Runs, and Work Items show trace/log taxonomy panel",
|
||
},
|
||
{
|
||
"work_item_id": "P0-E-verification-deploy",
|
||
"priority": "P0-E",
|
||
"title": "Focused verification and production deploy marker readback",
|
||
"status": "completed" if deployed_readback_complete else "in_progress",
|
||
"exit_criteria": "deploy marker includes this code and production API exposes trace_ledger/log_integration_taxonomy",
|
||
"blocker": None if deployed_readback_complete else "waiting_for_successful_gitea_cd_deploy_marker",
|
||
},
|
||
{
|
||
"work_item_id": "P1-A-ingestion-coverage",
|
||
"priority": "P1-A",
|
||
"title": "Collector and sanitizer coverage for all source families",
|
||
"status": "completed" if p1a_completed else "in_progress",
|
||
"exit_criteria": "all source families have active sanitized classified events",
|
||
"remaining_source_family_count": inactive_source_count,
|
||
},
|
||
{
|
||
"work_item_id": "P1-B-agent-decision-wiring",
|
||
"priority": "P1-B",
|
||
"title": "RAG retrieval to PlayBook select/repair/check-mode/apply/verifier",
|
||
"status": "completed" if p1b_completed else "in_progress" if p1a_completed else "pending",
|
||
"exit_criteria": "AI Agent consumes labeled evidence and emits target selector, dry-run, apply, verifier, rollback",
|
||
"remaining_decision_wiring_stage_count": decision_wiring_missing,
|
||
},
|
||
{
|
||
"work_item_id": "P1-C-learning-loop",
|
||
"priority": "P1-C",
|
||
"title": "KM / PlayBook trust learning loop",
|
||
"status": "completed" if p1c_completed else "in_progress" if p1b_completed else "pending",
|
||
"exit_criteria": "verified execution updates KM entries, trust delta, similar-case clusters, and repair candidates",
|
||
"remaining_learning_loop_stage_count": learning_loop_missing,
|
||
},
|
||
{
|
||
"work_item_id": "P1-D-alert-noise-reduction",
|
||
"priority": "P1-D",
|
||
"title": "Alert grouping and AI controlled workflow routing",
|
||
"status": "pending",
|
||
"exit_criteria": "repeated alerts are clustered, deduped, routed to controlled automation, and no longer default to manual handling",
|
||
},
|
||
{
|
||
"work_item_id": "P2-A-ui-ux-productization",
|
||
"priority": "P2-A",
|
||
"title": "Professional product UI replacing text-heavy surfaces",
|
||
"status": "pending",
|
||
"exit_criteria": "AI automation status is shown as dense dashboard controls, filters, counters, and action rails",
|
||
},
|
||
{
|
||
"work_item_id": "P2-B-multi-product-expansion",
|
||
"priority": "P2-B",
|
||
"title": "Reuse taxonomy across AWOOOI products/projects",
|
||
"status": "pending",
|
||
"exit_criteria": "StockPlatform, VibeWork, MOMO, AwoooGo, and other products report the same log taxonomy contract",
|
||
},
|
||
]
|
||
source_family_items = []
|
||
for source in source_families:
|
||
if not isinstance(source, Mapping):
|
||
continue
|
||
total = _int_value(source.get("total"))
|
||
source_family_items.append({
|
||
"work_item_id": f"P1-A-source-{source.get('source_family_id')}",
|
||
"priority": "P1-A",
|
||
"source_family_id": source.get("source_family_id"),
|
||
"title": f"Ingest and label {source.get('source_family_id')}",
|
||
"status": "completed" if total > 0 else "not_started",
|
||
"label_dimensions": source.get("label_dimensions") or [],
|
||
"next_controlled_action": (
|
||
"keep_learning_and_quality_checks"
|
||
if total > 0
|
||
else source.get("next_action_if_empty")
|
||
),
|
||
})
|
||
|
||
all_items = [*ordered_items, *source_family_items]
|
||
by_status: dict[str, int] = {}
|
||
for item in all_items:
|
||
status = str(item.get("status") or "unknown")
|
||
by_status[status] = by_status.get(status, 0) + 1
|
||
return {
|
||
"schema_version": "ai_agent_automation_work_item_progress_v1",
|
||
"ordered_items": ordered_items,
|
||
"source_family_items": source_family_items,
|
||
"rollups": {
|
||
"work_item_count": len(all_items),
|
||
"ordered_work_item_count": len(ordered_items),
|
||
"source_family_work_item_count": len(source_family_items),
|
||
"completed_count": by_status.get("completed", 0),
|
||
"in_progress_count": by_status.get("in_progress", 0),
|
||
"pending_count": by_status.get("pending", 0),
|
||
"blocked_count": by_status.get("blocked", 0),
|
||
"not_started_count": by_status.get("not_started", 0),
|
||
"by_status": by_status,
|
||
},
|
||
}
|
||
|
||
|
||
def _first_operation(
|
||
rows: Iterable[Mapping[str, Any]],
|
||
operation_type: str,
|
||
) -> dict[str, Any] | None:
|
||
for row in rows:
|
||
if str(row.get("operation_type") or "") == operation_type:
|
||
return dict(row)
|
||
return None
|
||
|
||
|
||
def _operation_by_id(
|
||
rows: Iterable[Mapping[str, Any]],
|
||
op_id: Any,
|
||
) -> dict[str, Any] | None:
|
||
needle = str(op_id or "")
|
||
if not needle:
|
||
return None
|
||
for row in rows:
|
||
if str(row.get("op_id") or "") == needle:
|
||
return dict(row)
|
||
return None
|
||
|
||
|
||
def _stage_status(row: Mapping[str, Any] | None, *, fallback_status: str | None = None) -> str:
|
||
if row is None:
|
||
return fallback_status or "missing"
|
||
return str(row.get("status") or row.get("result_status") or fallback_status or "present")
|
||
|
||
|
||
def _loop_stage(
|
||
*,
|
||
stage_id: str,
|
||
receipt_source: str,
|
||
present: bool,
|
||
status: str,
|
||
ref_id: str | None,
|
||
writes_runtime_state: bool,
|
||
next_action_if_missing: str,
|
||
) -> dict[str, Any]:
|
||
return {
|
||
"stage_id": stage_id,
|
||
"receipt_source": receipt_source,
|
||
"present": present,
|
||
"status": status,
|
||
"ref_id": ref_id,
|
||
"writes_runtime_state": writes_runtime_state,
|
||
"next_action_if_missing": None if present else next_action_if_missing,
|
||
}
|
||
|
||
|
||
def _autonomous_execution_loop_ledger(
|
||
*,
|
||
project_id: str,
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
km_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
latest_failure_classification: Mapping[str, Any],
|
||
controlled_retry_package: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Build the operation-id ledger that proves whether the runtime loop closed."""
|
||
|
||
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
|
||
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
|
||
km_rows = [_row_mapping(row) for row in km_latest_rows]
|
||
telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
|
||
auto_repair_rows = [_row_mapping(row) for row in auto_repair_latest_rows]
|
||
|
||
latest_apply = _first_operation(operation_rows, "ansible_apply_executed")
|
||
latest_check = None
|
||
latest_candidate = None
|
||
if latest_apply is not None:
|
||
latest_check = _operation_by_id(
|
||
operation_rows,
|
||
latest_apply.get("check_mode_op_id") or latest_apply.get("parent_op_id"),
|
||
)
|
||
else:
|
||
latest_check = _first_operation(operation_rows, "ansible_check_mode_executed")
|
||
|
||
source_candidate_op_id = None
|
||
if latest_check is not None:
|
||
source_candidate_op_id = latest_check.get("parent_op_id") or latest_check.get("source_candidate_op_id")
|
||
if latest_apply is not None and not source_candidate_op_id:
|
||
source_candidate_op_id = latest_apply.get("source_candidate_op_id")
|
||
latest_candidate = _operation_by_id(operation_rows, source_candidate_op_id)
|
||
if latest_candidate is None and latest_apply is None and latest_check is None:
|
||
latest_candidate = _first_operation(operation_rows, "ansible_candidate_matched")
|
||
|
||
anchor = latest_apply or latest_check or latest_candidate or {}
|
||
apply_op_id = str((latest_apply or {}).get("op_id") or "")
|
||
check_mode_op_id = str(
|
||
(latest_check or {}).get("op_id")
|
||
or (latest_apply or {}).get("check_mode_op_id")
|
||
or (latest_apply or {}).get("parent_op_id")
|
||
or ""
|
||
)
|
||
candidate_op_id = str(
|
||
(latest_candidate or {}).get("op_id")
|
||
or source_candidate_op_id
|
||
or ""
|
||
)
|
||
incident_id = str(anchor.get("incident_id") or "")
|
||
catalog_id = str(anchor.get("catalog_id") or "")
|
||
playbook_path = str(anchor.get("playbook_path") or "")
|
||
|
||
verifier = next(
|
||
(
|
||
row
|
||
for row in verifier_rows
|
||
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
|
||
),
|
||
None,
|
||
)
|
||
km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
|
||
km = next(
|
||
(
|
||
row
|
||
for row in km_rows
|
||
if (
|
||
km_path_type
|
||
and str(row.get("path_type") or "") == km_path_type
|
||
)
|
||
or (
|
||
incident_id
|
||
and str(row.get("related_incident_id") or "") == incident_id
|
||
)
|
||
),
|
||
None,
|
||
)
|
||
telegram = next(
|
||
(
|
||
row
|
||
for row in telegram_rows
|
||
if str(row.get("send_status") or "") == "sent"
|
||
and str(row.get("action") or "") == "controlled_apply_result"
|
||
and (
|
||
not incident_id
|
||
or str(row.get("incident_id") or "") == incident_id
|
||
)
|
||
),
|
||
None,
|
||
)
|
||
auto_repair = next(
|
||
(
|
||
row
|
||
for row in auto_repair_rows
|
||
if apply_op_id
|
||
and apply_op_id
|
||
in str(row.get("executed_steps_text") or row.get("executed_steps") or "")
|
||
),
|
||
None,
|
||
)
|
||
|
||
candidate_present = bool(latest_candidate or candidate_op_id)
|
||
check_present = bool(latest_check or check_mode_op_id)
|
||
apply_present = latest_apply is not None
|
||
auto_repair_present = auto_repair is not None
|
||
verifier_present = verifier is not None
|
||
km_present = km is not None
|
||
telegram_present = telegram is not None
|
||
|
||
stages = [
|
||
_loop_stage(
|
||
stage_id="candidate",
|
||
receipt_source="automation_operation_log:ansible_candidate_matched",
|
||
present=candidate_present,
|
||
status=_stage_status(latest_candidate, fallback_status="inferred_from_check_mode")
|
||
if candidate_present
|
||
else "missing",
|
||
ref_id=candidate_op_id or None,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
|
||
),
|
||
_loop_stage(
|
||
stage_id="check_mode",
|
||
receipt_source="automation_operation_log:ansible_check_mode_executed",
|
||
present=check_present,
|
||
status=_stage_status(latest_check, fallback_status="inferred_from_apply_parent")
|
||
if check_present
|
||
else "missing",
|
||
ref_id=check_mode_op_id or None,
|
||
writes_runtime_state=False,
|
||
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
|
||
),
|
||
_loop_stage(
|
||
stage_id="controlled_apply",
|
||
receipt_source="automation_operation_log:ansible_apply_executed",
|
||
present=apply_present,
|
||
status=_stage_status(latest_apply),
|
||
ref_id=apply_op_id or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
|
||
),
|
||
_loop_stage(
|
||
stage_id="auto_repair_execution_receipt",
|
||
receipt_source="auto_repair_executions:ansible_controlled_apply",
|
||
present=auto_repair_present,
|
||
status=str((auto_repair or {}).get("result_status") or "missing"),
|
||
ref_id=str((auto_repair or {}).get("id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="receipt_backfill_records_auto_repair_execution",
|
||
),
|
||
_loop_stage(
|
||
stage_id="post_apply_verifier",
|
||
receipt_source="incident_evidence.post_execution_state",
|
||
present=verifier_present,
|
||
status=str((verifier or {}).get("verification_result") or "missing"),
|
||
ref_id=str((verifier or {}).get("id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
|
||
),
|
||
_loop_stage(
|
||
stage_id="km_playbook_writeback",
|
||
receipt_source="knowledge_entries:ansible_apply_receipt",
|
||
present=km_present,
|
||
status=str((km or {}).get("status") or "missing"),
|
||
ref_id=str((km or {}).get("id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
|
||
),
|
||
_loop_stage(
|
||
stage_id="telegram_receipt",
|
||
receipt_source="awooop_outbound_message:controlled_apply_result",
|
||
present=telegram_present,
|
||
status=str((telegram or {}).get("send_status") or "missing"),
|
||
ref_id=str((telegram or {}).get("message_id") or "") or None,
|
||
writes_runtime_state=True,
|
||
next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
|
||
),
|
||
]
|
||
missing_stage_ids = [
|
||
str(stage["stage_id"])
|
||
for stage in stages
|
||
if stage["present"] is not True
|
||
]
|
||
closed = bool(
|
||
apply_op_id
|
||
and auto_repair_present
|
||
and latest_flow_closure.get("closed") is True
|
||
)
|
||
|
||
classification = str(latest_failure_classification.get("classification") or "")
|
||
if not candidate_present and not check_present and not apply_present:
|
||
execution_state = "waiting_for_candidate"
|
||
next_executor_action = "candidate_backfill_worker_waits_for_matching_incident"
|
||
elif not apply_present:
|
||
execution_state = "executor_in_progress_or_waiting"
|
||
next_executor_action = "continue_candidate_to_check_mode_to_apply"
|
||
elif closed and classification == "latest_controlled_apply_closed_success":
|
||
execution_state = "closed_success"
|
||
next_executor_action = "keep_receipt_chain_closed"
|
||
elif closed:
|
||
execution_state = "closed_failed_apply_repair_ready"
|
||
next_executor_action = str(
|
||
controlled_retry_package.get("next_ai_action")
|
||
or "run_no_write_check_mode_replay"
|
||
)
|
||
elif "telegram_receipt" in missing_stage_ids:
|
||
execution_state = "open_waiting_for_live_gateway_receipt"
|
||
next_executor_action = "do_not_fake_send_backfill_wait_for_live_apply_gateway"
|
||
else:
|
||
execution_state = "open_missing_internal_receipts"
|
||
next_executor_action = "backfill_missing_auto_repair_verifier_km_receipts"
|
||
|
||
return {
|
||
"schema_version": "ai_agent_autonomous_execution_loop_ledger_v1",
|
||
"project_id": project_id,
|
||
"operation_id": apply_op_id or check_mode_op_id or candidate_op_id or None,
|
||
"root_candidate_op_id": candidate_op_id or None,
|
||
"check_mode_op_id": check_mode_op_id or None,
|
||
"apply_op_id": apply_op_id or None,
|
||
"incident_id": incident_id or None,
|
||
"catalog_id": catalog_id or None,
|
||
"playbook_path": playbook_path or None,
|
||
"execution_state": execution_state,
|
||
"closed": closed,
|
||
"missing_stage_ids": missing_stage_ids,
|
||
"next_executor_action": next_executor_action,
|
||
"stages": stages,
|
||
"safety_contract": {
|
||
"writes_on_read": False,
|
||
"backfill_may_write_auto_repair_verifier_km": True,
|
||
"backfill_may_send_telegram": False,
|
||
"live_apply_may_send_telegram_gateway_receipt": True,
|
||
"reads_raw_sessions": False,
|
||
"reads_secret_values": False,
|
||
},
|
||
}
|
||
|
||
|
||
def _latest_flow_closure(
|
||
*,
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
km_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
) -> dict[str, Any]:
|
||
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
|
||
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
|
||
km_rows = [_row_mapping(row) for row in km_latest_rows]
|
||
telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
|
||
latest_apply = next(
|
||
(
|
||
row
|
||
for row in operation_rows
|
||
if str(row.get("operation_type") or "") == "ansible_apply_executed"
|
||
),
|
||
None,
|
||
)
|
||
if latest_apply is None:
|
||
return {
|
||
"apply_op_id": None,
|
||
"incident_id": None,
|
||
"has_post_apply_verifier": False,
|
||
"has_km_writeback": False,
|
||
"has_telegram_receipt": False,
|
||
"closed": False,
|
||
"missing": [
|
||
"ansible_apply_executed",
|
||
"post_apply_verifier",
|
||
"km_writeback",
|
||
"telegram_receipt",
|
||
],
|
||
}
|
||
|
||
apply_op_id = str(latest_apply.get("op_id") or "")
|
||
incident_id = str(latest_apply.get("incident_id") or "")
|
||
km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
|
||
has_verifier = any(
|
||
str(row.get("apply_op_id") or "") == apply_op_id
|
||
for row in verifier_rows
|
||
)
|
||
has_km = any(
|
||
str(row.get("path_type") or "") == km_path_type
|
||
or (
|
||
incident_id
|
||
and str(row.get("related_incident_id") or "") == incident_id
|
||
)
|
||
for row in km_rows
|
||
)
|
||
has_telegram = any(
|
||
str(row.get("send_status") or "") == "sent"
|
||
and str(row.get("action") or "") == "controlled_apply_result"
|
||
and (
|
||
not incident_id
|
||
or str(row.get("incident_id") or "") == incident_id
|
||
)
|
||
for row in telegram_rows
|
||
)
|
||
missing = [
|
||
name
|
||
for name, present in (
|
||
("post_apply_verifier", has_verifier),
|
||
("km_writeback", has_km),
|
||
("telegram_receipt", has_telegram),
|
||
)
|
||
if not present
|
||
]
|
||
return {
|
||
"apply_op_id": apply_op_id or None,
|
||
"incident_id": incident_id or None,
|
||
"has_post_apply_verifier": has_verifier,
|
||
"has_km_writeback": has_km,
|
||
"has_telegram_receipt": has_telegram,
|
||
"closed": not missing,
|
||
"missing": missing,
|
||
}
|
||
|
||
|
||
def _latest_failure_classification(
|
||
*,
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||
latest_flow_closure: Mapping[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Classify the newest controlled apply outcome without exposing command output."""
|
||
|
||
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
|
||
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
|
||
latest_apply = next(
|
||
(
|
||
row
|
||
for row in operation_rows
|
||
if str(row.get("operation_type") or "") == "ansible_apply_executed"
|
||
),
|
||
None,
|
||
)
|
||
if latest_apply is None:
|
||
return {
|
||
"schema_version": "ai_agent_executor_failure_classification_v1",
|
||
"classification": "no_controlled_apply_observed",
|
||
"action": "wait_for_controlled_apply_receipt",
|
||
"target_selector": {},
|
||
"evidence": {
|
||
"latest_flow_closed": False,
|
||
"output_tail_in_readback": False,
|
||
"unredacted_output_required": False,
|
||
},
|
||
}
|
||
|
||
apply_op_id = str(latest_apply.get("op_id") or "")
|
||
incident_id = str(latest_apply.get("incident_id") or "")
|
||
returncode = _int_value(latest_apply.get("returncode"))
|
||
verifier = next(
|
||
(
|
||
row
|
||
for row in verifier_rows
|
||
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
|
||
),
|
||
{},
|
||
)
|
||
verification_result = str(verifier.get("verification_result") or "").lower()
|
||
latest_flow_closed = latest_flow_closure.get("closed") is True
|
||
|
||
if returncode == 0 and verification_result in {"success", ""} and latest_flow_closed:
|
||
classification = "latest_controlled_apply_closed_success"
|
||
action = "keep_receipt_chain_closed"
|
||
elif returncode == 0:
|
||
classification = "controlled_apply_success_receipt_gap"
|
||
action = "backfill_missing_verifier_km_or_telegram_receipt"
|
||
elif latest_flow_closed:
|
||
classification = "closed_failed_apply_requires_ai_repair"
|
||
action = "queue_check_mode_replay_and_playbook_repair_candidate"
|
||
else:
|
||
classification = "failed_apply_receipt_gap_requires_backfill_then_repair"
|
||
action = "backfill_missing_receipts_then_queue_repair_candidate"
|
||
|
||
return {
|
||
"schema_version": "ai_agent_executor_failure_classification_v1",
|
||
"classification": classification,
|
||
"action": action,
|
||
"target_selector": {
|
||
"incident_id": incident_id or None,
|
||
"apply_op_id": apply_op_id or None,
|
||
"parent_op_id": latest_apply.get("parent_op_id"),
|
||
"catalog_id": latest_apply.get("catalog_id"),
|
||
"playbook_path": latest_apply.get("playbook_path"),
|
||
"execution_mode": latest_apply.get("execution_mode"),
|
||
},
|
||
"evidence": {
|
||
"operation_status": latest_apply.get("status"),
|
||
"returncode": latest_apply.get("returncode"),
|
||
"verification_result": verification_result or None,
|
||
"latest_flow_closed": latest_flow_closed,
|
||
"has_post_apply_verifier": latest_flow_closure.get("has_post_apply_verifier") is True,
|
||
"has_km_writeback": latest_flow_closure.get("has_km_writeback") is True,
|
||
"has_telegram_receipt": latest_flow_closure.get("has_telegram_receipt") is True,
|
||
"output_tail_in_readback": False,
|
||
"unredacted_output_required": False,
|
||
},
|
||
"safe_next_steps": [
|
||
"run_no_write_check_mode_replay",
|
||
"extract_sanitized_failed_task_summary",
|
||
"write_km_playbook_repair_candidate",
|
||
"retry_controlled_apply_only_after_check_mode_passes",
|
||
],
|
||
}
|
||
|
||
|
||
def _controlled_retry_package(classification: Mapping[str, Any]) -> dict[str, Any]:
|
||
"""Build the next no-write repair package from the public failure classification."""
|
||
|
||
target_selector = classification.get("target_selector")
|
||
if not isinstance(target_selector, Mapping):
|
||
target_selector = {}
|
||
apply_op_id = str(target_selector.get("apply_op_id") or "")
|
||
repair_required = classification.get("classification") in {
|
||
"closed_failed_apply_requires_ai_repair",
|
||
"failed_apply_receipt_gap_requires_backfill_then_repair",
|
||
}
|
||
return {
|
||
"schema_version": "ai_agent_controlled_retry_package_v1",
|
||
"package_id": (
|
||
f"ansible_retry:{apply_op_id[:8]}"
|
||
if repair_required and apply_op_id
|
||
else None
|
||
),
|
||
"status": (
|
||
"ready_for_no_write_check_mode_replay"
|
||
if repair_required
|
||
else "not_required_for_latest_apply"
|
||
),
|
||
"target_selector": dict(target_selector),
|
||
"source_of_truth": {
|
||
"catalog_id": target_selector.get("catalog_id"),
|
||
"playbook_path": target_selector.get("playbook_path"),
|
||
"source_diff_required_before_retry": True,
|
||
"failed_task_summary_required": True,
|
||
},
|
||
"preflight": {
|
||
"no_write_check_mode_replay_required": repair_required,
|
||
"reuse_parent_check_mode_op_id": target_selector.get("parent_op_id"),
|
||
"unredacted_output_required": False,
|
||
"secret_value_collection_allowed": False,
|
||
},
|
||
"apply_gate": {
|
||
"controlled_apply_retry_allowed_now": False,
|
||
"opens_legacy_runner": False,
|
||
"requires_check_mode_success_before_apply": repair_required,
|
||
},
|
||
"rollback": {
|
||
"rollback_candidate_required": repair_required,
|
||
"destructive_rollback_allowed": False,
|
||
"rollback_plan_source": "playbook_repair_candidate_after_failed_task_summary",
|
||
},
|
||
"post_apply": {
|
||
"post_apply_verifier_required": repair_required,
|
||
"km_playbook_trust_writeback_required": repair_required,
|
||
"telegram_receipt_required": repair_required,
|
||
},
|
||
"next_ai_action": (
|
||
"run_no_write_check_mode_replay"
|
||
if repair_required
|
||
else "keep_latest_apply_receipts"
|
||
),
|
||
}
|
||
|
||
|
||
def classify_deploy_control_plane_observation(
|
||
*,
|
||
run_status: str,
|
||
is_latest_deploy_intent: bool,
|
||
active_task_container_count: int,
|
||
production_marker_hit: bool,
|
||
latest_flow_closed: bool,
|
||
runner_capacity_ok: bool,
|
||
runner_forbidden_label_count: int,
|
||
) -> dict[str, Any]:
|
||
"""Classify CD/run noise into an internal PlayBook decision."""
|
||
|
||
normalized_status = str(run_status or "unknown").strip().lower()
|
||
has_active_task = active_task_container_count > 0
|
||
runner_lane_safe = runner_capacity_ok and runner_forbidden_label_count == 0
|
||
production_truth_ok = production_marker_hit and latest_flow_closed
|
||
|
||
if not is_latest_deploy_intent:
|
||
classification = "superseded_run_skip"
|
||
action = "skip_cd_work_and_attach_to_superseded_intent"
|
||
elif production_truth_ok and normalized_status == "success":
|
||
classification = "deploy_succeeded_marker_hit"
|
||
action = "close_deploy_intent_and_write_receipts"
|
||
elif normalized_status == "running" and has_active_task and runner_lane_safe:
|
||
classification = "running_with_controlled_task"
|
||
action = "continue_observing_without_restarting_runner"
|
||
elif normalized_status == "running" and not has_active_task and production_truth_ok:
|
||
classification = "running_no_container_stale_ui"
|
||
action = "treat_gitea_spinner_as_stale_and_keep_production_truth"
|
||
elif normalized_status == "failure" and production_truth_ok:
|
||
classification = "failed_run_superseded_by_marker_hit"
|
||
action = "record_non_blocking_failure_and_keep_current_marker"
|
||
elif normalized_status == "failure":
|
||
classification = "real_failure_requires_playbook_repair"
|
||
action = "open_cd_repair_playbook_with_target_selector_and_verifier"
|
||
elif not runner_lane_safe:
|
||
classification = "runner_lane_guardrail_violation"
|
||
action = "fail_closed_runner_lane_and_open_repair_playbook"
|
||
else:
|
||
classification = "waiting_for_controlled_observation"
|
||
action = "wait_for_mcp_observation_or_deploy_intent_update"
|
||
|
||
return {
|
||
"schema_version": "ai_agent_deploy_control_plane_decision_v1",
|
||
"classification": classification,
|
||
"action": action,
|
||
"inputs": {
|
||
"run_status": normalized_status,
|
||
"is_latest_deploy_intent": is_latest_deploy_intent,
|
||
"active_task_container_count": max(0, active_task_container_count),
|
||
"production_marker_hit": production_marker_hit,
|
||
"latest_flow_closed": latest_flow_closed,
|
||
"runner_capacity_ok": runner_capacity_ok,
|
||
"runner_forbidden_label_count": max(0, runner_forbidden_label_count),
|
||
},
|
||
"internal_writeback": {
|
||
"mcp_event_type": "deploy_run_observation",
|
||
"rag_context_required": True,
|
||
"km_writeback_required": True,
|
||
"playbook_route_required": True,
|
||
"log_projection_required": True,
|
||
"telegram_receipt_required": classification in {
|
||
"deploy_succeeded_marker_hit",
|
||
"real_failure_requires_playbook_repair",
|
||
"runner_lane_guardrail_violation",
|
||
},
|
||
},
|
||
"safety_boundary": {
|
||
"reads_raw_sessions": False,
|
||
"reads_secret_values": False,
|
||
"opens_legacy_runner": False,
|
||
"uses_force_push": False,
|
||
"writes_runtime_state": classification in {
|
||
"deploy_succeeded_marker_hit",
|
||
"real_failure_requires_playbook_repair",
|
||
"runner_lane_guardrail_violation",
|
||
},
|
||
},
|
||
}
|
||
|
||
|
||
def _control_plane_integration() -> dict[str, Any]:
|
||
classifier_examples = [
|
||
classify_deploy_control_plane_observation(
|
||
run_status="success",
|
||
is_latest_deploy_intent=True,
|
||
active_task_container_count=0,
|
||
production_marker_hit=True,
|
||
latest_flow_closed=True,
|
||
runner_capacity_ok=True,
|
||
runner_forbidden_label_count=0,
|
||
),
|
||
classify_deploy_control_plane_observation(
|
||
run_status="running",
|
||
is_latest_deploy_intent=True,
|
||
active_task_container_count=0,
|
||
production_marker_hit=True,
|
||
latest_flow_closed=True,
|
||
runner_capacity_ok=True,
|
||
runner_forbidden_label_count=0,
|
||
),
|
||
classify_deploy_control_plane_observation(
|
||
run_status="failure",
|
||
is_latest_deploy_intent=True,
|
||
active_task_container_count=0,
|
||
production_marker_hit=False,
|
||
latest_flow_closed=False,
|
||
runner_capacity_ok=True,
|
||
runner_forbidden_label_count=0,
|
||
),
|
||
]
|
||
return {
|
||
"schema_version": "ai_agent_autonomous_runtime_internal_loop_v1",
|
||
"status": "mcp_rag_km_playbook_log_control_loop_declared",
|
||
"purpose": (
|
||
"把 Gitea run、runner lane、production marker、browser smoke 與 executor receipt "
|
||
"先收斂成內部事件,再由 PlayBook decision 推進或跳過。"
|
||
),
|
||
"mcp_sensors": [
|
||
{
|
||
"sensor_id": "gitea_actions_run_observer",
|
||
"normalized_event": "RunObservation",
|
||
"raw_secret_access_allowed": False,
|
||
},
|
||
{
|
||
"sensor_id": "controlled_runner_lane_observer",
|
||
"normalized_event": "RunnerLaneState",
|
||
"raw_runner_token_access_allowed": False,
|
||
},
|
||
{
|
||
"sensor_id": "production_marker_observer",
|
||
"normalized_event": "ProductionTruthSnapshot",
|
||
"raw_session_access_allowed": False,
|
||
},
|
||
{
|
||
"sensor_id": "browser_smoke_observer",
|
||
"normalized_event": "FrontendTruthSnapshot",
|
||
"raw_conversation_access_allowed": False,
|
||
},
|
||
],
|
||
"rag_context_queries": [
|
||
"runner_pressure_buildkit_stockplatform_collision",
|
||
"controlled_cd_lane_capacity_label_guardrails",
|
||
"autonomous_runtime_marker_receipt_contract",
|
||
],
|
||
"playbook_decision_classes": [
|
||
"deploy_succeeded_marker_hit",
|
||
"running_with_controlled_task",
|
||
"running_no_container_stale_ui",
|
||
"superseded_run_skip",
|
||
"failed_run_superseded_by_marker_hit",
|
||
"real_failure_requires_playbook_repair",
|
||
"runner_lane_guardrail_violation",
|
||
],
|
||
"km_writeback_contract": {
|
||
"knowledge_entry_path_type": "deploy_control_plane_decision:<deploy_intent_id>",
|
||
"required_refs": [
|
||
"deploy_intent_id",
|
||
"target_sha",
|
||
"gitea_run_id",
|
||
"production_marker",
|
||
"latest_flow_closure",
|
||
"runner_lane_state",
|
||
],
|
||
"stores_raw_logs": False,
|
||
"stores_secret_values": False,
|
||
},
|
||
"log_projection_contract": {
|
||
"timeline_event_type": "ai_agent_deploy_control_plane_decision",
|
||
"logbook_projection": "summary_only_after_verifier",
|
||
"raw_html_or_long_log_allowed": False,
|
||
},
|
||
"classifier_examples": classifier_examples,
|
||
"rollups": {
|
||
"mcp_sensor_count": 4,
|
||
"rag_context_query_count": 3,
|
||
"playbook_decision_class_count": 7,
|
||
"classifier_example_count": len(classifier_examples),
|
||
},
|
||
}
|
||
|
||
|
||
def build_runtime_receipt_readback_from_rows(
|
||
*,
|
||
project_id: str = _DEFAULT_PROJECT_ID,
|
||
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
|
||
db_read_status: str = "ok",
|
||
operation_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
operation_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
auto_repair_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
verifier_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
km_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
km_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
telegram_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
telegram_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
mcp_gateway_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
legacy_mcp_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
service_log_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
executor_log_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
timeline_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
playbook_trust_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||
error_type: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""Build the live executor receipt readback from already-fetched rows."""
|
||
|
||
operation_latest = list(operation_latest_rows)
|
||
auto_repair_latest = list(auto_repair_latest_rows)
|
||
verifier_latest = list(verifier_latest_rows)
|
||
km_latest = list(km_latest_rows)
|
||
telegram_latest = list(telegram_latest_rows)
|
||
operation_summary = _operation_counts(operation_count_rows)
|
||
auto_repair_summary = _status_counts(
|
||
auto_repair_count_rows,
|
||
status_key="result_status",
|
||
)
|
||
verifier_summary = _status_counts(
|
||
verifier_count_rows,
|
||
status_key="verification_result",
|
||
)
|
||
km_summary = _status_counts(km_count_rows, status_key="status")
|
||
telegram_summary = _status_counts(telegram_count_rows, status_key="send_status")
|
||
mcp_gateway_summary = _status_counts(mcp_gateway_count_rows, status_key="status")
|
||
legacy_mcp_summary = _status_counts(legacy_mcp_count_rows, status_key="status")
|
||
service_log_summary = _status_counts(service_log_count_rows, status_key="status")
|
||
executor_log_summary = _status_counts(executor_log_count_rows, status_key="status")
|
||
timeline_summary = _status_counts(timeline_count_rows, status_key="status")
|
||
playbook_trust_summary = _status_counts(playbook_trust_count_rows, status_key="status")
|
||
latest_closure = _latest_flow_closure(
|
||
operation_latest_rows=operation_latest,
|
||
verifier_latest_rows=verifier_latest,
|
||
km_latest_rows=km_latest,
|
||
telegram_latest_rows=telegram_latest,
|
||
)
|
||
latest_failure = _latest_failure_classification(
|
||
operation_latest_rows=operation_latest,
|
||
verifier_latest_rows=verifier_latest,
|
||
latest_flow_closure=latest_closure,
|
||
)
|
||
retry_package = _controlled_retry_package(latest_failure)
|
||
loop_ledger = _autonomous_execution_loop_ledger(
|
||
project_id=project_id,
|
||
operation_latest_rows=operation_latest,
|
||
verifier_latest_rows=verifier_latest,
|
||
km_latest_rows=km_latest,
|
||
telegram_latest_rows=telegram_latest,
|
||
auto_repair_latest_rows=auto_repair_latest,
|
||
latest_flow_closure=latest_closure,
|
||
latest_failure_classification=latest_failure,
|
||
controlled_retry_package=retry_package,
|
||
)
|
||
trace_ledger = _build_trace_ledger(
|
||
operation_summary=operation_summary,
|
||
auto_repair_summary=auto_repair_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
telegram_summary=telegram_summary,
|
||
mcp_gateway_summary=mcp_gateway_summary,
|
||
legacy_mcp_summary=legacy_mcp_summary,
|
||
service_log_summary=service_log_summary,
|
||
executor_log_summary=executor_log_summary,
|
||
timeline_summary=timeline_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
latest_flow_closure=latest_closure,
|
||
loop_ledger=loop_ledger,
|
||
)
|
||
log_integration_taxonomy = _build_log_integration_taxonomy(
|
||
operation_summary=operation_summary,
|
||
auto_repair_summary=auto_repair_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
telegram_summary=telegram_summary,
|
||
mcp_gateway_summary=mcp_gateway_summary,
|
||
legacy_mcp_summary=legacy_mcp_summary,
|
||
service_log_summary=service_log_summary,
|
||
executor_log_summary=executor_log_summary,
|
||
timeline_summary=timeline_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
)
|
||
agent_decision_wiring = _build_agent_decision_wiring(
|
||
operation_summary=operation_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
mcp_gateway_summary=mcp_gateway_summary,
|
||
legacy_mcp_summary=legacy_mcp_summary,
|
||
service_log_summary=service_log_summary,
|
||
timeline_summary=timeline_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
log_integration_taxonomy=log_integration_taxonomy,
|
||
loop_ledger=loop_ledger,
|
||
latest_flow_closure=latest_closure,
|
||
)
|
||
learning_loop = _build_learning_loop_readback(
|
||
operation_summary=operation_summary,
|
||
verifier_summary=verifier_summary,
|
||
km_summary=km_summary,
|
||
playbook_trust_summary=playbook_trust_summary,
|
||
log_integration_taxonomy=log_integration_taxonomy,
|
||
agent_decision_wiring=agent_decision_wiring,
|
||
latest_flow_closure=latest_closure,
|
||
latest_failure_classification=latest_failure,
|
||
controlled_retry_package=retry_package,
|
||
loop_ledger=loop_ledger,
|
||
)
|
||
work_item_progress = _build_work_item_progress(
|
||
trace_ledger=trace_ledger,
|
||
log_integration_taxonomy=log_integration_taxonomy,
|
||
agent_decision_wiring=agent_decision_wiring,
|
||
learning_loop=learning_loop,
|
||
db_read_status=db_read_status,
|
||
)
|
||
apply_summary = operation_summary.get("ansible_apply_executed") or {}
|
||
readback = {
|
||
"schema_version": _LIVE_READBACK_SCHEMA_VERSION,
|
||
"project_id": project_id,
|
||
"lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)),
|
||
"db_read_status": db_read_status,
|
||
"writes_on_read": False,
|
||
"ansible_operations": {
|
||
"counts": operation_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
operation_latest,
|
||
allowed_keys=(
|
||
"op_id",
|
||
"parent_op_id",
|
||
"operation_type",
|
||
"status",
|
||
"actor",
|
||
"incident_id",
|
||
"catalog_id",
|
||
"playbook_path",
|
||
"execution_mode",
|
||
"source_candidate_op_id",
|
||
"check_mode_op_id",
|
||
"risk_level",
|
||
"controlled_apply_allowed",
|
||
"returncode",
|
||
"duration_ms",
|
||
"created_at",
|
||
),
|
||
),
|
||
},
|
||
"auto_repair_execution_receipt": {
|
||
**auto_repair_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
auto_repair_latest,
|
||
allowed_keys=(
|
||
"id",
|
||
"incident_id",
|
||
"catalog_id",
|
||
"playbook_name",
|
||
"result_status",
|
||
"triggered_by",
|
||
"risk_level",
|
||
"execution_time_ms",
|
||
"created_at",
|
||
),
|
||
),
|
||
},
|
||
"ansible_apply_executed": {
|
||
"total": _int_value(apply_summary.get("total")),
|
||
"recent": _int_value(apply_summary.get("recent")),
|
||
"by_status": apply_summary.get("by_status") or {},
|
||
},
|
||
"post_apply_verifier": {
|
||
**verifier_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
verifier_latest,
|
||
allowed_keys=(
|
||
"id",
|
||
"incident_id",
|
||
"matched_playbook_id",
|
||
"verification_result",
|
||
"apply_op_id",
|
||
"catalog_id",
|
||
"playbook_path",
|
||
"returncode",
|
||
"collected_at",
|
||
),
|
||
),
|
||
},
|
||
"km_writeback": {
|
||
**km_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
km_latest,
|
||
allowed_keys=(
|
||
"id",
|
||
"title",
|
||
"related_incident_id",
|
||
"related_playbook_id",
|
||
"path_type",
|
||
"status",
|
||
"created_by",
|
||
"created_at",
|
||
),
|
||
),
|
||
},
|
||
"telegram_receipt": {
|
||
**telegram_summary,
|
||
"latest": _sanitize_latest_rows(
|
||
telegram_latest,
|
||
allowed_keys=(
|
||
"message_id",
|
||
"run_id",
|
||
"message_type",
|
||
"send_status",
|
||
"provider_message_id",
|
||
"incident_id",
|
||
"action",
|
||
"queued_at",
|
||
"sent_at",
|
||
),
|
||
),
|
||
},
|
||
"mcp_context": {
|
||
"gateway": mcp_gateway_summary,
|
||
"legacy": legacy_mcp_summary,
|
||
"total": _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary),
|
||
"recent": _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary),
|
||
},
|
||
"service_log_evidence": service_log_summary,
|
||
"executor_log_projection": executor_log_summary,
|
||
"timeline_projection": timeline_summary,
|
||
"playbook_trust": playbook_trust_summary,
|
||
"latest_flow_closure": latest_closure,
|
||
"latest_failure_classification": latest_failure,
|
||
"controlled_retry_package": retry_package,
|
||
"autonomous_execution_loop_ledger": loop_ledger,
|
||
"trace_ledger": trace_ledger,
|
||
"log_integration_taxonomy": log_integration_taxonomy,
|
||
"agent_decision_wiring": agent_decision_wiring,
|
||
"learning_loop": learning_loop,
|
||
"work_item_progress": work_item_progress,
|
||
}
|
||
if error_type:
|
||
readback["error"] = {
|
||
"type": error_type,
|
||
"message": "runtime receipt DB read failed; see API logs",
|
||
}
|
||
return readback
|
||
|
||
|
||
def _attach_runtime_receipt_readback(
|
||
payload: dict[str, Any],
|
||
readback: dict[str, Any],
|
||
) -> dict[str, Any]:
|
||
payload["runtime_receipt_readback"] = readback
|
||
rollups = payload.setdefault("rollups", {})
|
||
rollups.update({
|
||
"live_ansible_apply_executed_count": _int_value(
|
||
readback.get("ansible_apply_executed", {}).get("total")
|
||
),
|
||
"live_auto_repair_execution_receipt_count": _int_value(
|
||
readback.get("auto_repair_execution_receipt", {}).get("total")
|
||
),
|
||
"live_post_apply_verifier_count": _int_value(
|
||
readback.get("post_apply_verifier", {}).get("total")
|
||
),
|
||
"live_km_writeback_count": _int_value(
|
||
readback.get("km_writeback", {}).get("total")
|
||
),
|
||
"live_telegram_receipt_count": _int_value(
|
||
readback.get("telegram_receipt", {}).get("total")
|
||
),
|
||
"live_executor_latest_flow_closed_count": (
|
||
1
|
||
if (readback.get("latest_flow_closure") or {}).get("closed") is True
|
||
else 0
|
||
),
|
||
"live_autonomous_execution_loop_closed_count": (
|
||
1
|
||
if (readback.get("autonomous_execution_loop_ledger") or {}).get("closed") is True
|
||
else 0
|
||
),
|
||
"live_executor_latest_apply_repair_required_count": (
|
||
1
|
||
if (
|
||
(readback.get("latest_failure_classification") or {}).get("classification")
|
||
in {
|
||
"closed_failed_apply_requires_ai_repair",
|
||
"failed_apply_receipt_gap_requires_backfill_then_repair",
|
||
}
|
||
)
|
||
else 0
|
||
),
|
||
"live_executor_retry_package_ready_count": (
|
||
1
|
||
if (readback.get("controlled_retry_package") or {}).get("status")
|
||
== "ready_for_no_write_check_mode_replay"
|
||
else 0
|
||
),
|
||
"live_mcp_context_count": _int_value(readback.get("mcp_context", {}).get("total")),
|
||
"live_service_log_evidence_count": _int_value(
|
||
readback.get("service_log_evidence", {}).get("total")
|
||
),
|
||
"live_executor_log_projection_count": _int_value(
|
||
readback.get("executor_log_projection", {}).get("total")
|
||
),
|
||
"live_timeline_projection_count": _int_value(
|
||
readback.get("timeline_projection", {}).get("total")
|
||
),
|
||
"live_playbook_trust_signal_count": _int_value(
|
||
readback.get("playbook_trust", {}).get("total")
|
||
),
|
||
"live_trace_recorded_stage_count": _int_value(
|
||
readback.get("trace_ledger", {}).get("recorded_stage_count")
|
||
),
|
||
"live_trace_required_missing_count": len(
|
||
(readback.get("trace_ledger") or {}).get("missing_required_stage_ids") or []
|
||
),
|
||
"live_log_source_family_count": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"source_family_count"
|
||
)
|
||
),
|
||
"live_log_active_source_family_count": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"active_source_family_count"
|
||
)
|
||
),
|
||
"live_log_label_dimension_count": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"label_dimension_count"
|
||
)
|
||
),
|
||
"live_log_classified_event_total": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"classified_event_total"
|
||
)
|
||
),
|
||
"live_log_recent_classified_event_total": _int_value(
|
||
((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get(
|
||
"recent_classified_event_total"
|
||
)
|
||
),
|
||
"live_agent_decision_wiring_stage_count": _int_value(
|
||
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
|
||
"stage_count"
|
||
)
|
||
),
|
||
"live_agent_decision_wiring_required_present_count": _int_value(
|
||
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
|
||
"required_stage_present_count"
|
||
)
|
||
),
|
||
"live_agent_decision_wiring_required_missing_count": _int_value(
|
||
((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get(
|
||
"required_stage_missing_count"
|
||
)
|
||
),
|
||
"live_agent_decision_wiring_complete_count": (
|
||
1
|
||
if (readback.get("agent_decision_wiring") or {}).get("status") == "completed"
|
||
else 0
|
||
),
|
||
"live_learning_loop_stage_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"stage_count"
|
||
)
|
||
),
|
||
"live_learning_loop_required_present_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"required_stage_present_count"
|
||
)
|
||
),
|
||
"live_learning_loop_required_missing_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"required_stage_missing_count"
|
||
)
|
||
),
|
||
"live_learning_loop_complete_count": (
|
||
1
|
||
if (readback.get("learning_loop") or {}).get("status") == "completed"
|
||
else 0
|
||
),
|
||
"live_learning_loop_similar_case_source_count": _int_value(
|
||
((readback.get("learning_loop") or {}).get("rollups") or {}).get(
|
||
"similar_case_source_total"
|
||
)
|
||
),
|
||
"live_work_item_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"work_item_count"
|
||
)
|
||
),
|
||
"live_work_item_completed_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"completed_count"
|
||
)
|
||
),
|
||
"live_work_item_in_progress_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"in_progress_count"
|
||
)
|
||
),
|
||
"live_work_item_pending_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"pending_count"
|
||
)
|
||
),
|
||
"live_work_item_blocked_count": _int_value(
|
||
((readback.get("work_item_progress") or {}).get("rollups") or {}).get(
|
||
"blocked_count"
|
||
)
|
||
),
|
||
})
|
||
return payload
|
||
|
||
|
||
def build_ai_agent_autonomous_runtime_control() -> dict[str, Any]:
|
||
"""Build the current AI Agent autonomy control-plane readback."""
|
||
|
||
allowed_risks = _allowed_risk_levels()
|
||
report_cadences = [
|
||
{
|
||
"cadence": "daily",
|
||
"display_name": "日報",
|
||
"schedule": f"每日 {DAILY_REPORT_HOUR_TAIPEI:02d}:00 台北時間",
|
||
"worker": "report_generation_service.run_daily_report_loop",
|
||
"telegram_gateway_delivery_enabled": True,
|
||
"direct_bot_api_allowed": False,
|
||
"receipt_source": "daily_report_sent log + Telegram Gateway result",
|
||
},
|
||
{
|
||
"cadence": "weekly",
|
||
"display_name": "週報",
|
||
"schedule": (
|
||
f"每週五 {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間"
|
||
if WEEKLY_REPORT_WEEKDAY_TAIPEI == 4
|
||
else f"每週 weekday={WEEKLY_REPORT_WEEKDAY_TAIPEI} {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間"
|
||
),
|
||
"worker": "report_generation_service.run_weekly_report_loop",
|
||
"telegram_gateway_delivery_enabled": True,
|
||
"direct_bot_api_allowed": False,
|
||
"receipt_source": "weekly_report_sent log + Telegram Gateway result",
|
||
},
|
||
{
|
||
"cadence": "monthly",
|
||
"display_name": "月報",
|
||
"schedule": f"每月 {MONTHLY_REPORT_DAY_TAIPEI} 日 {MONTHLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間",
|
||
"worker": "report_generation_service.run_monthly_report_loop",
|
||
"telegram_gateway_delivery_enabled": True,
|
||
"direct_bot_api_allowed": False,
|
||
"receipt_source": "monthly_report_sent log + Telegram Gateway result",
|
||
},
|
||
]
|
||
executor_receipts = [
|
||
{
|
||
"operation_type": "ansible_candidate_matched",
|
||
"owner_agent": "Hermes",
|
||
"purpose": "把修復候選寫入 executor 可認領佇列",
|
||
"writes_runtime_state": False,
|
||
},
|
||
{
|
||
"operation_type": "ansible_check_mode_executed",
|
||
"owner_agent": "AwoooP Ansible check-mode worker",
|
||
"purpose": "執行 ansible-playbook --check --diff 並留下乾跑收據",
|
||
"writes_runtime_state": False,
|
||
},
|
||
{
|
||
"operation_type": "ansible_apply_executed",
|
||
"owner_agent": "AwoooP controlled apply worker",
|
||
"purpose": "check-mode 通過後,對 allowlisted low / medium / high PlayBook 受控 apply",
|
||
"writes_runtime_state": True,
|
||
},
|
||
{
|
||
"operation_type": "incident_evidence.post_execution_state",
|
||
"owner_agent": "post_apply_verifier",
|
||
"purpose": "apply 後寫入 verifier 結果與 post-execution evidence",
|
||
"writes_runtime_state": True,
|
||
},
|
||
{
|
||
"operation_type": "knowledge_entries",
|
||
"owner_agent": "Hermes",
|
||
"purpose": "把已驗證執行沉澱成 KM / PlayBook trust 候選",
|
||
"writes_runtime_state": True,
|
||
},
|
||
]
|
||
hard_blockers = [
|
||
"secret_token_private_key_cookie_session_auth_header_cleartext",
|
||
"drop_truncate_restore_prune_destructive_database_operation",
|
||
"reboot_node_drain_irreversible_firewall_or_host_lockout",
|
||
"credentialed_exploit_or_external_active_scan",
|
||
"new_paid_provider_cost_ceiling_or_provider_switch_without_replay_shadow_canary",
|
||
"force_push_delete_repo_refs_or_visibility_change",
|
||
"critical_or_break_glass_route_without_explicit_break_glass_contract",
|
||
]
|
||
legacy_overrides = [
|
||
{
|
||
"legacy_area": "report_status_board_no_live_send",
|
||
"current_effect": "overridden",
|
||
"new_behavior": "日報 / 週報 / 月報透過 Telegram Gateway 排程派送",
|
||
},
|
||
{
|
||
"legacy_area": "report_live_delivery_owner_review_required",
|
||
"current_effect": "overridden",
|
||
"new_behavior": "報告派送走低/中/高風險自動化政策;critical 才 break-glass",
|
||
},
|
||
{
|
||
"legacy_area": "high_risk_owner_review_queue",
|
||
"current_effect": "overridden_for_high_non_critical",
|
||
"new_behavior": "high 風險允許 controlled apply;critical / hard blocker 仍不自動",
|
||
},
|
||
{
|
||
"legacy_area": "telegram_no_send_preview_only",
|
||
"current_effect": "overridden",
|
||
"new_behavior": "用 Telegram Gateway 實送報告與 actionable receipt;不直接暴露 Bot API",
|
||
},
|
||
]
|
||
control_plane_integration = _control_plane_integration()
|
||
payload = {
|
||
"schema_version": _SCHEMA_VERSION,
|
||
"generated_at": datetime.now(UTC).isoformat(),
|
||
"program_status": {
|
||
"current_task_id": "P2-416-D1N",
|
||
"status": "current_directive_control_plane_active",
|
||
"runtime_authority": _RUNTIME_AUTHORITY,
|
||
"deploy_readback_marker": _DEPLOY_READBACK_MARKER,
|
||
"deploy_attempt_note": _DEPLOY_ATTEMPT_NOTE,
|
||
"legacy_no_send_no_live_rules_overridden": True,
|
||
"implementation_completion_percent": 88,
|
||
"status_note": (
|
||
"目前有效規則:low / medium / high 風險由 AI Agent 在 allowlist、"
|
||
"Ansible check-mode、verifier、rollback、KM 與 Telegram receipt 下受控自動處理。"
|
||
),
|
||
},
|
||
"current_policy": {
|
||
"low_risk_controlled_apply_allowed": "low" in allowed_risks,
|
||
"medium_risk_controlled_apply_allowed": "medium" in allowed_risks,
|
||
"high_risk_controlled_apply_allowed": "high" in allowed_risks,
|
||
"critical_break_glass_required": True,
|
||
"owner_review_required_for_low_medium_high": False,
|
||
"direct_bot_api_allowed": False,
|
||
"telegram_gateway_required": True,
|
||
"post_apply_verifier_required": True,
|
||
"km_learning_writeback_required": True,
|
||
},
|
||
"runtime_switches": {
|
||
"ansible_check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER),
|
||
"ansible_controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY),
|
||
"ansible_controlled_apply_allowed_risk_levels": allowed_risks,
|
||
"ansible_check_mode_interval_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS,
|
||
"ansible_check_mode_batch_limit": settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT,
|
||
"ansible_check_mode_timeout_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS,
|
||
"ansible_controlled_apply_timeout_seconds": settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_TIMEOUT_SECONDS,
|
||
},
|
||
"agent_roles": [
|
||
{
|
||
"agent_id": "openclaw",
|
||
"role": "仲裁 / hard blocker / replay-shadow-canary gate",
|
||
"current_job": "只阻擋真正 critical 與 hard blocker,不再用身份保護舊架構",
|
||
},
|
||
{
|
||
"agent_id": "hermes",
|
||
"role": "報告 / Telegram digest / KM 與 PlayBook trust writeback",
|
||
"current_job": "日週月報、收據摘要與 verifier 後學習沉澱",
|
||
},
|
||
{
|
||
"agent_id": "nemotron",
|
||
"role": "市場技術雷達 / no-write replay / challenger scorecard",
|
||
"current_job": "用市場與回放數據挑戰 OpenClaw / provider / Agent 組合",
|
||
},
|
||
{
|
||
"agent_id": "awooop_ansible_worker",
|
||
"role": "executor",
|
||
"current_job": "candidate → check-mode → controlled apply → verifier → KM",
|
||
},
|
||
{
|
||
"agent_id": "telegram_ops",
|
||
"role": "Telegram Gateway receipt",
|
||
"current_job": "群組報告、actionable receipt、失敗告警;不展示敏感值或未脫敏資料",
|
||
},
|
||
],
|
||
"report_delivery": {
|
||
"status": "telegram_gateway_delivery_enabled",
|
||
"cadences": report_cadences,
|
||
},
|
||
"controlled_executor": {
|
||
"status": "check_mode_then_apply_enabled"
|
||
if settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY
|
||
else "check_mode_only_by_config",
|
||
"operation_receipts": executor_receipts,
|
||
"required_flow": [
|
||
"allowlisted_candidate",
|
||
"ansible_check_mode_success",
|
||
"controlled_apply",
|
||
"post_apply_verifier",
|
||
"auto_repair_execution_receipt",
|
||
"km_learning_writeback",
|
||
"telegram_receipt_or_alert",
|
||
],
|
||
},
|
||
"control_plane_integration": control_plane_integration,
|
||
"legacy_policy_overrides": legacy_overrides,
|
||
"hard_blockers": hard_blockers,
|
||
"visibility_contract": {
|
||
"frontend_displays_runtime_truth": True,
|
||
"work_window_transcript_display_allowed": False,
|
||
"prompt_body_display_allowed": False,
|
||
"internal_reasoning_display_allowed": False,
|
||
"sensitive_value_display_allowed": False,
|
||
"telegram_unredacted_payload_display_allowed": False,
|
||
"lan_topology_redaction_required": True,
|
||
},
|
||
"rollups": {
|
||
"automated_risk_tier_count": sum(1 for risk in ("low", "medium", "high") if risk in allowed_risks),
|
||
"hard_blocker_count": len(hard_blockers),
|
||
"report_cadence_enabled_count": len(report_cadences),
|
||
"telegram_gateway_delivery_enabled_count": sum(
|
||
1 for item in report_cadences if item["telegram_gateway_delivery_enabled"]
|
||
),
|
||
"direct_bot_api_allowed_count": 0,
|
||
"controlled_executor_operation_receipt_count": len(executor_receipts),
|
||
"runtime_write_receipt_type_count": sum(
|
||
1 for item in executor_receipts if item["writes_runtime_state"]
|
||
),
|
||
"legacy_policy_overridden_count": len(legacy_overrides),
|
||
"mcp_sensor_count": control_plane_integration["rollups"]["mcp_sensor_count"],
|
||
"rag_context_query_count": control_plane_integration["rollups"]["rag_context_query_count"],
|
||
"playbook_decision_class_count": control_plane_integration["rollups"]["playbook_decision_class_count"],
|
||
"deploy_control_classifier_example_count": control_plane_integration["rollups"]["classifier_example_count"],
|
||
},
|
||
}
|
||
_attach_runtime_receipt_readback(
|
||
payload,
|
||
build_runtime_receipt_readback_from_rows(
|
||
project_id=_DEFAULT_PROJECT_ID,
|
||
db_read_status="not_queried",
|
||
),
|
||
)
|
||
_validate_payload(payload)
|
||
return payload
|
||
|
||
|
||
async def load_ai_agent_autonomous_runtime_receipt_readback(
|
||
*,
|
||
project_id: str = _DEFAULT_PROJECT_ID,
|
||
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
|
||
limit: int = 20,
|
||
) -> dict[str, Any]:
|
||
"""Read live executor receipts without sending messages or mutating runtime state."""
|
||
|
||
params = {
|
||
"project_id": project_id,
|
||
"lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)),
|
||
"limit": max(1, int(limit or 20)),
|
||
}
|
||
try:
|
||
async with get_db_context(project_id) as db:
|
||
await db.execute(text("SET LOCAL statement_timeout = '5000ms'"))
|
||
|
||
async def _safe_aux_rows(
|
||
query_name: str,
|
||
sql: str,
|
||
fallback_sql: str | None = None,
|
||
) -> list[Mapping[str, Any]]:
|
||
try:
|
||
return (await db.execute(text(sql), params)).mappings().all()
|
||
except Exception as exc: # pragma: no cover - depends on live schema drift
|
||
logger.warning(
|
||
"ai_agent_autonomous_runtime_trace_aux_read_failed",
|
||
project_id=project_id,
|
||
query_name=query_name,
|
||
error_type=type(exc).__name__,
|
||
)
|
||
if fallback_sql:
|
||
try:
|
||
return (await db.execute(text(fallback_sql), params)).mappings().all()
|
||
except Exception as fallback_exc: # pragma: no cover - live schema drift
|
||
logger.warning(
|
||
"ai_agent_autonomous_runtime_trace_aux_fallback_failed",
|
||
project_id=project_id,
|
||
query_name=query_name,
|
||
error_type=type(fallback_exc).__name__,
|
||
)
|
||
return []
|
||
|
||
operation_counts = (
|
||
await db.execute(text(_RUNTIME_OPERATION_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
operation_latest = (
|
||
await db.execute(text(_RUNTIME_OPERATION_LATEST_SQL), params)
|
||
).mappings().all()
|
||
auto_repair_counts = (
|
||
await db.execute(text(_RUNTIME_AUTO_REPAIR_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
auto_repair_latest = (
|
||
await db.execute(text(_RUNTIME_AUTO_REPAIR_LATEST_SQL), params)
|
||
).mappings().all()
|
||
verifier_counts = (
|
||
await db.execute(text(_RUNTIME_VERIFIER_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
verifier_latest = (
|
||
await db.execute(text(_RUNTIME_VERIFIER_LATEST_SQL), params)
|
||
).mappings().all()
|
||
km_counts = (
|
||
await db.execute(text(_RUNTIME_KM_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
km_latest = (
|
||
await db.execute(text(_RUNTIME_KM_LATEST_SQL), params)
|
||
).mappings().all()
|
||
telegram_counts = (
|
||
await db.execute(text(_RUNTIME_TELEGRAM_COUNTS_SQL), params)
|
||
).mappings().all()
|
||
telegram_latest = (
|
||
await db.execute(text(_RUNTIME_TELEGRAM_LATEST_SQL), params)
|
||
).mappings().all()
|
||
mcp_gateway_counts = await _safe_aux_rows(
|
||
"mcp_gateway_counts",
|
||
_RUNTIME_MCP_GATEWAY_COUNTS_SQL,
|
||
)
|
||
legacy_mcp_counts = await _safe_aux_rows(
|
||
"legacy_mcp_counts",
|
||
_RUNTIME_LEGACY_MCP_COUNTS_SQL,
|
||
)
|
||
service_log_counts = await _safe_aux_rows(
|
||
"service_log_counts",
|
||
_RUNTIME_SERVICE_LOG_COUNTS_SQL,
|
||
)
|
||
executor_log_counts = await _safe_aux_rows(
|
||
"executor_log_counts",
|
||
_RUNTIME_EXECUTOR_LOG_COUNTS_SQL,
|
||
)
|
||
timeline_counts = await _safe_aux_rows(
|
||
"timeline_counts",
|
||
_RUNTIME_TIMELINE_COUNTS_SQL,
|
||
_RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL,
|
||
)
|
||
playbook_trust_counts = await _safe_aux_rows(
|
||
"playbook_trust_counts",
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL,
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL,
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"ai_agent_autonomous_runtime_receipt_readback_failed",
|
||
project_id=project_id,
|
||
error_type=type(exc).__name__,
|
||
)
|
||
return build_runtime_receipt_readback_from_rows(
|
||
project_id=project_id,
|
||
lookback_hours=params["lookback_hours"],
|
||
db_read_status="unavailable",
|
||
error_type=type(exc).__name__,
|
||
)
|
||
|
||
return build_runtime_receipt_readback_from_rows(
|
||
project_id=project_id,
|
||
lookback_hours=params["lookback_hours"],
|
||
db_read_status="ok",
|
||
operation_count_rows=operation_counts,
|
||
operation_latest_rows=operation_latest,
|
||
auto_repair_count_rows=auto_repair_counts,
|
||
auto_repair_latest_rows=auto_repair_latest,
|
||
verifier_count_rows=verifier_counts,
|
||
verifier_latest_rows=verifier_latest,
|
||
km_count_rows=km_counts,
|
||
km_latest_rows=km_latest,
|
||
telegram_count_rows=telegram_counts,
|
||
telegram_latest_rows=telegram_latest,
|
||
mcp_gateway_count_rows=mcp_gateway_counts,
|
||
legacy_mcp_count_rows=legacy_mcp_counts,
|
||
service_log_count_rows=service_log_counts,
|
||
executor_log_count_rows=executor_log_counts,
|
||
timeline_count_rows=timeline_counts,
|
||
playbook_trust_count_rows=playbook_trust_counts,
|
||
)
|
||
|
||
|
||
async def build_ai_agent_autonomous_runtime_control_with_live_readback(
|
||
*,
|
||
project_id: str = _DEFAULT_PROJECT_ID,
|
||
lookback_hours: int = _DEFAULT_LOOKBACK_HOURS,
|
||
) -> dict[str, Any]:
|
||
"""Build the control plane and attach live DB receipt readback."""
|
||
|
||
payload = build_ai_agent_autonomous_runtime_control()
|
||
readback = await load_ai_agent_autonomous_runtime_receipt_readback(
|
||
project_id=project_id,
|
||
lookback_hours=lookback_hours,
|
||
)
|
||
_attach_runtime_receipt_readback(payload, readback)
|
||
_validate_payload(payload)
|
||
return payload
|
||
|
||
|
||
_RUNTIME_OPERATION_COUNTS_SQL = """
|
||
SELECT
|
||
operation_type,
|
||
status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM automation_operation_log
|
||
WHERE operation_type IN (
|
||
'ansible_candidate_matched',
|
||
'ansible_check_mode_executed',
|
||
'ansible_apply_executed',
|
||
'ansible_learning_writeback_recorded',
|
||
'ansible_rollback_executed',
|
||
'ansible_execution_skipped'
|
||
)
|
||
GROUP BY operation_type, status
|
||
ORDER BY operation_type, status
|
||
"""
|
||
|
||
|
||
_RUNTIME_OPERATION_LATEST_SQL = """
|
||
SELECT
|
||
op_id::text AS op_id,
|
||
parent_op_id::text AS parent_op_id,
|
||
operation_type,
|
||
status,
|
||
actor,
|
||
coalesce(incident_id::text, input ->> 'incident_id') AS incident_id,
|
||
input ->> 'catalog_id' AS catalog_id,
|
||
coalesce(input ->> 'apply_playbook_path', input ->> 'playbook_path') AS playbook_path,
|
||
input ->> 'execution_mode' AS execution_mode,
|
||
input ->> 'source_candidate_op_id' AS source_candidate_op_id,
|
||
input ->> 'check_mode_op_id' AS check_mode_op_id,
|
||
input ->> 'risk_level' AS risk_level,
|
||
input ->> 'controlled_apply_allowed' AS controlled_apply_allowed,
|
||
coalesce(output ->> 'returncode', dry_run_result ->> 'returncode') AS returncode,
|
||
duration_ms,
|
||
created_at
|
||
FROM automation_operation_log
|
||
WHERE operation_type IN (
|
||
'ansible_candidate_matched',
|
||
'ansible_check_mode_executed',
|
||
'ansible_apply_executed',
|
||
'ansible_learning_writeback_recorded',
|
||
'ansible_rollback_executed',
|
||
'ansible_execution_skipped'
|
||
)
|
||
ORDER BY created_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_AUTO_REPAIR_COUNTS_SQL = """
|
||
SELECT
|
||
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM auto_repair_executions
|
||
WHERE triggered_by = 'ansible_controlled_apply'
|
||
GROUP BY CASE WHEN success THEN 'success' ELSE 'failed' END
|
||
ORDER BY result_status
|
||
"""
|
||
|
||
|
||
_RUNTIME_AUTO_REPAIR_LATEST_SQL = """
|
||
SELECT
|
||
id,
|
||
incident_id,
|
||
playbook_id AS catalog_id,
|
||
playbook_name,
|
||
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
|
||
executed_steps::text AS executed_steps_text,
|
||
triggered_by,
|
||
risk_level,
|
||
execution_time_ms,
|
||
created_at
|
||
FROM auto_repair_executions
|
||
WHERE triggered_by = 'ansible_controlled_apply'
|
||
ORDER BY created_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_VERIFIER_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(verification_result, 'missing') AS verification_result,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM incident_evidence
|
||
WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL
|
||
GROUP BY coalesce(verification_result, 'missing')
|
||
ORDER BY verification_result
|
||
"""
|
||
|
||
|
||
_RUNTIME_VERIFIER_LATEST_SQL = """
|
||
SELECT
|
||
id,
|
||
incident_id,
|
||
matched_playbook_id,
|
||
coalesce(verification_result, 'missing') AS verification_result,
|
||
post_execution_state ->> 'apply_op_id' AS apply_op_id,
|
||
post_execution_state ->> 'catalog_id' AS catalog_id,
|
||
post_execution_state ->> 'playbook_path' AS playbook_path,
|
||
post_execution_state ->> 'returncode' AS returncode,
|
||
collected_at
|
||
FROM incident_evidence
|
||
WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL
|
||
ORDER BY collected_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_KM_COUNTS_SQL = """
|
||
SELECT
|
||
status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM knowledge_entries
|
||
WHERE project_id = :project_id
|
||
AND (
|
||
path_type LIKE 'ansible_apply_receipt:%'
|
||
OR tags::text LIKE '%ansible_controlled_apply%'
|
||
)
|
||
GROUP BY status
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_KM_LATEST_SQL = """
|
||
SELECT
|
||
id,
|
||
title,
|
||
related_incident_id,
|
||
related_playbook_id,
|
||
path_type,
|
||
status,
|
||
created_by,
|
||
created_at
|
||
FROM knowledge_entries
|
||
WHERE project_id = :project_id
|
||
AND (
|
||
path_type LIKE 'ansible_apply_receipt:%'
|
||
OR tags::text LIKE '%ansible_controlled_apply%'
|
||
)
|
||
ORDER BY created_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_TELEGRAM_COUNTS_SQL = """
|
||
SELECT
|
||
send_status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE queued_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM awooop_outbound_message
|
||
WHERE project_id = :project_id
|
||
AND channel_type = 'telegram'
|
||
AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result'
|
||
GROUP BY send_status
|
||
ORDER BY send_status
|
||
"""
|
||
|
||
|
||
_RUNTIME_TELEGRAM_LATEST_SQL = """
|
||
SELECT
|
||
message_id::text AS message_id,
|
||
run_id::text AS run_id,
|
||
message_type,
|
||
send_status,
|
||
provider_message_id,
|
||
source_envelope #>> '{callback_reply,incident_id}' AS incident_id,
|
||
source_envelope #>> '{callback_reply,action}' AS action,
|
||
queued_at,
|
||
sent_at
|
||
FROM awooop_outbound_message
|
||
WHERE project_id = :project_id
|
||
AND channel_type = 'telegram'
|
||
AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result'
|
||
ORDER BY queued_at DESC
|
||
LIMIT :limit
|
||
"""
|
||
|
||
|
||
_RUNTIME_MCP_GATEWAY_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(result_status, 'unknown') AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM awooop_mcp_gateway_audit
|
||
WHERE project_id = :project_id
|
||
GROUP BY coalesce(result_status, 'unknown')
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_LEGACY_MCP_COUNTS_SQL = """
|
||
SELECT
|
||
CASE
|
||
WHEN success IS TRUE THEN 'success'
|
||
WHEN success IS FALSE THEN 'failed'
|
||
ELSE 'unknown'
|
||
END AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM mcp_audit_log
|
||
GROUP BY
|
||
CASE
|
||
WHEN success IS TRUE THEN 'success'
|
||
WHEN success IS FALSE THEN 'failed'
|
||
ELSE 'unknown'
|
||
END
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_SERVICE_LOG_COUNTS_SQL = """
|
||
SELECT
|
||
'sanitized_recent_logs' AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM incident_evidence
|
||
WHERE recent_logs IS NOT NULL
|
||
OR evidence_summary IS NOT NULL
|
||
OR mcp_health IS NOT NULL
|
||
OR anomaly_context IS NOT NULL
|
||
"""
|
||
|
||
|
||
_RUNTIME_EXECUTOR_LOG_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(status, 'unknown') AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM automation_operation_log
|
||
WHERE operation_type IN (
|
||
'ansible_candidate_matched',
|
||
'ansible_check_mode_executed',
|
||
'ansible_apply_executed',
|
||
'ansible_rollback_executed',
|
||
'ansible_execution_skipped'
|
||
)
|
||
AND (
|
||
output IS NOT NULL
|
||
OR error IS NOT NULL
|
||
OR stderr_feed_back IS NOT NULL
|
||
OR dry_run_result IS NOT NULL
|
||
)
|
||
GROUP BY coalesce(status, 'unknown')
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_TIMELINE_COUNTS_SQL = """
|
||
SELECT
|
||
coalesce(status, 'unknown') AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM timeline_events
|
||
WHERE event_type IS NOT NULL
|
||
OR actor IS NOT NULL
|
||
OR actor_role IS NOT NULL
|
||
GROUP BY coalesce(status, 'unknown')
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL = """
|
||
SELECT
|
||
'timeline_event' AS status,
|
||
count(*) AS total,
|
||
0 AS recent
|
||
FROM timeline_events
|
||
"""
|
||
|
||
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL = """
|
||
SELECT
|
||
CASE
|
||
WHEN review_required IS TRUE THEN 'review_required'
|
||
WHEN trust_score >= 0.8 THEN 'high_trust'
|
||
WHEN trust_score < 0.3 THEN 'low_trust'
|
||
WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active'
|
||
ELSE 'seeded_not_used'
|
||
END AS status,
|
||
count(*) AS total,
|
||
count(*) FILTER (
|
||
WHERE updated_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||
) AS recent
|
||
FROM playbooks
|
||
GROUP BY
|
||
CASE
|
||
WHEN review_required IS TRUE THEN 'review_required'
|
||
WHEN trust_score >= 0.8 THEN 'high_trust'
|
||
WHEN trust_score < 0.3 THEN 'low_trust'
|
||
WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active'
|
||
ELSE 'seeded_not_used'
|
||
END
|
||
ORDER BY status
|
||
"""
|
||
|
||
|
||
_RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL = """
|
||
SELECT
|
||
'cataloged' AS status,
|
||
count(*) AS total,
|
||
0 AS recent
|
||
FROM playbooks
|
||
"""
|
||
|
||
|
||
def _validate_payload(payload: dict[str, Any]) -> None:
|
||
if payload.get("schema_version") != _SCHEMA_VERSION:
|
||
raise ValueError(f"schema_version must be {_SCHEMA_VERSION}")
|
||
status = payload.get("program_status") or {}
|
||
if status.get("runtime_authority") != _RUNTIME_AUTHORITY:
|
||
raise ValueError(f"runtime_authority must be {_RUNTIME_AUTHORITY}")
|
||
if status.get("deploy_readback_marker") != _DEPLOY_READBACK_MARKER:
|
||
raise ValueError(f"deploy_readback_marker must be {_DEPLOY_READBACK_MARKER}")
|
||
if status.get("deploy_attempt_note") != _DEPLOY_ATTEMPT_NOTE:
|
||
raise ValueError(f"deploy_attempt_note must be {_DEPLOY_ATTEMPT_NOTE}")
|
||
policy = payload.get("current_policy") or {}
|
||
for key in (
|
||
"low_risk_controlled_apply_allowed",
|
||
"medium_risk_controlled_apply_allowed",
|
||
"high_risk_controlled_apply_allowed",
|
||
"telegram_gateway_required",
|
||
"post_apply_verifier_required",
|
||
"km_learning_writeback_required",
|
||
):
|
||
if policy.get(key) is not True:
|
||
raise ValueError(f"current_policy.{key} must be true")
|
||
if policy.get("owner_review_required_for_low_medium_high") is not False:
|
||
raise ValueError("owner_review_required_for_low_medium_high must be false")
|
||
if policy.get("direct_bot_api_allowed") is not False:
|
||
raise ValueError("direct_bot_api_allowed must be false")
|
||
visibility = payload.get("visibility_contract") or {}
|
||
for key in (
|
||
"work_window_transcript_display_allowed",
|
||
"prompt_body_display_allowed",
|
||
"internal_reasoning_display_allowed",
|
||
"sensitive_value_display_allowed",
|
||
"telegram_unredacted_payload_display_allowed",
|
||
):
|
||
if visibility.get(key) is not False:
|
||
raise ValueError(f"visibility_contract.{key} must remain false")
|