feat(ai): add autonomous runtime loop ledger
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 2m44s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 2m44s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
This commit is contained in:
@@ -151,6 +151,298 @@ def _status_counts(
|
||||
}
|
||||
|
||||
|
||||
def _first_operation(
|
||||
rows: Iterable[Mapping[str, Any]],
|
||||
operation_type: str,
|
||||
) -> dict[str, Any] | None:
|
||||
for row in rows:
|
||||
if str(row.get("operation_type") or "") == operation_type:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
def _operation_by_id(
|
||||
rows: Iterable[Mapping[str, Any]],
|
||||
op_id: Any,
|
||||
) -> dict[str, Any] | None:
|
||||
needle = str(op_id or "")
|
||||
if not needle:
|
||||
return None
|
||||
for row in rows:
|
||||
if str(row.get("op_id") or "") == needle:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
def _stage_status(row: Mapping[str, Any] | None, *, fallback_status: str | None = None) -> str:
|
||||
if row is None:
|
||||
return fallback_status or "missing"
|
||||
return str(row.get("status") or row.get("result_status") or fallback_status or "present")
|
||||
|
||||
|
||||
def _loop_stage(
|
||||
*,
|
||||
stage_id: str,
|
||||
receipt_source: str,
|
||||
present: bool,
|
||||
status: str,
|
||||
ref_id: str | None,
|
||||
writes_runtime_state: bool,
|
||||
next_action_if_missing: str,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"stage_id": stage_id,
|
||||
"receipt_source": receipt_source,
|
||||
"present": present,
|
||||
"status": status,
|
||||
"ref_id": ref_id,
|
||||
"writes_runtime_state": writes_runtime_state,
|
||||
"next_action_if_missing": None if present else next_action_if_missing,
|
||||
}
|
||||
|
||||
|
||||
def _autonomous_execution_loop_ledger(
|
||||
*,
|
||||
project_id: str,
|
||||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||||
km_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||||
telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||||
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||||
latest_flow_closure: Mapping[str, Any],
|
||||
latest_failure_classification: Mapping[str, Any],
|
||||
controlled_retry_package: Mapping[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""Build the operation-id ledger that proves whether the runtime loop closed."""
|
||||
|
||||
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
|
||||
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
|
||||
km_rows = [_row_mapping(row) for row in km_latest_rows]
|
||||
telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
|
||||
auto_repair_rows = [_row_mapping(row) for row in auto_repair_latest_rows]
|
||||
|
||||
latest_apply = _first_operation(operation_rows, "ansible_apply_executed")
|
||||
latest_check = None
|
||||
latest_candidate = None
|
||||
if latest_apply is not None:
|
||||
latest_check = _operation_by_id(
|
||||
operation_rows,
|
||||
latest_apply.get("check_mode_op_id") or latest_apply.get("parent_op_id"),
|
||||
)
|
||||
else:
|
||||
latest_check = _first_operation(operation_rows, "ansible_check_mode_executed")
|
||||
|
||||
source_candidate_op_id = None
|
||||
if latest_check is not None:
|
||||
source_candidate_op_id = latest_check.get("parent_op_id") or latest_check.get("source_candidate_op_id")
|
||||
if latest_apply is not None and not source_candidate_op_id:
|
||||
source_candidate_op_id = latest_apply.get("source_candidate_op_id")
|
||||
latest_candidate = _operation_by_id(operation_rows, source_candidate_op_id)
|
||||
if latest_candidate is None and latest_apply is None and latest_check is None:
|
||||
latest_candidate = _first_operation(operation_rows, "ansible_candidate_matched")
|
||||
|
||||
anchor = latest_apply or latest_check or latest_candidate or {}
|
||||
apply_op_id = str((latest_apply or {}).get("op_id") or "")
|
||||
check_mode_op_id = str(
|
||||
(latest_check or {}).get("op_id")
|
||||
or (latest_apply or {}).get("check_mode_op_id")
|
||||
or (latest_apply or {}).get("parent_op_id")
|
||||
or ""
|
||||
)
|
||||
candidate_op_id = str(
|
||||
(latest_candidate or {}).get("op_id")
|
||||
or source_candidate_op_id
|
||||
or ""
|
||||
)
|
||||
incident_id = str(anchor.get("incident_id") or "")
|
||||
catalog_id = str(anchor.get("catalog_id") or "")
|
||||
playbook_path = str(anchor.get("playbook_path") or "")
|
||||
|
||||
verifier = next(
|
||||
(
|
||||
row
|
||||
for row in verifier_rows
|
||||
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
|
||||
),
|
||||
None,
|
||||
)
|
||||
km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
|
||||
km = next(
|
||||
(
|
||||
row
|
||||
for row in km_rows
|
||||
if (
|
||||
km_path_type
|
||||
and str(row.get("path_type") or "") == km_path_type
|
||||
)
|
||||
or (
|
||||
incident_id
|
||||
and str(row.get("related_incident_id") or "") == incident_id
|
||||
)
|
||||
),
|
||||
None,
|
||||
)
|
||||
telegram = next(
|
||||
(
|
||||
row
|
||||
for row in telegram_rows
|
||||
if str(row.get("send_status") or "") == "sent"
|
||||
and str(row.get("action") or "") == "controlled_apply_result"
|
||||
and (
|
||||
not incident_id
|
||||
or str(row.get("incident_id") or "") == incident_id
|
||||
)
|
||||
),
|
||||
None,
|
||||
)
|
||||
auto_repair = next(
|
||||
(
|
||||
row
|
||||
for row in auto_repair_rows
|
||||
if apply_op_id
|
||||
and apply_op_id
|
||||
in str(row.get("executed_steps_text") or row.get("executed_steps") or "")
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
candidate_present = bool(latest_candidate or candidate_op_id)
|
||||
check_present = bool(latest_check or check_mode_op_id)
|
||||
apply_present = latest_apply is not None
|
||||
auto_repair_present = auto_repair is not None
|
||||
verifier_present = verifier is not None
|
||||
km_present = km is not None
|
||||
telegram_present = telegram is not None
|
||||
|
||||
stages = [
|
||||
_loop_stage(
|
||||
stage_id="candidate",
|
||||
receipt_source="automation_operation_log:ansible_candidate_matched",
|
||||
present=candidate_present,
|
||||
status=_stage_status(latest_candidate, fallback_status="inferred_from_check_mode")
|
||||
if candidate_present
|
||||
else "missing",
|
||||
ref_id=candidate_op_id or None,
|
||||
writes_runtime_state=False,
|
||||
next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
|
||||
),
|
||||
_loop_stage(
|
||||
stage_id="check_mode",
|
||||
receipt_source="automation_operation_log:ansible_check_mode_executed",
|
||||
present=check_present,
|
||||
status=_stage_status(latest_check, fallback_status="inferred_from_apply_parent")
|
||||
if check_present
|
||||
else "missing",
|
||||
ref_id=check_mode_op_id or None,
|
||||
writes_runtime_state=False,
|
||||
next_action_if_missing="ansible_check_mode_worker_claims_candidate",
|
||||
),
|
||||
_loop_stage(
|
||||
stage_id="controlled_apply",
|
||||
receipt_source="automation_operation_log:ansible_apply_executed",
|
||||
present=apply_present,
|
||||
status=_stage_status(latest_apply),
|
||||
ref_id=apply_op_id or None,
|
||||
writes_runtime_state=True,
|
||||
next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
|
||||
),
|
||||
_loop_stage(
|
||||
stage_id="auto_repair_execution_receipt",
|
||||
receipt_source="auto_repair_executions:ansible_controlled_apply",
|
||||
present=auto_repair_present,
|
||||
status=str((auto_repair or {}).get("result_status") or "missing"),
|
||||
ref_id=str((auto_repair or {}).get("id") or "") or None,
|
||||
writes_runtime_state=True,
|
||||
next_action_if_missing="receipt_backfill_records_auto_repair_execution",
|
||||
),
|
||||
_loop_stage(
|
||||
stage_id="post_apply_verifier",
|
||||
receipt_source="incident_evidence.post_execution_state",
|
||||
present=verifier_present,
|
||||
status=str((verifier or {}).get("verification_result") or "missing"),
|
||||
ref_id=str((verifier or {}).get("id") or "") or None,
|
||||
writes_runtime_state=True,
|
||||
next_action_if_missing="post_apply_verifier_writes_incident_evidence",
|
||||
),
|
||||
_loop_stage(
|
||||
stage_id="km_playbook_writeback",
|
||||
receipt_source="knowledge_entries:ansible_apply_receipt",
|
||||
present=km_present,
|
||||
status=str((km or {}).get("status") or "missing"),
|
||||
ref_id=str((km or {}).get("id") or "") or None,
|
||||
writes_runtime_state=True,
|
||||
next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
|
||||
),
|
||||
_loop_stage(
|
||||
stage_id="telegram_receipt",
|
||||
receipt_source="awooop_outbound_message:controlled_apply_result",
|
||||
present=telegram_present,
|
||||
status=str((telegram or {}).get("send_status") or "missing"),
|
||||
ref_id=str((telegram or {}).get("message_id") or "") or None,
|
||||
writes_runtime_state=True,
|
||||
next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
|
||||
),
|
||||
]
|
||||
missing_stage_ids = [
|
||||
str(stage["stage_id"])
|
||||
for stage in stages
|
||||
if stage["present"] is not True
|
||||
]
|
||||
closed = bool(
|
||||
apply_op_id
|
||||
and auto_repair_present
|
||||
and latest_flow_closure.get("closed") is True
|
||||
)
|
||||
|
||||
classification = str(latest_failure_classification.get("classification") or "")
|
||||
if not candidate_present and not check_present and not apply_present:
|
||||
execution_state = "waiting_for_candidate"
|
||||
next_executor_action = "candidate_backfill_worker_waits_for_matching_incident"
|
||||
elif not apply_present:
|
||||
execution_state = "executor_in_progress_or_waiting"
|
||||
next_executor_action = "continue_candidate_to_check_mode_to_apply"
|
||||
elif closed and classification == "latest_controlled_apply_closed_success":
|
||||
execution_state = "closed_success"
|
||||
next_executor_action = "keep_receipt_chain_closed"
|
||||
elif closed:
|
||||
execution_state = "closed_failed_apply_repair_ready"
|
||||
next_executor_action = str(
|
||||
controlled_retry_package.get("next_ai_action")
|
||||
or "run_no_write_check_mode_replay"
|
||||
)
|
||||
elif "telegram_receipt" in missing_stage_ids:
|
||||
execution_state = "open_waiting_for_live_gateway_receipt"
|
||||
next_executor_action = "do_not_fake_send_backfill_wait_for_live_apply_gateway"
|
||||
else:
|
||||
execution_state = "open_missing_internal_receipts"
|
||||
next_executor_action = "backfill_missing_auto_repair_verifier_km_receipts"
|
||||
|
||||
return {
|
||||
"schema_version": "ai_agent_autonomous_execution_loop_ledger_v1",
|
||||
"project_id": project_id,
|
||||
"operation_id": apply_op_id or check_mode_op_id or candidate_op_id or None,
|
||||
"root_candidate_op_id": candidate_op_id or None,
|
||||
"check_mode_op_id": check_mode_op_id or None,
|
||||
"apply_op_id": apply_op_id or None,
|
||||
"incident_id": incident_id or None,
|
||||
"catalog_id": catalog_id or None,
|
||||
"playbook_path": playbook_path or None,
|
||||
"execution_state": execution_state,
|
||||
"closed": closed,
|
||||
"missing_stage_ids": missing_stage_ids,
|
||||
"next_executor_action": next_executor_action,
|
||||
"stages": stages,
|
||||
"safety_contract": {
|
||||
"writes_on_read": False,
|
||||
"backfill_may_write_auto_repair_verifier_km": True,
|
||||
"backfill_may_send_telegram": False,
|
||||
"live_apply_may_send_telegram_gateway_receipt": True,
|
||||
"reads_raw_sessions": False,
|
||||
"reads_secret_values": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _latest_flow_closure(
|
||||
*,
|
||||
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
|
||||
@@ -568,6 +860,8 @@ def build_runtime_receipt_readback_from_rows(
|
||||
db_read_status: str = "ok",
|
||||
operation_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||||
operation_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||||
auto_repair_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||||
auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||||
verifier_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||||
verifier_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||||
km_count_rows: Iterable[Mapping[str, Any] | Any] = (),
|
||||
@@ -579,10 +873,15 @@ def build_runtime_receipt_readback_from_rows(
|
||||
"""Build the live executor receipt readback from already-fetched rows."""
|
||||
|
||||
operation_latest = list(operation_latest_rows)
|
||||
auto_repair_latest = list(auto_repair_latest_rows)
|
||||
verifier_latest = list(verifier_latest_rows)
|
||||
km_latest = list(km_latest_rows)
|
||||
telegram_latest = list(telegram_latest_rows)
|
||||
operation_summary = _operation_counts(operation_count_rows)
|
||||
auto_repair_summary = _status_counts(
|
||||
auto_repair_count_rows,
|
||||
status_key="result_status",
|
||||
)
|
||||
verifier_summary = _status_counts(
|
||||
verifier_count_rows,
|
||||
status_key="verification_result",
|
||||
@@ -601,6 +900,17 @@ def build_runtime_receipt_readback_from_rows(
|
||||
latest_flow_closure=latest_closure,
|
||||
)
|
||||
retry_package = _controlled_retry_package(latest_failure)
|
||||
loop_ledger = _autonomous_execution_loop_ledger(
|
||||
project_id=project_id,
|
||||
operation_latest_rows=operation_latest,
|
||||
verifier_latest_rows=verifier_latest,
|
||||
km_latest_rows=km_latest,
|
||||
telegram_latest_rows=telegram_latest,
|
||||
auto_repair_latest_rows=auto_repair_latest,
|
||||
latest_flow_closure=latest_closure,
|
||||
latest_failure_classification=latest_failure,
|
||||
controlled_retry_package=retry_package,
|
||||
)
|
||||
apply_summary = operation_summary.get("ansible_apply_executed") or {}
|
||||
readback = {
|
||||
"schema_version": _LIVE_READBACK_SCHEMA_VERSION,
|
||||
@@ -622,12 +932,33 @@ def build_runtime_receipt_readback_from_rows(
|
||||
"catalog_id",
|
||||
"playbook_path",
|
||||
"execution_mode",
|
||||
"source_candidate_op_id",
|
||||
"check_mode_op_id",
|
||||
"risk_level",
|
||||
"controlled_apply_allowed",
|
||||
"returncode",
|
||||
"duration_ms",
|
||||
"created_at",
|
||||
),
|
||||
),
|
||||
},
|
||||
"auto_repair_execution_receipt": {
|
||||
**auto_repair_summary,
|
||||
"latest": _sanitize_latest_rows(
|
||||
auto_repair_latest,
|
||||
allowed_keys=(
|
||||
"id",
|
||||
"incident_id",
|
||||
"catalog_id",
|
||||
"playbook_name",
|
||||
"result_status",
|
||||
"triggered_by",
|
||||
"risk_level",
|
||||
"execution_time_ms",
|
||||
"created_at",
|
||||
),
|
||||
),
|
||||
},
|
||||
"ansible_apply_executed": {
|
||||
"total": _int_value(apply_summary.get("total")),
|
||||
"recent": _int_value(apply_summary.get("recent")),
|
||||
@@ -686,6 +1017,7 @@ def build_runtime_receipt_readback_from_rows(
|
||||
"latest_flow_closure": latest_closure,
|
||||
"latest_failure_classification": latest_failure,
|
||||
"controlled_retry_package": retry_package,
|
||||
"autonomous_execution_loop_ledger": loop_ledger,
|
||||
}
|
||||
if error_type:
|
||||
readback["error"] = {
|
||||
@@ -705,6 +1037,9 @@ def _attach_runtime_receipt_readback(
|
||||
"live_ansible_apply_executed_count": _int_value(
|
||||
readback.get("ansible_apply_executed", {}).get("total")
|
||||
),
|
||||
"live_auto_repair_execution_receipt_count": _int_value(
|
||||
readback.get("auto_repair_execution_receipt", {}).get("total")
|
||||
),
|
||||
"live_post_apply_verifier_count": _int_value(
|
||||
readback.get("post_apply_verifier", {}).get("total")
|
||||
),
|
||||
@@ -719,6 +1054,11 @@ def _attach_runtime_receipt_readback(
|
||||
if (readback.get("latest_flow_closure") or {}).get("closed") is True
|
||||
else 0
|
||||
),
|
||||
"live_autonomous_execution_loop_closed_count": (
|
||||
1
|
||||
if (readback.get("autonomous_execution_loop_ledger") or {}).get("closed") is True
|
||||
else 0
|
||||
),
|
||||
"live_executor_latest_apply_repair_required_count": (
|
||||
1
|
||||
if (
|
||||
@@ -987,6 +1327,12 @@ async def load_ai_agent_autonomous_runtime_receipt_readback(
|
||||
operation_latest = (
|
||||
await db.execute(text(_RUNTIME_OPERATION_LATEST_SQL), params)
|
||||
).mappings().all()
|
||||
auto_repair_counts = (
|
||||
await db.execute(text(_RUNTIME_AUTO_REPAIR_COUNTS_SQL), params)
|
||||
).mappings().all()
|
||||
auto_repair_latest = (
|
||||
await db.execute(text(_RUNTIME_AUTO_REPAIR_LATEST_SQL), params)
|
||||
).mappings().all()
|
||||
verifier_counts = (
|
||||
await db.execute(text(_RUNTIME_VERIFIER_COUNTS_SQL), params)
|
||||
).mappings().all()
|
||||
@@ -1024,6 +1370,8 @@ async def load_ai_agent_autonomous_runtime_receipt_readback(
|
||||
db_read_status="ok",
|
||||
operation_count_rows=operation_counts,
|
||||
operation_latest_rows=operation_latest,
|
||||
auto_repair_count_rows=auto_repair_counts,
|
||||
auto_repair_latest_rows=auto_repair_latest,
|
||||
verifier_count_rows=verifier_counts,
|
||||
verifier_latest_rows=verifier_latest,
|
||||
km_count_rows=km_counts,
|
||||
@@ -1082,6 +1430,10 @@ _RUNTIME_OPERATION_LATEST_SQL = """
|
||||
input ->> 'catalog_id' AS catalog_id,
|
||||
coalesce(input ->> 'apply_playbook_path', input ->> 'playbook_path') AS playbook_path,
|
||||
input ->> 'execution_mode' AS execution_mode,
|
||||
input ->> 'source_candidate_op_id' AS source_candidate_op_id,
|
||||
input ->> 'check_mode_op_id' AS check_mode_op_id,
|
||||
input ->> 'risk_level' AS risk_level,
|
||||
input ->> 'controlled_apply_allowed' AS controlled_apply_allowed,
|
||||
coalesce(output ->> 'returncode', dry_run_result ->> 'returncode') AS returncode,
|
||||
duration_ms,
|
||||
created_at
|
||||
@@ -1098,6 +1450,39 @@ _RUNTIME_OPERATION_LATEST_SQL = """
|
||||
"""
|
||||
|
||||
|
||||
_RUNTIME_AUTO_REPAIR_COUNTS_SQL = """
|
||||
SELECT
|
||||
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
|
||||
count(*) AS total,
|
||||
count(*) FILTER (
|
||||
WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
|
||||
) AS recent
|
||||
FROM auto_repair_executions
|
||||
WHERE triggered_by = 'ansible_controlled_apply'
|
||||
GROUP BY CASE WHEN success THEN 'success' ELSE 'failed' END
|
||||
ORDER BY result_status
|
||||
"""
|
||||
|
||||
|
||||
_RUNTIME_AUTO_REPAIR_LATEST_SQL = """
|
||||
SELECT
|
||||
id,
|
||||
incident_id,
|
||||
playbook_id AS catalog_id,
|
||||
playbook_name,
|
||||
CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
|
||||
executed_steps::text AS executed_steps_text,
|
||||
triggered_by,
|
||||
risk_level,
|
||||
execution_time_ms,
|
||||
created_at
|
||||
FROM auto_repair_executions
|
||||
WHERE triggered_by = 'ansible_controlled_apply'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT :limit
|
||||
"""
|
||||
|
||||
|
||||
_RUNTIME_VERIFIER_COUNTS_SQL = """
|
||||
SELECT
|
||||
coalesce(verification_result, 'missing') AS verification_result,
|
||||
|
||||
@@ -146,6 +146,29 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
|
||||
},
|
||||
],
|
||||
operation_latest_rows=[
|
||||
{
|
||||
"op_id": "candidate-op",
|
||||
"parent_op_id": None,
|
||||
"operation_type": "ansible_candidate_matched",
|
||||
"status": "dry_run",
|
||||
"actor": "decision_manager",
|
||||
"incident_id": incident_id,
|
||||
"catalog_id": "ansible:188-momo-backup-user",
|
||||
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
|
||||
"execution_mode": "check_mode",
|
||||
},
|
||||
{
|
||||
"op_id": "check-mode-op",
|
||||
"parent_op_id": "candidate-op",
|
||||
"operation_type": "ansible_check_mode_executed",
|
||||
"status": "success",
|
||||
"actor": "ansible_check_mode_worker",
|
||||
"incident_id": incident_id,
|
||||
"catalog_id": "ansible:188-momo-backup-user",
|
||||
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
|
||||
"execution_mode": "check_mode",
|
||||
"returncode": "0",
|
||||
},
|
||||
{
|
||||
"op_id": apply_op_id,
|
||||
"parent_op_id": "check-mode-op",
|
||||
@@ -156,10 +179,28 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
|
||||
"catalog_id": "ansible:188-momo-backup-user",
|
||||
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
|
||||
"execution_mode": "controlled_apply",
|
||||
"source_candidate_op_id": "candidate-op",
|
||||
"check_mode_op_id": "check-mode-op",
|
||||
"returncode": "0",
|
||||
"duration_ms": 7727,
|
||||
},
|
||||
],
|
||||
auto_repair_count_rows=[
|
||||
{"result_status": "success", "total": 1, "recent": 1},
|
||||
],
|
||||
auto_repair_latest_rows=[
|
||||
{
|
||||
"id": "auto-repair-1",
|
||||
"incident_id": incident_id,
|
||||
"catalog_id": "ansible:188-momo-backup-user",
|
||||
"playbook_name": "infra/ansible/playbooks/188-momo-backup-user.yml",
|
||||
"result_status": "success",
|
||||
"executed_steps_text": f'["apply:{apply_op_id}"]',
|
||||
"triggered_by": "ansible_controlled_apply",
|
||||
"risk_level": "low",
|
||||
"execution_time_ms": 7727,
|
||||
},
|
||||
],
|
||||
verifier_count_rows=[
|
||||
{"verification_result": "success", "total": 1, "recent": 1},
|
||||
],
|
||||
@@ -207,6 +248,7 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
|
||||
assert readback["db_read_status"] == "ok"
|
||||
assert readback["writes_on_read"] is False
|
||||
assert readback["ansible_apply_executed"]["total"] == 1
|
||||
assert readback["auto_repair_execution_receipt"]["by_status"]["success"] == 1
|
||||
assert readback["post_apply_verifier"]["by_status"]["success"] == 1
|
||||
assert readback["km_writeback"]["by_status"]["review"] == 1
|
||||
assert readback["telegram_receipt"]["by_status"]["sent"] == 1
|
||||
@@ -223,6 +265,28 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
|
||||
"latest_controlled_apply_closed_success"
|
||||
)
|
||||
assert readback["controlled_retry_package"]["status"] == "not_required_for_latest_apply"
|
||||
ledger = readback["autonomous_execution_loop_ledger"]
|
||||
assert ledger["schema_version"] == "ai_agent_autonomous_execution_loop_ledger_v1"
|
||||
assert ledger["operation_id"] == apply_op_id
|
||||
assert ledger["root_candidate_op_id"] == "candidate-op"
|
||||
assert ledger["check_mode_op_id"] == "check-mode-op"
|
||||
assert ledger["apply_op_id"] == apply_op_id
|
||||
assert ledger["execution_state"] == "closed_success"
|
||||
assert ledger["closed"] is True
|
||||
assert ledger["missing_stage_ids"] == []
|
||||
assert ledger["next_executor_action"] == "keep_receipt_chain_closed"
|
||||
assert [stage["stage_id"] for stage in ledger["stages"]] == [
|
||||
"candidate",
|
||||
"check_mode",
|
||||
"controlled_apply",
|
||||
"auto_repair_execution_receipt",
|
||||
"post_apply_verifier",
|
||||
"km_playbook_writeback",
|
||||
"telegram_receipt",
|
||||
]
|
||||
assert {stage["present"] for stage in ledger["stages"]} == {True}
|
||||
assert ledger["safety_contract"]["backfill_may_send_telegram"] is False
|
||||
assert ledger["safety_contract"]["live_apply_may_send_telegram_gateway_receipt"] is True
|
||||
|
||||
|
||||
def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
|
||||
@@ -241,6 +305,18 @@ def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
|
||||
},
|
||||
],
|
||||
operation_latest_rows=[
|
||||
{
|
||||
"op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
|
||||
"parent_op_id": "candidate-op-2",
|
||||
"operation_type": "ansible_check_mode_executed",
|
||||
"status": "success",
|
||||
"actor": "ansible_check_mode_worker",
|
||||
"incident_id": incident_id,
|
||||
"catalog_id": "ansible:188-ai-web",
|
||||
"playbook_path": "infra/ansible/playbooks/188-ai-web-readonly.yml",
|
||||
"execution_mode": "check_mode",
|
||||
"returncode": "0",
|
||||
},
|
||||
{
|
||||
"op_id": apply_op_id,
|
||||
"parent_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
|
||||
@@ -251,10 +327,28 @@ def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
|
||||
"catalog_id": "ansible:188-ai-web",
|
||||
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
|
||||
"execution_mode": "controlled_apply",
|
||||
"source_candidate_op_id": "candidate-op-2",
|
||||
"check_mode_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
|
||||
"returncode": "2",
|
||||
"duration_ms": 4797,
|
||||
},
|
||||
],
|
||||
auto_repair_count_rows=[
|
||||
{"result_status": "failed", "total": 1, "recent": 1},
|
||||
],
|
||||
auto_repair_latest_rows=[
|
||||
{
|
||||
"id": "auto-repair-2",
|
||||
"incident_id": incident_id,
|
||||
"catalog_id": "ansible:188-ai-web",
|
||||
"playbook_name": "infra/ansible/playbooks/188-ai-web.yml",
|
||||
"result_status": "failed",
|
||||
"executed_steps_text": f'["apply:{apply_op_id}"]',
|
||||
"triggered_by": "ansible_controlled_apply",
|
||||
"risk_level": "medium",
|
||||
"execution_time_ms": 4797,
|
||||
},
|
||||
],
|
||||
verifier_count_rows=[
|
||||
{"verification_result": "failed", "total": 1, "recent": 1},
|
||||
],
|
||||
@@ -344,3 +438,54 @@ def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
|
||||
"telegram_receipt_required": True,
|
||||
}
|
||||
assert retry["next_ai_action"] == "run_no_write_check_mode_replay"
|
||||
ledger = readback["autonomous_execution_loop_ledger"]
|
||||
assert ledger["closed"] is True
|
||||
assert ledger["execution_state"] == "closed_failed_apply_repair_ready"
|
||||
assert ledger["next_executor_action"] == "run_no_write_check_mode_replay"
|
||||
assert ledger["missing_stage_ids"] == []
|
||||
assert ledger["stages"][3]["stage_id"] == "auto_repair_execution_receipt"
|
||||
assert ledger["stages"][3]["status"] == "failed"
|
||||
|
||||
|
||||
def test_runtime_execution_loop_ledger_does_not_mix_unrelated_check_mode_rows():
|
||||
apply_op_id = "db3f12ce-08fc-4289-8c93-338305d5850c"
|
||||
readback = build_runtime_receipt_readback_from_rows(
|
||||
project_id="awoooi",
|
||||
db_read_status="ok",
|
||||
operation_latest_rows=[
|
||||
{
|
||||
"op_id": "unrelated-check-mode-op",
|
||||
"parent_op_id": "unrelated-candidate",
|
||||
"operation_type": "ansible_check_mode_executed",
|
||||
"status": "success",
|
||||
"actor": "ansible_check_mode_worker",
|
||||
"incident_id": "INC-OTHER",
|
||||
"catalog_id": "ansible:110-devops",
|
||||
"playbook_path": "infra/ansible/playbooks/110-devops.yml",
|
||||
"execution_mode": "check_mode",
|
||||
"returncode": "0",
|
||||
},
|
||||
{
|
||||
"op_id": apply_op_id,
|
||||
"parent_op_id": "expected-check-mode-op",
|
||||
"operation_type": "ansible_apply_executed",
|
||||
"status": "success",
|
||||
"actor": "ansible_controlled_apply_worker",
|
||||
"incident_id": "INC-20260629-LEDGER",
|
||||
"catalog_id": "ansible:188-momo-backup-user",
|
||||
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
|
||||
"execution_mode": "controlled_apply",
|
||||
"returncode": "0",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
ledger = readback["autonomous_execution_loop_ledger"]
|
||||
assert ledger["operation_id"] == apply_op_id
|
||||
assert ledger["check_mode_op_id"] == "expected-check-mode-op"
|
||||
assert ledger["root_candidate_op_id"] is None
|
||||
assert ledger["incident_id"] == "INC-20260629-LEDGER"
|
||||
assert ledger["stages"][1]["stage_id"] == "check_mode"
|
||||
assert ledger["stages"][1]["ref_id"] == "expected-check-mode-op"
|
||||
assert ledger["stages"][1]["status"] == "inferred_from_apply_parent"
|
||||
assert "candidate" in ledger["missing_stage_ids"]
|
||||
|
||||
@@ -3710,7 +3710,10 @@
|
||||
"executor": "live ansible_apply_executed",
|
||||
"executorDetail": "post verifier {verifier} · KM {km} · Telegram {telegram}",
|
||||
"loop": "latest flow closed",
|
||||
"loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}"
|
||||
"loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}",
|
||||
"ledger": "operation-id loop ledger",
|
||||
"ledgerDetail": "{state} · stages {stages} · missing {missing}",
|
||||
"stageLedger": "candidate → check-mode → apply → verifier → KM → Telegram"
|
||||
},
|
||||
"badges": {
|
||||
"override": "舊 no-send / no-live 已覆寫",
|
||||
|
||||
@@ -3710,7 +3710,10 @@
|
||||
"executor": "live ansible_apply_executed",
|
||||
"executorDetail": "post verifier {verifier} · KM {km} · Telegram {telegram}",
|
||||
"loop": "latest flow closed",
|
||||
"loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}"
|
||||
"loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}",
|
||||
"ledger": "operation-id 閉環總帳",
|
||||
"ledgerDetail": "{state} · stages {stages} · missing {missing}",
|
||||
"stageLedger": "candidate → check-mode → apply → verifier → KM → Telegram"
|
||||
},
|
||||
"badges": {
|
||||
"override": "舊 no-send / no-live 已覆寫",
|
||||
|
||||
@@ -855,38 +855,69 @@ function AutonomousRuntimeControlReadbackGrid({
|
||||
t: ReturnType<typeof useTranslations>
|
||||
}) {
|
||||
const rollups = control.rollups
|
||||
const loopLedger = control.runtime_receipt_readback?.autonomous_execution_loop_ledger
|
||||
const loopStages = loopLedger?.stages ?? []
|
||||
const missingStageCount = loopLedger?.missing_stage_ids.length ?? 0
|
||||
|
||||
return (
|
||||
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(3, minmax(0, 1fr))', gap: 10 }} className="automation-inventory-current-autonomy-readback-grid">
|
||||
<GateMatrixRow
|
||||
label={t('globalControl.currentAutonomy.readback.marker')}
|
||||
value={control.program_status.deploy_readback_marker}
|
||||
detail={t('globalControl.currentAutonomy.readback.markerDetail', {
|
||||
task: control.program_status.current_task_id,
|
||||
status: control.program_status.status,
|
||||
})}
|
||||
tone="ok"
|
||||
/>
|
||||
<GateMatrixRow
|
||||
label={t('globalControl.currentAutonomy.readback.executor')}
|
||||
value={String(rollups.live_ansible_apply_executed_count ?? 0)}
|
||||
detail={t('globalControl.currentAutonomy.readback.executorDetail', {
|
||||
verifier: rollups.live_post_apply_verifier_count ?? 0,
|
||||
km: rollups.live_km_writeback_count ?? 0,
|
||||
telegram: rollups.live_telegram_receipt_count ?? 0,
|
||||
})}
|
||||
tone={(rollups.live_ansible_apply_executed_count ?? 0) > 0 ? 'ok' : 'warn'}
|
||||
/>
|
||||
<GateMatrixRow
|
||||
label={t('globalControl.currentAutonomy.readback.loop')}
|
||||
value={String(rollups.live_executor_latest_flow_closed_count ?? 0)}
|
||||
detail={t('globalControl.currentAutonomy.readback.loopDetail', {
|
||||
mcp: rollups.mcp_sensor_count ?? 0,
|
||||
rag: rollups.rag_context_query_count ?? 0,
|
||||
playbook: rollups.playbook_decision_class_count ?? 0,
|
||||
})}
|
||||
tone={(rollups.live_executor_latest_flow_closed_count ?? 0) > 0 ? 'ok' : 'warn'}
|
||||
/>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 10, minWidth: 0 }}>
|
||||
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(180px, 1fr))', gap: 10 }} className="automation-inventory-current-autonomy-readback-grid">
|
||||
<GateMatrixRow
|
||||
label={t('globalControl.currentAutonomy.readback.marker')}
|
||||
value={control.program_status.deploy_readback_marker}
|
||||
detail={t('globalControl.currentAutonomy.readback.markerDetail', {
|
||||
task: control.program_status.current_task_id,
|
||||
status: control.program_status.status,
|
||||
})}
|
||||
tone="ok"
|
||||
/>
|
||||
<GateMatrixRow
|
||||
label={t('globalControl.currentAutonomy.readback.executor')}
|
||||
value={String(rollups.live_ansible_apply_executed_count ?? 0)}
|
||||
detail={t('globalControl.currentAutonomy.readback.executorDetail', {
|
||||
verifier: rollups.live_post_apply_verifier_count ?? 0,
|
||||
km: rollups.live_km_writeback_count ?? 0,
|
||||
telegram: rollups.live_telegram_receipt_count ?? 0,
|
||||
})}
|
||||
tone={(rollups.live_ansible_apply_executed_count ?? 0) > 0 ? 'ok' : 'warn'}
|
||||
/>
|
||||
<GateMatrixRow
|
||||
label={t('globalControl.currentAutonomy.readback.loop')}
|
||||
value={String(rollups.live_executor_latest_flow_closed_count ?? 0)}
|
||||
detail={t('globalControl.currentAutonomy.readback.loopDetail', {
|
||||
mcp: rollups.mcp_sensor_count ?? 0,
|
||||
rag: rollups.rag_context_query_count ?? 0,
|
||||
playbook: rollups.playbook_decision_class_count ?? 0,
|
||||
})}
|
||||
tone={(rollups.live_executor_latest_flow_closed_count ?? 0) > 0 ? 'ok' : 'warn'}
|
||||
/>
|
||||
<GateMatrixRow
|
||||
label={t('globalControl.currentAutonomy.readback.ledger')}
|
||||
value={String(rollups.live_autonomous_execution_loop_closed_count ?? 0)}
|
||||
detail={t('globalControl.currentAutonomy.readback.ledgerDetail', {
|
||||
state: loopLedger?.execution_state ?? 'none',
|
||||
stages: loopStages.length,
|
||||
missing: missingStageCount,
|
||||
})}
|
||||
tone={(rollups.live_autonomous_execution_loop_closed_count ?? 0) > 0 ? 'ok' : missingStageCount > 0 ? 'warn' : 'neutral'}
|
||||
/>
|
||||
</div>
|
||||
{loopLedger ? (
|
||||
<div style={{ padding: 12, border: '0.5px solid #d8e8df', borderRadius: 7, background: '#fbfffd', display: 'flex', flexDirection: 'column', gap: 8, minWidth: 0 }}>
|
||||
<SmallLabel>{t('globalControl.currentAutonomy.readback.stageLedger')}</SmallLabel>
|
||||
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(180px, 1fr))', gap: 8 }} className="automation-inventory-current-autonomy-stage-ledger">
|
||||
{loopStages.map(stage => (
|
||||
<GateMatrixRow
|
||||
key={stage.stage_id}
|
||||
label={stage.stage_id}
|
||||
value={stage.status}
|
||||
detail={stage.present ? stage.receipt_source : `${stage.receipt_source} · ${stage.next_action_if_missing ?? '--'}`}
|
||||
tone={stage.present ? 'ok' : stage.writes_runtime_state ? 'warn' : 'neutral'}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2197,7 +2197,9 @@ export interface AiAgentAutonomousRuntimeControlSnapshot {
|
||||
runtime_write_receipt_type_count: number
|
||||
legacy_policy_overridden_count: number
|
||||
live_ansible_apply_executed_count?: number
|
||||
live_auto_repair_execution_receipt_count?: number
|
||||
live_executor_latest_flow_closed_count?: number
|
||||
live_autonomous_execution_loop_closed_count?: number
|
||||
live_km_writeback_count?: number
|
||||
live_post_apply_verifier_count?: number
|
||||
live_telegram_receipt_count?: number
|
||||
@@ -2206,6 +2208,43 @@ export interface AiAgentAutonomousRuntimeControlSnapshot {
|
||||
playbook_decision_class_count?: number
|
||||
deploy_control_classifier_example_count?: number
|
||||
}
|
||||
runtime_receipt_readback?: {
|
||||
schema_version: 'ai_agent_autonomous_runtime_receipt_readback_v1'
|
||||
db_read_status: string
|
||||
writes_on_read: boolean
|
||||
autonomous_execution_loop_ledger?: {
|
||||
schema_version: 'ai_agent_autonomous_execution_loop_ledger_v1'
|
||||
project_id: string
|
||||
operation_id: string | null
|
||||
root_candidate_op_id: string | null
|
||||
check_mode_op_id: string | null
|
||||
apply_op_id: string | null
|
||||
incident_id: string | null
|
||||
catalog_id: string | null
|
||||
playbook_path: string | null
|
||||
execution_state: string
|
||||
closed: boolean
|
||||
missing_stage_ids: string[]
|
||||
next_executor_action: string
|
||||
stages: Array<{
|
||||
stage_id: string
|
||||
receipt_source: string
|
||||
present: boolean
|
||||
status: string
|
||||
ref_id: string | null
|
||||
writes_runtime_state: boolean
|
||||
next_action_if_missing: string | null
|
||||
}>
|
||||
safety_contract: {
|
||||
writes_on_read: boolean
|
||||
backfill_may_write_auto_repair_verifier_km: boolean
|
||||
backfill_may_send_telegram: boolean
|
||||
live_apply_may_send_telegram_gateway_receipt: boolean
|
||||
reads_raw_sessions: boolean
|
||||
reads_secret_values: boolean
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export interface AiAgentAutomationInventorySnapshot {
|
||||
|
||||
Reference in New Issue
Block a user