feat(ai): add autonomous runtime loop ledger

2026-06-29 13:53:04 +08:00
parent 023f46f286
commit 52490d25dd
6 changed files with 638 additions and 32 deletions
--- a/apps/api/src/services/ai_agent_autonomous_runtime_control.py
+++ b/apps/api/src/services/ai_agent_autonomous_runtime_control.py
@@ -151,6 +151,298 @@ def _status_counts(
    }


+def _first_operation(
+    rows: Iterable[Mapping[str, Any]],
+    operation_type: str,
+) -> dict[str, Any] | None:
+    for row in rows:
+        if str(row.get("operation_type") or "") == operation_type:
+            return dict(row)
+    return None
+
+
+def _operation_by_id(
+    rows: Iterable[Mapping[str, Any]],
+    op_id: Any,
+) -> dict[str, Any] | None:
+    needle = str(op_id or "")
+    if not needle:
+        return None
+    for row in rows:
+        if str(row.get("op_id") or "") == needle:
+            return dict(row)
+    return None
+
+
+def _stage_status(row: Mapping[str, Any] | None, *, fallback_status: str | None = None) -> str:
+    if row is None:
+        return fallback_status or "missing"
+    return str(row.get("status") or row.get("result_status") or fallback_status or "present")
+
+
+def _loop_stage(
+    *,
+    stage_id: str,
+    receipt_source: str,
+    present: bool,
+    status: str,
+    ref_id: str | None,
+    writes_runtime_state: bool,
+    next_action_if_missing: str,
+) -> dict[str, Any]:
+    return {
+        "stage_id": stage_id,
+        "receipt_source": receipt_source,
+        "present": present,
+        "status": status,
+        "ref_id": ref_id,
+        "writes_runtime_state": writes_runtime_state,
+        "next_action_if_missing": None if present else next_action_if_missing,
+    }
+
+
+def _autonomous_execution_loop_ledger(
+    *,
+    project_id: str,
+    operation_latest_rows: Iterable[Mapping[str, Any] | Any],
+    verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
+    km_latest_rows: Iterable[Mapping[str, Any] | Any],
+    telegram_latest_rows: Iterable[Mapping[str, Any] | Any],
+    auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any],
+    latest_flow_closure: Mapping[str, Any],
+    latest_failure_classification: Mapping[str, Any],
+    controlled_retry_package: Mapping[str, Any],
+) -> dict[str, Any]:
+    """Build the operation-id ledger that proves whether the runtime loop closed."""
+
+    operation_rows = [_row_mapping(row) for row in operation_latest_rows]
+    verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
+    km_rows = [_row_mapping(row) for row in km_latest_rows]
+    telegram_rows = [_row_mapping(row) for row in telegram_latest_rows]
+    auto_repair_rows = [_row_mapping(row) for row in auto_repair_latest_rows]
+
+    latest_apply = _first_operation(operation_rows, "ansible_apply_executed")
+    latest_check = None
+    latest_candidate = None
+    if latest_apply is not None:
+        latest_check = _operation_by_id(
+            operation_rows,
+            latest_apply.get("check_mode_op_id") or latest_apply.get("parent_op_id"),
+        )
+    else:
+        latest_check = _first_operation(operation_rows, "ansible_check_mode_executed")
+
+    source_candidate_op_id = None
+    if latest_check is not None:
+        source_candidate_op_id = latest_check.get("parent_op_id") or latest_check.get("source_candidate_op_id")
+    if latest_apply is not None and not source_candidate_op_id:
+        source_candidate_op_id = latest_apply.get("source_candidate_op_id")
+    latest_candidate = _operation_by_id(operation_rows, source_candidate_op_id)
+    if latest_candidate is None and latest_apply is None and latest_check is None:
+        latest_candidate = _first_operation(operation_rows, "ansible_candidate_matched")
+
+    anchor = latest_apply or latest_check or latest_candidate or {}
+    apply_op_id = str((latest_apply or {}).get("op_id") or "")
+    check_mode_op_id = str(
+        (latest_check or {}).get("op_id")
+        or (latest_apply or {}).get("check_mode_op_id")
+        or (latest_apply or {}).get("parent_op_id")
+        or ""
+    )
+    candidate_op_id = str(
+        (latest_candidate or {}).get("op_id")
+        or source_candidate_op_id
+        or ""
+    )
+    incident_id = str(anchor.get("incident_id") or "")
+    catalog_id = str(anchor.get("catalog_id") or "")
+    playbook_path = str(anchor.get("playbook_path") or "")
+
+    verifier = next(
+        (
+            row
+            for row in verifier_rows
+            if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
+        ),
+        None,
+    )
+    km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else ""
+    km = next(
+        (
+            row
+            for row in km_rows
+            if (
+                km_path_type
+                and str(row.get("path_type") or "") == km_path_type
+            )
+            or (
+                incident_id
+                and str(row.get("related_incident_id") or "") == incident_id
+            )
+        ),
+        None,
+    )
+    telegram = next(
+        (
+            row
+            for row in telegram_rows
+            if str(row.get("send_status") or "") == "sent"
+            and str(row.get("action") or "") == "controlled_apply_result"
+            and (
+                not incident_id
+                or str(row.get("incident_id") or "") == incident_id
+            )
+        ),
+        None,
+    )
+    auto_repair = next(
+        (
+            row
+            for row in auto_repair_rows
+            if apply_op_id
+            and apply_op_id
+            in str(row.get("executed_steps_text") or row.get("executed_steps") or "")
+        ),
+        None,
+    )
+
+    candidate_present = bool(latest_candidate or candidate_op_id)
+    check_present = bool(latest_check or check_mode_op_id)
+    apply_present = latest_apply is not None
+    auto_repair_present = auto_repair is not None
+    verifier_present = verifier is not None
+    km_present = km is not None
+    telegram_present = telegram is not None
+
+    stages = [
+        _loop_stage(
+            stage_id="candidate",
+            receipt_source="automation_operation_log:ansible_candidate_matched",
+            present=candidate_present,
+            status=_stage_status(latest_candidate, fallback_status="inferred_from_check_mode")
+            if candidate_present
+            else "missing",
+            ref_id=candidate_op_id or None,
+            writes_runtime_state=False,
+            next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook",
+        ),
+        _loop_stage(
+            stage_id="check_mode",
+            receipt_source="automation_operation_log:ansible_check_mode_executed",
+            present=check_present,
+            status=_stage_status(latest_check, fallback_status="inferred_from_apply_parent")
+            if check_present
+            else "missing",
+            ref_id=check_mode_op_id or None,
+            writes_runtime_state=False,
+            next_action_if_missing="ansible_check_mode_worker_claims_candidate",
+        ),
+        _loop_stage(
+            stage_id="controlled_apply",
+            receipt_source="automation_operation_log:ansible_apply_executed",
+            present=apply_present,
+            status=_stage_status(latest_apply),
+            ref_id=apply_op_id or None,
+            writes_runtime_state=True,
+            next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success",
+        ),
+        _loop_stage(
+            stage_id="auto_repair_execution_receipt",
+            receipt_source="auto_repair_executions:ansible_controlled_apply",
+            present=auto_repair_present,
+            status=str((auto_repair or {}).get("result_status") or "missing"),
+            ref_id=str((auto_repair or {}).get("id") or "") or None,
+            writes_runtime_state=True,
+            next_action_if_missing="receipt_backfill_records_auto_repair_execution",
+        ),
+        _loop_stage(
+            stage_id="post_apply_verifier",
+            receipt_source="incident_evidence.post_execution_state",
+            present=verifier_present,
+            status=str((verifier or {}).get("verification_result") or "missing"),
+            ref_id=str((verifier or {}).get("id") or "") or None,
+            writes_runtime_state=True,
+            next_action_if_missing="post_apply_verifier_writes_incident_evidence",
+        ),
+        _loop_stage(
+            stage_id="km_playbook_writeback",
+            receipt_source="knowledge_entries:ansible_apply_receipt",
+            present=km_present,
+            status=str((km or {}).get("status") or "missing"),
+            ref_id=str((km or {}).get("id") or "") or None,
+            writes_runtime_state=True,
+            next_action_if_missing="hermes_writes_km_playbook_trust_candidate",
+        ),
+        _loop_stage(
+            stage_id="telegram_receipt",
+            receipt_source="awooop_outbound_message:controlled_apply_result",
+            present=telegram_present,
+            status=str((telegram or {}).get("send_status") or "missing"),
+            ref_id=str((telegram or {}).get("message_id") or "") or None,
+            writes_runtime_state=True,
+            next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt",
+        ),
+    ]
+    missing_stage_ids = [
+        str(stage["stage_id"])
+        for stage in stages
+        if stage["present"] is not True
+    ]
+    closed = bool(
+        apply_op_id
+        and auto_repair_present
+        and latest_flow_closure.get("closed") is True
+    )
+
+    classification = str(latest_failure_classification.get("classification") or "")
+    if not candidate_present and not check_present and not apply_present:
+        execution_state = "waiting_for_candidate"
+        next_executor_action = "candidate_backfill_worker_waits_for_matching_incident"
+    elif not apply_present:
+        execution_state = "executor_in_progress_or_waiting"
+        next_executor_action = "continue_candidate_to_check_mode_to_apply"
+    elif closed and classification == "latest_controlled_apply_closed_success":
+        execution_state = "closed_success"
+        next_executor_action = "keep_receipt_chain_closed"
+    elif closed:
+        execution_state = "closed_failed_apply_repair_ready"
+        next_executor_action = str(
+            controlled_retry_package.get("next_ai_action")
+            or "run_no_write_check_mode_replay"
+        )
+    elif "telegram_receipt" in missing_stage_ids:
+        execution_state = "open_waiting_for_live_gateway_receipt"
+        next_executor_action = "do_not_fake_send_backfill_wait_for_live_apply_gateway"
+    else:
+        execution_state = "open_missing_internal_receipts"
+        next_executor_action = "backfill_missing_auto_repair_verifier_km_receipts"
+
+    return {
+        "schema_version": "ai_agent_autonomous_execution_loop_ledger_v1",
+        "project_id": project_id,
+        "operation_id": apply_op_id or check_mode_op_id or candidate_op_id or None,
+        "root_candidate_op_id": candidate_op_id or None,
+        "check_mode_op_id": check_mode_op_id or None,
+        "apply_op_id": apply_op_id or None,
+        "incident_id": incident_id or None,
+        "catalog_id": catalog_id or None,
+        "playbook_path": playbook_path or None,
+        "execution_state": execution_state,
+        "closed": closed,
+        "missing_stage_ids": missing_stage_ids,
+        "next_executor_action": next_executor_action,
+        "stages": stages,
+        "safety_contract": {
+            "writes_on_read": False,
+            "backfill_may_write_auto_repair_verifier_km": True,
+            "backfill_may_send_telegram": False,
+            "live_apply_may_send_telegram_gateway_receipt": True,
+            "reads_raw_sessions": False,
+            "reads_secret_values": False,
+        },
+    }
+
+
 def _latest_flow_closure(
    *,
    operation_latest_rows: Iterable[Mapping[str, Any] | Any],
@@ -568,6 +860,8 @@ def build_runtime_receipt_readback_from_rows(
    db_read_status: str = "ok",
    operation_count_rows: Iterable[Mapping[str, Any] | Any] = (),
    operation_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
+    auto_repair_count_rows: Iterable[Mapping[str, Any] | Any] = (),
+    auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
    verifier_count_rows: Iterable[Mapping[str, Any] | Any] = (),
    verifier_latest_rows: Iterable[Mapping[str, Any] | Any] = (),
    km_count_rows: Iterable[Mapping[str, Any] | Any] = (),
@@ -579,10 +873,15 @@ def build_runtime_receipt_readback_from_rows(
    """Build the live executor receipt readback from already-fetched rows."""

    operation_latest = list(operation_latest_rows)
+    auto_repair_latest = list(auto_repair_latest_rows)
    verifier_latest = list(verifier_latest_rows)
    km_latest = list(km_latest_rows)
    telegram_latest = list(telegram_latest_rows)
    operation_summary = _operation_counts(operation_count_rows)
+    auto_repair_summary = _status_counts(
+        auto_repair_count_rows,
+        status_key="result_status",
+    )
    verifier_summary = _status_counts(
        verifier_count_rows,
        status_key="verification_result",
@@ -601,6 +900,17 @@ def build_runtime_receipt_readback_from_rows(
        latest_flow_closure=latest_closure,
    )
    retry_package = _controlled_retry_package(latest_failure)
+    loop_ledger = _autonomous_execution_loop_ledger(
+        project_id=project_id,
+        operation_latest_rows=operation_latest,
+        verifier_latest_rows=verifier_latest,
+        km_latest_rows=km_latest,
+        telegram_latest_rows=telegram_latest,
+        auto_repair_latest_rows=auto_repair_latest,
+        latest_flow_closure=latest_closure,
+        latest_failure_classification=latest_failure,
+        controlled_retry_package=retry_package,
+    )
    apply_summary = operation_summary.get("ansible_apply_executed") or {}
    readback = {
        "schema_version": _LIVE_READBACK_SCHEMA_VERSION,
@@ -622,12 +932,33 @@ def build_runtime_receipt_readback_from_rows(
                    "catalog_id",
                    "playbook_path",
                    "execution_mode",
+                    "source_candidate_op_id",
+                    "check_mode_op_id",
+                    "risk_level",
+                    "controlled_apply_allowed",
                    "returncode",
                    "duration_ms",
                    "created_at",
                ),
            ),
        },
+        "auto_repair_execution_receipt": {
+            **auto_repair_summary,
+            "latest": _sanitize_latest_rows(
+                auto_repair_latest,
+                allowed_keys=(
+                    "id",
+                    "incident_id",
+                    "catalog_id",
+                    "playbook_name",
+                    "result_status",
+                    "triggered_by",
+                    "risk_level",
+                    "execution_time_ms",
+                    "created_at",
+                ),
+            ),
+        },
        "ansible_apply_executed": {
            "total": _int_value(apply_summary.get("total")),
            "recent": _int_value(apply_summary.get("recent")),
@@ -686,6 +1017,7 @@ def build_runtime_receipt_readback_from_rows(
        "latest_flow_closure": latest_closure,
        "latest_failure_classification": latest_failure,
        "controlled_retry_package": retry_package,
+        "autonomous_execution_loop_ledger": loop_ledger,
    }
    if error_type:
        readback["error"] = {
@@ -705,6 +1037,9 @@ def _attach_runtime_receipt_readback(
        "live_ansible_apply_executed_count": _int_value(
            readback.get("ansible_apply_executed", {}).get("total")
        ),
+        "live_auto_repair_execution_receipt_count": _int_value(
+            readback.get("auto_repair_execution_receipt", {}).get("total")
+        ),
        "live_post_apply_verifier_count": _int_value(
            readback.get("post_apply_verifier", {}).get("total")
        ),
@@ -719,6 +1054,11 @@ def _attach_runtime_receipt_readback(
            if (readback.get("latest_flow_closure") or {}).get("closed") is True
            else 0
        ),
+        "live_autonomous_execution_loop_closed_count": (
+            1
+            if (readback.get("autonomous_execution_loop_ledger") or {}).get("closed") is True
+            else 0
+        ),
        "live_executor_latest_apply_repair_required_count": (
            1
            if (
@@ -987,6 +1327,12 @@ async def load_ai_agent_autonomous_runtime_receipt_readback(
            operation_latest = (
                await db.execute(text(_RUNTIME_OPERATION_LATEST_SQL), params)
            ).mappings().all()
+            auto_repair_counts = (
+                await db.execute(text(_RUNTIME_AUTO_REPAIR_COUNTS_SQL), params)
+            ).mappings().all()
+            auto_repair_latest = (
+                await db.execute(text(_RUNTIME_AUTO_REPAIR_LATEST_SQL), params)
+            ).mappings().all()
            verifier_counts = (
                await db.execute(text(_RUNTIME_VERIFIER_COUNTS_SQL), params)
            ).mappings().all()
@@ -1024,6 +1370,8 @@ async def load_ai_agent_autonomous_runtime_receipt_readback(
        db_read_status="ok",
        operation_count_rows=operation_counts,
        operation_latest_rows=operation_latest,
+        auto_repair_count_rows=auto_repair_counts,
+        auto_repair_latest_rows=auto_repair_latest,
        verifier_count_rows=verifier_counts,
        verifier_latest_rows=verifier_latest,
        km_count_rows=km_counts,
@@ -1082,6 +1430,10 @@ _RUNTIME_OPERATION_LATEST_SQL = """
        input ->> 'catalog_id' AS catalog_id,
        coalesce(input ->> 'apply_playbook_path', input ->> 'playbook_path') AS playbook_path,
        input ->> 'execution_mode' AS execution_mode,
+        input ->> 'source_candidate_op_id' AS source_candidate_op_id,
+        input ->> 'check_mode_op_id' AS check_mode_op_id,
+        input ->> 'risk_level' AS risk_level,
+        input ->> 'controlled_apply_allowed' AS controlled_apply_allowed,
        coalesce(output ->> 'returncode', dry_run_result ->> 'returncode') AS returncode,
        duration_ms,
        created_at
@@ -1098,6 +1450,39 @@ _RUNTIME_OPERATION_LATEST_SQL = """
 """


+_RUNTIME_AUTO_REPAIR_COUNTS_SQL = """
+    SELECT
+        CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
+        count(*) AS total,
+        count(*) FILTER (
+            WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour')
+        ) AS recent
+    FROM auto_repair_executions
+    WHERE triggered_by = 'ansible_controlled_apply'
+    GROUP BY CASE WHEN success THEN 'success' ELSE 'failed' END
+    ORDER BY result_status
+"""
+
+
+_RUNTIME_AUTO_REPAIR_LATEST_SQL = """
+    SELECT
+        id,
+        incident_id,
+        playbook_id AS catalog_id,
+        playbook_name,
+        CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status,
+        executed_steps::text AS executed_steps_text,
+        triggered_by,
+        risk_level,
+        execution_time_ms,
+        created_at
+    FROM auto_repair_executions
+    WHERE triggered_by = 'ansible_controlled_apply'
+    ORDER BY created_at DESC
+    LIMIT :limit
+"""
+
+
 _RUNTIME_VERIFIER_COUNTS_SQL = """
    SELECT
        coalesce(verification_result, 'missing') AS verification_result,
--- a/apps/api/tests/test_ai_agent_autonomous_runtime_control.py
+++ b/apps/api/tests/test_ai_agent_autonomous_runtime_control.py
@@ -146,6 +146,29 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
            },
        ],
        operation_latest_rows=[
+            {
+                "op_id": "candidate-op",
+                "parent_op_id": None,
+                "operation_type": "ansible_candidate_matched",
+                "status": "dry_run",
+                "actor": "decision_manager",
+                "incident_id": incident_id,
+                "catalog_id": "ansible:188-momo-backup-user",
+                "playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
+                "execution_mode": "check_mode",
+            },
+            {
+                "op_id": "check-mode-op",
+                "parent_op_id": "candidate-op",
+                "operation_type": "ansible_check_mode_executed",
+                "status": "success",
+                "actor": "ansible_check_mode_worker",
+                "incident_id": incident_id,
+                "catalog_id": "ansible:188-momo-backup-user",
+                "playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
+                "execution_mode": "check_mode",
+                "returncode": "0",
+            },
            {
                "op_id": apply_op_id,
                "parent_op_id": "check-mode-op",
@@ -156,10 +179,28 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
                "catalog_id": "ansible:188-momo-backup-user",
                "playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
                "execution_mode": "controlled_apply",
+                "source_candidate_op_id": "candidate-op",
+                "check_mode_op_id": "check-mode-op",
                "returncode": "0",
                "duration_ms": 7727,
            },
        ],
+        auto_repair_count_rows=[
+            {"result_status": "success", "total": 1, "recent": 1},
+        ],
+        auto_repair_latest_rows=[
+            {
+                "id": "auto-repair-1",
+                "incident_id": incident_id,
+                "catalog_id": "ansible:188-momo-backup-user",
+                "playbook_name": "infra/ansible/playbooks/188-momo-backup-user.yml",
+                "result_status": "success",
+                "executed_steps_text": f'["apply:{apply_op_id}"]',
+                "triggered_by": "ansible_controlled_apply",
+                "risk_level": "low",
+                "execution_time_ms": 7727,
+            },
+        ],
        verifier_count_rows=[
            {"verification_result": "success", "total": 1, "recent": 1},
        ],
@@ -207,6 +248,7 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
    assert readback["db_read_status"] == "ok"
    assert readback["writes_on_read"] is False
    assert readback["ansible_apply_executed"]["total"] == 1
+    assert readback["auto_repair_execution_receipt"]["by_status"]["success"] == 1
    assert readback["post_apply_verifier"]["by_status"]["success"] == 1
    assert readback["km_writeback"]["by_status"]["review"] == 1
    assert readback["telegram_receipt"]["by_status"]["sent"] == 1
@@ -223,6 +265,28 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
        "latest_controlled_apply_closed_success"
    )
    assert readback["controlled_retry_package"]["status"] == "not_required_for_latest_apply"
+    ledger = readback["autonomous_execution_loop_ledger"]
+    assert ledger["schema_version"] == "ai_agent_autonomous_execution_loop_ledger_v1"
+    assert ledger["operation_id"] == apply_op_id
+    assert ledger["root_candidate_op_id"] == "candidate-op"
+    assert ledger["check_mode_op_id"] == "check-mode-op"
+    assert ledger["apply_op_id"] == apply_op_id
+    assert ledger["execution_state"] == "closed_success"
+    assert ledger["closed"] is True
+    assert ledger["missing_stage_ids"] == []
+    assert ledger["next_executor_action"] == "keep_receipt_chain_closed"
+    assert [stage["stage_id"] for stage in ledger["stages"]] == [
+        "candidate",
+        "check_mode",
+        "controlled_apply",
+        "auto_repair_execution_receipt",
+        "post_apply_verifier",
+        "km_playbook_writeback",
+        "telegram_receipt",
+    ]
+    assert {stage["present"] for stage in ledger["stages"]} == {True}
+    assert ledger["safety_contract"]["backfill_may_send_telegram"] is False
+    assert ledger["safety_contract"]["live_apply_may_send_telegram_gateway_receipt"] is True


 def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
@@ -241,6 +305,18 @@ def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
            },
        ],
        operation_latest_rows=[
+            {
+                "op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
+                "parent_op_id": "candidate-op-2",
+                "operation_type": "ansible_check_mode_executed",
+                "status": "success",
+                "actor": "ansible_check_mode_worker",
+                "incident_id": incident_id,
+                "catalog_id": "ansible:188-ai-web",
+                "playbook_path": "infra/ansible/playbooks/188-ai-web-readonly.yml",
+                "execution_mode": "check_mode",
+                "returncode": "0",
+            },
            {
                "op_id": apply_op_id,
                "parent_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
@@ -251,10 +327,28 @@ def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
                "catalog_id": "ansible:188-ai-web",
                "playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
                "execution_mode": "controlled_apply",
+                "source_candidate_op_id": "candidate-op-2",
+                "check_mode_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
                "returncode": "2",
                "duration_ms": 4797,
            },
        ],
+        auto_repair_count_rows=[
+            {"result_status": "failed", "total": 1, "recent": 1},
+        ],
+        auto_repair_latest_rows=[
+            {
+                "id": "auto-repair-2",
+                "incident_id": incident_id,
+                "catalog_id": "ansible:188-ai-web",
+                "playbook_name": "infra/ansible/playbooks/188-ai-web.yml",
+                "result_status": "failed",
+                "executed_steps_text": f'["apply:{apply_op_id}"]',
+                "triggered_by": "ansible_controlled_apply",
+                "risk_level": "medium",
+                "execution_time_ms": 4797,
+            },
+        ],
        verifier_count_rows=[
            {"verification_result": "failed", "total": 1, "recent": 1},
        ],
@@ -344,3 +438,54 @@ def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
        "telegram_receipt_required": True,
    }
    assert retry["next_ai_action"] == "run_no_write_check_mode_replay"
+    ledger = readback["autonomous_execution_loop_ledger"]
+    assert ledger["closed"] is True
+    assert ledger["execution_state"] == "closed_failed_apply_repair_ready"
+    assert ledger["next_executor_action"] == "run_no_write_check_mode_replay"
+    assert ledger["missing_stage_ids"] == []
+    assert ledger["stages"][3]["stage_id"] == "auto_repair_execution_receipt"
+    assert ledger["stages"][3]["status"] == "failed"
+
+
+def test_runtime_execution_loop_ledger_does_not_mix_unrelated_check_mode_rows():
+    apply_op_id = "db3f12ce-08fc-4289-8c93-338305d5850c"
+    readback = build_runtime_receipt_readback_from_rows(
+        project_id="awoooi",
+        db_read_status="ok",
+        operation_latest_rows=[
+            {
+                "op_id": "unrelated-check-mode-op",
+                "parent_op_id": "unrelated-candidate",
+                "operation_type": "ansible_check_mode_executed",
+                "status": "success",
+                "actor": "ansible_check_mode_worker",
+                "incident_id": "INC-OTHER",
+                "catalog_id": "ansible:110-devops",
+                "playbook_path": "infra/ansible/playbooks/110-devops.yml",
+                "execution_mode": "check_mode",
+                "returncode": "0",
+            },
+            {
+                "op_id": apply_op_id,
+                "parent_op_id": "expected-check-mode-op",
+                "operation_type": "ansible_apply_executed",
+                "status": "success",
+                "actor": "ansible_controlled_apply_worker",
+                "incident_id": "INC-20260629-LEDGER",
+                "catalog_id": "ansible:188-momo-backup-user",
+                "playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
+                "execution_mode": "controlled_apply",
+                "returncode": "0",
+            },
+        ],
+    )
+
+    ledger = readback["autonomous_execution_loop_ledger"]
+    assert ledger["operation_id"] == apply_op_id
+    assert ledger["check_mode_op_id"] == "expected-check-mode-op"
+    assert ledger["root_candidate_op_id"] is None
+    assert ledger["incident_id"] == "INC-20260629-LEDGER"
+    assert ledger["stages"][1]["stage_id"] == "check_mode"
+    assert ledger["stages"][1]["ref_id"] == "expected-check-mode-op"
+    assert ledger["stages"][1]["status"] == "inferred_from_apply_parent"
+    assert "candidate" in ledger["missing_stage_ids"]
--- a/apps/web/messages/en.json
+++ b/apps/web/messages/en.json
@@ -3710,7 +3710,10 @@
            "executor": "live ansible_apply_executed",
            "executorDetail": "post verifier {verifier} · KM {km} · Telegram {telegram}",
            "loop": "latest flow closed",
-            "loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}"
+            "loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}",
+            "ledger": "operation-id loop ledger",
+            "ledgerDetail": "{state} · stages {stages} · missing {missing}",
+            "stageLedger": "candidate → check-mode → apply → verifier → KM → Telegram"
          },
          "badges": {
            "override": "舊 no-send / no-live 已覆寫",
--- a/apps/web/messages/zh-TW.json
+++ b/apps/web/messages/zh-TW.json
@@ -3710,7 +3710,10 @@
            "executor": "live ansible_apply_executed",
            "executorDetail": "post verifier {verifier} · KM {km} · Telegram {telegram}",
            "loop": "latest flow closed",
-            "loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}"
+            "loopDetail": "MCP {mcp} · RAG {rag} · PlayBook {playbook}",
+            "ledger": "operation-id 閉環總帳",
+            "ledgerDetail": "{state} · stages {stages} · missing {missing}",
+            "stageLedger": "candidate → check-mode → apply → verifier → KM → Telegram"
          },
          "badges": {
            "override": "舊 no-send / no-live 已覆寫",
--- a/apps/web/src/app/[locale]/governance/tabs/automation-inventory-tab.tsx
+++ b/apps/web/src/app/[locale]/governance/tabs/automation-inventory-tab.tsx
@@ -855,38 +855,69 @@ function AutonomousRuntimeControlReadbackGrid({
  t: ReturnType<typeof useTranslations>
 }) {
  const rollups = control.rollups
+  const loopLedger = control.runtime_receipt_readback?.autonomous_execution_loop_ledger
+  const loopStages = loopLedger?.stages ?? []
+  const missingStageCount = loopLedger?.missing_stage_ids.length ?? 0

  return (
-    <div style={{ display: 'grid', gridTemplateColumns: 'repeat(3, minmax(0, 1fr))', gap: 10 }} className="automation-inventory-current-autonomy-readback-grid">
-      <GateMatrixRow
-        label={t('globalControl.currentAutonomy.readback.marker')}
-        value={control.program_status.deploy_readback_marker}
-        detail={t('globalControl.currentAutonomy.readback.markerDetail', {
-          task: control.program_status.current_task_id,
-          status: control.program_status.status,
-        })}
-        tone="ok"
-      />
-      <GateMatrixRow
-        label={t('globalControl.currentAutonomy.readback.executor')}
-        value={String(rollups.live_ansible_apply_executed_count ?? 0)}
-        detail={t('globalControl.currentAutonomy.readback.executorDetail', {
-          verifier: rollups.live_post_apply_verifier_count ?? 0,
-          km: rollups.live_km_writeback_count ?? 0,
-          telegram: rollups.live_telegram_receipt_count ?? 0,
-        })}
-        tone={(rollups.live_ansible_apply_executed_count ?? 0) > 0 ? 'ok' : 'warn'}
-      />
-      <GateMatrixRow
-        label={t('globalControl.currentAutonomy.readback.loop')}
-        value={String(rollups.live_executor_latest_flow_closed_count ?? 0)}
-        detail={t('globalControl.currentAutonomy.readback.loopDetail', {
-          mcp: rollups.mcp_sensor_count ?? 0,
-          rag: rollups.rag_context_query_count ?? 0,
-          playbook: rollups.playbook_decision_class_count ?? 0,
-        })}
-        tone={(rollups.live_executor_latest_flow_closed_count ?? 0) > 0 ? 'ok' : 'warn'}
-      />
+    <div style={{ display: 'flex', flexDirection: 'column', gap: 10, minWidth: 0 }}>
+      <div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(180px, 1fr))', gap: 10 }} className="automation-inventory-current-autonomy-readback-grid">
+        <GateMatrixRow
+          label={t('globalControl.currentAutonomy.readback.marker')}
+          value={control.program_status.deploy_readback_marker}
+          detail={t('globalControl.currentAutonomy.readback.markerDetail', {
+            task: control.program_status.current_task_id,
+            status: control.program_status.status,
+          })}
+          tone="ok"
+        />
+        <GateMatrixRow
+          label={t('globalControl.currentAutonomy.readback.executor')}
+          value={String(rollups.live_ansible_apply_executed_count ?? 0)}
+          detail={t('globalControl.currentAutonomy.readback.executorDetail', {
+            verifier: rollups.live_post_apply_verifier_count ?? 0,
+            km: rollups.live_km_writeback_count ?? 0,
+            telegram: rollups.live_telegram_receipt_count ?? 0,
+          })}
+          tone={(rollups.live_ansible_apply_executed_count ?? 0) > 0 ? 'ok' : 'warn'}
+        />
+        <GateMatrixRow
+          label={t('globalControl.currentAutonomy.readback.loop')}
+          value={String(rollups.live_executor_latest_flow_closed_count ?? 0)}
+          detail={t('globalControl.currentAutonomy.readback.loopDetail', {
+            mcp: rollups.mcp_sensor_count ?? 0,
+            rag: rollups.rag_context_query_count ?? 0,
+            playbook: rollups.playbook_decision_class_count ?? 0,
+          })}
+          tone={(rollups.live_executor_latest_flow_closed_count ?? 0) > 0 ? 'ok' : 'warn'}
+        />
+        <GateMatrixRow
+          label={t('globalControl.currentAutonomy.readback.ledger')}
+          value={String(rollups.live_autonomous_execution_loop_closed_count ?? 0)}
+          detail={t('globalControl.currentAutonomy.readback.ledgerDetail', {
+            state: loopLedger?.execution_state ?? 'none',
+            stages: loopStages.length,
+            missing: missingStageCount,
+          })}
+          tone={(rollups.live_autonomous_execution_loop_closed_count ?? 0) > 0 ? 'ok' : missingStageCount > 0 ? 'warn' : 'neutral'}
+        />
+      </div>
+      {loopLedger ? (
+        <div style={{ padding: 12, border: '0.5px solid #d8e8df', borderRadius: 7, background: '#fbfffd', display: 'flex', flexDirection: 'column', gap: 8, minWidth: 0 }}>
+          <SmallLabel>{t('globalControl.currentAutonomy.readback.stageLedger')}</SmallLabel>
+          <div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(180px, 1fr))', gap: 8 }} className="automation-inventory-current-autonomy-stage-ledger">
+            {loopStages.map(stage => (
+              <GateMatrixRow
+                key={stage.stage_id}
+                label={stage.stage_id}
+                value={stage.status}
+                detail={stage.present ? stage.receipt_source : `${stage.receipt_source} · ${stage.next_action_if_missing ?? '--'}`}
+                tone={stage.present ? 'ok' : stage.writes_runtime_state ? 'warn' : 'neutral'}
+              />
+            ))}
+          </div>
+        </div>
+      ) : null}
    </div>
  )
 }
--- a/apps/web/src/lib/api-client.ts
+++ b/apps/web/src/lib/api-client.ts
@@ -2197,7 +2197,9 @@ export interface AiAgentAutonomousRuntimeControlSnapshot {
    runtime_write_receipt_type_count: number
    legacy_policy_overridden_count: number
    live_ansible_apply_executed_count?: number
+    live_auto_repair_execution_receipt_count?: number
    live_executor_latest_flow_closed_count?: number
+    live_autonomous_execution_loop_closed_count?: number
    live_km_writeback_count?: number
    live_post_apply_verifier_count?: number
    live_telegram_receipt_count?: number
@@ -2206,6 +2208,43 @@ export interface AiAgentAutonomousRuntimeControlSnapshot {
    playbook_decision_class_count?: number
    deploy_control_classifier_example_count?: number
  }
+  runtime_receipt_readback?: {
+    schema_version: 'ai_agent_autonomous_runtime_receipt_readback_v1'
+    db_read_status: string
+    writes_on_read: boolean
+    autonomous_execution_loop_ledger?: {
+      schema_version: 'ai_agent_autonomous_execution_loop_ledger_v1'
+      project_id: string
+      operation_id: string | null
+      root_candidate_op_id: string | null
+      check_mode_op_id: string | null
+      apply_op_id: string | null
+      incident_id: string | null
+      catalog_id: string | null
+      playbook_path: string | null
+      execution_state: string
+      closed: boolean
+      missing_stage_ids: string[]
+      next_executor_action: string
+      stages: Array<{
+        stage_id: string
+        receipt_source: string
+        present: boolean
+        status: string
+        ref_id: string | null
+        writes_runtime_state: boolean
+        next_action_if_missing: string | null
+      }>
+      safety_contract: {
+        writes_on_read: boolean
+        backfill_may_write_auto_repair_verifier_km: boolean
+        backfill_may_send_telegram: boolean
+        live_apply_may_send_telegram_gateway_receipt: boolean
+        reads_raw_sessions: boolean
+        reads_secret_values: boolean
+      }
+    }
+  }
 }

 export interface AiAgentAutomationInventorySnapshot {