feat(agents): classify runtime apply repair readback

This commit is contained in:
Your Name
2026-06-28 19:30:27 +08:00
parent d5c9d467c1
commit 688bfd7740
2 changed files with 299 additions and 0 deletions

View File

@@ -230,6 +230,155 @@ def _latest_flow_closure(
}
def _latest_failure_classification(
*,
operation_latest_rows: Iterable[Mapping[str, Any] | Any],
verifier_latest_rows: Iterable[Mapping[str, Any] | Any],
latest_flow_closure: Mapping[str, Any],
) -> dict[str, Any]:
"""Classify the newest controlled apply outcome without exposing command output."""
operation_rows = [_row_mapping(row) for row in operation_latest_rows]
verifier_rows = [_row_mapping(row) for row in verifier_latest_rows]
latest_apply = next(
(
row
for row in operation_rows
if str(row.get("operation_type") or "") == "ansible_apply_executed"
),
None,
)
if latest_apply is None:
return {
"schema_version": "ai_agent_executor_failure_classification_v1",
"classification": "no_controlled_apply_observed",
"action": "wait_for_controlled_apply_receipt",
"target_selector": {},
"evidence": {
"latest_flow_closed": False,
"output_tail_in_readback": False,
"unredacted_output_required": False,
},
}
apply_op_id = str(latest_apply.get("op_id") or "")
incident_id = str(latest_apply.get("incident_id") or "")
returncode = _int_value(latest_apply.get("returncode"))
verifier = next(
(
row
for row in verifier_rows
if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id
),
{},
)
verification_result = str(verifier.get("verification_result") or "").lower()
latest_flow_closed = latest_flow_closure.get("closed") is True
if returncode == 0 and verification_result in {"success", ""} and latest_flow_closed:
classification = "latest_controlled_apply_closed_success"
action = "keep_receipt_chain_closed"
elif returncode == 0:
classification = "controlled_apply_success_receipt_gap"
action = "backfill_missing_verifier_km_or_telegram_receipt"
elif latest_flow_closed:
classification = "closed_failed_apply_requires_ai_repair"
action = "queue_check_mode_replay_and_playbook_repair_candidate"
else:
classification = "failed_apply_receipt_gap_requires_backfill_then_repair"
action = "backfill_missing_receipts_then_queue_repair_candidate"
return {
"schema_version": "ai_agent_executor_failure_classification_v1",
"classification": classification,
"action": action,
"target_selector": {
"incident_id": incident_id or None,
"apply_op_id": apply_op_id or None,
"parent_op_id": latest_apply.get("parent_op_id"),
"catalog_id": latest_apply.get("catalog_id"),
"playbook_path": latest_apply.get("playbook_path"),
"execution_mode": latest_apply.get("execution_mode"),
},
"evidence": {
"operation_status": latest_apply.get("status"),
"returncode": latest_apply.get("returncode"),
"verification_result": verification_result or None,
"latest_flow_closed": latest_flow_closed,
"has_post_apply_verifier": latest_flow_closure.get("has_post_apply_verifier") is True,
"has_km_writeback": latest_flow_closure.get("has_km_writeback") is True,
"has_telegram_receipt": latest_flow_closure.get("has_telegram_receipt") is True,
"output_tail_in_readback": False,
"unredacted_output_required": False,
},
"safe_next_steps": [
"run_no_write_check_mode_replay",
"extract_sanitized_failed_task_summary",
"write_km_playbook_repair_candidate",
"retry_controlled_apply_only_after_check_mode_passes",
],
}
def _controlled_retry_package(classification: Mapping[str, Any]) -> dict[str, Any]:
"""Build the next no-write repair package from the public failure classification."""
target_selector = classification.get("target_selector")
if not isinstance(target_selector, Mapping):
target_selector = {}
apply_op_id = str(target_selector.get("apply_op_id") or "")
repair_required = classification.get("classification") in {
"closed_failed_apply_requires_ai_repair",
"failed_apply_receipt_gap_requires_backfill_then_repair",
}
return {
"schema_version": "ai_agent_controlled_retry_package_v1",
"package_id": (
f"ansible_retry:{apply_op_id[:8]}"
if repair_required and apply_op_id
else None
),
"status": (
"ready_for_no_write_check_mode_replay"
if repair_required
else "not_required_for_latest_apply"
),
"target_selector": dict(target_selector),
"source_of_truth": {
"catalog_id": target_selector.get("catalog_id"),
"playbook_path": target_selector.get("playbook_path"),
"source_diff_required_before_retry": True,
"failed_task_summary_required": True,
},
"preflight": {
"no_write_check_mode_replay_required": repair_required,
"reuse_parent_check_mode_op_id": target_selector.get("parent_op_id"),
"unredacted_output_required": False,
"secret_value_collection_allowed": False,
},
"apply_gate": {
"controlled_apply_retry_allowed_now": False,
"opens_legacy_runner": False,
"requires_check_mode_success_before_apply": repair_required,
},
"rollback": {
"rollback_candidate_required": repair_required,
"destructive_rollback_allowed": False,
"rollback_plan_source": "playbook_repair_candidate_after_failed_task_summary",
},
"post_apply": {
"post_apply_verifier_required": repair_required,
"km_playbook_trust_writeback_required": repair_required,
"telegram_receipt_required": repair_required,
},
"next_ai_action": (
"run_no_write_check_mode_replay"
if repair_required
else "keep_latest_apply_receipts"
),
}
def classify_deploy_control_plane_observation(
*,
run_status: str,
@@ -446,6 +595,12 @@ def build_runtime_receipt_readback_from_rows(
km_latest_rows=km_latest,
telegram_latest_rows=telegram_latest,
)
latest_failure = _latest_failure_classification(
operation_latest_rows=operation_latest,
verifier_latest_rows=verifier_latest,
latest_flow_closure=latest_closure,
)
retry_package = _controlled_retry_package(latest_failure)
apply_summary = operation_summary.get("ansible_apply_executed") or {}
readback = {
"schema_version": _LIVE_READBACK_SCHEMA_VERSION,
@@ -529,6 +684,8 @@ def build_runtime_receipt_readback_from_rows(
),
},
"latest_flow_closure": latest_closure,
"latest_failure_classification": latest_failure,
"controlled_retry_package": retry_package,
}
if error_type:
readback["error"] = {
@@ -562,6 +719,23 @@ def _attach_runtime_receipt_readback(
if (readback.get("latest_flow_closure") or {}).get("closed") is True
else 0
),
"live_executor_latest_apply_repair_required_count": (
1
if (
(readback.get("latest_failure_classification") or {}).get("classification")
in {
"closed_failed_apply_requires_ai_repair",
"failed_apply_receipt_gap_requires_backfill_then_repair",
}
)
else 0
),
"live_executor_retry_package_ready_count": (
1
if (readback.get("controlled_retry_package") or {}).get("status")
== "ready_for_no_write_check_mode_replay"
else 0
),
})
return payload

View File

@@ -219,3 +219,128 @@ def test_runtime_receipt_readback_summarizes_live_executor_closure_rows():
"closed": True,
"missing": [],
}
assert readback["latest_failure_classification"]["classification"] == (
"latest_controlled_apply_closed_success"
)
assert readback["controlled_retry_package"]["status"] == "not_required_for_latest_apply"
def test_runtime_receipt_readback_classifies_closed_failed_apply_as_ai_repair():
apply_op_id = "94925d5e-6fdc-49c3-90e8-f0a0d57a6a58"
incident_id = "INC-20260628-A40A9A"
readback = build_runtime_receipt_readback_from_rows(
project_id="awoooi",
db_read_status="ok",
operation_count_rows=[
{
"operation_type": "ansible_apply_executed",
"status": "failed",
"total": 1,
"recent": 1,
},
],
operation_latest_rows=[
{
"op_id": apply_op_id,
"parent_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
"operation_type": "ansible_apply_executed",
"status": "failed",
"actor": "ansible_controlled_apply_worker",
"incident_id": incident_id,
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"execution_mode": "controlled_apply",
"returncode": "2",
"duration_ms": 4797,
},
],
verifier_count_rows=[
{"verification_result": "failed", "total": 1, "recent": 1},
],
verifier_latest_rows=[
{
"id": "evidence-1",
"incident_id": incident_id,
"verification_result": "failed",
"apply_op_id": apply_op_id,
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"returncode": "2",
},
],
km_count_rows=[
{"status": "REVIEW", "total": 1, "recent": 1},
],
km_latest_rows=[
{
"id": "km-1",
"title": "AI 自動修復沉澱INC-20260628-A40A9A",
"related_incident_id": incident_id,
"related_playbook_id": "ansible:188-ai-web",
"path_type": "ansible_apply_receipt:94925d5e",
"status": "REVIEW",
"created_by": "ai_agent_ansible_worker",
},
],
telegram_count_rows=[
{"send_status": "sent", "total": 1, "recent": 1},
],
telegram_latest_rows=[
{
"message_id": "telegram-row-1",
"run_id": "telegram-run-1",
"message_type": "final",
"send_status": "sent",
"provider_message_id": "32016",
"incident_id": incident_id,
"action": "controlled_apply_result",
},
],
)
classification = readback["latest_failure_classification"]
assert classification["classification"] == "closed_failed_apply_requires_ai_repair"
assert classification["action"] == "queue_check_mode_replay_and_playbook_repair_candidate"
assert classification["target_selector"] == {
"incident_id": incident_id,
"apply_op_id": apply_op_id,
"parent_op_id": "8b555f41-e81f-4d8e-956b-fb20d358db63",
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"execution_mode": "controlled_apply",
}
assert classification["evidence"]["returncode"] == "2"
assert classification["evidence"]["verification_result"] == "failed"
assert classification["evidence"]["latest_flow_closed"] is True
assert classification["evidence"]["output_tail_in_readback"] is False
assert classification["evidence"]["unredacted_output_required"] is False
assert classification["safe_next_steps"] == [
"run_no_write_check_mode_replay",
"extract_sanitized_failed_task_summary",
"write_km_playbook_repair_candidate",
"retry_controlled_apply_only_after_check_mode_passes",
]
retry = readback["controlled_retry_package"]
assert retry["package_id"] == "ansible_retry:94925d5e"
assert retry["status"] == "ready_for_no_write_check_mode_replay"
assert retry["source_of_truth"] == {
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"source_diff_required_before_retry": True,
"failed_task_summary_required": True,
}
assert retry["preflight"]["no_write_check_mode_replay_required"] is True
assert retry["preflight"]["reuse_parent_check_mode_op_id"] == (
"8b555f41-e81f-4d8e-956b-fb20d358db63"
)
assert retry["apply_gate"]["controlled_apply_retry_allowed_now"] is False
assert retry["apply_gate"]["requires_check_mode_success_before_apply"] is True
assert retry["rollback"]["destructive_rollback_allowed"] is False
assert retry["post_apply"] == {
"post_apply_verifier_required": True,
"km_playbook_trust_writeback_required": True,
"telegram_receipt_required": True,
}
assert retry["next_ai_action"] == "run_no_write_check_mode_replay"