fix(api): record ansible apply repair receipts
Some checks failed
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / tests (push) Successful in 1m39s
CD Pipeline / build-and-deploy (push) Successful in 6m9s
CD Pipeline / post-deploy-checks (push) Has been cancelled

This commit is contained in:
Your Name
2026-06-27 13:46:40 +08:00
parent ddaad724ad
commit 8761459f9d
2 changed files with 137 additions and 0 deletions

View File

@@ -483,6 +483,99 @@ def _build_apply_result_payload(result: AnsibleRunResult) -> tuple[str, dict[str
return status, output, dry_run_result, error
def _build_auto_repair_execution_receipt(
claim: AnsibleCheckModeClaim,
result: AnsibleRunResult,
*,
apply_op_id: str,
) -> dict[str, Any]:
success = result.returncode == 0
return {
"incident_id": claim.incident_id,
"playbook_id": str(claim.catalog_id or "")[:36] or "ansible",
"playbook_name": f"Ansible controlled apply: {claim.apply_playbook_path}"[:200],
"success": success,
"executed_steps": [
f"candidate:{claim.source_candidate_op_id}",
f"check_mode:{claim.op_id}",
f"apply:{apply_op_id}",
f"catalog:{claim.catalog_id}",
f"returncode:{result.returncode}",
],
"error_message": None if success else _tail(result.stderr or result.stdout, 2000),
"triggered_by": "ansible_controlled_apply",
"similarity_score": None,
"risk_level": str(claim.risk_level or ""),
"execution_time_ms": result.duration_ms,
}
async def _record_auto_repair_execution_receipt(
claim: AnsibleCheckModeClaim,
result: AnsibleRunResult,
*,
apply_op_id: str,
project_id: str,
) -> bool:
receipt = _build_auto_repair_execution_receipt(
claim,
result,
apply_op_id=apply_op_id,
)
try:
async with get_db_context(project_id) as db:
inserted = await db.execute(
text("""
INSERT INTO auto_repair_executions (
incident_id,
playbook_id,
playbook_name,
success,
executed_steps,
error_message,
triggered_by,
similarity_score,
risk_level,
execution_time_ms
)
SELECT
:incident_id,
:playbook_id,
:playbook_name,
:success,
CAST(:executed_steps AS jsonb),
:error_message,
:triggered_by,
:similarity_score,
:risk_level,
:execution_time_ms
WHERE NOT EXISTS (
SELECT 1
FROM auto_repair_executions existing
WHERE existing.incident_id = :incident_id
AND existing.triggered_by = :triggered_by
AND existing.executed_steps::text LIKE :apply_op_id_needle
)
RETURNING id
"""),
{
**receipt,
"executed_steps": json.dumps(receipt["executed_steps"], ensure_ascii=False),
"apply_op_id_needle": f"%{apply_op_id}%",
},
)
return inserted.scalar() is not None
except Exception as exc:
logger.warning(
"ansible_auto_repair_execution_receipt_failed",
incident_id=claim.incident_id,
catalog_id=claim.catalog_id,
apply_op_id=apply_op_id,
error=str(exc),
)
return False
async def claim_pending_check_modes(
*,
project_id: str = "awoooi",
@@ -888,6 +981,12 @@ async def run_controlled_apply_for_claim(
"op_id": apply_op_id,
},
)
receipt_written = await _record_auto_repair_execution_receipt(
claim,
result,
apply_op_id=apply_op_id,
project_id=project_id,
)
logger.info(
"ansible_controlled_apply_completed",
@@ -898,6 +997,7 @@ async def run_controlled_apply_for_claim(
catalog_id=claim.catalog_id,
returncode=result.returncode,
timed_out=result.timed_out,
auto_repair_receipt_written=receipt_written,
)
return result

View File

@@ -12,7 +12,10 @@ from src.services.awooop_ansible_audit_service import (
record_ansible_decision_audit,
)
from src.services.awooop_ansible_check_mode_service import (
AnsibleCheckModeClaim,
AnsibleRunResult,
_automation_operation_log_incident_id,
_build_auto_repair_execution_receipt,
build_ansible_apply_command,
build_ansible_check_mode_claim_input,
build_ansible_check_mode_command,
@@ -1442,6 +1445,40 @@ def test_ansible_apply_command_uses_controlled_apply_without_check(tmp_path: Pat
assert str(known_hosts) in spec.command[-1]
def test_ansible_controlled_apply_builds_auto_repair_receipt() -> None:
claim = AnsibleCheckModeClaim(
op_id="check-op-1",
source_candidate_op_id="candidate-op-1",
incident_id="INC-20260627-NODE110",
catalog_id="ansible:110-devops",
playbook_path="infra/ansible/playbooks/110-devops.yml",
apply_playbook_path="infra/ansible/playbooks/110-devops.yml",
inventory_hosts=("host_110",),
risk_level="medium",
input_payload={"controlled_apply_allowed": True},
)
result = AnsibleRunResult(
returncode=0,
stdout="ok",
stderr="",
duration_ms=1234,
)
receipt = _build_auto_repair_execution_receipt(
claim,
result,
apply_op_id="apply-op-1",
)
assert receipt["incident_id"] == "INC-20260627-NODE110"
assert receipt["playbook_id"] == "ansible:110-devops"
assert receipt["success"] is True
assert receipt["triggered_by"] == "ansible_controlled_apply"
assert "apply:apply-op-1" in receipt["executed_steps"]
assert receipt["risk_level"] == "medium"
assert receipt["execution_time_ms"] == 1234
def test_ansible_claim_query_limits_recent_candidate_backlog() -> None:
source = inspect.getsource(claim_pending_check_modes)