Files
awoooi/docs/evaluations/agent_nemotron_external_runner_readiness_2026-06-01.json
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

105 lines
4.6 KiB
JSON

{
"artifacts": {
"candidate_inputs": {
"label_leak_records": 0,
"local_path": "/tmp/nemotron-replay-prod-20260601165413-sanitized-candidate-inputs.jsonl",
"records": 50,
"schema": "docs/schemas/agent_replay_candidate_input_v1.schema.json",
"source_unsanitized_path": "/tmp/nemotron-replay-prod-20260601165413-candidate-inputs.jsonl"
},
"external_results_required_path": "/tmp/nemotron-replay-prod-20260601165413-external-results.jsonl",
"fixtures": {
"expected_action_marker_records": 17,
"local_path": "/tmp/nemotron-replay-prod-20260601165413-sanitized-fixtures.jsonl",
"operator_only": true,
"records": 50,
"schema": "docs/schemas/agent_replay_fixture_v1.schema.json",
"source_unsanitized_path": "/tmp/nemotron-replay-prod-20260601165413-fixtures.jsonl"
},
"preferred_post_external_run_command": "apps/api/.venv/bin/python scripts/agents/nemotron-finalize-replay.py --requests /tmp/nemotron-replay-prod-20260601165413-sanitized-nemotron-requests.jsonl --external-results /tmp/nemotron-replay-prod-20260601165413-external-results.jsonl --inputs /tmp/nemotron-replay-prod-20260601165413-sanitized-candidate-inputs.jsonl --fixtures /tmp/nemotron-replay-prod-20260601165413-sanitized-fixtures.jsonl --baseline /tmp/openclaw-incumbent.jsonl --output-prefix /tmp/nemotron-replay-prod-20260601165413 --target-stage shadow",
"request_pack": {
"label_leak_records": 0,
"local_path": "/tmp/nemotron-replay-prod-20260601165413-sanitized-nemotron-requests.jsonl",
"not_replacement_evidence_records": 50,
"records": 50,
"request_only_records": 50,
"schema": "docs/schemas/agent_nemotron_replay_request_v1.schema.json",
"sensitive_marker_records": 0,
"source_unsanitized_path": "/tmp/nemotron-replay-prod-20260601165413-nemotron-requests.local.jsonl"
},
"sanitize_report": "docs/evaluations/agent_nemotron_request_pack_sanitize_2026-06-01.json",
"sanitized_preflight_report": "docs/evaluations/agent_nemotron_external_runner_preflight_sanitized_2026-06-01.json"
},
"candidate_id": "nemo_nemotron_fabric",
"counts": {
"manifest": {
"candidate_inputs": 50,
"expected_action_marker_records": 17,
"fixtures": 50,
"requests": 50
},
"sanitize_report": {
"candidate_inputs": 50,
"expected_action_marker_records": null,
"fixtures": 50,
"requests": 50
},
"sanitized_preflight": {
"candidate_inputs": 50,
"expected_action_marker_records": 17,
"fixtures": 50,
"requests": 50
}
},
"decision": "ready_for_approval",
"failures": [],
"gates": {
"candidate_is_nemotron_fabric": true,
"counts_match_across_reports": true,
"external_calls_not_performed_by_codex": true,
"external_execution_still_requires_approval": true,
"external_output_contract_declared": true,
"manifest_schema_valid": true,
"manifest_status_sanitized_ready": true,
"manifest_uses_sanitized_tmp_artifacts": true,
"minimum_records_met": true,
"no_label_leaks": true,
"no_missing_extra_or_duplicate_records": true,
"no_sensitive_context_markers": true,
"post_external_finalizer_declared": true,
"raw_artifacts_not_committed": true,
"request_pack_is_request_only": true,
"request_pack_not_replacement_evidence": true,
"run_id_present": true,
"sanitize_failures_empty": true,
"sanitize_preflight_valid": true,
"sanitize_report_schema_valid": true,
"sanitize_report_valid": true,
"sanitize_sensitive_markers_removed": true,
"sanitized_preflight_candidate_valid": true,
"sanitized_preflight_failures_empty": true,
"sanitized_preflight_schema_valid": true,
"sanitized_preflight_valid": true
},
"minimum_records": 50,
"next_actions": [
"Obtain explicit commander approval before external execution.",
"Run the approved offline NeMo/NIM/Nemotron runner against the sanitized request pack only.",
"Write external results to /tmp/nemotron-replay-prod-20260601165413-external-results.jsonl.",
"Run the preferred post-external finalizer command."
],
"ready": true,
"run_id": "nemotron-replay-prod-20260601165413",
"safety": {
"approval_required_before_external_execution": true,
"candidate_input_label_leak_records": 0,
"external_calls_performed_by_codex": false,
"not_replacement_evidence_records": 50,
"raw_artifacts_committed": false,
"request_context_label_leak_records": 0,
"request_only_records": 50,
"sensitive_marker_records": 0
},
"schema_version": "agent_nemotron_external_runner_readiness_v1"
}