105 lines
4.6 KiB
JSON
105 lines
4.6 KiB
JSON
{
|
|
"artifacts": {
|
|
"candidate_inputs": {
|
|
"label_leak_records": 0,
|
|
"local_path": "/tmp/nemotron-replay-prod-20260601165413-sanitized-candidate-inputs.jsonl",
|
|
"records": 50,
|
|
"schema": "docs/schemas/agent_replay_candidate_input_v1.schema.json",
|
|
"source_unsanitized_path": "/tmp/nemotron-replay-prod-20260601165413-candidate-inputs.jsonl"
|
|
},
|
|
"external_results_required_path": "/tmp/nemotron-replay-prod-20260601165413-external-results.jsonl",
|
|
"fixtures": {
|
|
"expected_action_marker_records": 17,
|
|
"local_path": "/tmp/nemotron-replay-prod-20260601165413-sanitized-fixtures.jsonl",
|
|
"operator_only": true,
|
|
"records": 50,
|
|
"schema": "docs/schemas/agent_replay_fixture_v1.schema.json",
|
|
"source_unsanitized_path": "/tmp/nemotron-replay-prod-20260601165413-fixtures.jsonl"
|
|
},
|
|
"preferred_post_external_run_command": "apps/api/.venv/bin/python scripts/agents/nemotron-finalize-replay.py --requests /tmp/nemotron-replay-prod-20260601165413-sanitized-nemotron-requests.jsonl --external-results /tmp/nemotron-replay-prod-20260601165413-external-results.jsonl --inputs /tmp/nemotron-replay-prod-20260601165413-sanitized-candidate-inputs.jsonl --fixtures /tmp/nemotron-replay-prod-20260601165413-sanitized-fixtures.jsonl --baseline /tmp/openclaw-incumbent.jsonl --output-prefix /tmp/nemotron-replay-prod-20260601165413 --target-stage shadow",
|
|
"request_pack": {
|
|
"label_leak_records": 0,
|
|
"local_path": "/tmp/nemotron-replay-prod-20260601165413-sanitized-nemotron-requests.jsonl",
|
|
"not_replacement_evidence_records": 50,
|
|
"records": 50,
|
|
"request_only_records": 50,
|
|
"schema": "docs/schemas/agent_nemotron_replay_request_v1.schema.json",
|
|
"sensitive_marker_records": 0,
|
|
"source_unsanitized_path": "/tmp/nemotron-replay-prod-20260601165413-nemotron-requests.local.jsonl"
|
|
},
|
|
"sanitize_report": "docs/evaluations/agent_nemotron_request_pack_sanitize_2026-06-01.json",
|
|
"sanitized_preflight_report": "docs/evaluations/agent_nemotron_external_runner_preflight_sanitized_2026-06-01.json"
|
|
},
|
|
"candidate_id": "nemo_nemotron_fabric",
|
|
"counts": {
|
|
"manifest": {
|
|
"candidate_inputs": 50,
|
|
"expected_action_marker_records": 17,
|
|
"fixtures": 50,
|
|
"requests": 50
|
|
},
|
|
"sanitize_report": {
|
|
"candidate_inputs": 50,
|
|
"expected_action_marker_records": null,
|
|
"fixtures": 50,
|
|
"requests": 50
|
|
},
|
|
"sanitized_preflight": {
|
|
"candidate_inputs": 50,
|
|
"expected_action_marker_records": 17,
|
|
"fixtures": 50,
|
|
"requests": 50
|
|
}
|
|
},
|
|
"decision": "ready_for_approval",
|
|
"failures": [],
|
|
"gates": {
|
|
"candidate_is_nemotron_fabric": true,
|
|
"counts_match_across_reports": true,
|
|
"external_calls_not_performed_by_codex": true,
|
|
"external_execution_still_requires_approval": true,
|
|
"external_output_contract_declared": true,
|
|
"manifest_schema_valid": true,
|
|
"manifest_status_sanitized_ready": true,
|
|
"manifest_uses_sanitized_tmp_artifacts": true,
|
|
"minimum_records_met": true,
|
|
"no_label_leaks": true,
|
|
"no_missing_extra_or_duplicate_records": true,
|
|
"no_sensitive_context_markers": true,
|
|
"post_external_finalizer_declared": true,
|
|
"raw_artifacts_not_committed": true,
|
|
"request_pack_is_request_only": true,
|
|
"request_pack_not_replacement_evidence": true,
|
|
"run_id_present": true,
|
|
"sanitize_failures_empty": true,
|
|
"sanitize_preflight_valid": true,
|
|
"sanitize_report_schema_valid": true,
|
|
"sanitize_report_valid": true,
|
|
"sanitize_sensitive_markers_removed": true,
|
|
"sanitized_preflight_candidate_valid": true,
|
|
"sanitized_preflight_failures_empty": true,
|
|
"sanitized_preflight_schema_valid": true,
|
|
"sanitized_preflight_valid": true
|
|
},
|
|
"minimum_records": 50,
|
|
"next_actions": [
|
|
"Obtain explicit commander approval before external execution.",
|
|
"Run the approved offline NeMo/NIM/Nemotron runner against the sanitized request pack only.",
|
|
"Write external results to /tmp/nemotron-replay-prod-20260601165413-external-results.jsonl.",
|
|
"Run the preferred post-external finalizer command."
|
|
],
|
|
"ready": true,
|
|
"run_id": "nemotron-replay-prod-20260601165413",
|
|
"safety": {
|
|
"approval_required_before_external_execution": true,
|
|
"candidate_input_label_leak_records": 0,
|
|
"external_calls_performed_by_codex": false,
|
|
"not_replacement_evidence_records": 50,
|
|
"raw_artifacts_committed": false,
|
|
"request_context_label_leak_records": 0,
|
|
"request_only_records": 50,
|
|
"sensitive_marker_records": 0
|
|
},
|
|
"schema_version": "agent_nemotron_external_runner_readiness_v1"
|
|
}
|