72 lines
2.5 KiB
JSON
72 lines
2.5 KiB
JSON
{
|
|
"schema_version": "agent_langgraph_replay_summary_v1",
|
|
"generated_at": "2026-06-02T10:35:00+08:00",
|
|
"candidate_id": "langgraph_incident_kernel",
|
|
"candidate_role": "durable_incident_workflow_kernel",
|
|
"run_id": "nemotron-replay-prod-20260602095438",
|
|
"adapter_mode": "deterministic_offline_workflow_kernel",
|
|
"sdk_dependency": "langgraph_python_package_not_installed",
|
|
"external_calls": false,
|
|
"tools_executed": false,
|
|
"production_writes": false,
|
|
"fixture_labels_read_by_adapter": false,
|
|
"records": 50,
|
|
"reports": {
|
|
"adapter_report": "docs/evaluations/agent_langgraph_replay_adapter_report_2026-06-02.json",
|
|
"contract_report": "docs/evaluations/agent_langgraph_replay_contract_2026-06-02.json",
|
|
"grading_report": "docs/evaluations/agent_langgraph_replay_grading_2026-06-02.json",
|
|
"pipeline_report": "docs/evaluations/agent_langgraph_replay_pipeline_2026-06-02.json",
|
|
"scorecard": "docs/evaluations/agent_langgraph_replay_scorecard_2026-06-02.json",
|
|
"promotion_gate": "docs/evaluations/agent_langgraph_replay_promotion_gate_2026-06-02.json"
|
|
},
|
|
"scorecard": {
|
|
"candidate_total_score": 0.4,
|
|
"openclaw_same_run_total_score": 0.6983,
|
|
"beats_baseline": false,
|
|
"hard_gates_pass": true,
|
|
"eligible_for_canary": true,
|
|
"gate_failures": [],
|
|
"metrics": {
|
|
"audit_trace_rate": 1.0,
|
|
"dangerous_action_block_rate": 1.0,
|
|
"error_rate": 0.0,
|
|
"fallback_rate": 0.0,
|
|
"false_repair_rate": 0.0,
|
|
"hitl_preserved_rate": 1.0,
|
|
"latency_p95_ms": 0.257,
|
|
"rca_correct_rate": 0.0,
|
|
"repair_success_rate": 0.0,
|
|
"tool_dry_run_pass_rate": 0.0
|
|
}
|
|
},
|
|
"grading": {
|
|
"graded_records": 13,
|
|
"action_match_true": 0,
|
|
"action_match_false": 13,
|
|
"missing_fixtures": 0,
|
|
"missing_expected_markers": 37
|
|
},
|
|
"promotion_gate": {
|
|
"approved": false,
|
|
"decision": "blocked",
|
|
"failures": [
|
|
"candidate_does_not_beat_baseline"
|
|
]
|
|
},
|
|
"professional_decision": {
|
|
"may_replace_openclaw": false,
|
|
"may_enter_shadow": false,
|
|
"may_enter_canary": false,
|
|
"recommended_role": [
|
|
"workflow-kernel safety baseline",
|
|
"durable orchestration candidate after real LangGraph SDK integration",
|
|
"state/trace/HITL shell for a stronger diagnostician"
|
|
],
|
|
"next_safe_steps": [
|
|
"Do not promote this no-SDK deterministic adapter to shadow.",
|
|
"If installing LangGraph is approved, rerun with the real SDK and identical replay gates.",
|
|
"Pair a LangGraph workflow kernel with a stronger diagnostician before another quality replay."
|
|
]
|
|
}
|
|
}
|