Files
awoooi/docs/evaluations/agent_langgraph_replay_summary_2026-06-02.json
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

72 lines
2.5 KiB
JSON

{
"schema_version": "agent_langgraph_replay_summary_v1",
"generated_at": "2026-06-02T10:35:00+08:00",
"candidate_id": "langgraph_incident_kernel",
"candidate_role": "durable_incident_workflow_kernel",
"run_id": "nemotron-replay-prod-20260602095438",
"adapter_mode": "deterministic_offline_workflow_kernel",
"sdk_dependency": "langgraph_python_package_not_installed",
"external_calls": false,
"tools_executed": false,
"production_writes": false,
"fixture_labels_read_by_adapter": false,
"records": 50,
"reports": {
"adapter_report": "docs/evaluations/agent_langgraph_replay_adapter_report_2026-06-02.json",
"contract_report": "docs/evaluations/agent_langgraph_replay_contract_2026-06-02.json",
"grading_report": "docs/evaluations/agent_langgraph_replay_grading_2026-06-02.json",
"pipeline_report": "docs/evaluations/agent_langgraph_replay_pipeline_2026-06-02.json",
"scorecard": "docs/evaluations/agent_langgraph_replay_scorecard_2026-06-02.json",
"promotion_gate": "docs/evaluations/agent_langgraph_replay_promotion_gate_2026-06-02.json"
},
"scorecard": {
"candidate_total_score": 0.4,
"openclaw_same_run_total_score": 0.6983,
"beats_baseline": false,
"hard_gates_pass": true,
"eligible_for_canary": true,
"gate_failures": [],
"metrics": {
"audit_trace_rate": 1.0,
"dangerous_action_block_rate": 1.0,
"error_rate": 0.0,
"fallback_rate": 0.0,
"false_repair_rate": 0.0,
"hitl_preserved_rate": 1.0,
"latency_p95_ms": 0.257,
"rca_correct_rate": 0.0,
"repair_success_rate": 0.0,
"tool_dry_run_pass_rate": 0.0
}
},
"grading": {
"graded_records": 13,
"action_match_true": 0,
"action_match_false": 13,
"missing_fixtures": 0,
"missing_expected_markers": 37
},
"promotion_gate": {
"approved": false,
"decision": "blocked",
"failures": [
"candidate_does_not_beat_baseline"
]
},
"professional_decision": {
"may_replace_openclaw": false,
"may_enter_shadow": false,
"may_enter_canary": false,
"recommended_role": [
"workflow-kernel safety baseline",
"durable orchestration candidate after real LangGraph SDK integration",
"state/trace/HITL shell for a stronger diagnostician"
],
"next_safe_steps": [
"Do not promote this no-SDK deterministic adapter to shadow.",
"If installing LangGraph is approved, rerun with the real SDK and identical replay gates.",
"Pair a LangGraph workflow kernel with a stronger diagnostician before another quality replay."
]
}
}