77 lines
2.3 KiB
TypeScript
77 lines
2.3 KiB
TypeScript
import { Sandbox } from "@e2b/code-interpreter";
|
|
import { JudgeOverallResult, JudgeErrorClassification } from "@agent-bounty/contracts";
|
|
|
|
export async function runSubmissionInSandbox(
|
|
submissionId: string,
|
|
deliverables: Record<string, string>,
|
|
testFileContent: string
|
|
) {
|
|
let sandbox;
|
|
try {
|
|
sandbox = await Sandbox.create();
|
|
|
|
// Setup: Initialize a simple project
|
|
await sandbox.commands.run("npm init -y");
|
|
await sandbox.commands.run("npm install vitest react react-dom @types/react @types/react-dom");
|
|
|
|
// Write deliverables
|
|
for (const [filepath, content] of Object.entries(deliverables)) {
|
|
// In a real implementation we would ensure directories exist
|
|
// For MVP we assume flat files or e2b handles basic paths
|
|
await sandbox.files.write(filepath, content);
|
|
}
|
|
|
|
// Write test file
|
|
await sandbox.files.write("test.spec.tsx", testFileContent);
|
|
|
|
// Run tests
|
|
const result = await sandbox.commands.run("npx vitest run test.spec.tsx --reporter json", {
|
|
timeoutMs: 120000 // 2 minutes
|
|
});
|
|
|
|
let overall: JudgeOverallResult = JudgeOverallResult.FAIL;
|
|
let errorClass: JudgeErrorClassification | null = result.exitCode === 0 ? null : JudgeErrorClassification.TEST_FAIL;
|
|
let parsedTests: any[] = [];
|
|
|
|
if (result.exitCode === 0) {
|
|
overall = JudgeOverallResult.PASS;
|
|
}
|
|
|
|
try {
|
|
if (result.stdout) {
|
|
const jsonResult = JSON.parse(result.stdout);
|
|
parsedTests = jsonResult.testResults || [];
|
|
}
|
|
} catch (e) {
|
|
console.error("Failed to parse vitest JSON output", e);
|
|
}
|
|
|
|
return {
|
|
overall_result: overall,
|
|
error_classification: errorClass,
|
|
tests: parsedTests,
|
|
artifacts: {
|
|
logs: result.stdout + "\n" + result.stderr
|
|
},
|
|
resource_usage: {
|
|
cpu_ms: 0,
|
|
mem_peak_mb: 0,
|
|
io_bytes: 0
|
|
}
|
|
};
|
|
} catch (error: any) {
|
|
console.error("Sandbox evaluation failed", error);
|
|
return {
|
|
overall_result: JudgeOverallResult.FAIL,
|
|
error_classification: JudgeErrorClassification.ENVIRONMENT_ERROR,
|
|
tests: [],
|
|
artifacts: { logs: error.message },
|
|
resource_usage: { cpu_ms: 0, mem_peak_mb: 0, io_bytes: 0 }
|
|
};
|
|
} finally {
|
|
if (sandbox) {
|
|
await sandbox.kill();
|
|
}
|
|
}
|
|
}
|