agent-bounty-protocol/apps/web/src/lib/sandbox.ts

import { Sandbox } from "@e2b/code-interpreter";
import { JudgeOverallResult, JudgeErrorClassification } from "@agent-bounty/contracts";

export async function runSubmissionInSandbox(
  submissionId: string,
  deliverables: Record<string, string>,
  testFileContent: string
) {
  let sandbox;
  try {
    sandbox = await Sandbox.create();

    // Setup: Initialize a simple project
    await sandbox.commands.run("npm init -y");
    await sandbox.commands.run("npm install vitest react react-dom @types/react @types/react-dom");

    // Write deliverables
    for (const [filepath, content] of Object.entries(deliverables)) {
      // In a real implementation we would ensure directories exist
      // For MVP we assume flat files or e2b handles basic paths
      await sandbox.files.write(filepath, content);
    }

    // Write test file
    await sandbox.files.write("test.spec.tsx", testFileContent);

    // Run tests
    const result = await sandbox.commands.run("npx vitest run test.spec.tsx --reporter json", {
      timeoutMs: 120000 // 2 minutes
    });

    let overall: JudgeOverallResult = JudgeOverallResult.FAIL;
    let errorClass: JudgeErrorClassification | null = result.exitCode === 0 ? null : JudgeErrorClassification.TEST_FAIL;
    let parsedTests: any[] = [];

    if (result.exitCode === 0) {
      overall = JudgeOverallResult.PASS;
    }

    try {
      if (result.stdout) {
        const jsonResult = JSON.parse(result.stdout);
        parsedTests = jsonResult.testResults || [];
      }
    } catch (e) {
      console.error("Failed to parse vitest JSON output", e);
    }

    return {
      overall_result: overall,
      error_classification: errorClass,
      tests: parsedTests,
      artifacts: {
        logs: result.stdout + "\n" + result.stderr
      },
      resource_usage: {
        cpu_ms: 0,
        mem_peak_mb: 0,
        io_bytes: 0
      }
    };
  } catch (error: any) {
    console.error("Sandbox evaluation failed", error);
    return {
      overall_result: JudgeOverallResult.FAIL,
      error_classification: JudgeErrorClassification.ENVIRONMENT_ERROR,
      tests: [],
      artifacts: { logs: error.message },
      resource_usage: { cpu_ms: 0, mem_peak_mb: 0, io_bytes: 0 }
    };
  } finally {
    if (sandbox) {
      await sandbox.kill();
    }
  }
}