diff --git a/evaluation/benchmarks/polyglot_benchmark/README.md b/evaluation/benchmarks/polyglot_benchmark/README.md index 46f79dfeb9c5..9fa8bfb1dfb3 100644 --- a/evaluation/benchmarks/polyglot_benchmark/README.md +++ b/evaluation/benchmarks/polyglot_benchmark/README.md @@ -53,6 +53,37 @@ export POLYGLOT_BENCHMARK_PATH="/path/to/polyglot-benchmark" # Path to the poly export USE_UNIT_TESTS="true" # Whether to run unit tests (default: true) export NO_DOCKER="true" # Skip Docker container creation and use local runtime (default: false) export POLYGLOT_DOCKER_IMAGE="image:tag" # Custom Docker image to use (default: ghcr.io/opendevin/eval-polyglot:v1.0.0) +export BUILD_LOCAL_DOCKER="true" # Build a local Docker image if one doesn't exist (default: false) +``` + +### Docker Support + +The benchmark uses Docker to create isolated environments for running code in different programming languages. There are two ways to use Docker with this benchmark: + +#### Option 1: Build a Local Docker Image + +You can build a local Docker image that contains all the necessary tools for the benchmark: + +```bash +# Build the Docker image +./evaluation/benchmarks/polyglot_benchmark/scripts/build_local_docker.sh + +# Run the benchmark with the local image +./evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 1 1 +``` + +Alternatively, you can set the `BUILD_LOCAL_DOCKER` environment variable: + +```bash +BUILD_LOCAL_DOCKER=true ./evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 1 1 +``` + +#### Option 2: Use a Pre-built Docker Image + +You can specify a custom Docker image to use: + +```bash +POLYGLOT_DOCKER_IMAGE="your-custom-image:tag" ./evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 1 1 ``` ### Troubleshooting @@ -67,18 +98,20 @@ Command 'docker buildx build ...' returned non-zero exit status 1 You can try the following solutions: -1. Run with `NO_DOCKER=true` to use the local runtime instead: +1. Build a local Docker image as described above. + +2. Run with `NO_DOCKER=true` to use the local runtime instead: ```bash NO_DOCKER=true ./evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 1 1 ``` -2. Make sure Docker is installed and running: +3. Make sure Docker is installed and running: ```bash docker --version docker ps ``` -3. Check if you have permission to use Docker: +4. Check if you have permission to use Docker: ```bash sudo usermod -aG docker $USER # Then log out and log back in diff --git a/evaluation/benchmarks/polyglot_benchmark/scripts/build_local_docker.sh b/evaluation/benchmarks/polyglot_benchmark/scripts/build_local_docker.sh new file mode 100755 index 000000000000..d129c5676ec1 --- /dev/null +++ b/evaluation/benchmarks/polyglot_benchmark/scripts/build_local_docker.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +set -e + +# Get the directory of this script +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +BENCHMARK_DIR="$( cd "${SCRIPT_DIR}/.." && pwd )" +REPO_ROOT="$( cd "${BENCHMARK_DIR}/../../.." && pwd )" + +# Create a temporary directory for the Docker build +BUILD_DIR=$(mktemp -d) +trap "rm -rf $BUILD_DIR" EXIT + +echo "Creating Docker build context in $BUILD_DIR" + +# Create a simple Dockerfile that includes all the necessary tools +cat > "$BUILD_DIR/Dockerfile" << 'EOF' +FROM ubuntu:22.04 + +# Avoid prompts from apt +ENV DEBIAN_FRONTEND=noninteractive + +# Install common dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + python3 \ + python3-pip \ + python3-dev \ + python3-venv \ + wget \ + software-properties-common \ + apt-transport-https \ + ca-certificates \ + gnupg \ + lsb-release \ + libboost-all-dev \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +# Install Python packages +RUN pip3 install --no-cache-dir pytest pytest-timeout + +# Install Node.js and npm +RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \ + && apt-get install -y nodejs \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +# Install Go +RUN wget https://go.dev/dl/go1.20.5.linux-amd64.tar.gz \ + && tar -C /usr/local -xzf go1.20.5.linux-amd64.tar.gz \ + && rm go1.20.5.linux-amd64.tar.gz +ENV PATH="/usr/local/go/bin:${PATH}" + +# Install Java +RUN apt-get update && apt-get install -y openjdk-17-jdk \ + && rm -rf /var/lib/apt/lists/* +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 + +# Install Gradle +RUN wget https://services.gradle.org/distributions/gradle-7.6-bin.zip \ + && mkdir /opt/gradle \ + && unzip -d /opt/gradle gradle-7.6-bin.zip \ + && rm gradle-7.6-bin.zip +ENV PATH="/opt/gradle/gradle-7.6/bin:${PATH}" + +# Create workspace directory +RUN mkdir -p /workspace +WORKDIR /workspace + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 + +CMD ["/bin/bash"] +EOF + +# Build the Docker image +IMAGE_NAME="polyglot-benchmark:local" +echo "Building Docker image: $IMAGE_NAME" +docker build -t "$IMAGE_NAME" "$BUILD_DIR" + +# Export the image name as an environment variable +echo "export POLYGLOT_DOCKER_IMAGE=$IMAGE_NAME" > "$BENCHMARK_DIR/docker_image.env" + +echo "Docker image built successfully: $IMAGE_NAME" +echo "To use this image, run:" +echo "source $BENCHMARK_DIR/docker_image.env" +echo "Then run the benchmark as usual." \ No newline at end of file diff --git a/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh b/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh index 7c7a3726be5f..a044219c27e1 100755 --- a/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh +++ b/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh @@ -14,7 +14,28 @@ EVAL_LANGUAGES=${7:-""} # Set environment variables export USE_UNIT_TESTS=${USE_UNIT_TESTS:-"true"} export NO_DOCKER=${NO_DOCKER:-"false"} -export POLYGLOT_DOCKER_IMAGE=${POLYGLOT_DOCKER_IMAGE:-"ghcr.io/opendevin/eval-polyglot:v1.0.0"} + +# Check if we have a local Docker image env file +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +BENCHMARK_DIR="$( cd "${SCRIPT_DIR}/.." && pwd )" +DOCKER_ENV_FILE="${BENCHMARK_DIR}/docker_image.env" + +if [ -f "$DOCKER_ENV_FILE" ]; then + echo "Loading Docker image configuration from $DOCKER_ENV_FILE" + source "$DOCKER_ENV_FILE" +else + # If no local image is available, use the default + export POLYGLOT_DOCKER_IMAGE=${POLYGLOT_DOCKER_IMAGE:-"ghcr.io/opendevin/eval-polyglot:v1.0.0"} + + # Check if we need to build a local Docker image + if [ "$BUILD_LOCAL_DOCKER" = "true" ]; then + echo "Building local Docker image..." + "${SCRIPT_DIR}/build_local_docker.sh" + source "$DOCKER_ENV_FILE" + fi +fi + +echo "Using Docker image: $POLYGLOT_DOCKER_IMAGE" # Try to find the polyglot-benchmark repository if [ -z "$POLYGLOT_BENCHMARK_PATH" ]; then