Skip to content

Commit

Permalink
Add swe-bench env.
Browse files Browse the repository at this point in the history
  • Loading branch information
john-b-yang committed Oct 19, 2023
1 parent 3472c77 commit 4e9b173
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 8 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

All notable changes to this project will be documented in this file.

## [1.0.2] - 10/27/2023

* The InterCode [webpage](https://intercode-benchmark.github.io/) has been modified to be a leaderboard style 🏆.
* If you evaluate on InterCode and would like to put your results on the leaderboard, please create an issue or email John directly 📧.
* We wrote a standalone [report](https://john-b-yang.github.io/static/misc/preprint_InterCode_CTF.pdf) describing the operational **InterCode-CTF** 🚩 environment, a dataset of **100** task instances, and our initial experiments.
* 🚨 New Environment! The recently released [SWE-bench](https://swe-bench.github.io/) benchmark introduces *software engineering* as a task. To support agent-based approaches, we have released the **IC-SWE-bench** environment, which presents the SWE-bench task in an interactive setting!

✍🏻 John

## [1.0.1] - 8/15/2023

Since its initial release, I am pleased to announce that InterCode has been extended to support a number of new languages and datasets. They are summarized as follows:
Expand Down
11 changes: 11 additions & 0 deletions docker/swe.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM ubuntu:latest

RUN apt-get update && \
apt-get install -y bash git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN git config --global user.email "[email protected]"
RUN git config --global user.name "intercode"

WORKDIR /
3 changes: 2 additions & 1 deletion intercode/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from intercode.envs.bash.bash_env import BashEnv
from intercode.envs.sql.sql_env import SqlEnv
from intercode.envs.ctf.ctf_env import CTFEnv
from intercode.envs.python.python_env import PythonEnv
from intercode.envs.python.python_env import PythonEnv
from intercode.envs.swe.swe_env import SWEEnv
1 change: 1 addition & 0 deletions intercode/envs/bash/bash_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"intercode-bash": "/bin/sh",
"intercode-nl2bash": "/bin/bash",
"intercode-ctf": "/bin/bash",
"intercode-swe": "/bin/bash",
}

class BashEnv(IntercodeEnv):
Expand Down
3 changes: 2 additions & 1 deletion intercode/envs/ic_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ def reset(self, index: int = None) -> Tuple[str, Dict]:
self.logger.info("-------------\nNew task episode initialized")
self.query_idx = np.random.randint(0, len(self.data_loader)) if index is None else index
self.record = self.data_loader.get(self.query_idx)
self.query, self.gold = self.record["query"], self.record["gold"]
self.query = self.record["query"]
self.gold = self.record["gold"] if "gold" in self.record else "N/A"
self.logger.info(f"Query: {self.query}")
self.logger.info(f"Gold: {self.gold}")
self.observation = self.query
Expand Down
48 changes: 48 additions & 0 deletions intercode/envs/swe/swe_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os

from intercode.envs import (
BashEnv, IntercodeEnv, AGENT_OBS, REWARD
)
from typing import Dict, Tuple

class SWEEnv(BashEnv):
"""Gym environmnet for SWE-bench"""
name = "ic_swe"

def __init__(self, image_name: str, **kwargs):
IntercodeEnv.__init__(self, image_name, **kwargs)
self.token = os.environ.get("GITHUB_TOKEN", "git")

def reset_container(self) -> None:
self.workdir = "/"
folders = self.container.exec_run(self.clean_cmd('ls')).output.decode("utf-8")

# Clone repository if not already cloned
repo_name = self.record['repo'].replace("/", "__")
if repo_name not in folders:
self.logger.info(f"{repo_name} not found in container, cloning...")
clone_cmd = f"git clone https://{self.token}@github.com/swe-bench/{repo_name}.git"
self.container.exec_run(self.clean_cmd(clone_cmd))

# TODO(?): Add logic for installing conda environment

# Clean repository of any modifications + Checkout base commit
self.workdir = f"/{repo_name}/"
self.container.exec_run(self.clean_cmd("git status"), workdir=self.workdir)
self.container.exec_run(self.clean_cmd("git restore ."), workdir=self.workdir)
self.container.exec_run(self.clean_cmd("git reset HEAD ."), workdir=self.workdir)
self.container.exec_run(self.clean_cmd("git clean -fdx"), workdir=self.workdir)
self.container.exec_run(
self.clean_cmd(f"git -c advice.detachedHead=false checkout {self.record['base_commit']}"),
workdir=self.workdir)

# TODO(?): Add logic to install repository at base commit

def get_reward(self) -> Tuple[float, Dict]:
# TODO(?): Add evaluation (Apply test patch + run testing script + parse logs & get results)
return 0, {}

def close(self):
self.logger.info("Beginning environment shutdown...")
self.container.stop()
self.logger.info("Agent container stopped")
8 changes: 4 additions & 4 deletions intercode/utils/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ def _load_data(self):
else:
raise ValueError(f"Unsupported file type: {file_ext}")

if "gold" not in data.columns or "query" not in data.columns:
raise ValueError("Data must have columns/fields 'gold' and 'query'")
if "query" not in data.columns:
raise ValueError("Data must have 'query' column/field")
return data

def _validate_file_path(self, file_path: str):
"""Check if the file extension is one of tsv, csv, json, or pickle"""
if not os.path.exists(file_path):
raise OSError("Invalid file path")
raise OSError(f"Invalid file path: {file_path}")
valid_extensions = [".tsv", ".csv", ".pickle", ".pkl", ".json"]
_, ext = os.path.splitext(file_path)
if ext not in valid_extensions:
raise ValueError("File type is not supported")
raise ValueError(f"File type is not supported: {file_path}")

def __len__(self):
return len(self.data)
5 changes: 3 additions & 2 deletions run_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import readline

from intercode.envs import (
BashEnv, PythonEnv, SqlEnv, CTFEnv
BashEnv, PythonEnv, SqlEnv, CTFEnv, SWEEnv
)
from experiments.policies import HumanPolicy
from typing import Dict, List
Expand All @@ -22,7 +22,8 @@ def preprocess_sql(record: Dict) -> List:
"bash": {"env": BashEnv, "image_name": "intercode-nl2bash", "data_path": "./data/bash/nl2bash/nl2bash_fs_1.json"},
"python": {"env": PythonEnv, "image_name": "intercode-python", "data_path": "./data/python/mbpp/ic_mbpp.json"},
"sql": {"env": SqlEnv, "image_name": "docker-env-sql", "data_path": "./data/sql/bird/ic_bird.json", "preprocess": preprocess_sql},
"ctf": {"env": CTFEnv, "image_name": "intercode-ctf", "data_path": "./data/ctf/ic_ctf.json", "preprocess": preprocess_ctf}
"ctf": {"env": CTFEnv, "image_name": "intercode-ctf", "data_path": "./data/ctf/ic_ctf.json", "preprocess": preprocess_ctf},
"swe": {"env": SWEEnv, "image_name": "intercode-swe", "data_path": "./data/swe-bench/ic_swe_bench.json"}
}


Expand Down
3 changes: 3 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ docker build -t intercode-python -f docker/python.Dockerfile .

echo "Setting up docker images for ctf..."
docker build -t intercode-ctf -f docker/ctf.Dockerfile .

echo "Setting up docker images for swe-bench..."
docker build -t intercode-swe -f docker/swe.Dockerfile .

0 comments on commit 4e9b173

Please sign in to comment.