diff --git a/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh b/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh index 8bbaa6ddcea5..044f9972f4eb 100755 --- a/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh +++ b/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh @@ -5,7 +5,7 @@ NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission mkdir -p $NEW_FOLDER_PATH # Build all_preds.jsonl -poetry run python evaluation/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl +poetry run python evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl # Build trajs/ diff --git a/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh b/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh index 8e263e10ca1c..13ef271671a5 100755 --- a/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh +++ b/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh @@ -58,7 +58,7 @@ else # ==== Convert OH format to SWE-bench format ==== echo "Merged output file with fine-grained report will be saved to $FILE_DIR" - poetry run python3 evaluation/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH + poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH # replace .jsonl with .swebench.jsonl in filename SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl} echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL" @@ -125,7 +125,7 @@ if [ -z "$INSTANCE_ID" ]; then mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json fi - poetry run python evaluation/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH + poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH else echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID" diff --git a/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh b/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh index dead194ef256..68280978368e 100755 --- a/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh +++ b/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh @@ -28,7 +28,7 @@ fi echo "... Evaluating on $INPUT_FILE ..." -COMMAND="poetry run python evaluation/swe_bench/eval_infer.py \ +COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \ --eval-num-workers $NUM_WORKERS \ --input-file $INPUT_FILE \ --dataset $DATASET \ @@ -43,4 +43,4 @@ fi eval $COMMAND # update the output with evaluation results -poetry run python evaluation/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE +poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE diff --git a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh index 520003635a4e..a27bd7cdbb14 100755 --- a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh +++ b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh @@ -84,7 +84,7 @@ fi function run_eval() { local eval_note=$1 - COMMAND="poetry run python evaluation/swe_bench/run_infer.py \ + COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \ --agent-cls $AGENT \ --llm-config $MODEL_CONFIG \ --max-iterations $MAX_ITER \ diff --git a/evaluation/benchmarks/swe_bench/scripts/setup/prepare_swe_utils.sh b/evaluation/benchmarks/swe_bench/scripts/setup/prepare_swe_utils.sh index bc1f4c03b7f4..7091b6f586b7 100755 --- a/evaluation/benchmarks/swe_bench/scripts/setup/prepare_swe_utils.sh +++ b/evaluation/benchmarks/swe_bench/scripts/setup/prepare_swe_utils.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -EVAL_WORKSPACE="evaluation/swe_bench/eval_workspace" +EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace" mkdir -p $EVAL_WORKSPACE # 1. Prepare REPO