Fix pr #5223: Fix issue #5222: [Refactor]: Refactor the evaluation di…

…rectory
All-Hands-AI · Nov 23, 2024 · b07b554 · b07b554
1 parent a759dd8
commit b07b554
Show file tree

Hide file tree

Showing 5 changed files with 7 additions and 7 deletions.
diff --git a/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh b/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh
@@ -5,7 +5,7 @@ NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
 mkdir -p $NEW_FOLDER_PATH
 
 # Build all_preds.jsonl
-poetry run python evaluation/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
+poetry run python evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
 mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl
 
 # Build trajs/

diff --git a/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh b/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh
@@ -58,7 +58,7 @@ else
 
     # ==== Convert OH format to SWE-bench format ====
     echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
-    poetry run python3 evaluation/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
+    poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
     # replace .jsonl with .swebench.jsonl in filename
     SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
     echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
@@ -125,7 +125,7 @@ if [ -z "$INSTANCE_ID" ]; then
         mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
     fi
 
-    poetry run python evaluation/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
+    poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
 
 else
     echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"

diff --git a/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh b/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh
@@ -28,7 +28,7 @@ fi
 
 echo "... Evaluating on $INPUT_FILE ..."
 
-COMMAND="poetry run python evaluation/swe_bench/eval_infer.py \
+COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
   --eval-num-workers $NUM_WORKERS \
   --input-file $INPUT_FILE \
   --dataset $DATASET \
@@ -43,4 +43,4 @@ fi
 eval $COMMAND
 
 # update the output with evaluation results
-poetry run python evaluation/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE
+poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE
diff --git a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh
@@ -84,7 +84,7 @@ fi
 
 function run_eval() {
   local eval_note=$1
-  COMMAND="poetry run python evaluation/swe_bench/run_infer.py \
+  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
     --agent-cls $AGENT \
     --llm-config $MODEL_CONFIG \
     --max-iterations $MAX_ITER \

diff --git a/evaluation/benchmarks/swe_bench/scripts/setup/prepare_swe_utils.sh b/evaluation/benchmarks/swe_bench/scripts/setup/prepare_swe_utils.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 set -e
-EVAL_WORKSPACE="evaluation/swe_bench/eval_workspace"
+EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
 mkdir -p $EVAL_WORKSPACE
 
 # 1. Prepare REPO