Skip to content

Commit

Permalink
Fix pr #5223: Fix issue #5222: [Refactor]: Refactor the evaluation di…
Browse files Browse the repository at this point in the history
…rectory
  • Loading branch information
openhands-agent committed Nov 23, 2024
1 parent a759dd8 commit b07b554
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
mkdir -p $NEW_FOLDER_PATH

# Build all_preds.jsonl
poetry run python evaluation/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl

# Build trajs/
Expand Down
4 changes: 2 additions & 2 deletions evaluation/benchmarks/swe_bench/scripts/eval_infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ else

# ==== Convert OH format to SWE-bench format ====
echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
poetry run python3 evaluation/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
# replace .jsonl with .swebench.jsonl in filename
SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
Expand Down Expand Up @@ -125,7 +125,7 @@ if [ -z "$INSTANCE_ID" ]; then
mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
fi

poetry run python evaluation/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH

else
echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
Expand Down
4 changes: 2 additions & 2 deletions evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fi

echo "... Evaluating on $INPUT_FILE ..."

COMMAND="poetry run python evaluation/swe_bench/eval_infer.py \
COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
--eval-num-workers $NUM_WORKERS \
--input-file $INPUT_FILE \
--dataset $DATASET \
Expand All @@ -43,4 +43,4 @@ fi
eval $COMMAND

# update the output with evaluation results
poetry run python evaluation/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE
2 changes: 1 addition & 1 deletion evaluation/benchmarks/swe_bench/scripts/run_infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ fi

function run_eval() {
local eval_note=$1
COMMAND="poetry run python evaluation/swe_bench/run_infer.py \
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations $MAX_ITER \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

set -e
EVAL_WORKSPACE="evaluation/swe_bench/eval_workspace"
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
mkdir -p $EVAL_WORKSPACE

# 1. Prepare REPO
Expand Down

0 comments on commit b07b554

Please sign in to comment.