From 8a4ca1e48c329f895682967aca70b824922570cc Mon Sep 17 00:00:00 2001 From: AlexCuadron Date: Wed, 26 Feb 2025 08:15:04 +0000 Subject: [PATCH] Improve output directory detection in evaluation scripts with better path matching and debugging output --- .../aider_bench/scripts/run_infer.sh | 9 ++++++-- .../polyglot_benchmark/scripts/run_infer.sh | 21 ++++++++++++++++--- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/evaluation/benchmarks/aider_bench/scripts/run_infer.sh b/evaluation/benchmarks/aider_bench/scripts/run_infer.sh index 3526381de5ab..737b004121c7 100755 --- a/evaluation/benchmarks/aider_bench/scripts/run_infer.sh +++ b/evaluation/benchmarks/aider_bench/scripts/run_infer.sh @@ -61,11 +61,11 @@ fi eval $COMMAND # Get the output directory - first try the default location -OUTPUT_DIR=$(find evaluation/evaluation_outputs/AiderBench/$AGENT -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1) +OUTPUT_DIR=$(find evaluation/evaluation_outputs -path "*/AiderBench/$AGENT/*" -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1) # If not found, try to find it anywhere under evaluation_outputs if [ -z "$OUTPUT_DIR" ]; then - OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1) + OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -path "*/AiderBench/$AGENT/*" -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1) fi # If still not found, try to find any output.jsonl file @@ -78,6 +78,11 @@ else OUTPUT_FILE="$OUTPUT_DIR/output.jsonl" fi +# Print the output directory and file for debugging +echo "" +echo "Output directory: $OUTPUT_DIR" +echo "Output file: $OUTPUT_FILE" + # Run evaluation if requested if [ "$RUN_EVALUATION" = "eval" ]; then echo "" diff --git a/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh b/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh index 112028eb7079..34bd41287dcf 100755 --- a/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh +++ b/evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh @@ -236,7 +236,11 @@ if [ "$ONE_PER_LANGUAGE" = true ]; then # Evaluate each language's results for LANG in "${LANGUAGES[@]}"; do # Try to find the output directory for this language - LANG_OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*one_per_language_${LANG}*" 2>/dev/null | sort -r | head -n 1) + LANG_OUTPUT_DIR=$(find evaluation/evaluation_outputs -type d -name "*one_per_language_${LANG}*" 2>/dev/null | sort -r | head -n 1) + + if [ -z "$LANG_OUTPUT_DIR" ]; then + LANG_OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*one_per_language_${LANG}*" 2>/dev/null | sort -r | head -n 1) + fi if [ -z "$LANG_OUTPUT_DIR" ]; then LANG_OUTPUT_DIR="evaluation/evaluation_outputs/one_per_language_${LANG}" @@ -244,6 +248,12 @@ if [ "$ONE_PER_LANGUAGE" = true ]; then LANG_OUTPUT_FILE="${LANG_OUTPUT_DIR}/output.jsonl" + # Print the language output directory and file for debugging + echo "" + echo "Language: $LANG" + echo "Output directory: $LANG_OUTPUT_DIR" + echo "Output file: $LANG_OUTPUT_FILE" + if [ -f "$LANG_OUTPUT_FILE" ]; then echo "" echo "===== Evaluating $LANG results =====" @@ -273,11 +283,11 @@ else echo "" # Get the output directory - first try the default location - OUTPUT_DIR=$(find evaluation/evaluation_outputs/PolyglotBenchmark/$AGENT -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1) + OUTPUT_DIR=$(find evaluation/evaluation_outputs -path "*/PolyglotBenchmark/$AGENT/*" -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1) # If not found, try to find it anywhere under evaluation_outputs if [ -z "$OUTPUT_DIR" ]; then - OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1) + OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -path "*/PolyglotBenchmark/$AGENT/*" -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1) fi # If still not found, try to find any output.jsonl file @@ -290,6 +300,11 @@ else OUTPUT_FILE="$OUTPUT_DIR/output.jsonl" fi + # Print the output directory and file for debugging + echo "" + echo "Output directory: $OUTPUT_DIR" + echo "Output file: $OUTPUT_FILE" + if [ -f "$OUTPUT_FILE" ]; then echo "Evaluating results in: $OUTPUT_FILE" poetry run python evaluation/benchmarks/polyglot_benchmark/scripts/summarize_results.py "$OUTPUT_FILE"