Skip to content

Commit

Permalink
Improve output directory detection in evaluation scripts with better …
Browse files Browse the repository at this point in the history
…path matching and debugging output
  • Loading branch information
AlexCuadron committed Feb 26, 2025
1 parent fa9a0f8 commit 8a4ca1e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
9 changes: 7 additions & 2 deletions evaluation/benchmarks/aider_bench/scripts/run_infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ fi
eval $COMMAND

# Get the output directory - first try the default location
OUTPUT_DIR=$(find evaluation/evaluation_outputs/AiderBench/$AGENT -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1)
OUTPUT_DIR=$(find evaluation/evaluation_outputs -path "*/AiderBench/$AGENT/*" -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1)

# If not found, try to find it anywhere under evaluation_outputs
if [ -z "$OUTPUT_DIR" ]; then
OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1)
OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -path "*/AiderBench/$AGENT/*" -type d -name "*$EVAL_NOTE*" 2>/dev/null | sort -r | head -n 1)
fi

# If still not found, try to find any output.jsonl file
Expand All @@ -78,6 +78,11 @@ else
OUTPUT_FILE="$OUTPUT_DIR/output.jsonl"
fi

# Print the output directory and file for debugging
echo ""
echo "Output directory: $OUTPUT_DIR"
echo "Output file: $OUTPUT_FILE"

# Run evaluation if requested
if [ "$RUN_EVALUATION" = "eval" ]; then
echo ""
Expand Down
21 changes: 18 additions & 3 deletions evaluation/benchmarks/polyglot_benchmark/scripts/run_infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,24 @@ if [ "$ONE_PER_LANGUAGE" = true ]; then
# Evaluate each language's results
for LANG in "${LANGUAGES[@]}"; do
# Try to find the output directory for this language
LANG_OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*one_per_language_${LANG}*" 2>/dev/null | sort -r | head -n 1)
LANG_OUTPUT_DIR=$(find evaluation/evaluation_outputs -type d -name "*one_per_language_${LANG}*" 2>/dev/null | sort -r | head -n 1)

if [ -z "$LANG_OUTPUT_DIR" ]; then
LANG_OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*one_per_language_${LANG}*" 2>/dev/null | sort -r | head -n 1)
fi

if [ -z "$LANG_OUTPUT_DIR" ]; then
LANG_OUTPUT_DIR="evaluation/evaluation_outputs/one_per_language_${LANG}"
fi

LANG_OUTPUT_FILE="${LANG_OUTPUT_DIR}/output.jsonl"

# Print the language output directory and file for debugging
echo ""
echo "Language: $LANG"
echo "Output directory: $LANG_OUTPUT_DIR"
echo "Output file: $LANG_OUTPUT_FILE"

if [ -f "$LANG_OUTPUT_FILE" ]; then
echo ""
echo "===== Evaluating $LANG results ====="
Expand Down Expand Up @@ -273,11 +283,11 @@ else
echo ""

# Get the output directory - first try the default location
OUTPUT_DIR=$(find evaluation/evaluation_outputs/PolyglotBenchmark/$AGENT -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1)
OUTPUT_DIR=$(find evaluation/evaluation_outputs -path "*/PolyglotBenchmark/$AGENT/*" -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1)

# If not found, try to find it anywhere under evaluation_outputs
if [ -z "$OUTPUT_DIR" ]; then
OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1)
OUTPUT_DIR=$(find . -path "*/evaluation_outputs/*" -path "*/PolyglotBenchmark/$AGENT/*" -type d -name "*tools_bash+finish+str_replace*" 2>/dev/null | sort -r | head -n 1)
fi

# If still not found, try to find any output.jsonl file
Expand All @@ -290,6 +300,11 @@ else
OUTPUT_FILE="$OUTPUT_DIR/output.jsonl"
fi

# Print the output directory and file for debugging
echo ""
echo "Output directory: $OUTPUT_DIR"
echo "Output file: $OUTPUT_FILE"

if [ -f "$OUTPUT_FILE" ]; then
echo "Evaluating results in: $OUTPUT_FILE"
poetry run python evaluation/benchmarks/polyglot_benchmark/scripts/summarize_results.py "$OUTPUT_FILE"
Expand Down

0 comments on commit 8a4ca1e

Please sign in to comment.