Skip to content

Commit 53fad73

Browse files
committed
Update output naming scheme
1 parent 4c13bdf commit 53fad73

File tree

5 files changed

+14
-8
lines changed

5 files changed

+14
-8
lines changed

Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ ENV includeComments=true
3131
ENV excludeStopwords=true
3232
ENV useTfidf=true
3333
ENV numberOfTfidfKeywords="50"
34+
ENV variant="default"
3435

3536
# Training variables
3637
ENV train=true

docker-compose.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ services:
1414

1515
environment:
1616
dataset: "codesearchnet"
17+
variant: "comments"
1718
# Preprocessing variables
1819
preprocess: true
1920
includeComments: true
@@ -22,9 +23,8 @@ services:
2223
numberOfTfidfKeywords: "50"
2324
# Training variables
2425
train: true
26+
# There has to ba an existing model for the following to work
2527
continueTrainingFromCheckpoint: false
26-
# Evaluation variables
27-
# add more flags here
2828

2929
deploy:
3030
resources:

entrypoint.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
set -e
55

66
if [ "$preprocess" = true ];
7-
then bash preprocess.sh --dataset="$dataset" --include_comments="$includeComments" --exclude_stopwords="$excludeStopwords" --include_tfidf="$useTfidf" --number_keywords="$numberOfTfidfKeywords"
7+
then bash preprocess.sh --dataset="$dataset" --include_comments="$includeComments" --exclude_stopwords="$excludeStopwords" --include_tfidf="$useTfidf" --number_keywords="$numberOfTfidfKeywords" --variant="$variant"
88
else echo "Not preprocessing."
99
fi
1010

1111
if [ "$train" = true ];
12-
then bash train.sh --dataset="$dataset" --continue_training_from_checkpoint="$continueTrainingFromCheckpoint"
12+
then bash train.sh --dataset="$dataset" --continue_training_from_checkpoint="$continueTrainingFromCheckpoint" --variant="$variant"
1313
else echo "Not training a new model."
1414
fi
1515

preprocess.sh

+5-2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ set -e
2727

2828
# Default preprocessing values
2929
DATASET_NAME=default
30+
VARIANT=default
3031
INCLUDE_COMMENTS=true
3132
EXCLUDE_STOPWORDS=false
3233
USE_TFIDF=false
@@ -49,13 +50,15 @@ while getopts ab:c:-: OPT; do
4950
exclude_stopwords ) EXCLUDE_STOPWORDS="$OPTARG" ;;
5051
include_tfidf ) USE_TFIDF="$OPTARG" ;;
5152
number_keywords ) NUMBER_OF_TFIDF_KEYWORDS="$OPTARG" ;;
53+
variant ) VARIANT="$OPTARG" ;;
5254
??* ) die "Illegal option --$OPT" ;; # bad long option
5355
? ) exit 2 ;; # bad short option (error reported via getopts)
5456
esac
5557
done
5658
shift $((OPTIND-1)) # remove parsed options and args from $@ list
5759

5860
echo "Dataset: $DATASET_NAME"
61+
echo "Variant: $VARIANT"
5962
echo "Including comments: $INCLUDE_COMMENTS"
6063
echo "Excluding stopwords: $EXCLUDE_STOPWORDS"
6164
echo "Using TFIDF: $USE_TFIDF"
@@ -76,9 +79,9 @@ NUM_THREADS=64
7679
PYTHON=python3
7780
###########################################################
7881

79-
OUTPUT_DIR=${INPUT_DIR}/${DATASET_NAME}/preprocessed
82+
OUTPUT_DIR=${INPUT_DIR}/${DATASET_NAME}/preprocessed/exp_${VARIANT}
8083

81-
mkdir -p ${INPUT_DIR}/${DATASET_NAME}/preprocessed
84+
mkdir -p ${INPUT_DIR}/${DATASET_NAME}/preprocessed/exp_${VARIANT}
8285

8386
TRAIN_DATA_FILE=${OUTPUT_DIR}/${DATASET_NAME}.train.raw.txt
8487
VAL_DATA_FILE=${OUTPUT_DIR}/${DATASET_NAME}.val.raw.txt

train.sh

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# on the final (held-out) test set, change 'val' to 'test'.
88

99
dataset_name=default
10+
variant=default
1011
continue_training_from_checkpoint=true
1112

1213
# This code block is used to get long two-dash arguments from the command line.
@@ -23,6 +24,7 @@ while getopts ab:c:-: OPT; do
2324
case "$OPT" in
2425
dataset ) dataset_name="$OPTARG" ;;
2526
continue_training_from_checkpoint ) continue_training_from_checkpoint="$OPTARG" ;;
27+
variant ) variant="$OPTARG" ;;
2628
??* ) die "Illegal option --$OPT" ;; # bad long option
2729
? ) exit 2 ;; # bad short option (error reported via getopts)
2830
esac
@@ -32,8 +34,8 @@ shift $((OPTIND-1)) # remove parsed options and args from $@ list
3234
echo "Dataset: $dataset_name"
3335
echo "Training from a previous checkpoint: $continue_training_from_checkpoint"
3436

35-
type=trained_${dataset_name}
36-
data_dir=datasets/${dataset_name}/preprocessed
37+
type=exp_${dataset_name}_${variant}
38+
data_dir=datasets/${dataset_name}/preprocessed/exp_${variant}
3739
data=${data_dir}/${dataset_name}
3840
test_data=${data_dir}/${dataset_name}.val.c2s
3941
model_dir=models/${type}

0 commit comments

Comments
 (0)