File tree 5 files changed +14
-8
lines changed
5 files changed +14
-8
lines changed Original file line number Diff line number Diff line change @@ -31,6 +31,7 @@ ENV includeComments=true
31
31
ENV excludeStopwords=true
32
32
ENV useTfidf=true
33
33
ENV numberOfTfidfKeywords="50"
34
+ ENV variant="default"
34
35
35
36
# Training variables
36
37
ENV train=true
Original file line number Diff line number Diff line change @@ -14,6 +14,7 @@ services:
14
14
15
15
environment :
16
16
dataset : " codesearchnet"
17
+ variant : " comments"
17
18
# Preprocessing variables
18
19
preprocess : true
19
20
includeComments : true
@@ -22,9 +23,8 @@ services:
22
23
numberOfTfidfKeywords : " 50"
23
24
# Training variables
24
25
train : true
26
+ # There has to ba an existing model for the following to work
25
27
continueTrainingFromCheckpoint : false
26
- # Evaluation variables
27
- # add more flags here
28
28
29
29
deploy :
30
30
resources :
Original file line number Diff line number Diff line change 4
4
set -e
5
5
6
6
if [ " $preprocess " = true ];
7
- then bash preprocess.sh --dataset=" $dataset " --include_comments=" $includeComments " --exclude_stopwords=" $excludeStopwords " --include_tfidf=" $useTfidf " --number_keywords=" $numberOfTfidfKeywords "
7
+ then bash preprocess.sh --dataset=" $dataset " --include_comments=" $includeComments " --exclude_stopwords=" $excludeStopwords " --include_tfidf=" $useTfidf " --number_keywords=" $numberOfTfidfKeywords " --variant= " $variant "
8
8
else echo " Not preprocessing."
9
9
fi
10
10
11
11
if [ " $train " = true ];
12
- then bash train.sh --dataset=" $dataset " --continue_training_from_checkpoint=" $continueTrainingFromCheckpoint "
12
+ then bash train.sh --dataset=" $dataset " --continue_training_from_checkpoint=" $continueTrainingFromCheckpoint " --variant= " $variant "
13
13
else echo " Not training a new model."
14
14
fi
15
15
Original file line number Diff line number Diff line change 27
27
28
28
# Default preprocessing values
29
29
DATASET_NAME=default
30
+ VARIANT=default
30
31
INCLUDE_COMMENTS=true
31
32
EXCLUDE_STOPWORDS=false
32
33
USE_TFIDF=false
@@ -49,13 +50,15 @@ while getopts ab:c:-: OPT; do
49
50
exclude_stopwords ) EXCLUDE_STOPWORDS=" $OPTARG " ;;
50
51
include_tfidf ) USE_TFIDF=" $OPTARG " ;;
51
52
number_keywords ) NUMBER_OF_TFIDF_KEYWORDS=" $OPTARG " ;;
53
+ variant ) VARIANT=" $OPTARG " ;;
52
54
??* ) die " Illegal option --$OPT " ;; # bad long option
53
55
? ) exit 2 ;; # bad short option (error reported via getopts)
54
56
esac
55
57
done
56
58
shift $(( OPTIND- 1 )) # remove parsed options and args from $@ list
57
59
58
60
echo " Dataset: $DATASET_NAME "
61
+ echo " Variant: $VARIANT "
59
62
echo " Including comments: $INCLUDE_COMMENTS "
60
63
echo " Excluding stopwords: $EXCLUDE_STOPWORDS "
61
64
echo " Using TFIDF: $USE_TFIDF "
@@ -76,9 +79,9 @@ NUM_THREADS=64
76
79
PYTHON=python3
77
80
# ##########################################################
78
81
79
- OUTPUT_DIR=${INPUT_DIR} /${DATASET_NAME} /preprocessed
82
+ OUTPUT_DIR=${INPUT_DIR} /${DATASET_NAME} /preprocessed/exp_ ${VARIANT}
80
83
81
- mkdir -p ${INPUT_DIR} /${DATASET_NAME} /preprocessed
84
+ mkdir -p ${INPUT_DIR} /${DATASET_NAME} /preprocessed/exp_ ${VARIANT}
82
85
83
86
TRAIN_DATA_FILE=${OUTPUT_DIR} /${DATASET_NAME} .train.raw.txt
84
87
VAL_DATA_FILE=${OUTPUT_DIR} /${DATASET_NAME} .val.raw.txt
Original file line number Diff line number Diff line change 7
7
# on the final (held-out) test set, change 'val' to 'test'.
8
8
9
9
dataset_name=default
10
+ variant=default
10
11
continue_training_from_checkpoint=true
11
12
12
13
# This code block is used to get long two-dash arguments from the command line.
@@ -23,6 +24,7 @@ while getopts ab:c:-: OPT; do
23
24
case " $OPT " in
24
25
dataset ) dataset_name=" $OPTARG " ;;
25
26
continue_training_from_checkpoint ) continue_training_from_checkpoint=" $OPTARG " ;;
27
+ variant ) variant=" $OPTARG " ;;
26
28
??* ) die " Illegal option --$OPT " ;; # bad long option
27
29
? ) exit 2 ;; # bad short option (error reported via getopts)
28
30
esac
@@ -32,8 +34,8 @@ shift $((OPTIND-1)) # remove parsed options and args from $@ list
32
34
echo " Dataset: $dataset_name "
33
35
echo " Training from a previous checkpoint: $continue_training_from_checkpoint "
34
36
35
- type=trained_ ${dataset_name}
36
- data_dir=datasets/${dataset_name} /preprocessed
37
+ type=exp_ ${dataset_name} _ ${variant }
38
+ data_dir=datasets/${dataset_name} /preprocessed/exp_ ${variant}
37
39
data=${data_dir} /${dataset_name}
38
40
test_data=${data_dir} /${dataset_name} .val.c2s
39
41
model_dir=models/${type}
You can’t perform that action at this time.
0 commit comments