Skip to content

Commit

Permalink
add scripts for java
Browse files Browse the repository at this point in the history
  • Loading branch information
Dylan Bourgeois committed May 23, 2019
1 parent a60f752 commit ca866ca
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 0 deletions.
34 changes: 34 additions & 0 deletions run_classifier-methodname-java.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
MAGRET_DIR="java-method"
PREFIX="java"
PRETRAIN_DIR="java"
PREDIR="methodname1"

export CUDA_VISIBLE_DEVICES=4

python classifier.py \
--do_train=True \
--do_eval=False \
--do_predict=True \
--max_nb_preds=1000 \
--task_name=methodname \
--label_vocab=$MAGRET_DIR/java-vocab-labels-thresh.txt \
--vocab_file=$MAGRET_DIR/$PREDIR/java-vocab.txt \
--train_file=$MAGRET_DIR/$PREDIR/${PREFIX}_tk.txt \
--train_labels=$MAGRET_DIR/$PREDIR/java-train-labels.txt \
--train_adj=$MAGRET_DIR/$PREDIR \
--eval_file=$MAGRET_DIR/$PREDIR/${PREFIX}_tk_val.txt \
--eval_labels=$MAGRET_DIR/$PREDIR/java-val-labels.txt \
--eval_adj=$MAGRET_DIR/$PREDIR \
--data_dir=$MAGRET_DIR \
--output_dir=$MAGRET_DIR/cls_output-methodname-nopt5 \
--max_seq_length=128 \
--train_batch_size=32 \
--learning_rate=1e-5 \
--num_train_epochs=1000 \
--save_checkpoints_steps=10000 \
--bert_config_file=$MAGRET_DIR/shallow_config.json \
--sparse_adj=True \
--adj_prefix=${PREFIX} \
--clean_data=True \
#--init_checkpoint=$PRETRAIN_DIR/pretraining_output-java-400k/model.ckpt-400000 \
# --shuffle=True
20 changes: 20 additions & 0 deletions run_java.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
MAGRET_DIR="java-64"
export CUDA_VISIBLE_DEVICES=3

python run_pretraining.py \
--input_file=$MAGRET_DIR/tf_examples.tfrecord \
--validation_file=$MAGRET_DIR/tf_examples_val.tfrecord \
--output_dir=$MAGRET_DIR/pretraining_output-java \
--do_train=True \
--do_eval=True \
--do_predict=False \
--save_prediction=False \
--save_attention=False \
--bert_config_file=$MAGRET_DIR/shallow_config.json \
--train_batch_size=32 \
--max_seq_length=64 \
--max_predictions_per_seq=1 \
--num_train_steps=100000 \
--save_checkpoints_steps=50000 \
--num_warmup_steps=10000 \
--learning_rate=5e-5
33 changes: 33 additions & 0 deletions run_prepare_java.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
MAGRET_DIR="java-64"
PREFIX="java"
VOCAB="java-vocab.txt"

python prepare_pretraining_data.py \
--input_file=$MAGRET_DIR/${PREFIX}_tk.txt \
--output_file=$MAGRET_DIR/tf_examples${SUFFIX}.tfrecord \
--vocab_file=$MAGRET_DIR/$VOCAB \
--adj_file=$MAGRET_DIR/adj/ \
--do_lower_case=True \
--max_seq_length=64 \
--max_predictions_per_seq=1 \
--masked_lm_prob=0.15 \
--random_seed=1009 \
--dupe_factor=50 \
--sparse_adj=True \
--adj_prefix=${PREFIX} \
--is_training=True

python prepare_pretraining_data.py \
--input_file=$MAGRET_DIR/${PREFIX}_tk_val.txt \
--output_file=$MAGRET_DIR/tf_examples_val${SUFFIX}.tfrecord \
--vocab_file=$MAGRET_DIR/$VOCAB \
--adj_file=$MAGRET_DIR/adj/ \
--do_lower_case=True \
--max_seq_length=64 \
--max_predictions_per_seq=1 \
--masked_lm_prob=0.15 \
--random_seed=1009 \
--dupe_factor=50 \
--sparse_adj=True \
--adj_prefix=${PREFIX} \
--is_training=False

0 comments on commit ca866ca

Please sign in to comment.