dmis-lab · flowpoint · Jun 27, 2021
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+datasets
+**/__pycache__
+**/runs
+**/output
+cached*
+*.h5
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ This project is supported by the members of [DMIS-Lab](https://dmis.korea.ac.kr/
 ## Installation
 ```bash
 # Install huggingface transformers
-pip install transformers==3.0.0
+pip install transformers
 
 # Download all datasets including NER/RE/QA
 ./download.sh

diff --git a/embedding/run_embedding.py b/embedding/run_embedding.py
@@ -182,7 +182,7 @@ def _embedding_loop(
             del inputs['metadata']
 
             with torch.no_grad():
-                outputs = model(**inputs)
+                outputs = model(**inputs, return_dict=False)
             last_hidden_states = outputs[0].detach()
 
             # batch process (fast)

diff --git a/named-entity-recognition/run_ner.py b/named-entity-recognition/run_ner.py
@@ -246,7 +246,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
         trainer.save_model()
         # For convenience, we also re-save the tokenizer to the same directory,
         # so that you can share your model easily on huggingface.co/models =)
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             tokenizer.save_pretrained(training_args.output_dir)
 
     # Evaluation
@@ -257,7 +257,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
         result = trainer.evaluate()
 
         output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             with open(output_eval_file, "w") as writer:
                 logger.info("***** Eval results *****")
                 for key, value in result.items():
@@ -284,7 +284,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
 
         # Save predictions
         output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt")
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             with open(output_test_results_file, "w") as writer:
                 logger.info("***** Test results *****")
                 for key, value in metrics.items():
@@ -293,7 +293,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
 
 
         output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             with open(output_test_predictions_file, "w") as writer:
                 with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f:
                     example_id = 0

diff --git a/question-answering/run_factoid.py b/question-answering/run_factoid.py
@@ -201,7 +201,7 @@ def train(args, train_dataset, model, tokenizer):
                         {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)}
                     )
 
-            outputs = model(**inputs)
+            outputs = model(**inputs, return_dict=False)
             # model outputs are always tuple in transformers (see doc)
             loss = outputs[0]
 
@@ -316,7 +316,7 @@ def evaluate(args, model, tokenizer, prefix=""):
                         {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)}
                     )
 
-            outputs = model(**inputs)
+            outputs = model(**inputs, return_dict=False)
 
         for i, feature_index in enumerate(feature_indices):
             eval_feature = features[feature_index.item()]
@@ -734,6 +734,7 @@ def main():
         args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
         do_lower_case=args.do_lower_case,
         cache_dir=args.cache_dir if args.cache_dir else None,
+        use_fast=False,
     )
     model = AutoModelForQuestionAnswering.from_pretrained(
         args.model_name_or_path,
@@ -782,7 +783,7 @@ def main():
 
         # Load a trained model and vocabulary that you have fine-tuned
         model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir)  # , force_download=True)
-        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
+        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False)
         model.to(args.device)
 
     # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory

diff --git a/question-answering/run_yesno.py b/question-answering/run_yesno.py
@@ -580,6 +580,7 @@ def main():
         args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
         do_lower_case=args.do_lower_case,
         cache_dir=args.cache_dir if args.cache_dir else None,
+        use_fast=False
     )
     model = AutoModelForYesno.from_pretrained(
         args.model_name_or_path,
@@ -648,4 +649,4 @@ def main():
             evaluate(args, model, tokenizer, prefix=global_step)
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/relation-extraction/run_re.py b/relation-extraction/run_re.py
@@ -118,6 +118,7 @@ def main():
     tokenizer = AutoTokenizer.from_pretrained(
         model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
         cache_dir=model_args.cache_dir,
+        use_fast=False,
     )
 
     # Get datasets
@@ -186,7 +187,7 @@ def compute_metrics_fn(p: EvalPrediction):
         trainer.save_model()
         # For convenience, we also re-save the tokenizer to the same directory,
         # so that you can share your model easily on huggingface.co/models =)
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             tokenizer.save_pretrained(training_args.output_dir)
 
     # Evaluation
@@ -209,7 +210,7 @@ def compute_metrics_fn(p: EvalPrediction):
             output_eval_file = os.path.join(
                 training_args.output_dir, f"eval_results_{eval_dataset.args.task_name}.txt"
             )
-            if trainer.is_world_master():
+            if trainer.is_world_process_zero():
                 with open(output_eval_file, "w") as writer:
                     logger.info("***** Eval results {} *****".format(eval_dataset.args.task_name))
                     for key, value in eval_result.items():
@@ -237,7 +238,7 @@ def compute_metrics_fn(p: EvalPrediction):
                 f"test_results.txt"
                 #f"test_results_{test_dataset.args.task_name}.txt"
             )
-            if trainer.is_world_master():
+            if trainer.is_world_process_zero():
                 with open(output_test_file, "w") as writer:
                     logger.info("***** Test results {} *****".format(test_dataset.args.task_name))
                     writer.write("index\tprediction\n")