From b21997e181632060abfa6a036cb31daf81467959 Mon Sep 17 00:00:00 2001
From: flowpoint <flowpoint@protonmail.com>
Date: Sun, 27 Jun 2021 21:09:43 +0200
Subject: [PATCH] update to transformers==4.8.1

---
 .gitignore                          | 6 ++++++
 README.md                           | 2 +-
 embedding/run_embedding.py          | 2 +-
 named-entity-recognition/run_ner.py | 8 ++++----
 question-answering/run_factoid.py   | 7 ++++---
 question-answering/run_yesno.py     | 3 ++-
 relation-extraction/run_re.py       | 7 ++++---
 7 files changed, 22 insertions(+), 13 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..aa74986
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+datasets
+**/__pycache__
+**/runs
+**/output
+cached*
+*.h5
diff --git a/README.md b/README.md
index e93e9be..e4b531a 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ This project is supported by the members of [DMIS-Lab](https://dmis.korea.ac.kr/
 ## Installation
 ```bash
 # Install huggingface transformers
-pip install transformers==3.0.0
+pip install transformers
 
 # Download all datasets including NER/RE/QA
 ./download.sh
diff --git a/embedding/run_embedding.py b/embedding/run_embedding.py
index 7ad27b1..4701d0d 100644
--- a/embedding/run_embedding.py
+++ b/embedding/run_embedding.py
@@ -182,7 +182,7 @@ def _embedding_loop(
             del inputs['metadata']
             
             with torch.no_grad():
-                outputs = model(**inputs)
+                outputs = model(**inputs, return_dict=False)
             last_hidden_states = outputs[0].detach()
 
             # batch process (fast)
diff --git a/named-entity-recognition/run_ner.py b/named-entity-recognition/run_ner.py
index c0a4232..9d8ebfd 100644
--- a/named-entity-recognition/run_ner.py
+++ b/named-entity-recognition/run_ner.py
@@ -246,7 +246,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
         trainer.save_model()
         # For convenience, we also re-save the tokenizer to the same directory,
         # so that you can share your model easily on huggingface.co/models =)
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             tokenizer.save_pretrained(training_args.output_dir)
 
     # Evaluation
@@ -257,7 +257,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
         result = trainer.evaluate()
         
         output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             with open(output_eval_file, "w") as writer:
                 logger.info("***** Eval results *****")
                 for key, value in result.items():
@@ -284,7 +284,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
         
         # Save predictions
         output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt")
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             with open(output_test_results_file, "w") as writer:
                 logger.info("***** Test results *****")
                 for key, value in metrics.items():
@@ -293,7 +293,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
 
         
         output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             with open(output_test_predictions_file, "w") as writer:
                 with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f:
                     example_id = 0
diff --git a/question-answering/run_factoid.py b/question-answering/run_factoid.py
index 6d9f4d1..e112f29 100644
--- a/question-answering/run_factoid.py
+++ b/question-answering/run_factoid.py
@@ -201,7 +201,7 @@ def train(args, train_dataset, model, tokenizer):
                         {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)}
                     )
 
-            outputs = model(**inputs)
+            outputs = model(**inputs, return_dict=False)
             # model outputs are always tuple in transformers (see doc)
             loss = outputs[0]
 
@@ -316,7 +316,7 @@ def evaluate(args, model, tokenizer, prefix=""):
                         {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)}
                     )
 
-            outputs = model(**inputs)
+            outputs = model(**inputs, return_dict=False)
 
         for i, feature_index in enumerate(feature_indices):
             eval_feature = features[feature_index.item()]
@@ -734,6 +734,7 @@ def main():
         args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
         do_lower_case=args.do_lower_case,
         cache_dir=args.cache_dir if args.cache_dir else None,
+        use_fast=False,
     )
     model = AutoModelForQuestionAnswering.from_pretrained(
         args.model_name_or_path,
@@ -782,7 +783,7 @@ def main():
 
         # Load a trained model and vocabulary that you have fine-tuned
         model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir)  # , force_download=True)
-        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
+        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False)
         model.to(args.device)
 
     # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
diff --git a/question-answering/run_yesno.py b/question-answering/run_yesno.py
index 2c90ba5..07d881a 100644
--- a/question-answering/run_yesno.py
+++ b/question-answering/run_yesno.py
@@ -580,6 +580,7 @@ def main():
         args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
         do_lower_case=args.do_lower_case,
         cache_dir=args.cache_dir if args.cache_dir else None,
+        use_fast=False
     )
     model = AutoModelForYesno.from_pretrained(
         args.model_name_or_path,
@@ -648,4 +649,4 @@ def main():
             evaluate(args, model, tokenizer, prefix=global_step)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/relation-extraction/run_re.py b/relation-extraction/run_re.py
index 3c1bd3e..d3b1bbb 100644
--- a/relation-extraction/run_re.py
+++ b/relation-extraction/run_re.py
@@ -118,6 +118,7 @@ def main():
     tokenizer = AutoTokenizer.from_pretrained(
         model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
         cache_dir=model_args.cache_dir,
+        use_fast=False,
     )
 
     # Get datasets
@@ -186,7 +187,7 @@ def compute_metrics_fn(p: EvalPrediction):
         trainer.save_model()
         # For convenience, we also re-save the tokenizer to the same directory,
         # so that you can share your model easily on huggingface.co/models =)
-        if trainer.is_world_master():
+        if trainer.is_world_process_zero():
             tokenizer.save_pretrained(training_args.output_dir)
 
     # Evaluation
@@ -209,7 +210,7 @@ def compute_metrics_fn(p: EvalPrediction):
             output_eval_file = os.path.join(
                 training_args.output_dir, f"eval_results_{eval_dataset.args.task_name}.txt"
             )
-            if trainer.is_world_master():
+            if trainer.is_world_process_zero():
                 with open(output_eval_file, "w") as writer:
                     logger.info("***** Eval results {} *****".format(eval_dataset.args.task_name))
                     for key, value in eval_result.items():
@@ -237,7 +238,7 @@ def compute_metrics_fn(p: EvalPrediction):
                 f"test_results.txt"
                 #f"test_results_{test_dataset.args.task_name}.txt"
             )
-            if trainer.is_world_master():
+            if trainer.is_world_process_zero():
                 with open(output_test_file, "w") as writer:
                     logger.info("***** Test results {} *****".format(test_dataset.args.task_name))
                     writer.write("index\tprediction\n")