From b21997e181632060abfa6a036cb31daf81467959 Mon Sep 17 00:00:00 2001 From: flowpoint Date: Sun, 27 Jun 2021 21:09:43 +0200 Subject: [PATCH] update to transformers==4.8.1 --- .gitignore | 6 ++++++ README.md | 2 +- embedding/run_embedding.py | 2 +- named-entity-recognition/run_ner.py | 8 ++++---- question-answering/run_factoid.py | 7 ++++--- question-answering/run_yesno.py | 3 ++- relation-extraction/run_re.py | 7 ++++--- 7 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aa74986 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +datasets +**/__pycache__ +**/runs +**/output +cached* +*.h5 diff --git a/README.md b/README.md index e93e9be..e4b531a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This project is supported by the members of [DMIS-Lab](https://dmis.korea.ac.kr/ ## Installation ```bash # Install huggingface transformers -pip install transformers==3.0.0 +pip install transformers # Download all datasets including NER/RE/QA ./download.sh diff --git a/embedding/run_embedding.py b/embedding/run_embedding.py index 7ad27b1..4701d0d 100644 --- a/embedding/run_embedding.py +++ b/embedding/run_embedding.py @@ -182,7 +182,7 @@ def _embedding_loop( del inputs['metadata'] with torch.no_grad(): - outputs = model(**inputs) + outputs = model(**inputs, return_dict=False) last_hidden_states = outputs[0].detach() # batch process (fast) diff --git a/named-entity-recognition/run_ner.py b/named-entity-recognition/run_ner.py index c0a4232..9d8ebfd 100644 --- a/named-entity-recognition/run_ner.py +++ b/named-entity-recognition/run_ner.py @@ -246,7 +246,7 @@ def compute_metrics(p: EvalPrediction) -> Dict: trainer.save_model() # For convenience, we also re-save the tokenizer to the same directory, # so that you can share your model easily on huggingface.co/models =) - if trainer.is_world_master(): + if trainer.is_world_process_zero(): tokenizer.save_pretrained(training_args.output_dir) # Evaluation @@ -257,7 +257,7 @@ def compute_metrics(p: EvalPrediction) -> Dict: result = trainer.evaluate() output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt") - if trainer.is_world_master(): + if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key, value in result.items(): @@ -284,7 +284,7 @@ def compute_metrics(p: EvalPrediction) -> Dict: # Save predictions output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt") - if trainer.is_world_master(): + if trainer.is_world_process_zero(): with open(output_test_results_file, "w") as writer: logger.info("***** Test results *****") for key, value in metrics.items(): @@ -293,7 +293,7 @@ def compute_metrics(p: EvalPrediction) -> Dict: output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt") - if trainer.is_world_master(): + if trainer.is_world_process_zero(): with open(output_test_predictions_file, "w") as writer: with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f: example_id = 0 diff --git a/question-answering/run_factoid.py b/question-answering/run_factoid.py index 6d9f4d1..e112f29 100644 --- a/question-answering/run_factoid.py +++ b/question-answering/run_factoid.py @@ -201,7 +201,7 @@ def train(args, train_dataset, model, tokenizer): {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)} ) - outputs = model(**inputs) + outputs = model(**inputs, return_dict=False) # model outputs are always tuple in transformers (see doc) loss = outputs[0] @@ -316,7 +316,7 @@ def evaluate(args, model, tokenizer, prefix=""): {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)} ) - outputs = model(**inputs) + outputs = model(**inputs, return_dict=False) for i, feature_index in enumerate(feature_indices): eval_feature = features[feature_index.item()] @@ -734,6 +734,7 @@ def main(): args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, + use_fast=False, ) model = AutoModelForQuestionAnswering.from_pretrained( args.model_name_or_path, @@ -782,7 +783,7 @@ def main(): # Load a trained model and vocabulary that you have fine-tuned model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir) # , force_download=True) - tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) + tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False) model.to(args.device) # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory diff --git a/question-answering/run_yesno.py b/question-answering/run_yesno.py index 2c90ba5..07d881a 100644 --- a/question-answering/run_yesno.py +++ b/question-answering/run_yesno.py @@ -580,6 +580,7 @@ def main(): args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, + use_fast=False ) model = AutoModelForYesno.from_pretrained( args.model_name_or_path, @@ -648,4 +649,4 @@ def main(): evaluate(args, model, tokenizer, prefix=global_step) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/relation-extraction/run_re.py b/relation-extraction/run_re.py index 3c1bd3e..d3b1bbb 100644 --- a/relation-extraction/run_re.py +++ b/relation-extraction/run_re.py @@ -118,6 +118,7 @@ def main(): tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, + use_fast=False, ) # Get datasets @@ -186,7 +187,7 @@ def compute_metrics_fn(p: EvalPrediction): trainer.save_model() # For convenience, we also re-save the tokenizer to the same directory, # so that you can share your model easily on huggingface.co/models =) - if trainer.is_world_master(): + if trainer.is_world_process_zero(): tokenizer.save_pretrained(training_args.output_dir) # Evaluation @@ -209,7 +210,7 @@ def compute_metrics_fn(p: EvalPrediction): output_eval_file = os.path.join( training_args.output_dir, f"eval_results_{eval_dataset.args.task_name}.txt" ) - if trainer.is_world_master(): + if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(eval_dataset.args.task_name)) for key, value in eval_result.items(): @@ -237,7 +238,7 @@ def compute_metrics_fn(p: EvalPrediction): f"test_results.txt" #f"test_results_{test_dataset.args.task_name}.txt" ) - if trainer.is_world_master(): + if trainer.is_world_process_zero(): with open(output_test_file, "w") as writer: logger.info("***** Test results {} *****".format(test_dataset.args.task_name)) writer.write("index\tprediction\n")