Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update to transformers==4.8.1 #20

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
datasets
**/__pycache__
**/runs
**/output
cached*
*.h5
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This project is supported by the members of [DMIS-Lab](https://dmis.korea.ac.kr/
## Installation
```bash
# Install huggingface transformers
pip install transformers==3.0.0
pip install transformers

# Download all datasets including NER/RE/QA
./download.sh
Expand Down
2 changes: 1 addition & 1 deletion embedding/run_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def _embedding_loop(
del inputs['metadata']

with torch.no_grad():
outputs = model(**inputs)
outputs = model(**inputs, return_dict=False)
last_hidden_states = outputs[0].detach()

# batch process (fast)
Expand Down
8 changes: 4 additions & 4 deletions named-entity-recognition/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
trainer.save_model()
# For convenience, we also re-save the tokenizer to the same directory,
# so that you can share your model easily on huggingface.co/models =)
if trainer.is_world_master():
if trainer.is_world_process_zero():
tokenizer.save_pretrained(training_args.output_dir)

# Evaluation
Expand All @@ -257,7 +257,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:
result = trainer.evaluate()

output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
if trainer.is_world_master():
if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****")
for key, value in result.items():
Expand All @@ -284,7 +284,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:

# Save predictions
output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt")
if trainer.is_world_master():
if trainer.is_world_process_zero():
with open(output_test_results_file, "w") as writer:
logger.info("***** Test results *****")
for key, value in metrics.items():
Expand All @@ -293,7 +293,7 @@ def compute_metrics(p: EvalPrediction) -> Dict:


output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
if trainer.is_world_master():
if trainer.is_world_process_zero():
with open(output_test_predictions_file, "w") as writer:
with open(os.path.join(data_args.data_dir, "test.txt"), "r") as f:
example_id = 0
Expand Down
7 changes: 4 additions & 3 deletions question-answering/run_factoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def train(args, train_dataset, model, tokenizer):
{"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)}
)

outputs = model(**inputs)
outputs = model(**inputs, return_dict=False)
# model outputs are always tuple in transformers (see doc)
loss = outputs[0]

Expand Down Expand Up @@ -316,7 +316,7 @@ def evaluate(args, model, tokenizer, prefix=""):
{"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)}
)

outputs = model(**inputs)
outputs = model(**inputs, return_dict=False)

for i, feature_index in enumerate(feature_indices):
eval_feature = features[feature_index.item()]
Expand Down Expand Up @@ -734,6 +734,7 @@ def main():
args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
do_lower_case=args.do_lower_case,
cache_dir=args.cache_dir if args.cache_dir else None,
use_fast=False,
)
model = AutoModelForQuestionAnswering.from_pretrained(
args.model_name_or_path,
Expand Down Expand Up @@ -782,7 +783,7 @@ def main():

# Load a trained model and vocabulary that you have fine-tuned
model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir) # , force_download=True)
tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False)
model.to(args.device)

# Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
Expand Down
3 changes: 2 additions & 1 deletion question-answering/run_yesno.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,7 @@ def main():
args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
do_lower_case=args.do_lower_case,
cache_dir=args.cache_dir if args.cache_dir else None,
use_fast=False
)
model = AutoModelForYesno.from_pretrained(
args.model_name_or_path,
Expand Down Expand Up @@ -648,4 +649,4 @@ def main():
evaluate(args, model, tokenizer, prefix=global_step)

if __name__ == "__main__":
main()
main()
7 changes: 4 additions & 3 deletions relation-extraction/run_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=False,
)

# Get datasets
Expand Down Expand Up @@ -186,7 +187,7 @@ def compute_metrics_fn(p: EvalPrediction):
trainer.save_model()
# For convenience, we also re-save the tokenizer to the same directory,
# so that you can share your model easily on huggingface.co/models =)
if trainer.is_world_master():
if trainer.is_world_process_zero():
tokenizer.save_pretrained(training_args.output_dir)

# Evaluation
Expand All @@ -209,7 +210,7 @@ def compute_metrics_fn(p: EvalPrediction):
output_eval_file = os.path.join(
training_args.output_dir, f"eval_results_{eval_dataset.args.task_name}.txt"
)
if trainer.is_world_master():
if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results {} *****".format(eval_dataset.args.task_name))
for key, value in eval_result.items():
Expand Down Expand Up @@ -237,7 +238,7 @@ def compute_metrics_fn(p: EvalPrediction):
f"test_results.txt"
#f"test_results_{test_dataset.args.task_name}.txt"
)
if trainer.is_world_master():
if trainer.is_world_process_zero():
with open(output_test_file, "w") as writer:
logger.info("***** Test results {} *****".format(test_dataset.args.task_name))
writer.write("index\tprediction\n")
Expand Down