From af607447774bdb9795ac2f9fe9a913427559edc9 Mon Sep 17 00:00:00 2001 From: Vasily Date: Thu, 16 Dec 2021 22:12:04 +0300 Subject: [PATCH] Feat/glue superglue update (#1508) * Add wnli config * Update copa config * Fix path * Fix record path * Exclude train from evaluation * Exclude train from evaluation * add ranker * update ranker * feat: deeppavlov version update Co-authored-by: Fedor Ignatov Co-authored-by: slowwavesleep <44175589+slowwavesleep@users.noreply.github.com> --- deeppavlov/_meta.py | 2 +- .../classifiers/glue/glue_mnli_roberta.json | 1 - .../glue/glue_rte_roberta_mnli.json | 1 - .../classifiers/glue/glue_wnli_roberta.json | 147 +++++++++++ .../superglue/superglue_copa_roberta.json | 236 +++++++++++------- .../superglue/superglue_record_roberta.json | 2 +- .../regressors/translation_ranker.json | 105 ++++++++ 7 files changed, 397 insertions(+), 97 deletions(-) create mode 100644 deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json create mode 100644 deeppavlov/configs/regressors/translation_ranker.json diff --git a/deeppavlov/_meta.py b/deeppavlov/_meta.py index d3486ec284..2ee55f7d22 100644 --- a/deeppavlov/_meta.py +++ b/deeppavlov/_meta.py @@ -1,4 +1,4 @@ -__version__ = '0.17.1' +__version__ = '0.17.2' __author__ = 'Neural Networks and Deep Learning lab, MIPT' __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.' __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot'] diff --git a/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json b/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json index 7ff348e303..16b20476c0 100644 --- a/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json +++ b/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json @@ -121,7 +121,6 @@ "log_every_n_batches": 250, "show_examples": false, "evaluation_targets": [ - "train", "valid" ], "class_name": "torch_trainer", diff --git a/deeppavlov/configs/classifiers/glue/glue_rte_roberta_mnli.json b/deeppavlov/configs/classifiers/glue/glue_rte_roberta_mnli.json index feb3f17ae5..6001c5cce7 100644 --- a/deeppavlov/configs/classifiers/glue/glue_rte_roberta_mnli.json +++ b/deeppavlov/configs/classifiers/glue/glue_rte_roberta_mnli.json @@ -121,7 +121,6 @@ "log_every_n_epochs": 1, "show_examples": false, "evaluation_targets": [ - "train", "valid" ], "class_name": "torch_trainer", diff --git a/deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json b/deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json new file mode 100644 index 0000000000..34b300c4b8 --- /dev/null +++ b/deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json @@ -0,0 +1,147 @@ +{ + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "BASE_MODEL": "roberta-large", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_wnli/{BASE_MODEL}" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/0.16/classifiers/glue_wnli_roberta.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + }, + "dataset_reader": { + "class_name": "huggingface_dataset_reader", + "path": "glue", + "name": "wnli", + "train": "train", + "valid": "validation" + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": [ + "sentence1", + "sentence2" + ], + "label": "label", + "seed": 42 + }, + "chainer": { + "in": [ + "sentence1", + "sentence2" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 192, + "truncation": "longest_first", + "padding": "longest", + "in": [ + "sentence1", + "sentence2" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": [ + "y" + ], + "out": [ + "y_ids" + ] + }, + { + "in": [ + "y_ids" + ], + "out": [ + "y_onehot" + ], + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{BASE_MODEL}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 1e-05 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 2.0, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": [ + "y_pred_probas" + ], + "out": [ + "y_pred_ids" + ], + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": [ + "y_pred_ids" + ], + "out": [ + "y_pred_labels" + ], + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "batch_size": 24, + "metrics": [ + "accuracy" + ], + "epochs": 1, + "val_every_n_batches": 250, + "log_every_n_batches": 250, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid" + ], + "class_name": "torch_trainer", + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2 + } +} diff --git a/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json b/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json index 1a9fda443d..101f474412 100644 --- a/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json +++ b/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json @@ -1,97 +1,147 @@ { - "dataset_reader": { - "class_name": "huggingface_dataset_reader", - "path": "super_glue", - "name": "copa", - "train": "train", - "valid": "validation", - "test": "test" - }, - "dataset_iterator": { - "class_name": "huggingface_dataset_iterator", - "features": ["contexts", "choices"], - "label": "label", - "seed": 42 - }, - "chainer": { - "in": ["contexts_list", "choices_list"], - "in_y": ["y"], - "pipe": [ - { - "class_name": "torch_transformers_multiplechoice_preprocessor", - "vocab_file": "{BASE_MODEL}", - "do_lower_case": false, - "max_seq_length": 64, - "in": ["contexts_list", "choices_list"], - "out": ["bert_features"] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": ["y"], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": ["y"], - "out": ["y_ids"] - }, - { - "in": ["y_ids"], - "out": ["y_onehot"], - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "torch_transformers_multiplechoice", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "pretrained_bert": "{BASE_MODEL}", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "optimizer": "AdamW", - "optimizer_parameters": { - "lr": 2e-05 + "dataset_reader": { + "class_name": "huggingface_dataset_reader", + "path": "super_glue", + "name": "copa", + "train": "train", + "valid": "validation", + "test": "test" + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": [ + "contexts", + "choices" + ], + "label": "label", + "seed": 42 + }, + "chainer": { + "in": [ + "contexts_list", + "choices_list" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_multiplechoice_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 64, + "in": [ + "contexts_list", + "choices_list" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": [ + "y" + ], + "out": [ + "y_ids" + ] + }, + { + "in": [ + "y_ids" + ], + "out": [ + "y_onehot" + ], + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_multiplechoice", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{BASE_MODEL}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-05 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 2.0, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": [ + "y_pred_probas" + ], + "out": [ + "y_pred_ids" + ], + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": [ + "y_pred_ids" + ], + "out": [ + "y_pred_labels" + ], + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "batch_size": 16, + "metrics": [ + "accuracy" + ], + "validation_patience": 10, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid" + ], + "class_name": "torch_trainer", + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2, + "pytest_batch_size": 2 + }, + "metadata": { + "variables": { + "BASE_MODEL": "roberta-large", + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/superglue_copa_{BASE_MODEL}" }, - "learning_rate_drop_patience": 3, - "learning_rate_drop_div": 2.0, - "in": ["bert_features"], - "in_y": ["y_ids"], - "out": ["y_pred_probas"] - }, - { - "in": ["y_pred_probas"], - "out": ["y_pred_ids"], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": ["y_pred_ids"], - "out": ["y_pred_labels"], - "ref": "classes_vocab" - } - ], - "out": ["y_pred_labels"] - }, - "train": { - "batch_size": 16, - "metrics": ["accuracy"], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": ["train", "valid"], - "class_name": "torch_trainer", - "tensorboard_log_dir": "{MODEL_PATH}/", - "pytest_max_batches": 2, - "pytest_batch_size": 2 - }, - "metadata": { - "variables": { - "BASE_MODEL": "roberta-large", - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/superglue_copa_{BASE_MODEL}" + "download": [ + { + "url": "http://files.deeppavlov.ai/0.17/classifiers/superglue/superglue_copa_roberta.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] } - } } diff --git a/deeppavlov/configs/classifiers/superglue/superglue_record_roberta.json b/deeppavlov/configs/classifiers/superglue/superglue_record_roberta.json index e537a098f3..c21bcf193e 100644 --- a/deeppavlov/configs/classifiers/superglue/superglue_record_roberta.json +++ b/deeppavlov/configs/classifiers/superglue/superglue_record_roberta.json @@ -13,7 +13,7 @@ "download": [ { "url": "http://files.deeppavlov.ai/0.17/classifiers/superglue/superglue_record_roberta.tar.gz", - "subdir": "{MODEL_PATH}" + "subdir": "{MODELS_PATH}" } ] }, diff --git a/deeppavlov/configs/regressors/translation_ranker.json b/deeppavlov/configs/regressors/translation_ranker.json new file mode 100644 index 0000000000..161a6ad2c5 --- /dev/null +++ b/deeppavlov/configs/regressors/translation_ranker.json @@ -0,0 +1,105 @@ +{ + "metadata": + { + "variables": { + "BASE_MODEL": "cointegrated/LaBSE-en-ru", + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/ranker_labse", + "SEED": 42 + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/v1/tmp/translation_ranker.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": [ + "source", + "hypothesis" + ], + "label": "agg_score", + "seed": "{SEED}", + "use_label_name": false + }, + "chainer": { + "in": [ + "source", + "hypothesis" + ], + "in_y": [ + "score" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 256, + "in": [ + "source", + "hypothesis" + ], + "out": [ + "bert_features" + ] + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": 1, + "return_probas": false, + "pretrained_bert": "{BASE_MODEL}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-06, + "weight_decay": 0.1 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 2.0, + "in": [ + "bert_features" + ], + "in_y": [ + "score" + ], + "out": [ + "pred_score" + ] + } + ], + "out": [ + "pred_score" + ] + }, + "train": { + "batch_size": 32, + "metrics": [ + { + "name": "mean_squared_error", + "inputs": [ + "score", + "pred_score" + ] + } + ], + "validation_patience": 10, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "class_name": "torch_trainer", + "evaluation_targets": [ + "train", + "valid" + ], + "metric_optimization": "minimize", + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2, + "pytest_batch_size": 2 + } +}