From a5cae8ae6a4cddab9930477095e300ce7bf3b06b Mon Sep 17 00:00:00 2001
From: Mary Trofimova <mary.vikhreva@gmail.com>
Date: Thu, 1 Aug 2019 16:49:59 +0300
Subject: [PATCH 1/7] docs: change hierarchy (#955)

---
 docs/features/models/index.rst | 19 -----------------
 docs/features/skills/index.rst | 16 ---------------
 docs/index.rst                 | 37 +++++++++++++++++++++++++++++++---
 3 files changed, 34 insertions(+), 38 deletions(-)
 delete mode 100644 docs/features/models/index.rst
 delete mode 100644 docs/features/skills/index.rst
diff --git a/docs/features/models/index.rst b/docs/features/models/index.rst
deleted file mode 100644
index 1e3893dc12..0000000000
--- a/docs/features/models/index.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-Models
-======
-
-.. toctree::
-   :glob:
-   :maxdepth: 1
-
-   BERT-based models <bert>
-   Context Question Answering <squad>
-   Classification <classifiers>
-   Morphological Tagger <morphotagger>
-   Named Entity Recognition <ner>
-   Neural Ranking <neural_ranking>
-   Slot filling <slot_filling>
-   Spelling Correction <spelling_correction>
-   TF-IDF Ranking <tfidf_ranking>
-   Popularity Ranking <popularity_ranking>
-   Knowledge Base Question answering <kbqa>
-
diff --git a/docs/features/skills/index.rst b/docs/features/skills/index.rst
deleted file mode 100644
index 9c8aa8ae62..0000000000
--- a/docs/features/skills/index.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-Skills
-======
-
-.. toctree::
-   :glob:
-   :maxdepth: 1
-
-   Goal-Oriented Dialogue Bot <go_bot>
-   Open-Domain Question Answering <odqa>
-   Pattern Matching <pattern_matching>
-   Sequence-To-Sequence Dialogue Bot <seq2seq_go_bot>
-   Frequently Asked Questions Answering <faq>
-   eCommerce Bot <ecommerce>
-   AIML <aiml_skill>
-   DSL <dsl_skill>
-
diff --git a/docs/index.rst b/docs/index.rst
index 8fc6ba862b..f3da1da9f2 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,6 +9,7 @@ Welcome to DeepPavlov's documentation!
    Installation <intro/installation>
    General concepts <intro/overview>
    Configuration file <intro/configuration>
+   Models/Skills overview <features/overview>
 
 
 .. toctree::
@@ -16,13 +17,43 @@ Welcome to DeepPavlov's documentation!
    :maxdepth: 2
    :caption: Features
 
-   Overview <features/overview>
-   Models <features/models/index>
-   Skills <features/skills/index>
    Pre-trained embeddings <features/pretrained_vectors>
    AutoML <features/hypersearch>
 
 
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :caption: Models
+   
+   BERT-based models <features/models/bert>
+   Context Question Answering <features/models/squad>
+   Classification <features/models/classifiers>
+   Morphological Tagger <features/models/morphotagger>
+   Named Entity Recognition <features/models/ner>
+   Neural Ranking <features/models/neural_ranking>
+   Slot filling <features/models/slot_filling>
+   Spelling Correction <features/models/spelling_correction>
+   TF-IDF Ranking <features/models/tfidf_ranking>
+   Popularity Ranking <features/models/popularity_ranking>
+   Knowledge Base Question answering <features/models/kbqa>
+
+
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :caption: Skills
+   
+   Goal-Oriented Dialogue Bot <features/skills/go_bot>
+   Open-Domain Question Answering <features/skills/odqa>
+   Pattern Matching <features/skills/pattern_matching>
+   Sequence-To-Sequence Dialogue Bot <features/skills/seq2seq_go_bot>
+   Frequently Asked Questions Answering <features/skills/faq>
+   eCommerce Bot <features/skills/ecommerce>
+   AIML <features/skills/aiml_skill>
+   DSL <features/skills/dsl_skill>
+
+
 .. toctree::
    :glob:
    :maxdepth: 3

From b9ee06015609d6654c04ef422ccd473a01a7c3a5 Mon Sep 17 00:00:00 2001
From: Aleksei Lymar <yoptar@gmail.com>
Date: Mon, 5 Aug 2019 10:19:19 +0300
Subject: [PATCH 2/7] docs: remove duplicate breaking changes block for 0.5.0

resolves #958
---
 README.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/README.md b/README.md
index e7f3e9a6ca..9675db48dd 100644
--- a/README.md
+++ b/README.md
@@ -211,11 +211,6 @@ and others in the Integrations section for more info.
 - models depending on `tensorflow` require `CUDA 10.0` to run on GPU instead of `CUDA 9.0`
 - scikit-learn models have to be redownloaded or retrained
 
-**Breaking changes in version 0.5.0**
-- dependencies have to be reinstalled for most pipeline configurations
-- models depending on `tensorflow` require `CUDA 10.0` to run on GPU instead of `CUDA 9.0`
-- scikit-learn models have to be redownloaded or retrained
-
 **Breaking changes in version 0.4.0!**
 - default target variable name for [neural evolution](https://docs.deeppavlov.ai/en/0.4.0/intro/hypersearch.html#parameters-evolution-for-deeppavlov-models)
 was changed from `MODELS_PATH` to `MODEL_PATH`.

From 500e96089680cf4f2b7c88ed3b0d7b36e0e1fa80 Mon Sep 17 00:00:00 2001
From: Sergei Grechanik <grechanik.sergey@huawei.com>
Date: Tue, 13 Aug 2019 14:34:42 +0300
Subject: [PATCH 3/7] feat: try and resume download after interruptions (#963)

* fix: simple_download: make it safer to interrupt downloads

* fix suffix addition

Co-Authored-By: Aleksei Lymar <yoptar@gmail.com>

* fix the case when the server doesn't provide the content length

* feat: do not ask to remove a temp file on download errors
---
 deeppavlov/core/data/utils.py | 49 +++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py
index 10e5ef3592..0c7abda692 100644
--- a/deeppavlov/core/data/utils.py
+++ b/deeppavlov/core/data/utils.py
@@ -55,29 +55,44 @@ def simple_download(url: str, destination: [Path, str]):
 
     destination = Path(destination)
     destination.parent.mkdir(parents=True, exist_ok=True)
+    temporary = destination.with_suffix(destination.suffix + '.part')
 
     headers = {'dp-token': get_download_token()}
     r = requests.get(url, stream=True, headers=headers)
     total_length = int(r.headers.get('content-length', 0))
 
     log.info('Downloading from {} to {}'.format(url, destination))
-    with destination.open('wb') as f, tqdm(total=total_length, unit='B', unit_scale=True) as pbar:
+
+    if temporary.exists() and temporary.stat().st_size > total_length:
+        temporary.write_bytes(b'')  # clearing temporary file when total_length is inconsistent
+
+    with temporary.open('ab') as f:
         done = False
-        downloaded = 0
-        while not done:
-            for chunk in r.iter_content(chunk_size=CHUNK):
-                if chunk:  # filter out keep-alive new chunks
-                    downloaded += len(chunk)
-                    pbar.update(len(chunk))
-                    f.write(chunk)
-            if downloaded < total_length:
-                log.warn(f'Download stopped abruptly, trying to resume from {downloaded} to reach {total_length}')
-                headers['Range'] = f'bytes={downloaded}-'
-                r = requests.get(url, headers=headers, stream=True)
-                if total_length - downloaded != int(r.headers['content-length']):
-                    raise RuntimeError('It looks like the server does not support resuming downloads')
-            else:
-                done = True
+        downloaded = f.tell()
+        if downloaded != 0:
+            log.warn(f'Found a partial download {temporary}')
+        with tqdm(initial=downloaded, total=total_length, unit='B', unit_scale=True) as pbar:
+            while not done:
+                if downloaded != 0:
+                    log.warn(f'Download stopped abruptly, trying to resume from {downloaded} '
+                             f'to reach {total_length}')
+                    headers['Range'] = f'bytes={downloaded}-'
+                    r = requests.get(url, headers=headers, stream=True)
+                    if 'content-length' not in r.headers or \
+                            total_length - downloaded != int(r.headers['content-length']):
+                        raise RuntimeError(f'It looks like the server does not support resuming '
+                                           f'downloads.')
+                for chunk in r.iter_content(chunk_size=CHUNK):
+                    if chunk:  # filter out keep-alive new chunks
+                        downloaded += len(chunk)
+                        pbar.update(len(chunk))
+                        f.write(chunk)
+                if downloaded >= total_length:
+                    # Note that total_length is 0 if the server didn't return the content length,
+                    # in this case we perform just one iteration and assume that we are done.
+                    done = True
+
+    temporary.rename(destination)
 
 
 def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_download=True):
@@ -477,4 +492,4 @@ def set_query_parameter(url, param_name, param_value):
     query_params[param_name] = [param_value]
     new_query_string = urlencode(query_params, doseq=True)
 
-    return urlunsplit((scheme, netloc, path, new_query_string, fragment))
\ No newline at end of file
+    return urlunsplit((scheme, netloc, path, new_query_string, fragment))

From f38bb796ddbd5ead9a1633ea4b139767a1e036c0 Mon Sep 17 00:00:00 2001
From: Dilyara Baymurzina <dilyara.rimovna@gmail.com>
Date: Tue, 13 Aug 2019 14:36:38 +0300
Subject: [PATCH 4/7] feat: add new config for convers vs info classification
 (#967)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: new confiп for convers vs info classification on conversational bert on yahoo

* fix: brackets
---
 .../yahoo_convers_vs_info_bert.json           | 168 ++++++++++++++++++
 docs/features/models/classifiers.rst          |   6 +-
 docs/features/overview.rst                    |   6 +-
 3 files changed, 176 insertions(+), 4 deletions(-)
 create mode 100644 deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json

diff --git a/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json b/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json
new file mode 100644
index 0000000000..c0136a0c7c
--- /dev/null
+++ b/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json
@@ -0,0 +1,168 @@
+{
+  "chainer": {
+    "in": [
+      "x"
+    ],
+    "in_y": [
+      "y"
+    ],
+    "pipe": [
+      {
+        "class_name": "bert_preprocessor",
+        "vocab_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/vocab.txt",
+        "do_lower_case": false,
+        "max_seq_length": 64,
+        "in": [
+          "x"
+        ],
+        "out": [
+          "bert_features"
+        ]
+      },
+      {
+        "id": "classes_vocab",
+        "class_name": "simple_vocab",
+        "fit_on": [
+          "y"
+        ],
+        "save_path": "{MODEL_PATH}/classes.dict",
+        "load_path": "{MODEL_PATH}/classes.dict",
+        "in": [
+          "y"
+        ],
+        "out": [
+          "y_ids"
+        ]
+      },
+      {
+        "in": [
+          "y_ids"
+        ],
+        "out": [
+          "y_onehot"
+        ],
+        "class_name": "one_hotter",
+        "id": "my_one_hotter",
+        "depth": "#classes_vocab.len",
+        "single_vector": true
+      },
+      {
+        "class_name": "bert_classifier",
+        "n_classes": "#classes_vocab.len",
+        "return_probas": true,
+        "one_hot_labels": true,
+        "bert_config_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_config.json",
+        "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_model.ckpt",
+        "save_path": "{MODEL_PATH}/model",
+        "load_path": "{MODEL_PATH}/model",
+        "keep_prob": 0.5,
+        "learning_rate": 1e-05,
+        "learning_rate_drop_patience": 5,
+        "learning_rate_drop_div": 2.0,
+        "in": [
+          "bert_features"
+        ],
+        "in_y": [
+          "y_onehot"
+        ],
+        "out": [
+          "y_pred_probas"
+        ]
+      },
+      {
+        "in": [
+          "y_pred_probas"
+        ],
+        "out": [
+          "y_pred_ids"
+        ],
+        "class_name": "proba2labels",
+        "max_proba": true
+      },
+      {
+        "in": [
+          "y_pred_ids"
+        ],
+        "out": [
+          "y_pred_labels"
+        ],
+        "ref": "classes_vocab"
+      },
+      {
+        "ref": "my_one_hotter",
+        "in": [
+          "y_pred_ids"
+        ],
+        "out": [
+          "y_pred_onehot"
+        ]
+      }
+    ],
+    "out": [
+      "y_pred_labels"
+    ]
+  },
+  "train": {
+    "epochs": 100,
+    "batch_size": 64,
+    "metrics": [
+      {
+        "name": "roc_auc",
+        "inputs": [
+          "y_onehot",
+          "y_pred_probas"
+        ]
+      },
+      {
+        "name": "sets_accuracy",
+        "inputs": [
+          "y",
+          "y_pred_labels"
+        ]
+      },
+      {
+        "name": "f1_macro",
+        "inputs": [
+          "y",
+          "y_pred_labels"
+        ]
+      }
+    ],
+    "validation_patience": 20,
+    "val_every_n_epochs": 1,
+    "log_every_n_epochs": 1,
+    "show_examples": false,
+    "evaluation_targets": [
+      "train",
+      "valid"
+    ],
+    "tensorboard_log_dir": "{MODEL_PATH}/",
+    "class_name": "nn_trainer"
+  },
+  "metadata": {
+    "variables": {
+      "ROOT_PATH": "~/.deeppavlov",
+      "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
+      "MODELS_PATH": "{ROOT_PATH}/models",
+      "MODEL_PATH": "{MODELS_PATH}/classifiers/yahoo_convers_vs_info_v3"
+    },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/tf.txt",
+      "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt"
+    ],
+    "labels": {
+      "telegram_utils": "IntentModel",
+      "server_utils": "KerasIntentModel"
+    },
+    "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/conversational_cased_L-12_H-768_A-12.tar.gz",
+        "subdir": "{DOWNLOADS_PATH}/bert_models"
+      },
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/yahoo_convers_vs_info_v3.tar.gz",
+        "subdir": "{MODELS_PATH}/classifiers/"
+      }
+    ]
+  }
+}
diff --git a/docs/features/models/classifiers.rst b/docs/features/models/classifiers.rst
index c951f34f98..12a77e7ee9 100644
--- a/docs/features/models/classifiers.rst
+++ b/docs/features/models/classifiers.rst
@@ -299,7 +299,9 @@ Therefore, this model is available only for interaction.
 |                  |                    |      | :config:`English Conversational BERT <classifiers/insults_kaggle_conv_bert.json>`               |             | 0.9389 | 0.8941 |  1200 Mb  |
 +------------------+--------------------+      +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
 | 5 topics         | `AG News`_         |      | :config:`Wiki emb <classifiers/topic_ag_news.json>`                                             | Accuracy    | 0.8922 | 0.9059 |  8.5 Gb   |
-+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+             +--------+--------+-----------+
++------------------+--------------------+      +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
+| Intent           |`Yahoo-L31`_        |      | :config:`Yahoo-L31 on conversational BERT <classifiers/yahoo_convers_vs_info_bert.json>`        | ROC-AUC     | 0.9436 |   --   |  1200 Mb  |
++------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
 | Sentiment        |`Twitter mokoron`_  | Ru   | :config:`RuWiki+Lenta emb w/o preprocessing <classifiers/sentiment_twitter.json>`               |             | 0.9965 | 0.9961 |  6.2 Gb   |
 +                  +                    +      +-------------------------------------------------------------------------------------------------+             +--------+--------+-----------+
 |                  |                    |      | :config:`RuWiki+Lenta emb with preprocessing <classifiers/sentiment_twitter_preproc.json>`      |             | 0.7823 | 0.7759 |  6.2 Gb   |
@@ -312,7 +314,7 @@ Therefore, this model is available only for interaction.
 +                  +                    +      +-------------------------------------------------------------------------------------------------+             +--------+--------+-----------+
 |                  |                    |      | :config:`Multi-language BERT <classifiers/rusentiment_bert.json>`                               |             | 0.6809 | 0.7193 |  1900 Mb  |
 +------------------+--------------------+      +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
-| Intent           |`Yahoo-L31`_        |      | :config:`Yahoo-L31 on ELMo <classifiers/yahoo_convers_vs_info.json>` pre-trained on `Yahoo-L6`_ | ROC-AUC     | 0.9412 |   --   |  700 Mb   |
+| Intent           |Ru like`Yahoo-L31`_ |      | :config:`Conversational vs Informational on ELMo <classifiers/yahoo_convers_vs_info.json>`      | ROC-AUC     | 0.9412 |   --   |  700 Mb   |
 +------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
 
 .. _`DSTC 2`: http://camdial.org/~mh521/dstc/
diff --git a/docs/features/overview.rst b/docs/features/overview.rst
index 697dd5b1ac..a8917f4ed9 100644
--- a/docs/features/overview.rst
+++ b/docs/features/overview.rst
@@ -85,7 +85,9 @@ Several pre-trained models are available and presented in Table below.
 |                  |                    |      | :config:`English Conversational BERT <classifiers/insults_kaggle_conv_bert.json>`               |             | 0.9389 | 0.8941 |  1200 Mb  |
 +------------------+--------------------+      +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
 | 5 topics         | `AG News`_         |      | :config:`Wiki emb <classifiers/topic_ag_news.json>`                                             | Accuracy    | 0.8922 | 0.9059 |  8.5 Gb   |
-+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+             +--------+--------+-----------+
++------------------+--------------------+      +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
+| Intent           |`Yahoo-L31`_        |      | :config:`Yahoo-L31 on conversational BERT <classifiers/yahoo_convers_vs_info_bert.json>`        | ROC-AUC     | 0.9436 |   --   |  1200 Mb  |
++------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
 | Sentiment        |`Twitter mokoron`_  | Ru   | :config:`RuWiki+Lenta emb w/o preprocessing <classifiers/sentiment_twitter.json>`               |             | 0.9965 | 0.9961 |  6.2 Gb   |
 +                  +                    +      +-------------------------------------------------------------------------------------------------+             +--------+--------+-----------+
 |                  |                    |      | :config:`RuWiki+Lenta emb with preprocessing <classifiers/sentiment_twitter_preproc.json>`      |             | 0.7823 | 0.7759 |  6.2 Gb   |
@@ -98,7 +100,7 @@ Several pre-trained models are available and presented in Table below.
 +                  +                    +      +-------------------------------------------------------------------------------------------------+             +--------+--------+-----------+
 |                  |                    |      | :config:`Multi-language BERT <classifiers/rusentiment_bert.json>`                               |             | 0.6809 | 0.7193 |  1900 Mb  |
 +------------------+--------------------+      +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
-| Intent           |`Yahoo-L31`_        |      | :config:`Yahoo-L31 on ELMo <classifiers/yahoo_convers_vs_info.json>` pre-trained on `Yahoo-L6`_ | ROC-AUC     | 0.9412 |   --   |  700 Mb   |
+| Intent           |Ru like`Yahoo-L31`_ |      | :config:`Conversational vs Informational on ELMo <classifiers/yahoo_convers_vs_info.json>`      | ROC-AUC     | 0.9412 |   --   |  700 Mb   |
 +------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+
 
 .. [1] Coucke A. et al. Snips voice platform: an embedded spoken language understanding system for private-by-design voice interfaces //arXiv preprint arXiv:1805.10190. – 2018.

From d2929075614aa99db914d67ff81674b94b8e0828 Mon Sep 17 00:00:00 2001
From: yurakuratov <yurakuratov@gmail.com>
Date: Tue, 13 Aug 2019 16:05:14 +0300
Subject: [PATCH 5/7] feat: add multilingual squad config (#968)

* feat: add multilingual squad config

* fix: remove training section from mult squad config
---
 .../squad_bert_multilingual_freezed_emb.json  | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json

diff --git a/deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json b/deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json
new file mode 100644
index 0000000000..29a52beaae
--- /dev/null
+++ b/deeppavlov/configs/squad/squad_bert_multilingual_freezed_emb.json
@@ -0,0 +1,74 @@
+{
+  "chainer": {
+    "in": ["context_raw", "question_raw"],
+    "in_y": ["ans_raw", "ans_raw_start"],
+    "pipe": [
+      {
+        "class_name": "bert_preprocessor",
+        "vocab_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/vocab.txt",
+        "do_lower_case": false,
+        "max_seq_length": 384,
+        "in": ["question_raw", "context_raw"],
+        "out": ["bert_features"]
+      },
+      {
+        "class_name": "squad_bert_mapping",
+        "do_lower_case": false,
+        "in": ["context_raw", "bert_features"],
+        "out": ["subtok2chars", "char2subtoks"]
+      },
+      {
+        "class_name": "squad_bert_ans_preprocessor",
+        "do_lower_case": false,
+        "in": ["ans_raw", "ans_raw_start","char2subtoks"],
+        "out": ["ans", "ans_start", "ans_end"]
+      },
+      {
+        "class_name": "squad_bert_model",
+        "bert_config_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_config.json",
+        "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_model.ckpt",
+        "save_path": "{MODELS_PATH}/squad_bert/model_multi_freezed",
+        "load_path": "{MODELS_PATH}/squad_bert/model_multi_freezed",
+        "keep_prob": 0.5,
+        "learning_rate": 2e-05,
+        "learning_rate_drop_patience": 2,
+        "learning_rate_drop_div": 2.0,
+        "in": ["bert_features"],
+        "in_y": ["ans_start", "ans_end"],
+        "out": ["ans_start_predicted", "ans_end_predicted", "logits"]
+      },
+      {
+        "class_name": "squad_bert_ans_postprocessor",
+        "in": ["ans_start_predicted", "ans_end_predicted", "context_raw", "bert_features", "subtok2chars"],
+        "out": ["ans_predicted", "ans_start_predicted", "ans_end_predicted"]
+      }
+    ],
+    "out": ["ans_predicted", "ans_start_predicted", "logits"]
+  },
+  "metadata": {
+    "variables": {
+      "ROOT_PATH": "~/.deeppavlov",
+      "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
+      "MODELS_PATH": "{ROOT_PATH}/models"
+    },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/tf.txt",
+      "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt"
+    ],
+    "labels": {
+      "telegram_utils": "SquadModel",
+      "server_utils": "SquadModel"
+    },
+    "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip",
+        "subdir": "{DOWNLOADS_PATH}/bert_models"
+      },
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_bert_mult_freezed.tar.gz",
+        "subdir": "{MODELS_PATH}"
+      }
+      ]
+  }
+}
+

From 2d79c74815f73c3f703451933c9217d62c778c45 Mon Sep 17 00:00:00 2001
From: Aleksei Lymar <yoptar@gmail.com>
Date: Tue, 13 Aug 2019 16:22:47 +0300
Subject: [PATCH 6/7] chore: update version to 0.5.1

---
 deeppavlov/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deeppavlov/__init__.py b/deeppavlov/__init__.py
index b40b8f7e35..b5f7926eed 100644
--- a/deeppavlov/__init__.py
+++ b/deeppavlov/__init__.py
@@ -37,7 +37,7 @@ def evaluate_model(config: [str, Path, dict], download: bool = False, recursive:
 except ImportError:
     'Assuming that requirements are not yet installed'
 
-__version__ = '0.5.0'
+__version__ = '0.5.1'
 __author__ = 'Neural Networks and Deep Learning lab, MIPT'
 __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
 __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']

From 6eb02f73573ac82ec6362b1aad3a8cf57375da6b Mon Sep 17 00:00:00 2001
From: Pavel Gulyaev <zagerpaul@gmail.com>
Date: Tue, 13 Aug 2019 16:45:03 +0300
Subject: [PATCH 7/7] feat: add squad_zh_bert config (#970)

---
 deeppavlov/configs/squad/squad_zh_bert.json | 116 ++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 deeppavlov/configs/squad/squad_zh_bert.json

diff --git a/deeppavlov/configs/squad/squad_zh_bert.json b/deeppavlov/configs/squad/squad_zh_bert.json
new file mode 100644
index 0000000000..9fc95aa8b5
--- /dev/null
+++ b/deeppavlov/configs/squad/squad_zh_bert.json
@@ -0,0 +1,116 @@
+{
+  "dataset_reader": {
+    "class_name": "squad_dataset_reader",
+    "dataset": "SQuAD",
+    "url": "http://files.deeppavlov.ai/datasets/DRCD.tar.gz",
+    "data_path": "{DOWNLOADS_PATH}/DRCD_train/"
+  },
+  "dataset_iterator": {
+    "class_name": "squad_iterator",
+    "seed": 1337,
+    "shuffle": true
+  },
+  "chainer": {
+    "in": ["context_raw", "question_raw"],
+    "in_y": ["ans_raw", "ans_raw_start"],
+    "pipe": [
+      {
+        "class_name": "bert_preprocessor",
+        "vocab_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/vocab.txt",
+        "do_lower_case": "{lowercase}",
+        "max_seq_length": 384,
+        "in": ["question_raw", "context_raw"],
+        "out": ["bert_features"]
+      },
+      {
+        "class_name": "squad_bert_mapping",
+        "do_lower_case": "{lowercase}",
+        "in": ["context_raw", "bert_features"],
+        "out": ["subtok2chars", "char2subtoks"]
+      },
+      {
+        "class_name": "squad_bert_ans_preprocessor",
+        "do_lower_case": "{lowercase}",
+        "in": ["ans_raw", "ans_raw_start","char2subtoks"],
+        "out": ["ans", "ans_start", "ans_end"]
+      },
+      {
+        "class_name": "squad_bert_model",
+        "bert_config_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_config.json",
+        "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_model.ckpt",
+        "save_path": "{MODELS_PATH}/squad_zh_bert/model_multi",
+        "load_path": "{MODELS_PATH}/squad_zh_bert/model_multi",
+        "keep_prob": 0.5,
+        "learning_rate": 2e-05,
+        "learning_rate_drop_patience": 3,
+        "learning_rate_drop_div": 2.0,
+        "in": ["bert_features"],
+        "in_y": ["ans_start", "ans_end"],
+        "out": ["ans_start_predicted", "ans_end_predicted", "logits", "score"]
+      },
+      {
+        "class_name": "squad_bert_ans_postprocessor",
+        "in": ["ans_start_predicted", "ans_end_predicted", "context_raw", "bert_features", "subtok2chars"],
+        "out": ["ans_predicted", "ans_start_predicted", "ans_end_predicted"]
+      }
+    ],
+    "out": ["ans_predicted", "ans_start_predicted", "logits"]
+  },
+  "train": {
+    "show_examples": false,
+    "test_best": false,
+    "validate_best": true,
+    "log_every_n_batches": 250,
+    "val_every_n_batches": 500,
+    "batch_size": 10,
+    "pytest_max_batches": 2,
+    "pytest_batch_size": 5,
+    "validation_patience": 10,
+    "metrics": [
+      {
+        "name": "squad_v2_f1",
+        "inputs": ["ans", "ans_predicted"]
+      },
+      {
+        "name": "squad_v2_em",
+        "inputs": ["ans", "ans_predicted"]
+      },
+      {
+        "name": "squad_v1_f1",
+        "inputs": ["ans", "ans_predicted"]
+      },
+      {
+        "name": "squad_v1_em",
+        "inputs": ["ans", "ans_predicted"]
+      }
+    ],
+    "tensorboard_log_dir": "{MODELS_PATH}/squad_zh_bert/logs"
+  },
+  "metadata": {
+    "variables": {
+      "lowercase": false,
+      "ROOT_PATH": "~/.deeppavlov",
+      "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
+      "MODELS_PATH": "{ROOT_PATH}/models"
+    },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/tf.txt",
+      "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt"
+    ],
+    "labels": {
+      "telegram_utils": "SquadModel",
+      "server_utils": "SquadModel"
+    },
+    "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip",
+        "subdir": "{DOWNLOADS_PATH}/bert_models"
+      },
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_zh.tar.gz",
+        "subdir": "{MODELS_PATH}"
+      }
+      ]
+  }
+}
+