From 84650032ec89dcef8cbdf7e2180678a82d628e4b Mon Sep 17 00:00:00 2001 From: Aleksey Lymar Date: Thu, 12 Jul 2018 20:07:58 +0300 Subject: [PATCH 1/6] feat: add cache option for downloads to try and speed the tests up * feat: add cache option for downloads to try and speed the tests up * feat: try and resume broken downloads * fix: ignore the case when downloaded extra * refactor: destroy tqdm object properly in download function * feat: raise when unable to resume broken download * fix: correct arg name for download * feat: cache extracted archives alongside downloads * fix: fix copying cached extracted archives * feat: install spacy en only if it's not installed * feat: do not install spacy en in Jenkinsfile * fix: correctly copy extracted directories * fix: install tensorflow-gpu in jenkinsfile --- Jenkinsfile | 4 +- deeppavlov/core/data/utils.py | 177 ++++++++++++++++++++----------- tests/test_quick_start.py | 15 ++- utils/pip_wrapper/pip_wrapper.py | 6 +- 4 files changed, 136 insertions(+), 66 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 86ca20f9d7..0c1626c927 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -11,11 +11,9 @@ node('gpu') { sh """ virtualenv --python=python3 ".venv-$BUILD_NUMBER" . .venv-$BUILD_NUMBER/bin/activate - sed -ri 's/^\\s*tensorflow\\s*(=|<|>|;|\$)/tensorflow-gpu\\1/g' requirements.txt sed -i "s/stream=True/stream=False/g" deeppavlov/core/data/utils.py python setup.py develop - pip install http://lnsigo.mipt.ru/export/en_core_web_sm-2.0.0.tar.gz - python -m spacy link en_core_web_sm en --force + pip install -r requirements/tf-gpu.txt pip install -r requirements-dev.txt """ } diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py index dfa370efad..66346bdcc7 100644 --- a/deeppavlov/core/data/utils.py +++ b/deeppavlov/core/data/utils.py @@ -13,8 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. """ - +import os +from hashlib import md5 from pathlib import Path +from urllib.parse import urlparse +from typing import List, Union import requests from tqdm import tqdm @@ -35,7 +38,36 @@ tqdm.monitor_interval = 0 -def download(dest_file_path, source_url, force_download=True): +def simple_download(url: str, destination: [Path, str]): + CHUNK = 32 * 1024 + + destination = Path(destination) + destination.parent.mkdir(parents=True, exist_ok=True) + + r = requests.get(url, stream=True) + total_length = int(r.headers.get('content-length', 0)) + + log.info('Downloading from {} to {}'.format(url, destination)) + with destination.open('wb') as f, tqdm(total=total_length, unit='B', unit_scale=True) as pbar: + done = False + downloaded = 0 + while not done: + for chunk in r.iter_content(chunk_size=CHUNK): + if chunk: # filter out keep-alive new chunks + downloaded += len(chunk) + pbar.update(len(chunk)) + f.write(chunk) + if downloaded < total_length: + log.warn(f'Download stopped abruptly, trying to resume from {downloaded} to reach {total_length}') + resume_header = {'Range': f'bytes={downloaded}-'} + r = requests.get(url, headers=resume_header, stream=True) + if total_length - downloaded != int(r.headers['content-length']): + raise RuntimeError('It looks like the server does not support resuming downloads') + else: + done = True + + +def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_download=True): """Download a file from URL to one or several target locations Args: @@ -44,43 +76,40 @@ def download(dest_file_path, source_url, force_download=True): force_download: download file if it already exists, or not """ - CHUNK = 16 * 1024 - - if isinstance(dest_file_path, str): - dest_file_path = [Path(dest_file_path).absolute()] - elif isinstance(dest_file_path, Path): - dest_file_path = [dest_file_path.absolute()] - elif isinstance(dest_file_path, list): - dest_file_path = [Path(path) for path in dest_file_path] - - first_dest_path = dest_file_path.pop() - if force_download or not first_dest_path.exists(): - first_dest_path.parent.mkdir(parents=True, exist_ok=True) - - r = requests.get(source_url, stream=True) - total_length = int(r.headers.get('content-length', 0)) - - with first_dest_path.open('wb') as f: - log.info('Downloading from {} to {}'.format(source_url, first_dest_path)) - - pbar = tqdm(total=total_length, unit='B', unit_scale=True) - for chunk in r.iter_content(chunk_size=CHUNK): - if chunk: # filter out keep-alive new chunks - pbar.update(len(chunk)) - f.write(chunk) - f.close() + if isinstance(dest_file_path, list): + dest_file_paths = [Path(path) for path in dest_file_path] else: - log.info('File already exists in {}'.format(first_dest_path)) + dest_file_paths = [Path(dest_file_path).absolute()] + + if not force_download: + to_check = list(dest_file_paths) + dest_file_paths = [] + for p in to_check: + if p.exists(): + log.info(f'File already exists in {p}') + else: + dest_file_paths.append(p) + + if dest_file_paths: + cache_dir = os.getenv('DP_CACHE_DIR') + cached_exists = False + if cache_dir: + first_dest_path = Path(cache_dir) / md5(source_url.encode('utf8')).hexdigest()[:15] + cached_exists = first_dest_path.exists() + else: + first_dest_path = dest_file_paths.pop() - while len(dest_file_path) > 0: - dest_path = dest_file_path.pop() + if not cached_exists: + first_dest_path.parent.mkdir(parents=True, exist_ok=True) - if force_download or not dest_path.exists(): + simple_download(source_url, first_dest_path) + else: + log.info(f'Found cached {source_url} in {first_dest_path}') + + for dest_path in dest_file_paths: dest_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy(str(first_dest_path), str(dest_path)) - else: - log.info('File already exists in {}'.format(dest_path)) def untar(file_path, extract_folder=None): @@ -100,7 +129,7 @@ def untar(file_path, extract_folder=None): tar.close() -def ungzip(file_path, extract_folder=None): +def ungzip(file_path, extract_path: Path=None): """Simple .gz archive extractor Args: @@ -110,9 +139,7 @@ def ungzip(file_path, extract_folder=None): """ CHUNK = 16 * 1024 file_path = Path(file_path) - extract_path = file_path.with_suffix('') - if extract_folder is not None: - extract_path = Path(extract_folder) / extract_path.name + extract_path = extract_path or file_path.with_suffix('') with gzip.open(file_path, 'rb') as fin, extract_path.open('wb') as fout: while True: @@ -122,7 +149,7 @@ def ungzip(file_path, extract_folder=None): fout.write(block) -def download_decompress(url, download_path, extract_paths=None): +def download_decompress(url: str, download_path: [Path, str], extract_paths=None): """Download and extract .tar.gz or .gz file to one or several target locations. The archive is deleted if extraction was successful. @@ -132,36 +159,66 @@ def download_decompress(url, download_path, extract_paths=None): until the end of extraction extract_paths: path or list of paths where contents of archive will be extracted """ - file_name = url.split('/')[-1] + file_name = Path(urlparse(url).path).name download_path = Path(download_path) - arch_file_path = download_path / file_name - download(arch_file_path, url) if extract_paths is None: extract_paths = [download_path] - elif isinstance(extract_paths, str): - extract_paths = [Path(extract_paths)] elif isinstance(extract_paths, list): extract_paths = [Path(path) for path in extract_paths] + else: + extract_paths = [Path(extract_paths)] - if url.endswith(('.tar.gz', '.gz', '.zip')): - for extract_path in extract_paths: - log.info('Extracting {} archive into {}'.format(arch_file_path, extract_path)) - extract_path.mkdir(parents=True, exist_ok=True) - - if url.endswith('.tar.gz'): - untar(arch_file_path, extract_path) - elif url.endswith('.gz'): - ungzip(arch_file_path, extract_path) - elif url.endswith('.zip'): - zip_ref = zipfile.ZipFile(arch_file_path, 'r') - zip_ref.extractall(extract_path) - zip_ref.close() - - arch_file_path.unlink() + cache_dir = os.getenv('DP_CACHE_DIR') + extracted = False + if cache_dir: + cache_dir = Path(cache_dir) + url_hash = md5(url.encode('utf8')).hexdigest()[:15] + arch_file_path = cache_dir / url_hash + extracted_path = cache_dir / (url_hash + '_extracted') + extracted = extracted_path.exists() + if not extracted and not arch_file_path.exists(): + simple_download(url, arch_file_path) else: - log.error('File {} has unsupported format. ' - 'Not extracted, downloaded to {}'.format(file_name, arch_file_path)) + arch_file_path = download_path / file_name + simple_download(url, arch_file_path) + extracted_path = extract_paths.pop() + + if not extracted: + log.info('Extracting {} archive into {}'.format(arch_file_path, extracted_path)) + extracted_path.mkdir(parents=True, exist_ok=True) + + if file_name.endswith('.tar.gz'): + untar(arch_file_path, extracted_path) + elif file_name.endswith('.gz'): + ungzip(arch_file_path, extracted_path / Path(file_name).with_suffix('').name) + elif file_name.endswith('.zip'): + with zipfile.ZipFile(arch_file_path, 'r') as zip_ref: + zip_ref.extractall(extracted_path) + else: + raise RuntimeError(f'Trying to extract an unknown type of archive {file_name}') + + if not cache_dir: + arch_file_path.unlink() + + for extract_path in extract_paths: + for src in extracted_path.iterdir(): + dest = extract_path / src.name + if src.is_dir(): + copytree(src, dest) + else: + extract_path.mkdir(parents=True, exist_ok=True) + shutil.copy(str(src), str(dest)) + + +def copytree(src: Path, dest: Path): + dest.mkdir(parents=True, exist_ok=True) + for f in src.iterdir(): + f_dest = dest / f.name + if f.is_dir(): + copytree(f, f_dest) + else: + shutil.copy(str(f), str(f_dest)) def load_vocab(vocab_path): diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 17852c802a..ee31f89003 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -1,8 +1,10 @@ import io import json +import os from pathlib import Path import shutil import sys +from tempfile import TemporaryDirectory import pytest import pexpect @@ -14,6 +16,7 @@ from utils.server_utils.server import get_server_params, SERVER_CONFIG_FILENAME +cache_dir = None tests_dir = Path(__file__, '..').resolve() test_configs_path = tests_dir / "deeppavlov" / "configs" src_dir = tests_dir.parent / "deeppavlov" / "configs" @@ -163,11 +166,18 @@ def setup_module(): for (conf_file, _, _), _ in conf_dict.items(): download_config(conf_file) + global cache_dir + cache_dir = TemporaryDirectory() + os.environ['DP_CACHE_DIR'] = cache_dir.name + def teardown_module(): shutil.rmtree(str(test_configs_path.parent), ignore_errors=True) shutil.rmtree(str(download_path), ignore_errors=True) + global cache_dir + cache_dir.cleanup() + @pytest.mark.parametrize("model,conf_file,model_dir,mode", TEST_GRID, scope='class') class TestQuickStart(object): @@ -197,7 +207,8 @@ def interact(conf_file, model_dir, qr_list=None): p.expect(">> ") if expected_response is not None: actual_response = p.readline().decode().strip() - assert expected_response == actual_response, f"Error in interacting with {model_dir} ({conf_file}): {query}" + assert expected_response == actual_response,\ + f"Error in interacting with {model_dir} ({conf_file}): {query}" p.expect("::") p.sendline("quit") @@ -302,7 +313,7 @@ def test_evolving(self, model, conf_file, model_dir, mode): shutil.rmtree(str(model_path), ignore_errors=True) logfile = io.BytesIO(b'') - _, exitstatus = pexpect.run(sys.executable + " -m deeppavlov.evolve " + str(c) + " --iterations 1 --p_size 1", + _, exitstatus = pexpect.run(sys.executable + f" -m deeppavlov.evolve {c} --iterations 1 --p_size 1", timeout=None, withexitstatus=True, logfile=logfile) if exitstatus != 0: diff --git a/utils/pip_wrapper/pip_wrapper.py b/utils/pip_wrapper/pip_wrapper.py index 8e3e013300..f272eb32a0 100644 --- a/utils/pip_wrapper/pip_wrapper.py +++ b/utils/pip_wrapper/pip_wrapper.py @@ -25,7 +25,11 @@ def install(*packages): *[re.sub(r'\s', '', package) for package in packages]], env=os.environ.copy()) if any(_spacy_re.match(package) for package in packages): - subprocess.check_call([sys.executable, '-m', 'spacy', 'download', 'en'], env=os.environ.copy()) + try: + import spacy + spacy.load('en') + except IOError: + subprocess.check_call([sys.executable, '-m', 'spacy', 'download', 'en'], env=os.environ.copy()) return result From 5a6330ced5f645d8d3edb18e3bbb8e992fd97336 Mon Sep 17 00:00:00 2001 From: Aleksey Lymar Date: Fri, 13 Jul 2018 19:07:06 +0300 Subject: [PATCH 2/6] fix: add local requirements into the pip wheel (#325) --- Jenkinsfile | 2 +- MANIFEST.in | 2 +- deeppavlov/configs/go_bot/gobot_dstc2.json | 6 +++--- deeppavlov/configs/go_bot/gobot_dstc2_all.json | 6 +++--- deeppavlov/configs/go_bot/gobot_dstc2_best.json | 6 +++--- deeppavlov/configs/go_bot/gobot_dstc2_minimal.json | 6 +++--- deeppavlov/configs/intents/intents_dstc2.json | 4 ++-- deeppavlov/configs/intents/intents_dstc2_big.json | 4 ++-- deeppavlov/configs/intents/intents_sample_csv.json | 4 ++-- deeppavlov/configs/intents/intents_sample_json.json | 4 ++-- deeppavlov/configs/intents/intents_snips.json | 4 ++-- .../configs/morpho_tagger/UD2.0/hu/morpho_hu_predict.json | 2 +- .../configs/morpho_tagger/UD2.0/hu/morpho_hu_train.json | 2 +- .../UD2.0/ru_syntagrus/morpho_ru_syntagrus_predict.json | 2 +- .../UD2.0/ru_syntagrus/morpho_ru_syntagrus_train.json | 2 +- deeppavlov/configs/ner/ner_conll2003.json | 4 ++-- deeppavlov/configs/ner/ner_conll2003_pos.json | 4 ++-- deeppavlov/configs/ner/ner_dstc2.json | 2 +- deeppavlov/configs/ner/ner_ontonotes.json | 4 ++-- deeppavlov/configs/ner/ner_rus.json | 4 ++-- deeppavlov/configs/ner/slotfill_dstc2.json | 2 +- deeppavlov/configs/ner/slotfill_dstc2_raw.json | 2 +- deeppavlov/configs/odqa/en_odqa_infer_wiki.json | 4 ++-- deeppavlov/configs/odqa/ru_odqa_infer_wiki.json | 2 +- deeppavlov/configs/ranking/en_ranker_tfidf_wiki.json | 2 +- deeppavlov/configs/ranking/ranking_insurance.json | 4 ++-- deeppavlov/configs/sentiment/insults_kaggle.json | 4 ++-- deeppavlov/configs/sentiment/sentiment_ag_news.json | 4 ++-- deeppavlov/configs/sentiment/sentiment_twitter.json | 4 ++-- deeppavlov/configs/seq2seq_go_bot/bot_kvret.json | 4 ++-- deeppavlov/configs/seq2seq_go_bot/bot_kvret_infer.json | 4 ++-- .../spelling_correction/brillmoore_kartaslov_ru.json | 2 +- .../brillmoore_kartaslov_ru_custom_vocab.json | 2 +- .../spelling_correction/brillmoore_kartaslov_ru_nolm.json | 2 +- .../spelling_correction/brillmoore_wikitypos_en.json | 2 +- .../spelling_correction/levenstein_corrector_ru.json | 2 +- deeppavlov/configs/squad/squad.json | 2 +- deeppavlov/configs/squad/squad_ru.json | 2 +- {requirements => dp_requirements}/fasttext.txt | 0 {requirements => dp_requirements}/gensim.txt | 0 {requirements => dp_requirements}/spacy.txt | 0 {requirements => dp_requirements}/spelling.txt | 0 {requirements => dp_requirements}/tf-gpu.txt | 0 {requirements => dp_requirements}/tf.txt | 0 setup.py | 2 +- tests/test_configs/intents/intents_snips_bigru.json | 4 ++-- tests/test_configs/intents/intents_snips_bilstm.json | 4 ++-- tests/test_configs/intents/intents_snips_bilstm_bilstm.json | 4 ++-- tests/test_configs/intents/intents_snips_bilstm_cnn.json | 4 ++-- .../intents/intents_snips_bilstm_self_add_attention.json | 4 ++-- .../intents/intents_snips_bilstm_self_mult_attention.json | 4 ++-- tests/test_configs/intents/intents_snips_cnn_bilstm.json | 4 ++-- tests/test_configs/odqa/en_odqa_infer_wiki_test.json | 4 ++-- tests/test_configs/ranking/en_ranker_tfidf_wiki_test.json | 2 +- 54 files changed, 80 insertions(+), 80 deletions(-) rename {requirements => dp_requirements}/fasttext.txt (100%) rename {requirements => dp_requirements}/gensim.txt (100%) rename {requirements => dp_requirements}/spacy.txt (100%) rename {requirements => dp_requirements}/spelling.txt (100%) rename {requirements => dp_requirements}/tf-gpu.txt (100%) rename {requirements => dp_requirements}/tf.txt (100%) diff --git a/Jenkinsfile b/Jenkinsfile index 0c1626c927..c801df52b4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -13,7 +13,7 @@ node('gpu') { . .venv-$BUILD_NUMBER/bin/activate sed -i "s/stream=True/stream=False/g" deeppavlov/core/data/utils.py python setup.py develop - pip install -r requirements/tf-gpu.txt + pip install -r dp_requirements/tf-gpu.txt pip install -r requirements-dev.txt """ } diff --git a/MANIFEST.in b/MANIFEST.in index f21bad08ff..0e5bcd1eb3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include README.MD include LICENSE include requirements.txt -recursive-include requirements *.txt +recursive-include dp_requirements *.txt recursive-include deeppavlov *.json recursive-include deeppavlov *.md recursive-include utils *.json \ No newline at end of file diff --git a/deeppavlov/configs/go_bot/gobot_dstc2.json b/deeppavlov/configs/go_bot/gobot_dstc2.json index 43ebea8710..914c4022ff 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2.json @@ -84,9 +84,9 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt", + "../dp_requirements/spacy.txt" ], "labels": { "telegram_utils": "GoalOrientedBot", diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_all.json b/deeppavlov/configs/go_bot/gobot_dstc2_all.json index 98078b1b20..09dd38ada6 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2_all.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2_all.json @@ -89,9 +89,9 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt", + "../dp_requirements/spacy.txt" ], "labels": { "telegram_utils": "GoalOrientedBot", diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_best.json b/deeppavlov/configs/go_bot/gobot_dstc2_best.json index c4f1218208..16b26a79a9 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2_best.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2_best.json @@ -96,9 +96,9 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt", + "../dp_requirements/spacy.txt" ], "labels": { "telegram_utils": "GoalOrientedBot", diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json b/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json index 14452a720c..83498b0dba 100644 --- a/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json +++ b/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json @@ -68,9 +68,9 @@ }, "train": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt", + "../dp_requirements/spacy.txt" ], "epochs": 200, "batch_size": 4, diff --git a/deeppavlov/configs/intents/intents_dstc2.json b/deeppavlov/configs/intents/intents_dstc2.json index ae8946b350..5e630a668d 100644 --- a/deeppavlov/configs/intents/intents_dstc2.json +++ b/deeppavlov/configs/intents/intents_dstc2.json @@ -108,8 +108,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel", diff --git a/deeppavlov/configs/intents/intents_dstc2_big.json b/deeppavlov/configs/intents/intents_dstc2_big.json index bdfe85c4dc..d71158840c 100644 --- a/deeppavlov/configs/intents/intents_dstc2_big.json +++ b/deeppavlov/configs/intents/intents_dstc2_big.json @@ -108,8 +108,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/deeppavlov/configs/intents/intents_sample_csv.json b/deeppavlov/configs/intents/intents_sample_csv.json index e8042aaed5..cf9dd4dd28 100644 --- a/deeppavlov/configs/intents/intents_sample_csv.json +++ b/deeppavlov/configs/intents/intents_sample_csv.json @@ -114,8 +114,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel", diff --git a/deeppavlov/configs/intents/intents_sample_json.json b/deeppavlov/configs/intents/intents_sample_json.json index b31a98ce66..0043668c2b 100644 --- a/deeppavlov/configs/intents/intents_sample_json.json +++ b/deeppavlov/configs/intents/intents_sample_json.json @@ -109,8 +109,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel", diff --git a/deeppavlov/configs/intents/intents_snips.json b/deeppavlov/configs/intents/intents_snips.json index 9508d2ef52..cbaa80e28e 100644 --- a/deeppavlov/configs/intents/intents_snips.json +++ b/deeppavlov/configs/intents/intents_snips.json @@ -107,8 +107,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_predict.json b/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_predict.json index 75d6ca1d09..ae542b1d76 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_predict.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_predict.json @@ -65,7 +65,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt" + "../dp_requirements/tf.txt" ], "download": [ "http://lnsigo.mipt.ru/export/deeppavlov_data/morpho_tagger.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_train.json b/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_train.json index c9de1b64ca..0ee67af978 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_train.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/hu/morpho_hu_train.json @@ -67,7 +67,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt" + "../dp_requirements/tf.txt" ], "download": [ "http://lnsigo.mipt.ru/export/deeppavlov_data/morpho_tagger.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_predict.json b/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_predict.json index 36b575881e..cca028daea 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_predict.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_predict.json @@ -66,7 +66,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt" + "../dp_requirements/tf.txt" ], "download": [ "http://lnsigo.mipt.ru/export/deeppavlov_data/morpho_tagger.tar.gz", diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_train.json b/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_train.json index 5ee015a4b7..4888927726 100644 --- a/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_train.json +++ b/deeppavlov/configs/morpho_tagger/UD2.0/ru_syntagrus/morpho_ru_syntagrus_train.json @@ -67,7 +67,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt" + "../dp_requirements/tf.txt" ], "download": [ "http://lnsigo.mipt.ru/export/deeppavlov_data/morpho_tagger.tar.gz", diff --git a/deeppavlov/configs/ner/ner_conll2003.json b/deeppavlov/configs/ner/ner_conll2003.json index 93474a85d2..b55871b95e 100644 --- a/deeppavlov/configs/ner/ner_conll2003.json +++ b/deeppavlov/configs/ner/ner_conll2003.json @@ -145,8 +145,8 @@ }, "metadata": { "requirements": [ - "../requirements/gensim.txt", - "../requirements/tf-gpu.txt" + "../dp_requirements/gensim.txt", + "../dp_requirements/tf-gpu.txt" ], "labels": { "telegram_utils": "NERCoNLL2003Model", diff --git a/deeppavlov/configs/ner/ner_conll2003_pos.json b/deeppavlov/configs/ner/ner_conll2003_pos.json index 3bbd8f5c05..0b2da81f41 100644 --- a/deeppavlov/configs/ner/ner_conll2003_pos.json +++ b/deeppavlov/configs/ner/ner_conll2003_pos.json @@ -163,8 +163,8 @@ }, "metadata": { "requirements": [ - "../requirements/gensim.txt", - "../requirements/tf-gpu.txt" + "../dp_requirements/gensim.txt", + "../dp_requirements/tf-gpu.txt" ], "labels": { "telegram_utils": "NERCoNLL2003Model", diff --git a/deeppavlov/configs/ner/ner_dstc2.json b/deeppavlov/configs/ner/ner_dstc2.json index 9da047a5dc..00f76a62cd 100644 --- a/deeppavlov/configs/ner/ner_dstc2.json +++ b/deeppavlov/configs/ner/ner_dstc2.json @@ -92,7 +92,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt" + "../dp_requirements/tf.txt" ], "labels": { "telegram_utils": "NERModel", diff --git a/deeppavlov/configs/ner/ner_ontonotes.json b/deeppavlov/configs/ner/ner_ontonotes.json index 166b5719ae..7558671c0d 100644 --- a/deeppavlov/configs/ner/ner_ontonotes.json +++ b/deeppavlov/configs/ner/ner_ontonotes.json @@ -129,8 +129,8 @@ }, "metadata": { "requirements": [ - "../requirements/gensim.txt", - "../requirements/tf-gpu.txt" + "../dp_requirements/gensim.txt", + "../dp_requirements/tf-gpu.txt" ], "labels": { "telegram_utils": "NERCoNLL2003Model", diff --git a/deeppavlov/configs/ner/ner_rus.json b/deeppavlov/configs/ner/ner_rus.json index d1010bf405..e623d64f31 100644 --- a/deeppavlov/configs/ner/ner_rus.json +++ b/deeppavlov/configs/ner/ner_rus.json @@ -143,8 +143,8 @@ }, "metadata": { "requirements": [ - "../requirements/fasttext.txt", - "../requirements/tf-gpu.txt" + "../dp_requirements/fasttext.txt", + "../dp_requirements/tf-gpu.txt" ], "labels": { "telegram_utils": "NERCoNLL2003Model", diff --git a/deeppavlov/configs/ner/slotfill_dstc2.json b/deeppavlov/configs/ner/slotfill_dstc2.json index 3dc7ee4535..4721f0d8a1 100644 --- a/deeppavlov/configs/ner/slotfill_dstc2.json +++ b/deeppavlov/configs/ner/slotfill_dstc2.json @@ -38,7 +38,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt" + "../dp_requirements/tf.txt" ], "labels": { "telegram_utils": "NERModel", diff --git a/deeppavlov/configs/ner/slotfill_dstc2_raw.json b/deeppavlov/configs/ner/slotfill_dstc2_raw.json index 80089b9a02..58925d91e6 100644 --- a/deeppavlov/configs/ner/slotfill_dstc2_raw.json +++ b/deeppavlov/configs/ner/slotfill_dstc2_raw.json @@ -24,7 +24,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt" + "../dp_requirements/tf.txt" ], "labels": { "telegram_utils": "NERModel" diff --git a/deeppavlov/configs/odqa/en_odqa_infer_wiki.json b/deeppavlov/configs/odqa/en_odqa_infer_wiki.json index 4f65aaecbf..9ed6c6f9d2 100644 --- a/deeppavlov/configs/odqa/en_odqa_infer_wiki.json +++ b/deeppavlov/configs/odqa/en_odqa_infer_wiki.json @@ -47,8 +47,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf-gpu.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf-gpu.txt", + "../dp_requirements/spacy.txt" ], "labels": { "server_utils": "ODQA" diff --git a/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json b/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json index 079ea2e19d..ebb41f2e67 100644 --- a/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json +++ b/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json @@ -47,7 +47,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf-gpu.txt" + "../dp_requirements/tf-gpu.txt" ], "labels": { "server_utils": "ODQA" diff --git a/deeppavlov/configs/ranking/en_ranker_tfidf_wiki.json b/deeppavlov/configs/ranking/en_ranker_tfidf_wiki.json index cfc67b9ff9..2e175ffbee 100644 --- a/deeppavlov/configs/ranking/en_ranker_tfidf_wiki.json +++ b/deeppavlov/configs/ranking/en_ranker_tfidf_wiki.json @@ -52,7 +52,7 @@ }, "metadata": { "requirements": [ - "../requirements/spacy.txt" + "../dp_requirements/spacy.txt" ], "labels": { "server_utils": "Ranker" diff --git a/deeppavlov/configs/ranking/ranking_insurance.json b/deeppavlov/configs/ranking/ranking_insurance.json index 2b4c6e40b7..3cbeb37141 100644 --- a/deeppavlov/configs/ranking/ranking_insurance.json +++ b/deeppavlov/configs/ranking/ranking_insurance.json @@ -57,8 +57,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/gensim.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/gensim.txt" ], "labels": { "telegram_utils": "RankingModel", diff --git a/deeppavlov/configs/sentiment/insults_kaggle.json b/deeppavlov/configs/sentiment/insults_kaggle.json index 5d8a746bfb..eabfd00f90 100644 --- a/deeppavlov/configs/sentiment/insults_kaggle.json +++ b/deeppavlov/configs/sentiment/insults_kaggle.json @@ -108,8 +108,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel", diff --git a/deeppavlov/configs/sentiment/sentiment_ag_news.json b/deeppavlov/configs/sentiment/sentiment_ag_news.json index 5c18570463..b17ba4239a 100644 --- a/deeppavlov/configs/sentiment/sentiment_ag_news.json +++ b/deeppavlov/configs/sentiment/sentiment_ag_news.json @@ -107,8 +107,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel", diff --git a/deeppavlov/configs/sentiment/sentiment_twitter.json b/deeppavlov/configs/sentiment/sentiment_twitter.json index 995ed8e5b4..2fe0f71785 100644 --- a/deeppavlov/configs/sentiment/sentiment_twitter.json +++ b/deeppavlov/configs/sentiment/sentiment_twitter.json @@ -108,8 +108,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel", diff --git a/deeppavlov/configs/seq2seq_go_bot/bot_kvret.json b/deeppavlov/configs/seq2seq_go_bot/bot_kvret.json index 69e9d27e03..e45b383580 100644 --- a/deeppavlov/configs/seq2seq_go_bot/bot_kvret.json +++ b/deeppavlov/configs/seq2seq_go_bot/bot_kvret.json @@ -108,8 +108,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/spacy.txt" ], "labels": { "telegram_utils": "Seq2SeqGoalOrientedBot", diff --git a/deeppavlov/configs/seq2seq_go_bot/bot_kvret_infer.json b/deeppavlov/configs/seq2seq_go_bot/bot_kvret_infer.json index a03f33644a..2eaca52335 100644 --- a/deeppavlov/configs/seq2seq_go_bot/bot_kvret_infer.json +++ b/deeppavlov/configs/seq2seq_go_bot/bot_kvret_infer.json @@ -83,8 +83,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/spacy.txt" ], "labels": { "telegram_utils": "Seq2SeqGoalOrientedBot", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json index f92ba7d22a..a4649e7911 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json @@ -60,7 +60,7 @@ }, "metadata": { "requirements": [ - "../requirements/spelling.txt" + "../dp_requirements/spelling.txt" ], "labels": { "telegram_utils": "ErrorModel", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json index 884800987e..09bb7a56d9 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json @@ -62,7 +62,7 @@ }, "metadata": { "requirements": [ - "../requirements/spelling.txt" + "../dp_requirements/spelling.txt" ], "labels": { "telegram_utils": "ErrorModel", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json index 22b2c7b1c8..3f31fe68bd 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json @@ -59,7 +59,7 @@ }, "metadata": { "requirements": [ - "../requirements/spelling.txt" + "../dp_requirements/spelling.txt" ], "labels": { "telegram_utils": "ErrorModel", diff --git a/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json b/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json index 5b801ea844..cf77d7ed6a 100644 --- a/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json +++ b/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json @@ -59,7 +59,7 @@ }, "metadata": { "requirements": [ - "../requirements/spelling.txt" + "../dp_requirements/spelling.txt" ], "labels": { "telegram_utils": "ErrorModel", diff --git a/deeppavlov/configs/spelling_correction/levenstein_corrector_ru.json b/deeppavlov/configs/spelling_correction/levenstein_corrector_ru.json index 73b562cdcb..69b3bf7e1d 100644 --- a/deeppavlov/configs/spelling_correction/levenstein_corrector_ru.json +++ b/deeppavlov/configs/spelling_correction/levenstein_corrector_ru.json @@ -42,7 +42,7 @@ }, "metadata": { "requirements": [ - "../requirements/spelling.txt" + "../dp_requirements/spelling.txt" ], "labels": { "telegram_utils": "ErrorModel", diff --git a/deeppavlov/configs/squad/squad.json b/deeppavlov/configs/squad/squad.json index 96c2f78a44..e384445a52 100644 --- a/deeppavlov/configs/squad/squad.json +++ b/deeppavlov/configs/squad/squad.json @@ -105,7 +105,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf-gpu.txt" + "../dp_requirements/tf-gpu.txt" ], "labels": { "telegram_utils": "SquadModel", diff --git a/deeppavlov/configs/squad/squad_ru.json b/deeppavlov/configs/squad/squad_ru.json index 4501ab9846..6faef08244 100644 --- a/deeppavlov/configs/squad/squad_ru.json +++ b/deeppavlov/configs/squad/squad_ru.json @@ -106,7 +106,7 @@ }, "metadata": { "requirements": [ - "../requirements/tf-gpu.txt" + "../dp_requirements/tf-gpu.txt" ], "labels": { "telegram_utils": "SquadModel", diff --git a/requirements/fasttext.txt b/dp_requirements/fasttext.txt similarity index 100% rename from requirements/fasttext.txt rename to dp_requirements/fasttext.txt diff --git a/requirements/gensim.txt b/dp_requirements/gensim.txt similarity index 100% rename from requirements/gensim.txt rename to dp_requirements/gensim.txt diff --git a/requirements/spacy.txt b/dp_requirements/spacy.txt similarity index 100% rename from requirements/spacy.txt rename to dp_requirements/spacy.txt diff --git a/requirements/spelling.txt b/dp_requirements/spelling.txt similarity index 100% rename from requirements/spelling.txt rename to dp_requirements/spelling.txt diff --git a/requirements/tf-gpu.txt b/dp_requirements/tf-gpu.txt similarity index 100% rename from requirements/tf-gpu.txt rename to dp_requirements/tf-gpu.txt diff --git a/requirements/tf.txt b/dp_requirements/tf.txt similarity index 100% rename from requirements/tf.txt rename to dp_requirements/tf.txt diff --git a/setup.py b/setup.py index d8190dbe4f..f548ba06dc 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ def readme(): setup( name='deeppavlov', - packages=find_packages(exclude=('tests',)), + packages=find_packages(exclude=('tests',)) + ['dp_requirements'], version=meta['__version__'], description='An open source library for building end-to-end dialog systems and training chatbots.', long_description=readme(), diff --git a/tests/test_configs/intents/intents_snips_bigru.json b/tests/test_configs/intents/intents_snips_bigru.json index 7c54f52722..c035c1aa4c 100644 --- a/tests/test_configs/intents/intents_snips_bigru.json +++ b/tests/test_configs/intents/intents_snips_bigru.json @@ -104,8 +104,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/tests/test_configs/intents/intents_snips_bilstm.json b/tests/test_configs/intents/intents_snips_bilstm.json index 1698c293a4..63083a4b4b 100644 --- a/tests/test_configs/intents/intents_snips_bilstm.json +++ b/tests/test_configs/intents/intents_snips_bilstm.json @@ -104,8 +104,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/tests/test_configs/intents/intents_snips_bilstm_bilstm.json b/tests/test_configs/intents/intents_snips_bilstm_bilstm.json index 182d3c0f3f..674921b833 100644 --- a/tests/test_configs/intents/intents_snips_bilstm_bilstm.json +++ b/tests/test_configs/intents/intents_snips_bilstm_bilstm.json @@ -105,8 +105,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/tests/test_configs/intents/intents_snips_bilstm_cnn.json b/tests/test_configs/intents/intents_snips_bilstm_cnn.json index 47893e8138..6e3223185c 100644 --- a/tests/test_configs/intents/intents_snips_bilstm_cnn.json +++ b/tests/test_configs/intents/intents_snips_bilstm_cnn.json @@ -111,8 +111,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/tests/test_configs/intents/intents_snips_bilstm_self_add_attention.json b/tests/test_configs/intents/intents_snips_bilstm_self_add_attention.json index 3e701373b7..d906c417e6 100644 --- a/tests/test_configs/intents/intents_snips_bilstm_self_add_attention.json +++ b/tests/test_configs/intents/intents_snips_bilstm_self_add_attention.json @@ -106,8 +106,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/tests/test_configs/intents/intents_snips_bilstm_self_mult_attention.json b/tests/test_configs/intents/intents_snips_bilstm_self_mult_attention.json index b8b4ed00a0..c21fb08efb 100644 --- a/tests/test_configs/intents/intents_snips_bilstm_self_mult_attention.json +++ b/tests/test_configs/intents/intents_snips_bilstm_self_mult_attention.json @@ -106,8 +106,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/tests/test_configs/intents/intents_snips_cnn_bilstm.json b/tests/test_configs/intents/intents_snips_cnn_bilstm.json index 6ab121b81e..3f84c30546 100644 --- a/tests/test_configs/intents/intents_snips_cnn_bilstm.json +++ b/tests/test_configs/intents/intents_snips_cnn_bilstm.json @@ -111,8 +111,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf.txt", - "../requirements/fasttext.txt" + "../dp_requirements/tf.txt", + "../dp_requirements/fasttext.txt" ], "labels": { "telegram_utils": "IntentModel" diff --git a/tests/test_configs/odqa/en_odqa_infer_wiki_test.json b/tests/test_configs/odqa/en_odqa_infer_wiki_test.json index cc086cb204..5d5f834f91 100644 --- a/tests/test_configs/odqa/en_odqa_infer_wiki_test.json +++ b/tests/test_configs/odqa/en_odqa_infer_wiki_test.json @@ -67,8 +67,8 @@ }, "metadata": { "requirements": [ - "../requirements/tf-gpu.txt", - "../requirements/spacy.txt" + "../dp_requirements/tf-gpu.txt", + "../dp_requirements/spacy.txt" ], "labels": { "server_utils": "ODQA" diff --git a/tests/test_configs/ranking/en_ranker_tfidf_wiki_test.json b/tests/test_configs/ranking/en_ranker_tfidf_wiki_test.json index 3a8d503eb7..944e328df9 100644 --- a/tests/test_configs/ranking/en_ranker_tfidf_wiki_test.json +++ b/tests/test_configs/ranking/en_ranker_tfidf_wiki_test.json @@ -52,7 +52,7 @@ }, "metadata": { "requirements": [ - "../requirements/spacy.txt" + "../dp_requirements/spacy.txt" ], "labels": { "server_utils": "Ranker" From 4044fd998d4f3ee70b57cff0f75c2a8effde9cb2 Mon Sep 17 00:00:00 2001 From: Aleksey Lymar Date: Fri, 13 Jul 2018 19:07:19 +0300 Subject: [PATCH 3/6] fix: use correct accuracy metric in ner_dstc2 and slotfill/README.md (#324) --- deeppavlov/configs/ner/ner_dstc2.json | 2 +- deeppavlov/metrics/accuracy.py | 1 + deeppavlov/models/slotfill/README.md | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/deeppavlov/configs/ner/ner_dstc2.json b/deeppavlov/configs/ner/ner_dstc2.json index 00f76a62cd..529874d56a 100644 --- a/deeppavlov/configs/ner/ner_dstc2.json +++ b/deeppavlov/configs/ner/ner_dstc2.json @@ -83,7 +83,7 @@ "epochs": 100, "batch_size": 64, - "metrics": ["ner_f1", "per_item_accuracy"], + "metrics": ["ner_f1", "per_token_accuracy"], "validation_patience": 5, "val_every_n_epochs": 5, diff --git a/deeppavlov/metrics/accuracy.py b/deeppavlov/metrics/accuracy.py index c81e6a65bb..c367ff4739 100644 --- a/deeppavlov/metrics/accuracy.py +++ b/deeppavlov/metrics/accuracy.py @@ -88,6 +88,7 @@ def per_item_accuracy(y_true, y_predicted): correct = sum([y1 == y2 for y1, y2 in zip(y_true, y_predicted)]) return correct / examples_len if examples_len else 0 + @register_metric('per_token_accuracy') def per_token_accuracy(y_true, y_predicted): y_true = list(itertools.chain(*y_true)) diff --git a/deeppavlov/models/slotfill/README.md b/deeppavlov/models/slotfill/README.md index c5ad7a6217..1c622e1362 100644 --- a/deeppavlov/models/slotfill/README.md +++ b/deeppavlov/models/slotfill/README.md @@ -274,7 +274,7 @@ And now all parts together: "train": { "epochs": 100, "batch_size": 64, - "metrics": ["ner_f1", "per_item_accuracy"], + "metrics": ["ner_f1", "per_token_accuracy"], "validation_patience": 5, "val_every_n_epochs": 5, "log_every_n_batches": 100, From d1669a6cf34c7a869e60c2b0a350255f1059eb33 Mon Sep 17 00:00:00 2001 From: Aleksey Lymar Date: Fri, 13 Jul 2018 19:07:42 +0300 Subject: [PATCH 4/6] feat: `show_examples` in train config shows results of the last batch for every report (#322) --- deeppavlov/core/commands/train.py | 46 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/deeppavlov/core/commands/train.py b/deeppavlov/core/commands/train.py index d699296496..f59090b38b 100644 --- a/deeppavlov/core/commands/train.py +++ b/deeppavlov/core/commands/train.py @@ -145,7 +145,8 @@ def train_evaluate_model_from_config(config: [str, Path, dict], to_train=True, t train_config = { 'metrics': ['accuracy'], 'validate_best': to_validate, - 'test_best': True + 'test_best': True, + 'show_examples': False } try: @@ -178,7 +179,8 @@ def train_evaluate_model_from_config(config: [str, Path, dict], to_train=True, t if train_config['validate_best']: report = { 'valid': _test_model(model, metrics_functions, iterator, - train_config.get('batch_size', -1), 'valid') + train_config.get('batch_size', -1), 'valid', + show_examples=train_config['show_examples']) } print(json.dumps(report, ensure_ascii=False)) @@ -186,7 +188,8 @@ def train_evaluate_model_from_config(config: [str, Path, dict], to_train=True, t if train_config['test_best']: report = { 'test': _test_model(model, metrics_functions, iterator, - train_config.get('batch_size', -1), 'test') + train_config.get('batch_size', -1), 'test', + show_examples=train_config['show_examples']) } print(json.dumps(report, ensure_ascii=False)) @@ -194,7 +197,7 @@ def train_evaluate_model_from_config(config: [str, Path, dict], to_train=True, t def _test_model(model: Component, metrics_functions: List[Tuple[str, Callable]], iterator: DataLearningIterator, batch_size=-1, data_type='valid', - start_time: float=None) -> Dict[str, Union[int, OrderedDict, str]]: + start_time: float=None, show_examples=False) -> Dict[str, Union[int, OrderedDict, str]]: if start_time is None: start_time = time.time() @@ -212,6 +215,17 @@ def _test_model(model: Component, metrics_functions: List[Tuple[str, Callable]], 'metrics': prettify_metrics(metrics), 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } + + if show_examples: + try: + report['examples'] = [{ + 'x': x_item, + 'y_predicted': y_predicted_item, + 'y_true': y_true_item + } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)] + except NameError: + log.warning(f'Could not log examples for {data_type}, assuming it\'s empty') + return report @@ -230,7 +244,6 @@ def _train_batches(model: NNModel, iterator: DataLearningIterator, train_config: 'log_every_n_batches': 0, 'log_every_n_epochs': 0, - # 'show_examples': False, 'validate_best': True, 'test_best': True, @@ -298,6 +311,16 @@ def improved(score, best): 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } + if train_config['show_examples']: + try: + report['examples'] = [{ + 'x': x_item, + 'y_predicted': y_predicted_item, + 'y_true': y_true_item + } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)] + except NameError: + log.warning('Could not log examples as y_predicted is not defined') + if losses: report['loss'] = sum(losses)/len(losses) losses = [] @@ -352,6 +375,17 @@ def improved(score, best): 'metrics': prettify_metrics(metrics), 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } + + if train_config['show_examples']: + try: + report['examples'] = [{ + 'x': x_item, + 'y_predicted': y_predicted_item, + 'y_true': y_true_item + } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)] + except NameError: + log.warning('Could not log examples') + if losses: report['loss'] = sum(losses)/len(losses) losses = [] @@ -375,7 +409,7 @@ def improved(score, best): if train_config['val_every_n_epochs'] > 0 and epochs % train_config['val_every_n_epochs'] == 0: report = _test_model(model, metrics_functions, iterator, - train_config['batch_size'], 'valid', start_time) + train_config['batch_size'], 'valid', start_time, train_config['show_examples']) report['epochs_done'] = epochs report['batches_seen'] = i report['train_examples_seen'] = examples From 65c5eff0d4a082c47392a4201127b2b1e41d79ce Mon Sep 17 00:00:00 2001 From: Aleksey Lymar Date: Fri, 13 Jul 2018 19:07:56 +0300 Subject: [PATCH 5/6] feat: send an autogenerated token as a header with download requests (#319) --- deeppavlov/core/data/utils.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py index 66346bdcc7..122c724af6 100644 --- a/deeppavlov/core/data/utils.py +++ b/deeppavlov/core/data/utils.py @@ -18,15 +18,18 @@ from pathlib import Path from urllib.parse import urlparse from typing import List, Union - import requests from tqdm import tqdm import tarfile import gzip -import numpy as np -import re import zipfile +import re import shutil +import secrets + +import requests +from tqdm import tqdm +import numpy as np from deeppavlov.core.common.log import get_logger @@ -38,13 +41,22 @@ tqdm.monitor_interval = 0 +def get_download_token(): + token_file = Path.home() / '.deeppavlov' + if not token_file.exists(): + token_file.write_text(secrets.token_urlsafe(32), encoding='utf8') + + return token_file.read_text(encoding='utf8').strip() + + def simple_download(url: str, destination: [Path, str]): CHUNK = 32 * 1024 destination = Path(destination) destination.parent.mkdir(parents=True, exist_ok=True) - r = requests.get(url, stream=True) + headers = {'dp-token': get_download_token()} + r = requests.get(url, stream=True, headers=headers) total_length = int(r.headers.get('content-length', 0)) log.info('Downloading from {} to {}'.format(url, destination)) @@ -59,8 +71,8 @@ def simple_download(url: str, destination: [Path, str]): f.write(chunk) if downloaded < total_length: log.warn(f'Download stopped abruptly, trying to resume from {downloaded} to reach {total_length}') - resume_header = {'Range': f'bytes={downloaded}-'} - r = requests.get(url, headers=resume_header, stream=True) + headers['Range'] = f'bytes={downloaded}-' + r = requests.get(url, headers=headers, stream=True) if total_length - downloaded != int(r.headers['content-length']): raise RuntimeError('It looks like the server does not support resuming downloads') else: From 3b57e3acac2c393647ec5a34ae495b1515a40980 Mon Sep 17 00:00:00 2001 From: Aleksey Lymar Date: Fri, 13 Jul 2018 19:09:10 +0300 Subject: [PATCH 6/6] chore: update deeppavlov version --- deeppavlov/package_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeppavlov/package_meta.py b/deeppavlov/package_meta.py index bd582e2bbc..cc7a49296b 100644 --- a/deeppavlov/package_meta.py +++ b/deeppavlov/package_meta.py @@ -1,2 +1,2 @@ -__version__ = '0.0.6' +__version__ = '0.0.6.1' __author__ = 'Neural Networks and Deep Learning lab, MIPT'