From 836f8225de2d868b63d5479054b37c05c8e74fec Mon Sep 17 00:00:00 2001 From: Nils Reimers Date: Thu, 1 Apr 2021 08:32:45 +0200 Subject: [PATCH] Bump version to 1.0.4 --- .../ms_marco/multilingual/translate_queries.py | 11 ++++++++--- sentence_transformers/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/examples/training/ms_marco/multilingual/translate_queries.py b/examples/training/ms_marco/multilingual/translate_queries.py index 65466cb55..ef726b3b0 100644 --- a/examples/training/ms_marco/multilingual/translate_queries.py +++ b/examples/training/ms_marco/multilingual/translate_queries.py @@ -3,12 +3,16 @@ For machine translation, we use EasyNMT: https://github.com/UKPLab/EasyNMT You can install it via: pip install easynmt + +Usage: +python translate_queries [target_language] """ import os from sentence_transformers import LoggingHandler, util import logging import tarfile from easynmt import EasyNMT +import sys #### Just some code to print debug information to stdout logging.basicConfig(format='%(asctime)s - %(message)s', @@ -17,8 +21,10 @@ handlers=[LoggingHandler()]) #### /print debug information to stdout -target_lang = 'de' +target_lang = sys.argv[1] output_folder = 'multilingual-data' +data_folder = '../msmarco-data' + output_filename = os.path.join(output_folder, 'train_queries.en-{}.tsv'.format(target_lang)) os.makedirs(output_folder, exist_ok=True) @@ -32,7 +38,6 @@ translated_qids.add(splits[0]) ### Now we read the MS Marco dataset -data_folder = '../msmarco-data' os.makedirs(data_folder, exist_ok=True) # Read qrels file for relevant positives per query @@ -78,5 +83,5 @@ with open(output_filename, 'a' if os.path.exists(output_filename) else 'w', encoding='utf8') as fOut: for qid, query, translated_query in zip(qids, queries, translation_model.translate_stream(queries, source_lang='en', target_lang=target_lang, beam_size=2, perform_sentence_splitting=False, chunk_size=256, batch_size=64)): - fOut.write("{}\t{}\t{}\n".format(qid, query.replace("\t", " "), translated_query.replace("\t", " "))) + fOut.write("{}\t{}\t{}\n".format(qid, translated_query.replace("\t", " "))) fOut.flush() diff --git a/sentence_transformers/__init__.py b/sentence_transformers/__init__.py index fab41f554..5022284ae 100644 --- a/sentence_transformers/__init__.py +++ b/sentence_transformers/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.0.3" +__version__ = "1.0.4" __DOWNLOAD_SERVER__ = 'http://sbert.net/models/' from .datasets import SentencesDataset, ParallelSentencesDataset from .LoggingHandler import LoggingHandler diff --git a/setup.py b/setup.py index 1172eeb27..f3833765a 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="sentence-transformers", - version="1.0.3", + version="1.0.4", author="Nils Reimers", author_email="info@nils-reimers.de", description="Sentence Embeddings using BERT / RoBERTa / XLM-R",