Skip to content

Commit

Permalink
Bump version to 1.0.4
Browse files Browse the repository at this point in the history
  • Loading branch information
nreimers committed Apr 1, 2021
1 parent a17ec1e commit 836f822
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
11 changes: 8 additions & 3 deletions examples/training/ms_marco/multilingual/translate_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
For machine translation, we use EasyNMT: https://github.com/UKPLab/EasyNMT
You can install it via: pip install easynmt
Usage:
python translate_queries [target_language]
"""
import os
from sentence_transformers import LoggingHandler, util
import logging
import tarfile
from easynmt import EasyNMT
import sys

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
Expand All @@ -17,8 +21,10 @@
handlers=[LoggingHandler()])
#### /print debug information to stdout

target_lang = 'de'
target_lang = sys.argv[1]
output_folder = 'multilingual-data'
data_folder = '../msmarco-data'

output_filename = os.path.join(output_folder, 'train_queries.en-{}.tsv'.format(target_lang))
os.makedirs(output_folder, exist_ok=True)

Expand All @@ -32,7 +38,6 @@
translated_qids.add(splits[0])

### Now we read the MS Marco dataset
data_folder = '../msmarco-data'
os.makedirs(data_folder, exist_ok=True)

# Read qrels file for relevant positives per query
Expand Down Expand Up @@ -78,5 +83,5 @@

with open(output_filename, 'a' if os.path.exists(output_filename) else 'w', encoding='utf8') as fOut:
for qid, query, translated_query in zip(qids, queries, translation_model.translate_stream(queries, source_lang='en', target_lang=target_lang, beam_size=2, perform_sentence_splitting=False, chunk_size=256, batch_size=64)):
fOut.write("{}\t{}\t{}\n".format(qid, query.replace("\t", " "), translated_query.replace("\t", " ")))
fOut.write("{}\t{}\t{}\n".format(qid, translated_query.replace("\t", " ")))
fOut.flush()
2 changes: 1 addition & 1 deletion sentence_transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.0.3"
__version__ = "1.0.4"
__DOWNLOAD_SERVER__ = 'http://sbert.net/models/'
from .datasets import SentencesDataset, ParallelSentencesDataset
from .LoggingHandler import LoggingHandler
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name="sentence-transformers",
version="1.0.3",
version="1.0.4",
author="Nils Reimers",
author_email="[email protected]",
description="Sentence Embeddings using BERT / RoBERTa / XLM-R",
Expand Down

0 comments on commit 836f822

Please sign in to comment.