Skip to content

Commit

Permalink
data_download.py consistent with process_data.py (mlcommons#249)
Browse files Browse the repository at this point in the history
  • Loading branch information
vishalsubbiah authored and nvpaulius committed Apr 4, 2019
1 parent 7c2fb6f commit 25cdb0b
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion translation/tensorflow/transformer/data_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@

# Strings to inclue in the generated files.
_PREFIX = "wmt32k"
_ENCODE_TAG = "encoded"
_TRAIN_TAG = "train"
_EVAL_TAG = "dev" # Following WMT and Tensor2Tensor conventions, in which the
# evaluation datasets are tagged as "dev" for development.
Expand Down Expand Up @@ -307,7 +308,7 @@ def encode_and_save_files(
def shard_filename(path, tag, shard_num, total_shards):
"""Create filename for data shard."""
return os.path.join(
path, "%s-%s-%.5d-of-%.5d" % (_PREFIX, tag, shard_num, total_shards))
path, "%s-%s-%s-%.5d-of-%.5d" % (_PREFIX, _ENCODE_TAG, tag, shard_num, total_shards))


def shuffle_records(fname):
Expand Down

0 comments on commit 25cdb0b

Please sign in to comment.