Skip to content

Commit 8cf5fa4

Browse files
author
Ryan Sepassi
committed
Rm all uses of xrange and fix decode_length for slow_greedy_infer
PiperOrigin-RevId: 193717890
1 parent 347084c commit 8cf5fa4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+220
-202
lines changed

Diff for: .gitignore

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Compiled python modules.
22
*.pyc
3-
*.DS_Store
43

54
# Byte-compiled
65
_pycache__/
@@ -17,3 +16,9 @@ dist/
1716
# Sublime project files
1817
*.sublime-project
1918
*.sublime-workspace
19+
20+
# Tests
21+
.pytest_cache/
22+
23+
# Other
24+
*.DS_Store

Diff for: tensor2tensor/data_generators/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ with an integer denoting the length of the input list.
4747

4848
```
4949
def length_generator(nbr_cases):
50-
for _ in xrange(nbr_cases):
50+
for _ in range(nbr_cases):
5151
length = np.random.randint(100) + 1
5252
yield {"inputs": [2] * length, "targets": [length]}
5353
```

Diff for: tensor2tensor/data_generators/algorithmic.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import numpy as np
2424

25-
from six.moves import xrange # pylint: disable=redefined-builtin
25+
from six.moves import range # pylint: disable=redefined-builtin
2626

2727
from tensor2tensor.data_generators import generator_utils as utils
2828
from tensor2tensor.data_generators import problem
@@ -113,9 +113,9 @@ def generator(self, nbr_symbols, max_length, nbr_cases):
113113
A dictionary {"inputs": input-list, "targets": target-list} where
114114
input-list and target-list are the same.
115115
"""
116-
for _ in xrange(nbr_cases):
116+
for _ in range(nbr_cases):
117117
l = np.random.randint(max_length) + 1
118-
inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)]
118+
inputs = [np.random.randint(nbr_symbols) for _ in range(l)]
119119
yield {"inputs": inputs, "targets": inputs}
120120

121121

@@ -153,9 +153,9 @@ def generator(self, nbr_symbols, max_length, nbr_cases):
153153
target-list[i] = input-list[i] + shift.
154154
"""
155155
shift = 10
156-
for _ in xrange(nbr_cases):
156+
for _ in range(nbr_cases):
157157
l = np.random.randint(max_length) + 1
158-
inputs = [np.random.randint(nbr_symbols - shift) for _ in xrange(l)]
158+
inputs = [np.random.randint(nbr_symbols - shift) for _ in range(l)]
159159
yield {"inputs": inputs, "targets": [i + shift for i in inputs]}
160160

161161
@property
@@ -187,9 +187,9 @@ def generator(self, nbr_symbols, max_length, nbr_cases):
187187
A dictionary {"inputs": input-list, "targets": target-list} where
188188
target-list is input-list reversed.
189189
"""
190-
for _ in xrange(nbr_cases):
190+
for _ in range(nbr_cases):
191191
l = np.random.randint(max_length) + 1
192-
inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)]
192+
inputs = [np.random.randint(nbr_symbols) for _ in range(l)]
193193
yield {"inputs": inputs, "targets": list(reversed(inputs))}
194194

195195

@@ -265,7 +265,7 @@ def reverse_generator_nlplike(nbr_symbols,
265265
"""
266266
std_dev = max_length / scale_std_dev
267267
distr_map = zipf_distribution(nbr_symbols, alpha)
268-
for _ in xrange(nbr_cases):
268+
for _ in range(nbr_cases):
269269
l = int(abs(np.random.normal(loc=max_length / 2, scale=std_dev)) + 1)
270270
inputs = zipf_random_sample(distr_map, l)
271271
yield {"inputs": inputs, "targets": list(reversed(inputs))}
@@ -321,7 +321,7 @@ def random_number_lower_endian(length, base):
321321
"""Helper function: generate a random number as a lower-endian digits list."""
322322
if length == 1: # Last digit can be 0 only if length is 1.
323323
return [np.random.randint(base)]
324-
prefix = [np.random.randint(base) for _ in xrange(length - 1)]
324+
prefix = [np.random.randint(base) for _ in range(length - 1)]
325325
return prefix + [np.random.randint(base - 1) + 1] # Last digit is not 0.
326326

327327

@@ -354,7 +354,7 @@ def generator(self, base, max_length, nbr_cases):
354354
"""
355355
if max_length < 3:
356356
raise ValueError("Maximum length must be at least 3.")
357-
for _ in xrange(nbr_cases):
357+
for _ in range(nbr_cases):
358358
l1 = np.random.randint(max_length // 2) + 1
359359
l2 = np.random.randint(max_length - l1 - 1) + 1
360360
n1 = random_number_lower_endian(l1, base)
@@ -405,7 +405,7 @@ def generator(self, base, max_length, nbr_cases):
405405
"""
406406
if max_length < 3:
407407
raise ValueError("Maximum length must be at least 3.")
408-
for _ in xrange(nbr_cases):
408+
for _ in range(nbr_cases):
409409
l1 = np.random.randint(max_length // 2) + 1
410410
l2 = np.random.randint(max_length - l1 - 1) + 1
411411
n1 = random_number_lower_endian(l1, base)

Diff for: tensor2tensor/data_generators/algorithmic_math.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
# Dependency imports
2929

3030
import six
31-
from six.moves import xrange # pylint: disable=redefined-builtin
31+
from six.moves import range # pylint: disable=redefined-builtin
3232
import sympy
3333

3434

@@ -421,7 +421,7 @@ def math_dataset_init(alphabet_size=26, digits=None, functions=None):
421421
raise ValueError("digits cannot must be between 1 and 10. Got %s." % digits)
422422
vlist = alphabet[:alphabet_size]
423423
if digits is not None:
424-
dlist = [str(d) for d in xrange(digits)]
424+
dlist = [str(d) for d in range(digits)]
425425
else:
426426
dlist = []
427427
if functions is None:
@@ -481,7 +481,7 @@ def algebra_inverse(alphabet_size=26, min_depth=0, max_depth=2,
481481
"Got max_depth=%s, min_depth=%s" % (max_depth, min_depth))
482482

483483
alg_cfg = math_dataset_init(alphabet_size)
484-
for _ in xrange(nbr_cases):
484+
for _ in range(nbr_cases):
485485
sample, target = generate_algebra_inverse_sample(
486486
alg_cfg.vlist,
487487
list(alg_cfg.ops.values()), alg_cfg.solve_ops, min_depth, max_depth)
@@ -522,7 +522,7 @@ def algebra_simplify(alphabet_size=26,
522522
"Got max_depth=%s, min_depth=%s" % (max_depth, min_depth))
523523

524524
alg_cfg = math_dataset_init(alphabet_size, digits=5)
525-
for _ in xrange(nbr_cases):
525+
for _ in range(nbr_cases):
526526
sample, target = generate_algebra_simplify_sample(
527527
alg_cfg.vlist, list(alg_cfg.ops.values()), min_depth, max_depth)
528528
yield {

Diff for: tensor2tensor/data_generators/algorithmic_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
# Dependency imports
2323

24-
from six.moves import xrange # pylint: disable=redefined-builtin
24+
from six.moves import range # pylint: disable=redefined-builtin
2525

2626
from tensor2tensor.data_generators import algorithmic
2727

@@ -51,7 +51,7 @@ def testZipfDistribution(self):
5151
# more probable/frequent that the second in rank, three times more prob/freq
5252
# that the third in rank and so on.
5353
d = algorithmic.zipf_distribution(10, 1.0001)
54-
for i in xrange(len(d[1:])-1):
54+
for i in range(len(d[1:])-1):
5555
self.assertEqual("%.4f" % (abs(d[i+1]-d[i+2])*(i+2)), "%.4f" % d[1])
5656

5757
def testReverseGeneratorNlpLike(self):

Diff for: tensor2tensor/data_generators/cifar.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ def cifar_generator(cifar_version, tmp_dir, training, how_many, start_from=0):
100100
num_images = images.shape[0]
101101
images = images.reshape((num_images, 3, image_size, image_size))
102102
all_images.extend([
103-
np.squeeze(images[j]).transpose((1, 2, 0)) for j in xrange(num_images)
103+
np.squeeze(images[j]).transpose((1, 2, 0)) for j in range(num_images)
104104
])
105105
labels = data[label_key]
106-
all_labels.extend([labels[j] for j in xrange(num_images)])
106+
all_labels.extend([labels[j] for j in range(num_images)])
107107
return image_utils.image_generator(
108108
all_images[start_from:start_from + how_many],
109109
all_labels[start_from:start_from + how_many])

Diff for: tensor2tensor/data_generators/dna_encoder.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import itertools
2727
# Dependency imports
2828

29-
from six.moves import xrange # pylint: disable=redefined-builtin
29+
from six.moves import range # pylint: disable=redefined-builtin
3030
from tensor2tensor.data_generators import text_encoder
3131

3232

@@ -77,7 +77,7 @@ def encode(self, s):
7777
assert (len(bases) % self._chunk_size) == 0
7878
num_chunks = len(bases) // self._chunk_size
7979
ids = []
80-
for chunk_idx in xrange(num_chunks):
80+
for chunk_idx in range(num_chunks):
8181
start_idx = chunk_idx * self._chunk_size
8282
end_idx = start_idx + self._chunk_size
8383
chunk = tuple(bases[start_idx:end_idx])

Diff for: tensor2tensor/data_generators/gene_expression.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
import h5py
4545
import numpy as np
4646

47-
from six.moves import xrange # pylint: disable=redefined-builtin
47+
from six.moves import range # pylint: disable=redefined-builtin
4848

4949
from tensor2tensor.data_generators import dna_encoder
5050
from tensor2tensor.data_generators import generator_utils
@@ -130,7 +130,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1):
130130
# Start and wait for processes in batches
131131
num_batches = int(
132132
math.ceil(float(len(processes)) / MAX_CONCURRENT_PROCESSES))
133-
for i in xrange(num_batches):
133+
for i in range(num_batches):
134134
start = i * MAX_CONCURRENT_PROCESSES
135135
end = start + MAX_CONCURRENT_PROCESSES
136136
current = processes[start:end]
@@ -211,7 +211,7 @@ def generate_shard_args(outfiles, num_examples):
211211
"""Generate start and end indices per outfile."""
212212
num_shards = len(outfiles)
213213
num_examples_per_shard = num_examples // num_shards
214-
start_idxs = [i * num_examples_per_shard for i in xrange(num_shards)]
214+
start_idxs = [i * num_examples_per_shard for i in range(num_shards)]
215215
end_idxs = list(start_idxs)
216216
end_idxs.pop(0)
217217
end_idxs.append(num_examples)
@@ -249,7 +249,7 @@ def dataset_generator(filepath,
249249
if end_idx is None:
250250
end_idx = inp_data.len()
251251

252-
for i in xrange(start_idx, end_idx):
252+
for i in range(start_idx, end_idx):
253253
if i % 100 == 0:
254254
print("Generating example %d for %s" % (i, dataset))
255255
inputs, mask, outputs = inp_data[i], mask_data[i], out_data[i]

Diff for: tensor2tensor/data_generators/generator_utils.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
import requests
3131
import six
32-
from six.moves import xrange # pylint: disable=redefined-builtin
32+
from six.moves import range # pylint: disable=redefined-builtin
3333
import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3
3434

3535
from tensor2tensor.data_generators import text_encoder
@@ -119,7 +119,7 @@ def sharded_name(base_name, shard, total_shards):
119119

120120
def shard_filepath(fname, num_shards):
121121
return [
122-
sharded_name(fname, shard, num_shards) for shard in xrange(num_shards)
122+
sharded_name(fname, shard, num_shards) for shard in range(num_shards)
123123
]
124124

125125

@@ -592,7 +592,7 @@ def pack_examples(examples,
592592
if chop_long_sequences and len(x) > packed_length:
593593
assert not has_inputs
594594
num_fragments = len(x) // packed_length
595-
for i in xrange(num_fragments):
595+
for i in range(num_fragments):
596596
yield packer(
597597
x[packed_length * i:packed_length * (i + 1)], spacing).to_dict()
598598
x = x[packed_length * num_fragments:]

Diff for: tensor2tensor/data_generators/lm1b.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
# Dependency imports
2626

27-
from six.moves import xrange # pylint: disable=redefined-builtin
27+
from six.moves import range # pylint: disable=redefined-builtin
2828

2929
from tensor2tensor.data_generators import generator_utils
3030
from tensor2tensor.data_generators import problem
@@ -79,7 +79,7 @@ def _train_data_filenames(tmp_dir):
7979
os.path.join(tmp_dir,
8080
"1-billion-word-language-modeling-benchmark-r13output",
8181
"training-monolingual.tokenized.shuffled",
82-
"news.en-%05d-of-00100" % i) for i in xrange(1, 100)
82+
"news.en-%05d-of-00100" % i) for i in range(1, 100)
8383
]
8484

8585

Diff for: tensor2tensor/data_generators/ocr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def generator(self, data_dir, tmp_dir, is_training):
6969
num_examples = 2
7070
ocr_dir = os.path.join(tmp_dir, "ocr/")
7171
tf.logging.info("Looking for OCR data in %s." % ocr_dir)
72-
for i in xrange(num_examples):
72+
for i in range(num_examples):
7373
image_filepath = os.path.join(ocr_dir, "%d.png" % i)
7474
text_filepath = os.path.join(ocr_dir, "%d.txt" % i)
7575
with tf.gfile.Open(text_filepath, "rb") as f:

Diff for: tensor2tensor/data_generators/text_encoder.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
import numpy as np
3636
import six
37-
from six.moves import xrange # pylint: disable=redefined-builtin
37+
from six.moves import range # pylint: disable=redefined-builtin
3838
from tensor2tensor.data_generators import tokenizer
3939

4040
import tensorflow as tf
@@ -385,7 +385,7 @@ def store_to_file(self, filename):
385385
filename: Full path of the file to store the vocab to.
386386
"""
387387
with tf.gfile.Open(filename, "w") as f:
388-
for i in xrange(len(self._id_to_token)):
388+
for i in range(len(self._id_to_token)):
389389
f.write(self._id_to_token[i] + "\n")
390390

391391

@@ -599,7 +599,7 @@ def _escaped_token_to_subtoken_strings(self, escaped_token):
599599
start = 0
600600
token_len = len(escaped_token)
601601
while start < token_len:
602-
for end in xrange(
602+
for end in range(
603603
min(token_len, start + self._max_subtoken_len), start, -1):
604604
subtoken = escaped_token[start:end]
605605
if subtoken in self._subtoken_string_to_id:
@@ -785,7 +785,7 @@ def build_from_token_counts(self,
785785
# with high enough counts for our new vocabulary.
786786
if min_count < 1:
787787
min_count = 1
788-
for i in xrange(num_iterations):
788+
for i in range(num_iterations):
789789
tf.logging.info("Iteration {0}".format(i))
790790

791791
# Collect all substrings of the encoded token that break along current
@@ -800,7 +800,7 @@ def build_from_token_counts(self,
800800
if max_subtoken_length is not None:
801801
last_position = min(last_position, start + max_subtoken_length)
802802

803-
for end in xrange(start + 1, last_position):
803+
for end in range(start + 1, last_position):
804804
new_subtoken = escaped_token[start:end]
805805
subtoken_counts[new_subtoken] += count
806806
start += len(subtoken)
@@ -817,7 +817,7 @@ def build_from_token_counts(self,
817817
# Consider the candidates longest to shortest, so that if we accept
818818
# a longer subtoken string, we can decrement the counts of its prefixes.
819819
new_subtoken_strings = []
820-
for lsub in xrange(len(len_to_subtoken_strings) - 1, 0, -1):
820+
for lsub in range(len(len_to_subtoken_strings) - 1, 0, -1):
821821
subtoken_strings = len_to_subtoken_strings[lsub]
822822
for subtoken_string in subtoken_strings:
823823
count = subtoken_counts[subtoken_string]
@@ -826,7 +826,7 @@ def build_from_token_counts(self,
826826
# explicitly, regardless of count.
827827
if subtoken_string not in self._alphabet:
828828
new_subtoken_strings.append((count, subtoken_string))
829-
for l in xrange(1, lsub):
829+
for l in range(1, lsub):
830830
subtoken_counts[subtoken_string[:l]] -= count
831831

832832
# Include the alphabet explicitly to guarantee all strings are encodable.

Diff for: tensor2tensor/data_generators/text_encoder_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
# Dependency imports
3131
import mock
3232
import six
33-
from six.moves import xrange # pylint: disable=redefined-builtin
33+
from six.moves import range # pylint: disable=redefined-builtin
3434

3535
from tensor2tensor.data_generators import text_encoder
3636
import tensorflow as tf
@@ -193,7 +193,7 @@ def test_long_tokens(self):
193193
long_tokens = []
194194
for _ in range(num_tokens):
195195
long_token = "".join([random.choice(string.ascii_uppercase)
196-
for _ in xrange(token_length)])
196+
for _ in range(token_length)])
197197
long_tokens.append(long_token)
198198

199199
corpus = " ".join(long_tokens)

Diff for: tensor2tensor/data_generators/tokenizer.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
# Dependency imports
5252

5353
import six
54-
from six.moves import xrange # pylint: disable=redefined-builtin
54+
from six.moves import range # pylint: disable=redefined-builtin
5555
import tensorflow as tf
5656

5757
# Conversion between Unicode and UTF-8, if required (on Python2)
@@ -60,7 +60,7 @@
6060

6161
# This set contains all letter and number characters.
6262
_ALPHANUMERIC_CHAR_SET = set(
63-
six.unichr(i) for i in xrange(sys.maxunicode)
63+
six.unichr(i) for i in range(sys.maxunicode)
6464
if (unicodedata.category(six.unichr(i)).startswith("L") or
6565
unicodedata.category(six.unichr(i)).startswith("N")))
6666

@@ -79,7 +79,7 @@ def encode(text):
7979
token_start = 0
8080
# Classify each character in the input string
8181
is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text]
82-
for pos in xrange(1, len(text)):
82+
for pos in range(1, len(text)):
8383
if is_alnum[pos] != is_alnum[pos - 1]:
8484
token = text[token_start:pos]
8585
if token != u" " or token_start == 0:

0 commit comments

Comments
 (0)