Skip to content

Commit

Permalink
Make get_char_vocab.py python 2 and 3 compatible.
Browse files Browse the repository at this point in the history
  • Loading branch information
kentonl committed Aug 7, 2019
1 parent 10c6fb6 commit 9d1ee19
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions get_char_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import sys
import json
import io

def get_char_vocab(input_filenames, output_filename):
vocab = set()
Expand All @@ -14,9 +15,10 @@ def get_char_vocab(input_filenames, output_filename):
for word in sentence:
vocab.update(word)
vocab = sorted(list(vocab))
with open(output_filename, "w") as f:
with io.open(output_filename, mode="w", encoding="utf8") as f:
for char in vocab:
f.write(u"{}\n".format(char).encode("utf8"))
f.write(char)
f.write(u"\n")
print("Wrote {} characters to {}".format(len(vocab), output_filename))

def get_char_vocab_language(language):
Expand Down

0 comments on commit 9d1ee19

Please sign in to comment.