diff --git a/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md b/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md index 93a6cd1f7..91449867e 100644 --- a/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md +++ b/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md @@ -81,7 +81,7 @@ class TokenEmbedding: data_dir = d2l.download_extract(embedding_name) # GloVe website: https://nlp.stanford.edu/projects/glove/ # fastText website: https://fasttext.cc/ - with open(os.path.join(data_dir, 'vec.txt'), 'r') as f: + with open(os.path.join(data_dir, 'vec.txt'), 'r', encoding='utf-8') as f: for line in f: elems = line.rstrip().split(' ') token, elems = elems[0], [float(elem) for elem in elems[1:]] diff --git a/d2l/mxnet.py b/d2l/mxnet.py index 25fc6eaf3..80e6af6d8 100644 --- a/d2l/mxnet.py +++ b/d2l/mxnet.py @@ -2071,7 +2071,7 @@ def _load_embedding(self, embedding_name): data_dir = d2l.download_extract(embedding_name) # GloVe网站:https://nlp.stanford.edu/projects/glove/ # fastText网站:https://fasttext.cc/ - with open(os.path.join(data_dir, 'vec.txt'), 'r') as f: + with open(os.path.join(data_dir, 'vec.txt'), 'r', encoding='utf-8') as f: for line in f: elems = line.rstrip().split(' ') token, elems = elems[0], [float(elem) for elem in elems[1:]] diff --git a/d2l/paddle.py b/d2l/paddle.py index 6c5813aeb..fc03a9a8a 100644 --- a/d2l/paddle.py +++ b/d2l/paddle.py @@ -2201,7 +2201,7 @@ def _load_embedding(self, embedding_name): data_dir = d2l.download_extract(embedding_name) # GloVe网站:https://nlp.stanford.edu/projects/glove/ # fastText网站:https://fasttext.cc/ - with open(os.path.join(data_dir, 'vec.txt'), 'r') as f: + with open(os.path.join(data_dir, 'vec.txt'), 'r' , encoding='utf-8') as f: for line in f: elems = line.rstrip().split(' ') token, elems = elems[0], [float(elem) for elem in elems[1:]]