Skip to content

Commit

Permalink
Issue error if UTF-16 encoded *.pbrt file is provided
Browse files Browse the repository at this point in the history
Issue mmp#136.
  • Loading branch information
mmp committed Jun 1, 2021
1 parent 1e83bb6 commit 56bab28
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/pbrt/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ Tokenizer::Tokenizer(std::string str,
pos = contents.data();
end = pos + contents.size();
tokenizerMemory += contents.size();
CheckUTF(str.data(), str.size());
}

#if defined(PBRT_HAVE_MMAP) || defined(PBRT_IS_WINDOWS)
Expand All @@ -233,6 +234,7 @@ Tokenizer::Tokenizer(void *ptr, size_t len, std::string filename,
loc = FileLoc(*new std::string(filename));
pos = (const char *)ptr;
end = pos + len;
CheckUTF(ptr, len);
}
#endif

Expand All @@ -248,6 +250,14 @@ Tokenizer::~Tokenizer() {
#endif
}

void Tokenizer::CheckUTF(const void *ptr, int len) const {
const unsigned char *c = (const unsigned char *)ptr;
// https://en.wikipedia.org/wiki/Byte_order_mark
if (len >= 2 && ((c[0] == 0xfe && c[1] == 0xff) || (c[0] == 0xff && c[1] == 0xfe)))
errorCallback("File is encoded with UTF-16, which is not currently "
"supported by pbrt (https://github.com/mmp/pbrt-v4/issues/136).", &loc);
}

pstd::optional<Token> Tokenizer::Next() {
while (true) {
const char *tokenStart = pos;
Expand Down
2 changes: 2 additions & 0 deletions src/pbrt/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ class Tokenizer {
FileLoc loc;

private:
void CheckUTF(const void *ptr, int len) const;

// Tokenizer Private Methods
int getChar() {
if (pos == end)
Expand Down

0 comments on commit 56bab28

Please sign in to comment.