Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize parsing #19

Merged
merged 5 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.21)

project(mat-json VERSION 3.1.4)
project(mat-json VERSION 3.1.5)

set(SOURCE_FILES
src/external/dragonbox.cpp
Expand Down
2 changes: 1 addition & 1 deletion include/matjson.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ namespace matjson {
Value(T*) = delete;

Value(Value const&);
Value(Value&&);
Value(Value&&) noexcept;
~Value();

Value& operator=(Value);
Expand Down
6 changes: 3 additions & 3 deletions src/impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ class matjson::ValueImpl {

public:
template <class T>
ValueImpl(Type type, T value) : m_type(type), m_value(value) {}
ValueImpl(Type type, T&& value) : m_type(type), m_value(std::forward<T>(value)) {}

template <class T>
ValueImpl(Type type, std::string key, T value) :
m_type(type), m_key(std::move(key)), m_value(value) {}
ValueImpl(Type type, std::string key, T&& value) :
m_type(type), m_key(std::move(key)), m_value(std::forward<T>(value)) {}

ValueImpl(ValueImpl const&) = default;

Expand Down
139 changes: 98 additions & 41 deletions src/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,27 @@ bool isWhitespace(char c) {
return c == ' ' || c == '\n' || c == '\r' || c == '\t';
}

template <class S = std::istream&>
struct StringStream {
std::istream& stream;
int line = 1, column = 1;
S stream;
int line = 1, column = 1, offset = 0;

static constexpr bool isStream = std::is_same_v<S, std::istream&>;

auto error(std::string_view msg) const noexcept {
return Err(ParseError(std::string(msg), stream.tellg(), line, column));
return Err(ParseError(std::string(msg), offset, line, column));
}

Result<char, ParseError> take() noexcept {
char ch;
if (!stream.get(ch)) return this->error("eof");
if constexpr (isStream) {
if (!stream.get(ch)) return this->error("eof");
} else {
if (stream.empty()) return this->error("eof");
ch = stream[0];
stream = stream.substr(1);
}
++offset;
if (ch == '\n') {
++line;
column = 1;
Expand All @@ -38,40 +48,76 @@ struct StringStream {

Result<std::string, ParseError> take(size_t n) {
// this is only used for constants so its fine to not count lines
std::string buffer;
buffer.resize(n);
if (!stream.read(buffer.data(), n)) return this->error("eof");
column += n;
return Ok(buffer);
if constexpr (isStream) {
std::string buffer;
buffer.resize(n);
if (!stream.read(buffer.data(), n)) return this->error("eof");
column += n;
offset += n;
return Ok(std::move(buffer));
} else {
if (stream.size() < n) return this->error("eof");
std::string buffer = std::string(stream.substr(0, n));
stream = stream.substr(n);
column += n;
offset += n;
return Ok(std::move(buffer));
}
}

Result<char, ParseError> peek() noexcept {
auto ch = stream.peek();
if (ch == EOF) return this->error("eof");
return Ok(ch);
if constexpr (isStream) {
auto ch = stream.peek();
if (ch == EOF) return this->error("eof");
return Ok(ch);
} else {
if (stream.empty()) return this->error("eof");
return Ok(stream[0]);
}
}

// takes until the next char is not whitespace
void skipWhitespace() noexcept {
while (stream.good() && isWhitespace(stream.peek())) {
char ch = stream.get();
if (ch == '\n') {
++line;
column = 1;
if constexpr (isStream) {
while (stream.good() && isWhitespace(stream.peek())) {
char ch = stream.get();
++offset;
if (ch == '\n') {
++line;
column = 1;
}
else {
++column;
}
}
else {
++column;
} else {
while (!stream.empty() && isWhitespace(stream[0])) {
char ch = stream[0];
stream = stream.substr(1);
++offset;
if (ch == '\n') {
++line;
column = 1;
}
else {
++column;
}
}
}
}

explicit operator bool() const noexcept {
(void)stream.peek();
return stream.good();
if constexpr (isStream) {
(void)stream.peek();
return stream.good();
} else {
return !stream.empty();
}
}
};

Result<ValuePtr, ParseError> parseConstant(StringStream& stream) {
template <class S>
Result<ValuePtr, ParseError> parseConstant(StringStream<S>& stream) {
GEODE_UNWRAP_INTO(auto first, stream.peek());
switch (first) {
case 't': {
Expand Down Expand Up @@ -120,7 +166,8 @@ void encodeUTF8(std::string& str, int32_t code_point) {
}
}

Result<std::string, ParseError> parseString(StringStream& stream) noexcept {
template <class S>
Result<std::string, ParseError> parseString(StringStream<S>& stream) noexcept {
// when this function is called we already know the first character is a quote
GEODE_UNWRAP(stream.take());
std::string str;
Expand Down Expand Up @@ -193,10 +240,11 @@ Result<std::string, ParseError> parseString(StringStream& stream) noexcept {
}
// eat the "
GEODE_UNWRAP(stream.take());
return Ok(str);
return Ok(std::move(str));
}

Result<ValuePtr, ParseError> parseNumber(StringStream& stream) noexcept {
template <class S>
Result<ValuePtr, ParseError> parseNumber(StringStream<S>& stream) noexcept {
std::string buffer;
bool isFloating = false;
bool isNegative = false;
Expand Down Expand Up @@ -274,21 +322,23 @@ Result<ValuePtr, ParseError> parseNumber(StringStream& stream) noexcept {
// FIXME: std::stod is locale specific, might break on some machines
return Ok(std::make_unique<ValueImpl>(Type::Number, std::stod(buffer)));
#else
return fromCharsHelper.operator()<double>();
return fromCharsHelper.template operator()<double>();
#endif
}
else if (isNegative) {
return fromCharsHelper.operator()<intmax_t>();
return fromCharsHelper.template operator()<intmax_t>();
}
else {
return fromCharsHelper.operator()<uintmax_t>();
return fromCharsHelper.template operator()<uintmax_t>();
}
}

// parses a json element with optional whitespace around it
Result<ValuePtr, ParseError> parseElement(StringStream& stream) noexcept;
template <class S>
Result<ValuePtr, ParseError> parseElement(StringStream<S>& stream) noexcept;

Result<ValuePtr, ParseError> parseObject(StringStream& stream) noexcept {
template <class S>
Result<ValuePtr, ParseError> parseObject(StringStream<S>& stream) noexcept {
GEODE_UNWRAP(stream.take());
stream.skipWhitespace();
std::vector<Value> object;
Expand All @@ -312,7 +362,7 @@ Result<ValuePtr, ParseError> parseObject(StringStream& stream) noexcept {

GEODE_UNWRAP_INTO(auto value, parseElement(stream));
value->setKey(key);
object.push_back(ValueImpl::asValue(std::move(value)));
object.emplace_back(std::move(ValueImpl::asValue(std::move(value))));

GEODE_UNWRAP_INTO(char c, stream.peek());
if (c == ',') {
Expand All @@ -328,18 +378,19 @@ Result<ValuePtr, ParseError> parseObject(StringStream& stream) noexcept {
}
// eat the }
GEODE_UNWRAP(stream.take());
return Ok(std::make_unique<ValueImpl>(Type::Object, object));
return Ok(std::make_unique<ValueImpl>(Type::Object, std::move(object)));
}

Result<ValuePtr, ParseError> parseArray(StringStream& stream) noexcept {
template <class S>
Result<ValuePtr, ParseError> parseArray(StringStream<S>& stream) noexcept {
GEODE_UNWRAP(stream.take());
stream.skipWhitespace();
std::vector<Value> array;
GEODE_UNWRAP_INTO(char p, stream.peek());
if (p != ']') {
while (true) {
GEODE_UNWRAP_INTO(auto element, parseElement(stream));
array.push_back(ValueImpl::asValue(std::move(element)));
array.emplace_back(std::move(ValueImpl::asValue(std::move(element))));

GEODE_UNWRAP_INTO(char c, stream.peek());
if (c == ',') {
Expand All @@ -355,10 +406,11 @@ Result<ValuePtr, ParseError> parseArray(StringStream& stream) noexcept {
}
// eat the ]
GEODE_UNWRAP(stream.take());
return Ok(std::make_unique<ValueImpl>(Type::Array, array));
return Ok(std::make_unique<ValueImpl>(Type::Array, std::move(array)));
}

Result<ValuePtr, ParseError> parseValue(StringStream& stream) noexcept {
template <class S>
Result<ValuePtr, ParseError> parseValue(StringStream<S>& stream) noexcept {
GEODE_UNWRAP_INTO(char p, stream.peek());
switch (p) {
case 't':
Expand Down Expand Up @@ -386,14 +438,16 @@ Result<ValuePtr, ParseError> parseValue(StringStream& stream) noexcept {
}
}

Result<ValuePtr, ParseError> parseElement(StringStream& stream) noexcept {
template <class S>
Result<ValuePtr, ParseError> parseElement(StringStream<S>& stream) noexcept {
stream.skipWhitespace();
GEODE_UNWRAP_INTO(auto value, parseValue(stream));
stream.skipWhitespace();
return Ok(std::move(value));
}

Result<ValuePtr, ParseError> parseRoot(StringStream& stream) noexcept {
template <class S>
Result<ValuePtr, ParseError> parseRoot(StringStream<S>& stream) noexcept {
GEODE_UNWRAP_INTO(auto value, parseElement(stream));
// if theres anything left in the stream that is not whitespace
// it should be considered an error
Expand All @@ -404,14 +458,17 @@ Result<ValuePtr, ParseError> parseRoot(StringStream& stream) noexcept {
}

Result<Value, ParseError> Value::parse(std::istream& sourceStream) {
StringStream stream{sourceStream};
StringStream<std::istream&> stream{sourceStream};

return parseRoot(stream).map([](auto impl) {
return ValueImpl::asValue(std::move(impl));
});
}

Result<Value, ParseError> Value::parse(std::string_view source) {
std::istringstream strStream{std::string(source)};
return Value::parse(strStream);
StringStream<std::string_view> stream{source};

return parseRoot(stream).map([](auto impl) {
return ValueImpl::asValue(std::move(impl));
});
}
2 changes: 1 addition & 1 deletion src/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Value::Value(Value const& other) {
m_impl = std::make_unique<ValueImpl>(*other.m_impl.get());
}

Value::Value(Value&& other) {
Value::Value(Value&& other) noexcept {
if (other.m_impl == getDummyNullValue()->m_impl) {
m_impl = std::make_unique<ValueImpl>(Type::Null, std::monostate{});
return;
Expand Down
22 changes: 22 additions & 0 deletions test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,11 +382,27 @@ TEST_CASE("ParseError line numbers") {
auto err = matjson::parse("{").unwrapErr();
REQUIRE(err.line == 1);
REQUIRE(err.column == 2);
REQUIRE(err.offset == 1);

err = matjson::parse("{\n\"hello").unwrapErr();

REQUIRE(err.line == 2);
REQUIRE(err.column == 7);
REQUIRE(err.offset == 8);
}

TEST_CASE("ParseError line numbers from stream") {
std::istringstream stream("{");
auto err = matjson::parse(stream).unwrapErr();
REQUIRE(err.line == 1);
REQUIRE(err.column == 2);
REQUIRE(err.offset == 1);

stream = std::istringstream("{\n\"hello");
err = matjson::parse(stream).unwrapErr();
REQUIRE(err.line == 2);
REQUIRE(err.column == 7);
REQUIRE(err.offset == 8);
}

TEST_CASE("parseAs") {
Expand All @@ -401,6 +417,8 @@ TEST_CASE("Parse from stream") {
std::istringstream stream(R"({"name": "Hello!","value": 123})");

auto res = matjson::parse(stream).unwrap();
// parsing should consume the whole stream
REQUIRE(stream.eof());

REQUIRE(res == CoolStruct{.name = "Hello!", .value = 123});

Expand All @@ -412,6 +430,10 @@ TEST_CASE("Parse from stream") {
stream = std::istringstream("[1, 2, 3");
REQUIRE(matjson::parse(stream).isErr());

stream = std::istringstream("[1, 2!, 3");
REQUIRE(matjson::parse(stream).isErr());
REQUIRE(!stream.eof());

stream = std::istringstream("");
REQUIRE(matjson::parse(stream).isErr());

Expand Down
Loading