From 1ba90bc505ac7ff0074a30658ae4b52408a069c1 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Fri, 18 Oct 2024 00:35:13 +0900 Subject: [PATCH 1/6] refactor: remove set_location --- include/toml11/fwd/location_fwd.hpp | 1 - include/toml11/impl/location_impl.hpp | 24 ------------------------ include/toml11/parser.hpp | 4 ++-- 3 files changed, 2 insertions(+), 27 deletions(-) diff --git a/include/toml11/fwd/location_fwd.hpp b/include/toml11/fwd/location_fwd.hpp index f6dbfdbc..650381fb 100644 --- a/include/toml11/fwd/location_fwd.hpp +++ b/include/toml11/fwd/location_fwd.hpp @@ -54,7 +54,6 @@ class location { return this->location_; } - void set_location(const std::size_t loc) noexcept; std::size_t line_number() const noexcept { diff --git a/include/toml11/impl/location_impl.hpp b/include/toml11/impl/location_impl.hpp index 19c418ab..7be3d180 100644 --- a/include/toml11/impl/location_impl.hpp +++ b/include/toml11/impl/location_impl.hpp @@ -66,30 +66,6 @@ TOML11_INLINE location::char_type location::peek() } } -TOML11_INLINE void location::set_location(const std::size_t loc) noexcept -{ - if(this->location_ == loc) - { - return ; - } - - if(loc == 0) - { - this->line_number_ = 1; - } - else if(this->location_ < loc) - { - const auto d = loc - this->location_; - this->advance_line_number(d); - } - else - { - const auto d = this->location_ - loc; - this->retrace_line_number(d); - } - this->location_ = loc; -} - TOML11_INLINE std::string location::get_line() const { assert(this->is_ok()); diff --git a/include/toml11/parser.hpp b/include/toml11/parser.hpp index 6d147311..39156500 100644 --- a/include/toml11/parser.hpp +++ b/include/toml11/parser.hpp @@ -3459,7 +3459,7 @@ parse_impl(std::vector cs, std::string fname, const spec& s // skip BOM if found if(loc.source()->size() >= 3) { - auto first = loc.get_location(); + auto first = loc; const auto c0 = loc.current(); loc.advance(); const auto c1 = loc.current(); loc.advance(); @@ -3468,7 +3468,7 @@ parse_impl(std::vector cs, std::string fname, const spec& s const auto bom_found = (c0 == 0xEF) && (c1 == 0xBB) && (c2 == 0xBF); if( ! bom_found) { - loc.set_location(first); + loc = first; } } From 5fbb86d98934d628bfb7f388266ad75225f1435d Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Fri, 18 Oct 2024 02:08:22 +0900 Subject: [PATCH 2/6] refactor: rename adv_line_num -> adv_impl --- include/toml11/fwd/location_fwd.hpp | 4 ++-- include/toml11/impl/location_impl.hpp | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/toml11/fwd/location_fwd.hpp b/include/toml11/fwd/location_fwd.hpp index 650381fb..401d857e 100644 --- a/include/toml11/fwd/location_fwd.hpp +++ b/include/toml11/fwd/location_fwd.hpp @@ -67,8 +67,8 @@ class location private: - void advance_line_number(const std::size_t n); - void retrace_line_number(const std::size_t n); + void advance_impl(const std::size_t n); + void retrace_impl(const std::size_t n); private: diff --git a/include/toml11/impl/location_impl.hpp b/include/toml11/impl/location_impl.hpp index 7be3d180..73e234ff 100644 --- a/include/toml11/impl/location_impl.hpp +++ b/include/toml11/impl/location_impl.hpp @@ -15,12 +15,12 @@ TOML11_INLINE void location::advance(std::size_t n) noexcept assert(this->is_ok()); if(this->location_ + n < this->source_->size()) { - this->advance_line_number(n); + this->advance_impl(n); this->location_ += n; } else { - this->advance_line_number(this->source_->size() - this->location_); + this->advance_impl(this->source_->size() - this->location_); this->location_ = this->source_->size(); } } @@ -34,7 +34,7 @@ TOML11_INLINE void location::retrace(std::size_t n) noexcept } else { - this->retrace_line_number(n); + this->retrace_impl(n); this->location_ -= n; } } @@ -89,7 +89,7 @@ TOML11_INLINE std::size_t location::column_number() const noexcept } -TOML11_INLINE void location::advance_line_number(const std::size_t n) +TOML11_INLINE void location::advance_impl(const std::size_t n) { assert(this->is_ok()); assert(this->location_ + n <= this->source_->size()); @@ -102,7 +102,7 @@ TOML11_INLINE void location::advance_line_number(const std::size_t n) return; } -TOML11_INLINE void location::retrace_line_number(const std::size_t n) +TOML11_INLINE void location::retrace_impl(const std::size_t n) { assert(this->is_ok()); assert(n <= this->location_); // loc - n >= 0 From f06ad06ad7f22eb053026969b061bb0273c28531 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Fri, 18 Oct 2024 02:21:38 +0900 Subject: [PATCH 3/6] refactor: rm argument(same as default) of retrace --- include/toml11/impl/location_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/toml11/impl/location_impl.hpp b/include/toml11/impl/location_impl.hpp index 73e234ff..a7751a1d 100644 --- a/include/toml11/impl/location_impl.hpp +++ b/include/toml11/impl/location_impl.hpp @@ -142,7 +142,7 @@ TOML11_INLINE bool operator!=(const location& lhs, const location& rhs) TOML11_INLINE location prev(const location& loc) { location p(loc); - p.retrace(1); + p.retrace(); return p; } TOML11_INLINE location next(const location& loc) From befe37924105bf6f144d246167f51f2e4597b923 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Fri, 18 Oct 2024 02:24:11 +0900 Subject: [PATCH 4/6] refactor: restrict retrace dist == 1 to simplify the implementation --- include/toml11/fwd/location_fwd.hpp | 4 ++-- include/toml11/impl/location_impl.hpp | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/toml11/fwd/location_fwd.hpp b/include/toml11/fwd/location_fwd.hpp index 401d857e..7fc7ecaf 100644 --- a/include/toml11/fwd/location_fwd.hpp +++ b/include/toml11/fwd/location_fwd.hpp @@ -41,7 +41,7 @@ class location ~location() = default; void advance(std::size_t n = 1) noexcept; - void retrace(std::size_t n = 1) noexcept; + void retrace() noexcept; bool is_ok() const noexcept { return static_cast(this->source_); } @@ -68,7 +68,7 @@ class location private: void advance_impl(const std::size_t n); - void retrace_impl(const std::size_t n); + void retrace_impl(); private: diff --git a/include/toml11/impl/location_impl.hpp b/include/toml11/impl/location_impl.hpp index a7751a1d..a909ee48 100644 --- a/include/toml11/impl/location_impl.hpp +++ b/include/toml11/impl/location_impl.hpp @@ -24,18 +24,18 @@ TOML11_INLINE void location::advance(std::size_t n) noexcept this->location_ = this->source_->size(); } } -TOML11_INLINE void location::retrace(std::size_t n) noexcept +TOML11_INLINE void location::retrace(/*restricted to n=1*/) noexcept { assert(this->is_ok()); - if(this->location_ < n) + if(this->location_ == 0) { this->location_ = 0; this->line_number_ = 1; } else { - this->retrace_impl(n); - this->location_ -= n; + this->retrace_impl(); + this->location_ -= 1; } } @@ -102,14 +102,14 @@ TOML11_INLINE void location::advance_impl(const std::size_t n) return; } -TOML11_INLINE void location::retrace_impl(const std::size_t n) +TOML11_INLINE void location::retrace_impl(/*n == 1*/) { assert(this->is_ok()); - assert(n <= this->location_); // loc - n >= 0 + assert(this->location_ != 0); const auto iter = this->source_->cbegin(); const auto dline_num = static_cast(std::count( - std::next(iter, static_cast(this->location_ - n)), + std::next(iter, static_cast(this->location_ - 1)), std::next(iter, static_cast(this->location_)), char_type('\n'))); From 42a2628924eb6538689f75e85d13393e53f29ced Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Sat, 19 Oct 2024 00:04:08 +0900 Subject: [PATCH 5/6] feat: save column_number in location instead of calculating it every time --- include/toml11/fwd/location_fwd.hpp | 9 +++-- include/toml11/impl/location_impl.hpp | 49 +++++++++++++++------------ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/include/toml11/fwd/location_fwd.hpp b/include/toml11/fwd/location_fwd.hpp index 7fc7ecaf..395b96cb 100644 --- a/include/toml11/fwd/location_fwd.hpp +++ b/include/toml11/fwd/location_fwd.hpp @@ -31,7 +31,7 @@ class location location(source_ptr src, std::string src_name) : source_(std::move(src)), source_name_(std::move(src_name)), - location_(0), line_number_(1) + location_(0), line_number_(1), column_number_(1) {} location(const location&) = default; @@ -59,8 +59,11 @@ class location { return this->line_number_; } + std::size_t column_number() const noexcept + { + return this->column_number_; + } std::string get_line() const; - std::size_t column_number() const noexcept; source_ptr const& source() const noexcept {return this->source_;} std::string const& source_name() const noexcept {return this->source_name_;} @@ -69,6 +72,7 @@ class location void advance_impl(const std::size_t n); void retrace_impl(); + std::size_t calc_column_number() const noexcept; private: @@ -80,6 +84,7 @@ class location std::string source_name_; std::size_t location_; // std::vector<>::difference_type is signed std::size_t line_number_; + std::size_t column_number_; }; bool operator==(const location& lhs, const location& rhs) noexcept; diff --git a/include/toml11/impl/location_impl.hpp b/include/toml11/impl/location_impl.hpp index a909ee48..b996ab68 100644 --- a/include/toml11/impl/location_impl.hpp +++ b/include/toml11/impl/location_impl.hpp @@ -16,12 +16,12 @@ TOML11_INLINE void location::advance(std::size_t n) noexcept if(this->location_ + n < this->source_->size()) { this->advance_impl(n); - this->location_ += n; } else { this->advance_impl(this->source_->size() - this->location_); - this->location_ = this->source_->size(); + + assert(this->location_ == this->source_->size()); } } TOML11_INLINE void location::retrace(/*restricted to n=1*/) noexcept @@ -31,11 +31,11 @@ TOML11_INLINE void location::retrace(/*restricted to n=1*/) noexcept { this->location_ = 0; this->line_number_ = 1; + this->column_number_ = 1; } else { this->retrace_impl(); - this->location_ -= 1; } } @@ -77,7 +77,8 @@ TOML11_INLINE std::string location::get_line() const return make_string(std::next(prev.base()), next); } -TOML11_INLINE std::size_t location::column_number() const noexcept + +TOML11_INLINE std::size_t location::calc_column_number() const noexcept { assert(this->is_ok()); const auto iter = std::next(this->source_->cbegin(), static_cast(this->location_)); @@ -88,18 +89,29 @@ TOML11_INLINE std::size_t location::column_number() const noexcept return static_cast(std::distance(prev.base(), iter) + 1); // 1-origin } - TOML11_INLINE void location::advance_impl(const std::size_t n) { assert(this->is_ok()); assert(this->location_ + n <= this->source_->size()); - const auto iter = this->source_->cbegin(); - this->line_number_ += static_cast(std::count( - std::next(iter, static_cast(this->location_)), - std::next(iter, static_cast(this->location_ + n)), - char_type('\n'))); + auto iter = this->source_->cbegin(); + std::advance(iter, static_cast(this->location_)); + for(std::size_t i=0; iline_number_ += 1; + this->column_number_ = 1; + } + else + { + this->column_number_ += 1; + } + iter++; + } + this->location_ += n; return; } TOML11_INLINE void location::retrace_impl(/*n == 1*/) @@ -107,19 +119,14 @@ TOML11_INLINE void location::retrace_impl(/*n == 1*/) assert(this->is_ok()); assert(this->location_ != 0); - const auto iter = this->source_->cbegin(); - const auto dline_num = static_cast(std::count( - std::next(iter, static_cast(this->location_ - 1)), - std::next(iter, static_cast(this->location_)), - char_type('\n'))); + this->location_ -= 1; - if(this->line_number_ <= dline_num) - { - this->line_number_ = 1; - } - else + auto iter = this->source_->cbegin(); + std::advance(iter, static_cast(this->location_)); + if(*iter == '\n') { - this->line_number_ -= dline_num; + this->line_number_ -= 1; + this->column_number_ = this->calc_column_number(); } return; } From 869fdbdf8f0827c8efe82f0e613fb2033985808c Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Mon, 21 Oct 2024 03:03:13 +0900 Subject: [PATCH 6/6] feat: reduce memory consumption with long line source_location stores the whole line. In case of short range in a long line like: ``` array = [1, 2, 3, ... , 100, 101, ..., 10000] ^^^- the region ``` It save the whole line as a `std::stirng`. It consumes a lot of memory and slows down everything. We can omit most of the part of the line because we only need the region, `100` here. --- include/toml11/fwd/region_fwd.hpp | 7 +- include/toml11/fwd/source_location_fwd.hpp | 37 ++++++++- include/toml11/impl/region_impl.hpp | 86 ++++++++++++++++---- include/toml11/impl/source_location_impl.hpp | 32 +++++--- 4 files changed, 135 insertions(+), 27 deletions(-) diff --git a/include/toml11/fwd/region_fwd.hpp b/include/toml11/fwd/region_fwd.hpp index 26f1a9bc..04ce56f0 100644 --- a/include/toml11/fwd/region_fwd.hpp +++ b/include/toml11/fwd/region_fwd.hpp @@ -82,11 +82,16 @@ class region const_iterator cend() const noexcept; std::string as_string() const; - std::vector as_lines() const; + std::vector> as_lines() const; source_ptr const& source() const noexcept {return this->source_;} std::string const& source_name() const noexcept {return this->source_name_;} + private: + + std::pair + take_line(const_iterator begin, const_iterator end) const; + private: source_ptr source_; diff --git a/include/toml11/fwd/source_location_fwd.hpp b/include/toml11/fwd/source_location_fwd.hpp index 77c7579d..70226899 100644 --- a/include/toml11/fwd/source_location_fwd.hpp +++ b/include/toml11/fwd/source_location_fwd.hpp @@ -10,7 +10,34 @@ namespace toml { +// // A struct to contain location in a toml file. +// +// To reduce memory consumption, it omits unrelated parts of long lines. like: +// +// 1. one long line, short region +// ``` +// | +// 1 | ... "foo", "bar", baz, "qux", "foobar", ... +// | ^-- unknown value +// ``` +// 2. long region +// ``` +// | +// 1 | array = [ "foo", ... "bar" ] +// | ^^^^^^^^^^^^^^^^^^^^- in this array +// ``` +// 3. many lines +// | +// 1 | array = [ "foo", +// | ^^^^^^^^ +// | ... +// | ^^^ +// | +// 10 | , "bar"] +// | ^^^^^^^^- in this array +// ``` +// struct source_location { public: @@ -39,13 +66,19 @@ struct source_location std::vector const& lines() const noexcept {return line_str_;} + // for internal use + std::size_t first_column_offset() const noexcept {return this->first_offset_;} + std::size_t last_column_offset() const noexcept {return this->last_offset_;} + private: bool is_ok_; std::size_t first_line_; - std::size_t first_column_; + std::size_t first_column_; // column num in the actual file + std::size_t first_offset_; // column num in the shown line std::size_t last_line_; - std::size_t last_column_; + std::size_t last_column_; // column num in the actual file + std::size_t last_offset_; // column num in the shown line std::size_t length_; std::string file_name_; std::vector line_str_; diff --git a/include/toml11/impl/region_impl.hpp b/include/toml11/impl/region_impl.hpp index 08147e25..28242c8b 100644 --- a/include/toml11/impl/region_impl.hpp +++ b/include/toml11/impl/region_impl.hpp @@ -121,27 +121,65 @@ TOML11_INLINE std::string region::as_string() const } } -TOML11_INLINE std::vector region::as_lines() const +TOML11_INLINE std::pair +region::take_line(const_iterator begin, const_iterator end) const +{ + // To omit long line, we cap region by before/after 30 chars + const auto dist_before = std::distance(source_->cbegin(), begin); + const auto dist_after = std::distance(end, source_->cend()); + + const const_iterator capped_begin = (dist_before <= 30) ? source_->cbegin() : std::prev(begin, 30); + const const_iterator capped_end = (dist_after <= 30) ? source_->cend() : std::next(end, 30); + + const auto lf = char_type('\n'); + const auto lf_before = std::find(cxx::make_reverse_iterator(begin), + cxx::make_reverse_iterator(capped_begin), lf); + const auto lf_after = std::find(end, capped_end, lf); + + auto offset = static_cast(std::distance(lf_before.base(), begin)); + + std::string retval = make_string(lf_before.base(), lf_after); + + if(lf_before.base() != source_->cbegin() && *lf_before != lf) + { + retval = "... " + retval; + offset += 4; + } + + if(lf_after != source_->cend() && *lf_after != lf) + { + retval = retval + " ..."; + } + + return std::make_pair(retval, offset); +} + +TOML11_INLINE std::vector> region::as_lines() const { assert(this->is_ok()); if(this->length_ == 0) { - return std::vector{""}; + return std::vector>{ + std::make_pair("", std::size_t(0)) + }; } // Consider the following toml file // ``` // array = [ + // 1, 2, 3, // ] # comment // ``` // and the region represnets // ``` // [ + // 1, 2, 3, // ] // ``` // but we want to show the following. // ``` // array = [ + // 1, 2, 3, // ] # comment // ``` // So we need to find LFs before `begin` and after `end`. @@ -162,25 +200,45 @@ TOML11_INLINE std::vector region::as_lines() const const auto begin = std::next(this->source_->cbegin(), begin_idx); const auto end = std::next(this->source_->cbegin(), end_idx); - const auto line_begin = std::find(cxx::make_reverse_iterator(begin), this->source_->crend(), char_type('\n')).base(); - const auto line_end = std::find(end, this->source_->cend(), char_type('\n')); + assert(this->first_line_number() <= this->last_line_number()); - const auto reg_lines = make_string(line_begin, line_end); - - if(reg_lines == "") // the region is an empty line that only contains LF + if(this->first_line_number() == this->last_line_number()) { - return std::vector{""}; + return std::vector>{ + this->take_line(begin, end) + }; } - std::istringstream iss(reg_lines); + // we have multiple lines. `begin` and `end` points different lines. + // that means that there is at least one `LF` between `begin` and `end`. + + const auto after_begin = std::distance(begin, this->source_->cend()); + const auto before_end = std::distance(this->source_->cbegin(), end); + + const_iterator capped_file_end = this->source_->cend(); + const_iterator capped_file_begin = this->source_->cbegin(); + if(60 < after_begin) {capped_file_end = std::next(begin, 50);} + if(60 < before_end) {capped_file_begin = std::prev(end, 50);} - std::vector lines; - std::string line; - while(std::getline(iss, line)) + const auto lf = char_type('\n'); + const auto first_line_end = std::find(begin, capped_file_end, lf); + const auto last_line_begin = std::find(capped_file_begin, end, lf); + + const auto first_line = this->take_line(begin, first_line_end); + const auto last_line = this->take_line(last_line_begin, end); + + if(this->first_line_number() + 1 == this->last_line_number()) + { + return std::vector>{ + first_line, last_line + }; + } + else { - lines.push_back(line); + return std::vector>{ + first_line, std::make_pair("...", 0), last_line + }; } - return lines; } } // namespace detail diff --git a/include/toml11/impl/source_location_impl.hpp b/include/toml11/impl/source_location_impl.hpp index 7a2fc8c3..3ecf2ad4 100644 --- a/include/toml11/impl/source_location_impl.hpp +++ b/include/toml11/impl/source_location_impl.hpp @@ -20,8 +20,10 @@ TOML11_INLINE source_location::source_location(const detail::region& r) : is_ok_(false), first_line_(1), first_column_(1), + first_offset_(1), last_line_(1), last_column_(1), + last_offset_(1), length_(0), file_name_("unknown file") { @@ -34,7 +36,17 @@ TOML11_INLINE source_location::source_location(const detail::region& r) this->last_line_ = r.last_line_number(); this->last_column_ = r.last_column_number(); this->length_ = r.length(); - this->line_str_ = r.as_lines(); + + const auto lines = r.as_lines(); + assert( ! lines.empty()); + + for(const auto& l : lines) + { + this->line_str_.push_back(l.first); + } + + this->first_offset_ = lines.at( 0).second + 1; // to 1-origin + this->last_offset_ = lines.at(lines.size()-1).second + 1; } } @@ -145,36 +157,36 @@ TOML11_INLINE std::string format_location_impl(const std::size_t lnw, { // when column points LF, it exceeds the size of the first line. std::size_t underline_limit = 1; - if(loc.first_line().size() < loc.first_column_number()) + if(loc.first_line().size() < loc.first_column_offset()) { underline_limit = 1; } else { - underline_limit = loc.first_line().size() - loc.first_column_number() + 1; + underline_limit = loc.first_line().size() - loc.first_column_offset() + 1; } const auto underline_len = (std::min)(underline_limit, loc.length()); format_line(oss, lnw, loc.first_line_number(), loc.first_line()); - format_underline(oss, lnw, loc.first_column_number(), underline_len, msg); + format_underline(oss, lnw, loc.first_column_offset(), underline_len, msg); } else if(loc.lines().size() == 2) { const auto first_underline_len = - loc.first_line().size() - loc.first_column_number() + 1; + loc.first_line().size() - loc.first_column_offset() + 1; format_line(oss, lnw, loc.first_line_number(), loc.first_line()); - format_underline(oss, lnw, loc.first_column_number(), + format_underline(oss, lnw, loc.first_column_offset(), first_underline_len, ""); format_line(oss, lnw, loc.last_line_number(), loc.last_line()); - format_underline(oss, lnw, 1, loc.last_column_number(), msg); + format_underline(oss, lnw, 1, loc.last_column_offset(), msg); } else if(loc.lines().size() > 2) { const auto first_underline_len = - loc.first_line().size() - loc.first_column_number() + 1; + loc.first_line().size() - loc.first_column_offset() + 1; format_line(oss, lnw, loc.first_line_number(), loc.first_line()); - format_underline(oss, lnw, loc.first_column_number(), + format_underline(oss, lnw, loc.first_column_offset(), first_underline_len, "and"); if(loc.lines().size() == 3) @@ -188,7 +200,7 @@ TOML11_INLINE std::string format_location_impl(const std::size_t lnw, format_empty_line(oss, lnw); } format_line(oss, lnw, loc.last_line_number(), loc.last_line()); - format_underline(oss, lnw, 1, loc.last_column_number(), msg); + format_underline(oss, lnw, 1, loc.last_column_offset(), msg); } // if loc is empty, do nothing. return oss.str();