Skip to content

Commit

Permalink
Introduce UnsafeConstIterator for bytes instances.
Browse files Browse the repository at this point in the history
An unsafe iterator offers fast but unchecked access to the data. We
also rename `bytes::Iterator` to `bytes::SafeConstIterator` so that
for bytes we now follow the same two-tiered iterator structure as for
streams. We then switch some library code over to now use unsafe
iterators, gaining a noticeable speed-up in some cases.
  • Loading branch information
rsmmr committed Oct 29, 2024
1 parent e179079 commit 88ea73f
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 33 deletions.
2 changes: 1 addition & 1 deletion hilti/runtime/include/type-info.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ class Bool : public detail::AtomicType<bool> {};
class Bytes : public detail::AtomicType<hilti::rt::Bytes> {};

/** Type information for type ``iterator<bytes>`. */
class BytesIterator : public detail::AtomicType<hilti::rt::bytes::Iterator> {};
class BytesIterator : public detail::AtomicType<hilti::rt::bytes::SafeIterator> {};

namespace enum_ {

Expand Down
125 changes: 110 additions & 15 deletions hilti/runtime/include/types/bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,29 @@ HILTI_RT_ENUM(Charset, Undef, UTF8, ASCII);
/** For bytes decoding, how to handle decoding errors. */
using DecodeErrorStrategy = string::DecodeErrorStrategy;

class Iterator {
/**
* Safe bytes iterator traversing the content of an instance.
*
* Unlike the STL-style iterators, this iterator protects against the bytes
* instance being no longer available by throwing an `InvalidIterator`
* exception if it's still dereferenced. It will also catch attempts to
* dereference iterators that remain outside of the current valid range of the
* underlying bytes instance, throwing an `IndexError` exception in that case.
* However, operations that only move the iterator will succeed even for
* out-of-range positions. That includes advancing an iterator beyond the end
* of the content.
*/
class SafeIterator {
using B = std::string;
using difference_type = B::const_iterator::difference_type;

std::weak_ptr<const B*> _control;
typename integer::safe<std::uint64_t> _index = 0;

public:
Iterator() = default;
SafeIterator() = default;

Iterator(typename B::size_type index, std::weak_ptr<const B*> control)
SafeIterator(typename B::size_type index, std::weak_ptr<const B*> control)
: _control(std::move(control)), _index(index) {}

uint8_t operator*() const {
Expand Down Expand Up @@ -87,7 +99,7 @@ class Iterator {

template<typename T>
auto operator+(const T& n) const {
return Iterator{_index + n, _control};
return SafeIterator{_index + n, _control};
}

explicit operator bool() const { return static_cast<bool>(_control.lock()); }
Expand All @@ -103,39 +115,39 @@ class Iterator {
return result;
}

friend auto operator==(const Iterator& a, const Iterator& b) {
friend auto operator==(const SafeIterator& a, const SafeIterator& b) {
if ( a._control.lock() != b._control.lock() )
throw InvalidArgument("cannot compare iterators into different bytes");
return a._index == b._index;
}

friend bool operator!=(const Iterator& a, const Iterator& b) { return ! (a == b); }
friend bool operator!=(const SafeIterator& a, const SafeIterator& b) { return ! (a == b); }

friend auto operator<(const Iterator& a, const Iterator& b) {
friend auto operator<(const SafeIterator& a, const SafeIterator& b) {
if ( a._control.lock() != b._control.lock() )
throw InvalidArgument("cannot compare iterators into different bytes");
return a._index < b._index;
}

friend auto operator<=(const Iterator& a, const Iterator& b) {
friend auto operator<=(const SafeIterator& a, const SafeIterator& b) {
if ( a._control.lock() != b._control.lock() )
throw InvalidArgument("cannot compare iterators into different bytes");
return a._index <= b._index;
}

friend auto operator>(const Iterator& a, const Iterator& b) {
friend auto operator>(const SafeIterator& a, const SafeIterator& b) {
if ( a._control.lock() != b._control.lock() )
throw InvalidArgument("cannot compare iterators into different bytes");
return a._index > b._index;
}

friend auto operator>=(const Iterator& a, const Iterator& b) {
friend auto operator>=(const SafeIterator& a, const SafeIterator& b) {
if ( a._control.lock() != b._control.lock() )
throw InvalidArgument("cannot compare iterators into different bytes");
return a._index >= b._index;
}

friend difference_type operator-(const Iterator& a, const Iterator& b) {
friend difference_type operator-(const SafeIterator& a, const SafeIterator& b) {
if ( a._control.lock() != b._control.lock() )
throw InvalidArgument("cannot perform arithmetic with iterators into different bytes");
return a._index - b._index;
Expand All @@ -144,13 +156,83 @@ class Iterator {
friend class ::hilti::rt::Bytes;
};

inline std::string to_string(const Iterator& /* i */, rt::detail::adl::tag /*unused*/) { return "<bytes iterator>"; }
inline std::string to_string(const SafeIterator& /* i */, rt::detail::adl::tag /*unused*/) {
return "<bytes iterator>";
}

inline std::ostream& operator<<(std::ostream& out, const SafeIterator& /* x */) {
out << "<bytes iterator>";
return out;
}

namespace detail {
/**
* Unsafe bytes iterator for internal usage. Unlike *SafeConstIterator*, this
* iterator version is not safe against the underlying bytes instances
* disappearing or potentially even just changing; it will not catch that and
* likely causes crashes on access. It also does not perform any
* bounds-checking. When using this, one hence needs to ensure that the bytes
* instance will remain valid & unchanged for long as the iterator remains
* alive. In return, this iterator is more efficient than the
* `SafeConstIterator`.
*/
class UnsafeConstIterator {
using I = std::string::const_iterator;

I _i;

public:
UnsafeConstIterator() = default;
UnsafeConstIterator(I i) : _i(i) {}

uint8_t operator*() const { return static_cast<uint8_t>(*_i); }

template<typename T>
auto& operator+=(const T& n) {
return *this += n;
}

auto& operator+=(uint64_t n) {
_i += static_cast<I::difference_type>(n);
return *this;
}

template<typename T>
auto operator+(const T& n) const {
return *this + n;
}

auto& operator++() {
++_i;
return *this;
}

auto operator++(int) {
auto result = *this;
++_i;
return result;
}

friend auto operator==(const UnsafeConstIterator& a, const UnsafeConstIterator& b) { return a._i == b._i; }
friend bool operator!=(const UnsafeConstIterator& a, const UnsafeConstIterator& b) { return ! (a == b); }
friend auto operator<(const UnsafeConstIterator& a, const UnsafeConstIterator& b) { return a._i < b._i; }
friend auto operator<=(const UnsafeConstIterator& a, const UnsafeConstIterator& b) { return a._i <= b._i; }
friend auto operator>(const UnsafeConstIterator& a, const UnsafeConstIterator& b) { return a._i > b._i; }
friend auto operator>=(const UnsafeConstIterator& a, const UnsafeConstIterator& b) { return a._i >= b._i; }
friend auto operator-(const UnsafeConstIterator& a, const UnsafeConstIterator& b) { return a._i - b._i; }
};

inline std::string to_string(const UnsafeConstIterator& /* i */, rt::detail::adl::tag /*unused*/) {
return "<bytes iterator>";
}

inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
inline std::ostream& operator<<(std::ostream& out, const UnsafeConstIterator& /* x */) {
out << "<bytes iterator>";
return out;
}

} // namespace detail

} // namespace bytes

/** HILTI's `Bytes` is a `std::string`-like type for wrapping raw bytes with
Expand All @@ -162,7 +244,8 @@ inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
class Bytes : protected std::string {
public:
using Base = std::string;
using const_iterator = bytes::Iterator;
using const_iterator = bytes::SafeIterator;
using unsafe_const_iterator = bytes::detail::UnsafeConstIterator;
using Base::const_reference;
using Base::reference;
using Offset = uint64_t;
Expand Down Expand Up @@ -238,12 +321,24 @@ class Bytes : protected std::string {
/** Same as `begin()`, just for compatibility with std types. */
const_iterator cbegin() const { return const_iterator(0U, getControl()); }

/**
* Returns an unchecked (but fast) iterator representing the first byte of
* the instance.
*/
auto unsafeBegin() const { return unsafe_const_iterator(str().begin()); }

/** Returns an iterator representing the end of the instance. */
const_iterator end() const { return const_iterator(size(), getControl()); }

/** Same as `end()`, just for compatibility with std types. */
const_iterator cend() const { return const_iterator(size(), getControl()); }

/**
* Returns an unchecked (but fast) iterator representing the end of the
* instance.
*/
auto unsafeEnd() const { return unsafe_const_iterator(str().end()); }

/** Returns an iterator referring to the given offset. */
const_iterator at(Offset o) const { return begin() + o; }

Expand Down Expand Up @@ -557,7 +652,7 @@ class Bytes : protected std::string {
}

private:
friend bytes::Iterator;
friend bytes::SafeIterator;

const C& getControl() const {
if ( ! _control )
Expand Down
15 changes: 15 additions & 0 deletions hilti/runtime/src/tests/bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,21 @@ TEST_CASE("iteration") {
}
}

TEST_CASE("unsafe iteration") {
const auto b = "123"_b;
auto i = b.unsafeBegin();
CHECK_EQ(*i, '1');
CHECK_EQ(*(++i), '2');
CHECK_EQ(*(++i), '3');
CHECK_EQ(++i, b.unsafeEnd());

// Check yield type, like above.
for ( auto i = b.unsafeBegin(); i != b.unsafeEnd(); ++i ) {
(void)i;
static_assert(std::is_same_v<decltype(*i), uint8_t>);
}
}

TEST_CASE("split") {
SUBCASE("separator") {
CHECK_EQ("12 45"_b.split(" "), Vector({"12"_b, "45"_b}));
Expand Down
4 changes: 2 additions & 2 deletions hilti/runtime/src/types/bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ std::tuple<bool, Bytes::const_iterator> Bytes::find(const Bytes& v, const const_
if ( v.isEmpty() )
return std::make_tuple(true, n ? n : b);

auto bv = v.begin();
auto bv = v.unsafeBegin();
auto first = *bv;

for ( auto i = const_iterator(n ? n : b); true; ++i ) {
Expand All @@ -40,7 +40,7 @@ std::tuple<bool, Bytes::const_iterator> Bytes::find(const Bytes& v, const const_
if ( *x++ != *y++ )
break;

if ( y == v.end() )
if ( y == v.unsafeEnd() )
return std::make_tuple(true, i);
}
}
Expand Down
20 changes: 10 additions & 10 deletions hilti/runtime/src/types/stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ std::tuple<bool, UnsafeConstIterator> View::find(const View& v, UnsafeConstItera
if ( v.isEmpty() )
return std::make_tuple(true, n);

auto first = *v.begin();
auto first = *v.unsafeBegin();

for ( auto i = n; true; ++i ) {
if ( i == unsafeEnd() )
Expand Down Expand Up @@ -333,7 +333,7 @@ std::tuple<bool, UnsafeConstIterator> View::_findForward(const Bytes& v, UnsafeC
if ( v.isEmpty() )
return std::make_tuple(true, n);

auto first = *v.begin();
auto first = *v.unsafeBegin();

for ( auto i = n; true; ++i ) {
if ( i == unsafeEnd() )
Expand All @@ -343,7 +343,7 @@ std::tuple<bool, UnsafeConstIterator> View::_findForward(const Bytes& v, UnsafeC
continue;

auto x = i;
auto y = v.begin();
auto y = v.unsafeBegin();

for ( ;; ) {
if ( x == unsafeEnd() )
Expand All @@ -352,7 +352,7 @@ std::tuple<bool, UnsafeConstIterator> View::_findForward(const Bytes& v, UnsafeC
if ( *x++ != *y++ )
break;

if ( y == v.end() )
if ( y == v.unsafeEnd() )
return std::make_tuple(true, i);
}
}
Expand Down Expand Up @@ -384,20 +384,20 @@ std::tuple<bool, UnsafeConstIterator> View::_findBackward(const Bytes& needle, U

i -= (needle.size() - 1).Ref(); // this is safe now, get us 1st position where initial character may match

auto first = *needle.begin();
auto first = *needle.unsafeBegin();

// The following is not the most efficient way to search backwards, but
// it'll do for now.
for ( auto j = i; true; --j ) {
if ( *j == first ) {
auto x = j;
auto y = needle.begin();
auto y = needle.unsafeBegin();

for ( ;; ) {
if ( *x++ != *y++ )
break;

if ( y == needle.end() )
if ( y == needle.unsafeEnd() )
return std::make_tuple(true, j);
}
}
Expand All @@ -413,8 +413,8 @@ bool View::startsWith(const Bytes& b) const {
_ensureValid();
auto s1 = unsafeBegin();
auto e1 = unsafeEnd();
auto s2 = b.begin();
auto e2 = b.end();
auto s2 = b.unsafeBegin();
auto e2 = b.unsafeEnd();

// If the iterator has no data in it, we cannot dereference it.
if ( isEmpty() )
Expand Down Expand Up @@ -586,7 +586,7 @@ bool stream::View::operator==(const Bytes& other) const {
return false;

auto i = unsafeBegin();
auto j = other.begin();
auto j = other.unsafeBegin();

while ( i != unsafeEnd() ) {
if ( ! i.chunk() ) // out-of-bounds, cannot match
Expand Down
4 changes: 3 additions & 1 deletion hilti/toolchain/src/compiler/codegen/types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,9 @@ struct VisitorStorage : hilti::visitor::PreOrder {

void operator()(type::Interval* n) final { result = CxxTypes{.base_type = "::hilti::rt::Interval"}; }

void operator()(type::bytes::Iterator* n) final { result = CxxTypes{.base_type = "::hilti::rt::bytes::Iterator"}; }
void operator()(type::bytes::Iterator* n) final {
result = CxxTypes{.base_type = "::hilti::rt::bytes::SafeIterator"};
}

void operator()(type::stream::Iterator* n) final {
result = CxxTypes{.base_type = "::hilti::rt::stream::SafeConstIterator"};
Expand Down
8 changes: 4 additions & 4 deletions spicy/runtime/src/tests/unit-context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,20 @@ TEST_CASE("copy context") {

TEST_CASE("create and set") {
auto b = hilti::rt::reference::make_strong<hilti::rt::Bytes>("x"_b);
auto c = detail::createContext(b, &hilti::rt::type_info::bytes);
auto c = spicy::rt::detail::createContext(b, &hilti::rt::type_info::bytes);

hilti::rt::StrongReference<hilti::rt::Bytes> __context;

// Set __context
detail::setContext(__context, c, &hilti::rt::type_info::bytes);
spicy::rt::detail::setContext(__context, c, &hilti::rt::type_info::bytes);
CHECK_EQ(*__context, "x"_b);

// Unset __context
detail::setContext(__context, {}, &hilti::rt::type_info::bytes);
spicy::rt::detail::setContext(__context, {}, &hilti::rt::type_info::bytes);
CHECK(! __context);

// Catch type mismatch
CHECK_THROWS_AS(detail::setContext(__context, c, &hilti::rt::type_info::string), ContextMismatch);
CHECK_THROWS_AS(spicy::rt::detail::setContext(__context, c, &hilti::rt::type_info::string), ContextMismatch);
}

TEST_CASE("to_string") {
Expand Down

0 comments on commit 88ea73f

Please sign in to comment.