Skip to content

Commit

Permalink
Add string search utility
Browse files Browse the repository at this point in the history
  • Loading branch information
Andersama committed Nov 15, 2020
1 parent d0f3778 commit 9a74099
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 4 deletions.
1 change: 1 addition & 0 deletions include/ctre/atoms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct any { };

// actual AST of regexp
template <auto... Str> struct string { };
template <auto... Str> struct string_search { };
template <typename... Opts> struct select { };
template <typename... Content> struct sequence { };
struct empty { };
Expand Down
116 changes: 112 additions & 4 deletions include/ctre/evaluation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,17 @@ template <typename CharT, typename Iterator, typename EndIterator> constexpr CTR
}

template <auto... String, size_t... Idx, typename Iterator, typename EndIterator> constexpr CTRE_FORCE_INLINE string_match_result<Iterator> evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence<Idx...>) noexcept {

bool same = (compare_character(String, current, end) && ... && true);

return {current, same};
if constexpr (!std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
bool same = (::std::distance(current, end) >= sizeof...(String)) && ((String == *(current + Idx)) & ...);
if (same) {
return {current+=sizeof...(String), same};
} else {
return {current, same};
}
} else {
bool same = (compare_character(String, current, end) && ... && true);
return { current, same };
}
}

template <typename R, typename Iterator, typename EndIterator, auto... String, typename... Tail>
Expand All @@ -132,6 +139,107 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
return evaluate(begin, result.position, end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list<Tail...>());
}

template<typename Ty>
constexpr bool is_prefix(Ty* word, size_t wordlen, ptrdiff_t pos) {
ptrdiff_t suffixlen = wordlen - pos;
for (int i = 0; i < suffixlen; i++) {
if (word[i] != word[pos + i]) {
return false;
}
}
return true;
}

template<typename Ty>
constexpr size_t suffix_length(Ty* word, size_t wordlen, ptrdiff_t pos) {
size_t i = 0;
// increment suffix length i to the first mismatch or beginning of the word
for (; (word[pos - i] == word[wordlen - 1 - i]) && (i < pos); i++);
return i;
}
//MSVC workaround, array operator[] blows up in face if constexpr, use pointers instead
template<typename Ty, auto... String>
constexpr auto make_delta_2(string<String...>) {
std::array<Ty, sizeof...(String)> chars{ String... };
std::array<ptrdiff_t, sizeof...(String)> table;
constexpr size_t patlen = sizeof...(String);
size_t p = 0;
size_t last_prefix_index = patlen - 1;

for (p = patlen - 1; p < patlen; p--) {
if (is_prefix(chars.data(), patlen, p + 1)) {
last_prefix_index = p + 1;
}
table.data()[p] = last_prefix_index + (patlen - 1 - p);
}

for (p = 0; p < patlen - 1; p++) {
size_t slen = suffix_length(chars.data(), patlen, p);
if (chars.data()[p - slen] != chars.data()[patlen - 1 - slen]) {
table.data()[patlen - 1 - slen] = patlen - 1 - p + slen;
}
}

return table;
}

template <typename Iterator, typename EndIterator, auto... String>
constexpr CTRE_FORCE_INLINE string_match_result<Iterator> evaluate_search_string(Iterator current, const EndIterator end, string<String...>) {
if constexpr (sizeof...(String) > 2 && !std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
constexpr std::array<ptrdiff_t, sizeof...(String)> delta_2 = make_delta_2<typename ::std::iterator_traits<Iterator>::value_type>(string<String...>());

size_t str_size = std::distance(current, end);
if (str_size < sizeof...(String)) { //quick exit no way to match
return { current, false };
}

size_t i = sizeof...(String) - 1; //index over to the starting location
for (; i < str_size;) {
size_t j = sizeof...(String) - 1;
size_t m = i + 1;
for (; *(current + i) == *(chars.data() + j); --i, --j) { //match string in reverse
if (j == 0) {
return { current + m, true };
}
}
size_t shift = enumeration<String...>::match_char(*(current + i)) ? static_cast<size_t>(*(delta_2.data() + j)) : sizeof...(String);
i += shift;
}

return { current + str_size, false };
} else if (sizeof...(String)) {
//fallback to plain string matching
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
constexpr typename ::std::iterator_traits<Iterator>::value_type first_char = chars.data()[0];
while (current != end) {
while (current != end && *current != first_char) {
current++;
}
auto result = evaluate_match_string<String...>(current, end, std::make_index_sequence<sizeof...(String)>());
if (result.match) {
return result;
} else {
++current;
}
}
return { current, false };
} else {
return { current, true };
}
}

template <typename R, typename Iterator, typename EndIterator, auto... String, typename... Tail>
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, [[maybe_unused]] const flags& f, R captures, ctll::list<string_search<String...>, Tail...>) noexcept {
auto result = evaluate_search_string(current, end, string<String...>());

if (!result.matched) {
return not_matched;
}

return evaluate(begin, std::advance(result.position, sizeof...(String)), end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list<Tail...>());
}

// matching select in patterns
template <typename R, typename Iterator, typename EndIterator, typename HeadOptions, typename... TailOptions, typename... Tail>
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, const flags & f, R captures, ctll::list<select<HeadOptions, TailOptions...>, Tail...>) noexcept {
Expand Down

0 comments on commit 9a74099

Please sign in to comment.