From 9a74099b6835a5823828b3eeafee6dc39adec4c4 Mon Sep 17 00:00:00 2001 From: Andersama Date: Fri, 13 Nov 2020 06:48:12 -0800 Subject: [PATCH] Add string search utility --- include/ctre/atoms.hpp | 1 + include/ctre/evaluation.hpp | 116 ++++++++++++++++++++++++++++++++++-- 2 files changed, 113 insertions(+), 4 deletions(-) diff --git a/include/ctre/atoms.hpp b/include/ctre/atoms.hpp index 94a105bf..6a398c1f 100644 --- a/include/ctre/atoms.hpp +++ b/include/ctre/atoms.hpp @@ -19,6 +19,7 @@ struct any { }; // actual AST of regexp template struct string { }; +template struct string_search { }; template struct select { }; template struct sequence { }; struct empty { }; diff --git a/include/ctre/evaluation.hpp b/include/ctre/evaluation.hpp index 47b0d4b2..be39c0c3 100644 --- a/include/ctre/evaluation.hpp +++ b/include/ctre/evaluation.hpp @@ -115,10 +115,17 @@ template constexpr CTR } template constexpr CTRE_FORCE_INLINE string_match_result evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence) noexcept { - - bool same = (compare_character(String, current, end) && ... && true); - - return {current, same}; + if constexpr (!std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{})) { + bool same = (::std::distance(current, end) >= sizeof...(String)) && ((String == *(current + Idx)) & ...); + if (same) { + return {current+=sizeof...(String), same}; + } else { + return {current, same}; + } + } else { + bool same = (compare_character(String, current, end) && ... && true); + return { current, same }; + } } template @@ -132,6 +139,107 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c return evaluate(begin, result.position, end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list()); } +template +constexpr bool is_prefix(Ty* word, size_t wordlen, ptrdiff_t pos) { + ptrdiff_t suffixlen = wordlen - pos; + for (int i = 0; i < suffixlen; i++) { + if (word[i] != word[pos + i]) { + return false; + } + } + return true; +} + +template +constexpr size_t suffix_length(Ty* word, size_t wordlen, ptrdiff_t pos) { + size_t i = 0; + // increment suffix length i to the first mismatch or beginning of the word + for (; (word[pos - i] == word[wordlen - 1 - i]) && (i < pos); i++); + return i; +} +//MSVC workaround, array operator[] blows up in face if constexpr, use pointers instead +template +constexpr auto make_delta_2(string) { + std::array chars{ String... }; + std::array table; + constexpr size_t patlen = sizeof...(String); + size_t p = 0; + size_t last_prefix_index = patlen - 1; + + for (p = patlen - 1; p < patlen; p--) { + if (is_prefix(chars.data(), patlen, p + 1)) { + last_prefix_index = p + 1; + } + table.data()[p] = last_prefix_index + (patlen - 1 - p); + } + + for (p = 0; p < patlen - 1; p++) { + size_t slen = suffix_length(chars.data(), patlen, p); + if (chars.data()[p - slen] != chars.data()[patlen - 1 - slen]) { + table.data()[patlen - 1 - slen] = patlen - 1 - p + slen; + } + } + + return table; +} + +template +constexpr CTRE_FORCE_INLINE string_match_result evaluate_search_string(Iterator current, const EndIterator end, string) { + if constexpr (sizeof...(String) > 2 && !std::is_same_v && is_random_accessible(typename std::iterator_traits::iterator_category{})) { + constexpr std::array::value_type, sizeof...(String)> chars{ String... }; + constexpr std::array delta_2 = make_delta_2::value_type>(string()); + + size_t str_size = std::distance(current, end); + if (str_size < sizeof...(String)) { //quick exit no way to match + return { current, false }; + } + + size_t i = sizeof...(String) - 1; //index over to the starting location + for (; i < str_size;) { + size_t j = sizeof...(String) - 1; + size_t m = i + 1; + for (; *(current + i) == *(chars.data() + j); --i, --j) { //match string in reverse + if (j == 0) { + return { current + m, true }; + } + } + size_t shift = enumeration::match_char(*(current + i)) ? static_cast(*(delta_2.data() + j)) : sizeof...(String); + i += shift; + } + + return { current + str_size, false }; + } else if (sizeof...(String)) { + //fallback to plain string matching + constexpr std::array::value_type, sizeof...(String)> chars{ String... }; + constexpr typename ::std::iterator_traits::value_type first_char = chars.data()[0]; + while (current != end) { + while (current != end && *current != first_char) { + current++; + } + auto result = evaluate_match_string(current, end, std::make_index_sequence()); + if (result.match) { + return result; + } else { + ++current; + } + } + return { current, false }; + } else { + return { current, true }; + } +} + +template +constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, [[maybe_unused]] const flags& f, R captures, ctll::list, Tail...>) noexcept { + auto result = evaluate_search_string(current, end, string()); + + if (!result.matched) { + return not_matched; + } + + return evaluate(begin, std::advance(result.position, sizeof...(String)), end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list()); +} + // matching select in patterns template constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, const flags & f, R captures, ctll::list, Tail...>) noexcept {