Skip to content

Commit

Permalink
Pattern Analysis
Browse files Browse the repository at this point in the history
Adds functionality to analyze the minimum and maximum # of
characters a regex may match.
  • Loading branch information
Andersama committed Dec 29, 2019
1 parent ac7cb6f commit 2b62f7e
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 4 deletions.
224 changes: 224 additions & 0 deletions include/ctre/evaluation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,230 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
// property matching


// pattern analysis - returns the minimum and maximum # of characters in order for a regex to match a string
// -1 is considered INF, -2 is finite (but perhaps too large to store), all other values are exact counts
constexpr CTRE_FORCE_INLINE size_t saturate_limit(const size_t& lhs, const size_t& rhs) {
const constexpr size_t inf = size_t{ 0 } -1;
const constexpr size_t lim = size_t{ 0 } -2;
size_t ret = inf;
if (lhs == inf || rhs == inf) {
return ret;
} else {
ret = lhs + rhs;
ret = ret < lhs ? lim : ret == inf ? lim : ret;
}
return ret;
}

constexpr CTRE_FORCE_INLINE size_t mult_saturate_limit(const size_t& lhs, const size_t& rhs) {
const constexpr size_t inf = size_t{ 0 } -1;
const constexpr size_t lim = size_t{ 0 } -2;
size_t ret = inf;
if (lhs == inf || rhs == inf) {
return ret;
} else if (lhs == 0 || rhs == 0) {
return ret = 0;
} else {
if (lhs > (SIZE_MAX / rhs))
return ret = lim;
ret = lhs * rhs;
ret = ret == inf ? lim : ret;
return ret;
}
}
//a custom std::pair to overload some handy operations that we'll perform w/ a fold
struct analysis_results : std::pair<size_t, size_t> {
constexpr inline CTRE_FORCE_INLINE operator bool() const noexcept {
return first;
}
constexpr auto CTRE_FORCE_INLINE operator+(analysis_results other) const noexcept {
return analysis_results{std::make_pair(
saturate_limit(first, other.first),
saturate_limit(second, other.second)
)};
}
constexpr auto CTRE_FORCE_INLINE operator||(analysis_results other) const noexcept {
return analysis_results{std::make_pair(
std::min(first, other.first),
std::max(second, other.second)
)};
}
};

template <typename Pattern>
static constexpr auto trampoline_analysis(Pattern) noexcept;

template <typename... Patterns>
static constexpr auto trampoline_analysis(ctll::list<Patterns...>) noexcept;

template<typename T, typename R>
static constexpr auto trampoline_analysis(T, R captures) noexcept;

//processing for each type

//repeat
template<size_t A, size_t B, typename R, typename... Content>
static constexpr auto _analyze(repeat<A,B,Content...>, R captures) noexcept {
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
if constexpr (sizeof...(Content)) {
ret = trampoline_analysis(ctll::list<Content...>(), captures);
ret.first = mult_saturate_limit(ret.first, A);
ret.second = mult_saturate_limit(ret.second, B);
}
return ret;
}

//note: all * ? + operations are specialized variations of repeat {A,B}
//lazy_repeat
template<size_t A, size_t B, typename R, typename... Content>
static constexpr auto _analyze(lazy_repeat<A, B, Content...>, R captures) noexcept {
return _analyze(repeat<A, B, Content...>(), captures);
}

//possessive_repeat
template<size_t A, size_t B, typename R, typename... Content>
static constexpr auto _analyze(possessive_repeat<A, B, Content...>, R captures) noexcept {
return _analyze(repeat<A, B, Content...>(), captures);
}

//star
template<typename R, typename... Content>
static constexpr auto _analyze(star<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
}

//lazy_star
template<typename R, typename... Content>
static constexpr auto _analyze(lazy_star<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
}

//possessive_star
template<typename R, typename... Content>
static constexpr auto _analyze(possessive_star<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
}

//optional
template<typename R, typename... Content>
static constexpr auto _analyze(optional<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
}

//lazy_optional
template<typename R, typename... Content>
static constexpr auto _analyze(lazy_optional<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
}

//back_reference
template<size_t Id, typename R>
static constexpr auto _analyze(back_reference<Id>, R captures) noexcept {
const auto ref = captures.template get<Id>();
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
if constexpr (size(ref.get_expression())) {
ret = trampoline_analysis(ref.get_expression(), captures);
}
return ret;
}

//back_reference_with_name
template<typename Name, typename R>
static constexpr auto _analyze(back_reference_with_name<Name>, R captures) noexcept {
const auto ref = captures.template get<Name>();
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
if constexpr (size(ref.get_expression())) {
ret = trampoline_analysis(ref.get_expression(), captures);
}
return ret;
}

//select, this is specialized, we need to take the minimum of all minimums and maximum of all maximums
template<typename R, typename... Content>
static constexpr auto _analyze(select<Content...>, R captures) noexcept {
analysis_results ret = trampoline_select_analysis(ctll::list<Content...>(), captures);
return ret;
}

//character, any character contributes exactly one to both counts
template<auto C, typename R>
static constexpr auto _analyze(character<C>, R captures) noexcept {
analysis_results ret{ std::make_pair(1ULL, 1ULL) };
return ret;
}

//strings, any string contributes the # of characters it contains (if we have an empty string that'll be 0)
template<auto... Str, typename R>
static constexpr auto _analyze(string<Str...>, R captures) noexcept {
analysis_results ret{ std::make_pair(sizeof...(Str), sizeof...(Str)) };
return ret;
}

//we'll process anything that has contents as a regex
//ctll::list
template<typename R, typename... Content>
static constexpr auto _analyze(ctll::list<Content...>,R captures) noexcept {
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
return ret;
}

//sequence
template<typename R, typename... Content>
static constexpr auto _analyze(sequence<Content...>, R captures) noexcept {
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
return ret;
}

//capture
template<size_t Id, typename R, typename... Content>
static constexpr auto _analyze(capture<Id, Content...>, R captures) noexcept {
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
return ret;
}

//capture_with_name
template<size_t Id, typename Name, typename R, typename... Content>
static constexpr auto _analyze(capture_with_name<Id, Name, Content...>, R captures) noexcept {
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
return ret;
}

//everything else, anything we haven't matched already isn't supported and will contribute 0
template<typename T, typename R>
static constexpr auto _analyze(T, R captures) noexcept {
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
return ret;
}
//note: ctll::list wraps patterns just like sequences, we'll treat anything that looks like a regex w/ ctll::list
template <typename... Patterns, typename R>
static constexpr auto trampoline_analysis(ctll::list<Patterns...>, R captures) noexcept {
//fold, for every argument in a ctll::list, calculate its contribution to the limits
auto r = ((_analyze(Patterns(), captures)) + ...);
//note any reordering of parameters will result in the same limits
return r;
}

template <typename... Patterns, typename R>
static constexpr auto trampoline_select_analysis(ctll::list<Patterns...>, R captures) noexcept {
//fold, each argument in a selection of regexes we take the minimum and maximum of all values
auto r = ((trampoline_analysis(Patterns(), captures)) || ...);
//note again, order is unimportant
return r;
}

template <typename... Patterns>
static constexpr auto pattern_analysis(ctll::list<Patterns...>) noexcept {
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
return trampoline_analysis(ctll::list<Patterns...>(), return_type{});
}

template <typename Pattern = empty>
static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept {
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
return trampoline_analysis(ctll::list<Pattern>(), return_type{});
}


}

#endif
4 changes: 2 additions & 2 deletions include/ctre/find_captures.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ template <typename... Content, typename... Tail, typename Output> constexpr auto


template <size_t Id, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture<Id,Content...>, Tail...>, ctll::list<Output...>) noexcept {
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id>>());
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, void, ctll::list<Content...>>>());
}


template <size_t Id, typename Name, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture_with_name<Id,Name,Content...>, Tail...>, ctll::list<Output...>) noexcept {
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name>>());
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name, ctll::list<Content...>>>());
}


Expand Down
8 changes: 6 additions & 2 deletions include/ctre/return_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ struct not_matched_tag_t { };

static constexpr inline auto not_matched = not_matched_tag_t{};

template <size_t Id, typename Name = void> struct captured_content {
template <size_t Id, typename Name = void, typename Content = void> struct captured_content {
template <typename Iterator> class storage {
Iterator _begin{};
Iterator _end{};

bool _matched{false};
public:
using char_type = typename std::iterator_traits<Iterator>::value_type;

using content_type = Content;
using name = Name;

constexpr CTRE_FORCE_INLINE storage() noexcept {}
Expand Down Expand Up @@ -86,6 +86,10 @@ template <size_t Id, typename Name = void> struct captured_content {
constexpr CTRE_FORCE_INLINE static size_t get_id() noexcept {
return Id;
}

constexpr CTRE_FORCE_INLINE static content_type get_expression() noexcept {
return {};
}
};
};

Expand Down

0 comments on commit 2b62f7e

Please sign in to comment.