Skip to content

Commit

Permalink
apply formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Apr 6, 2024
1 parent e204e54 commit d86973f
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 79 deletions.
13 changes: 10 additions & 3 deletions extras/rapidfuzz_amalgamated.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
// SPDX-License-Identifier: MIT
// RapidFuzz v1.0.2
// Generated: 2024-03-04 01:05:33.165575
// Generated: 2024-04-06 15:39:26.940916
// ----------------------------------------------------------
// This file is an amalgamation of multiple different files.
// You probably shouldn't edit it directly.
Expand Down Expand Up @@ -6408,7 +6408,10 @@ double jaro_winkler_similarity(const Range<InputIt1>& P, const Range<InputIt2>&
}

double Sim = jaro_similarity(P, T, jaro_score_cutoff);
if (Sim > 0.7) Sim += static_cast<double>(prefix) * prefix_weight * (1.0 - Sim);
if (Sim > 0.7) {
Sim += static_cast<double>(prefix) * prefix_weight * (1.0 - Sim);
Sim = std::min(Sim, 1.0);
}

return (Sim >= score_cutoff) ? Sim : 0;
}
Expand Down Expand Up @@ -6437,7 +6440,10 @@ double jaro_winkler_similarity(const BlockPatternMatchVector& PM, const Range<In
}

double Sim = jaro_similarity(PM, P, T, jaro_score_cutoff);
if (Sim > 0.7) Sim += static_cast<double>(prefix) * prefix_weight * (1.0 - Sim);
if (Sim > 0.7) {
Sim += static_cast<double>(prefix) * prefix_weight * (1.0 - Sim);
Sim = std::min(Sim, 1.0);
}

return (Sim >= score_cutoff) ? Sim : 0;
}
Expand Down Expand Up @@ -6593,6 +6599,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
if (static_cast<uint64_t>(s2[prefix]) != prefixes[i][prefix]) break;

scores[i] += static_cast<double>(prefix) * prefix_weight * (1.0 - scores[i]);
scores[i] = std::min(scores[i], 1.0);
}

if (scores[i] < score_cutoff) scores[i] = 0.0;
Expand Down
2 changes: 1 addition & 1 deletion rapidfuzz/details/GrowingHashmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#pragma once

#include <array>
#include <stdint.h>
#include <stddef.h>
#include <stdint.h>

namespace rapidfuzz::detail {

Expand Down
8 changes: 3 additions & 5 deletions rapidfuzz/details/Range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#include <limits>
#include <ostream>
#include <stdexcept>
#include <stdint.h>
#include <sys/types.h>
#include <vector>
#include <stdint.h>

namespace rapidfuzz::detail {

Expand Down Expand Up @@ -76,8 +76,7 @@ class Range {
}

constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size)
{
}
{}

template <typename T>
constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x))
Expand Down Expand Up @@ -156,8 +155,7 @@ class Range {

Range res = *this;
res.remove_prefix(pos);
if(count < res.size())
res.remove_suffix(res.size() - count);
if (count < res.size()) res.remove_suffix(res.size() - count);

return res;
}
Expand Down
10 changes: 5 additions & 5 deletions rapidfuzz/distance/Indel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,21 @@ namespace rapidfuzz {

template <typename InputIt1, typename InputIt2>
size_t indel_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
size_t score_cutoff = std::numeric_limits<size_t>::max())
size_t score_cutoff = std::numeric_limits<size_t>::max())
{
return detail::Indel::distance(first1, last1, first2, last2, score_cutoff, score_cutoff);
}

template <typename Sentence1, typename Sentence2>
size_t indel_distance(const Sentence1& s1, const Sentence2& s2,
size_t score_cutoff = std::numeric_limits<size_t>::max())
size_t score_cutoff = std::numeric_limits<size_t>::max())
{
return detail::Indel::distance(s1, s2, score_cutoff, score_cutoff);
}

template <typename InputIt1, typename InputIt2>
size_t indel_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
size_t score_cutoff = 0.0)
size_t score_cutoff = 0.0)
{
return detail::Indel::similarity(first1, last1, first2, last2, score_cutoff, score_cutoff);
}
Expand Down Expand Up @@ -146,8 +146,8 @@ struct MultiIndel
#endif

template <typename CharT1>
struct CachedIndel : public detail::CachedDistanceBase<CachedIndel<CharT1>, size_t, 0,
std::numeric_limits<int64_t>::max()> {
struct CachedIndel
: public detail::CachedDistanceBase<CachedIndel<CharT1>, size_t, 0, std::numeric_limits<int64_t>::max()> {
template <typename Sentence1>
explicit CachedIndel(const Sentence1& s1_) : CachedIndel(detail::to_begin(s1_), detail::to_end(s1_))
{}
Expand Down
4 changes: 2 additions & 2 deletions rapidfuzz/distance/Indel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace rapidfuzz::detail {

template <typename InputIt1, typename InputIt2>
size_t indel_distance(const BlockPatternMatchVector& block, const Range<InputIt1>& s1,
const Range<InputIt2>& s2, size_t score_cutoff)
const Range<InputIt2>& s2, size_t score_cutoff)
{
size_t maximum = s1.size() + s2.size();
size_t lcs_cutoff = (maximum / 2 >= score_cutoff) ? maximum / 2 - score_cutoff : 0;
Expand Down Expand Up @@ -54,7 +54,7 @@ class Indel : public DistanceBase<Indel, size_t, 0, std::numeric_limits<int64_t>

template <typename InputIt1, typename InputIt2>
static size_t _distance(const Range<InputIt1>& s1, const Range<InputIt2>& s2, size_t score_cutoff,
size_t score_hint)
size_t score_hint)
{
size_t maximum = Indel::maximum(s1, s2);
size_t lcs_cutoff = (maximum / 2 >= score_cutoff) ? maximum / 2 - score_cutoff : 0;
Expand Down
5 changes: 3 additions & 2 deletions rapidfuzz/distance/Jaro_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,8 +673,9 @@ jaro_similarity_simd_long_s2(Range<double*> scores, const detail::BlockPatternMa

VecType PatternFlagMask = blsi(P_flag_cur);

uint64_t PM_j = block.get(
cur_vec + cur_block, s2[countr_zero(T_flag_cur) + T_word_index * sizeof(VecType) * 8]);
uint64_t PM_j =
block.get(cur_vec + cur_block,
s2[countr_zero(T_flag_cur) + T_word_index * sizeof(VecType) * 8]);
Transpositions += !(PM_j & (static_cast<uint64_t>(PatternFlagMask) << offset));

T_flag_cur = blsr(T_flag_cur);
Expand Down
11 changes: 5 additions & 6 deletions rapidfuzz/distance/LCSseq.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,21 @@ namespace rapidfuzz {

template <typename InputIt1, typename InputIt2>
size_t lcs_seq_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
size_t score_cutoff = std::numeric_limits<size_t>::max())
size_t score_cutoff = std::numeric_limits<size_t>::max())
{
return detail::LCSseq::distance(first1, last1, first2, last2, score_cutoff, score_cutoff);
}

template <typename Sentence1, typename Sentence2>
size_t lcs_seq_distance(const Sentence1& s1, const Sentence2& s2,
size_t score_cutoff = std::numeric_limits<size_t>::max())
size_t score_cutoff = std::numeric_limits<size_t>::max())
{
return detail::LCSseq::distance(s1, s2, score_cutoff, score_cutoff);
}

template <typename InputIt1, typename InputIt2>
size_t lcs_seq_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
size_t score_cutoff = 0)
size_t score_cutoff = 0)
{
return detail::LCSseq::similarity(first1, last1, first2, last2, score_cutoff, score_cutoff);
}
Expand Down Expand Up @@ -206,8 +206,7 @@ struct CachedLCSseq
{}

private:
friend detail::CachedSimilarityBase<CachedLCSseq<CharT1>, size_t, 0,
std::numeric_limits<int64_t>::max()>;
friend detail::CachedSimilarityBase<CachedLCSseq<CharT1>, size_t, 0, std::numeric_limits<int64_t>::max()>;
friend detail::CachedNormalizedMetricBase<CachedLCSseq<CharT1>>;

template <typename InputIt2>
Expand All @@ -218,7 +217,7 @@ struct CachedLCSseq

template <typename InputIt2>
size_t _similarity(const detail::Range<InputIt2>& s2, size_t score_cutoff,
[[maybe_unused]] size_t score_hint) const
[[maybe_unused]] size_t score_hint) const
{
return detail::lcs_seq_similarity(PM, detail::Range(s1), s2, score_cutoff);
}
Expand Down
4 changes: 2 additions & 2 deletions rapidfuzz/distance/LCSseq_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ size_t longest_common_subsequence(const Range<InputIt1>& s1, const Range<InputIt

template <typename InputIt1, typename InputIt2>
size_t lcs_seq_similarity(const BlockPatternMatchVector& block, Range<InputIt1> s1, Range<InputIt2> s2,
size_t score_cutoff)
size_t score_cutoff)
{
auto len1 = s1.size();
auto len2 = s2.size();
Expand Down Expand Up @@ -520,7 +520,7 @@ class LCSseq : public SimilarityBase<LCSseq, size_t, 0, std::numeric_limits<int6

template <typename InputIt1, typename InputIt2>
static size_t _similarity(const Range<InputIt1>& s1, const Range<InputIt2>& s2, size_t score_cutoff,
[[maybe_unused]] size_t score_hint)
[[maybe_unused]] size_t score_hint)
{
return lcs_seq_similarity(s1, s2, score_cutoff);
}
Expand Down
40 changes: 17 additions & 23 deletions rapidfuzz/distance/Levenshtein.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,34 +139,34 @@ namespace rapidfuzz {
*/
template <typename InputIt1, typename InputIt2>
size_t levenshtein_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
LevenshteinWeightTable weights = {1, 1, 1},
size_t score_cutoff = std::numeric_limits<size_t>::max(),
size_t score_hint = std::numeric_limits<size_t>::max())
LevenshteinWeightTable weights = {1, 1, 1},
size_t score_cutoff = std::numeric_limits<size_t>::max(),
size_t score_hint = std::numeric_limits<size_t>::max())
{
return detail::Levenshtein::distance(first1, last1, first2, last2, weights, score_cutoff, score_hint);
}

template <typename Sentence1, typename Sentence2>
size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2,
LevenshteinWeightTable weights = {1, 1, 1},
size_t score_cutoff = std::numeric_limits<size_t>::max(),
size_t score_hint = std::numeric_limits<size_t>::max())
LevenshteinWeightTable weights = {1, 1, 1},
size_t score_cutoff = std::numeric_limits<size_t>::max(),
size_t score_hint = std::numeric_limits<size_t>::max())
{
return detail::Levenshtein::distance(s1, s2, weights, score_cutoff, score_hint);
}

template <typename InputIt1, typename InputIt2>
size_t levenshtein_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
LevenshteinWeightTable weights = {1, 1, 1}, size_t score_cutoff = 0,
size_t score_hint = 0)
LevenshteinWeightTable weights = {1, 1, 1}, size_t score_cutoff = 0,
size_t score_hint = 0)
{
return detail::Levenshtein::similarity(first1, last1, first2, last2, weights, score_cutoff, score_hint);
}

template <typename Sentence1, typename Sentence2>
size_t levenshtein_similarity(const Sentence1& s1, const Sentence2& s2,
LevenshteinWeightTable weights = {1, 1, 1}, size_t score_cutoff = 0,
size_t score_hint = 0)
LevenshteinWeightTable weights = {1, 1, 1}, size_t score_cutoff = 0,
size_t score_hint = 0)
{
return detail::Levenshtein::similarity(s1, s2, weights, score_cutoff, score_hint);
}
Expand Down Expand Up @@ -389,17 +389,13 @@ struct MultiLevenshtein : public detail::MultiDistanceBase<MultiLevenshtein<MaxL

detail::Range scores_(scores, scores + score_count);
if constexpr (MaxLen == 8)
detail::levenshtein_hyrroe2003_simd<uint8_t>(scores_, PM, str_lens, s2,
score_cutoff);
detail::levenshtein_hyrroe2003_simd<uint8_t>(scores_, PM, str_lens, s2, score_cutoff);
else if constexpr (MaxLen == 16)
detail::levenshtein_hyrroe2003_simd<uint16_t>(scores_, PM, str_lens, s2,
score_cutoff);
detail::levenshtein_hyrroe2003_simd<uint16_t>(scores_, PM, str_lens, s2, score_cutoff);
else if constexpr (MaxLen == 32)
detail::levenshtein_hyrroe2003_simd<uint32_t>(scores_, PM, str_lens, s2,
score_cutoff);
detail::levenshtein_hyrroe2003_simd<uint32_t>(scores_, PM, str_lens, s2, score_cutoff);
else if constexpr (MaxLen == 64)
detail::levenshtein_hyrroe2003_simd<uint64_t>(scores_, PM, str_lens, s2,
score_cutoff);
detail::levenshtein_hyrroe2003_simd<uint64_t>(scores_, PM, str_lens, s2, score_cutoff);
}

template <typename InputIt2>
Expand Down Expand Up @@ -458,9 +454,8 @@ struct CachedLevenshtein : public detail::CachedDistanceBase<CachedLevenshtein<C
// max can make use of the common divisor of the three weights
size_t new_score_cutoff = detail::ceil_div(score_cutoff, weights.insert_cost);
size_t new_score_hint = detail::ceil_div(score_hint, weights.insert_cost);
size_t dist = detail::uniform_levenshtein_distance(
PM, detail::Range(s1), s2, new_score_cutoff,
new_score_hint);
size_t dist = detail::uniform_levenshtein_distance(PM, detail::Range(s1), s2,
new_score_cutoff, new_score_hint);
dist *= weights.insert_cost;

return (dist <= score_cutoff) ? dist : score_cutoff + 1;
Expand All @@ -478,8 +473,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase<CachedLevenshtein<C
}
}

return detail::generalized_levenshtein_distance(
detail::Range(s1), s2, weights, score_cutoff);
return detail::generalized_levenshtein_distance(detail::Range(s1), s2, weights, score_cutoff);
}

std::vector<CharT1> s1;
Expand Down
Loading

0 comments on commit d86973f

Please sign in to comment.