Skip to content

Commit

Permalink
Improved Like in-memory evaluator
Browse files Browse the repository at this point in the history
  • Loading branch information
fiseni committed Oct 3, 2024
1 parent 1044b3e commit 9418435
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 21 deletions.
1 change: 1 addition & 0 deletions exclusion.dic
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ asdfa
aaab
aaaab
aaaaab
axza
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ public static IQueryable<T> Like<T>(this IQueryable<T> source, IEnumerable<LikeE

foreach (var likeExpression in likeExpressions)
{
if (string.IsNullOrEmpty(likeExpression.Pattern))
continue;

var propertySelector = ParameterReplacerVisitor.Replace(
likeExpression.KeySelector,
likeExpression.KeySelector.Parameters[0],
Expand All @@ -34,14 +31,14 @@ public static IQueryable<T> Like<T>(this IQueryable<T> source, IEnumerable<LikeE

var patternAsExpression = ((Expression<Func<string>>)(() => likeExpression.Pattern)).Body;

var EFLikeExpression = Expression.Call(
var efLikeExpression = Expression.Call(
null,
_likeMethodInfo,
_functions,
propertySelector.Body,
patternAsExpression);

expr = expr is null ? (Expression)EFLikeExpression : Expression.OrElse(expr, EFLikeExpression);
expr = expr is null ? efLikeExpression : Expression.OrElse(expr, efLikeExpression);
}

return expr is null
Expand Down
36 changes: 20 additions & 16 deletions src/QuerySpecification/Evaluators/LikeExtension.cs
Original file line number Diff line number Diff line change
@@ -1,36 +1,40 @@
using System.Diagnostics.CodeAnalysis;
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions;

namespace Pozitron.QuerySpecification;

internal static class LikeExtension
{
public static bool Like(this string input, string pattern)
{
try
{
return SqlLike(input, pattern);
}
catch (Exception ex)
{
throw new InvalidLikePatternException(pattern, ex);
}
}
private static readonly ConcurrentDictionary<string, Regex> _regexCache = new();

private static bool SqlLike(this string input, string pattern)
private static Regex BuildRegex(string pattern)
{
// Escape special regex characters, excluding those handled separately
var regexPattern = Regex.Escape(pattern)
var regexPattern = Regex
.Escape(pattern)
.Replace("%", ".*") // Translate SQL LIKE wildcard '%' to regex '.*'
.Replace("_", ".") // Translate SQL LIKE wildcard '_' to regex '.'
.Replace(@"\[", "[") // Unescape '[' as it's used for character classes/ranges
.Replace(@"\^", "^"); // Unescape '^' as it can be used for negation in character classes

// Ensure the pattern matches the entire string
regexPattern = "^" + regexPattern + "$";
var regex = new Regex(regexPattern, RegexOptions.IgnoreCase);
var regex = new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
return regex;
}

return regex.IsMatch(input);
public static bool Like(this string input, string pattern)
{
try
{
var regex = _regexCache.GetOrAdd(pattern, BuildRegex);
return regex.IsMatch(input);
}
catch (Exception ex)
{
throw new InvalidLikePatternException(pattern, ex);
}
}

#pragma warning disable IDE0051 // Remove unused private members
Expand Down
3 changes: 3 additions & 0 deletions src/QuerySpecification/Evaluators/LikeMemoryEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ private LikeMemoryEvaluator() { }

public IEnumerable<T> Evaluate<T>(IEnumerable<T> source, Specification<T> specification)
{
// There are benchmarks in QuerySpecification.Benchmarks project.
// It turns out that this is the simplest and also more efficient way.

foreach (var likeGroup in specification.LikeExpressions.GroupBy(x => x.Group))
{
source = source.Where(x => likeGroup.Any(c => c.KeySelectorFunc(x)?.Like(c.Pattern) ?? false));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
using System.Reflection;

namespace QuerySpecification.Benchmarks;

// Benchmarks measuring only building the IQueryable.
[MemoryDiagnoser]
public class LikeInMemoryBenchmark
{
public record Customer(int Id, string FirstName, string? LastName);
private class CustomerSpec : Specification<Customer>
{
public CustomerSpec()
{
Query
.Like(x => x.FirstName, "%xx%", 1)
.Like(x => x.LastName, "%xy%", 2)
.Like(x => x.LastName, "%xz%", 2);
}
}

private CustomerSpec _specification = default!;
private List<Customer> _source = default!;

[GlobalSetup]
public void Setup()
{
_specification = new CustomerSpec();
_source =
[
new(1, "axxa", "axya"),
new(2, "aaaa", "aaaa"),
new(3, "axxa", "axza"),
new(4, "aaaa", null),
new(5, "axxa", null),
.. Enumerable.Range(6, 1000).Select(x => new Customer(x, "axxa", "axya"))
];
}

[Benchmark(Baseline = true)]
public List<Customer> EvaluateOption1()
{
var source = _source.AsEnumerable();

foreach (var likeGroup in _specification.LikeExpressions.GroupBy(x => x.Group))
{
source = source.Where(x => likeGroup.Any(c => c.KeySelectorFunc(x)?.Like(c.Pattern) ?? false));
}

return source.ToList();
}

[Benchmark]
public List<Customer> EvaluateOption2()
{
var source = _source.AsEnumerable();

// Precompute the predicates for each group
var groupPredicates = _specification
.LikeExpressions
.GroupBy(x => x.Group)
.Select(group => new Func<Customer, bool>(x => group.Any(c => c.KeySelectorFunc(x)?.Like(c.Pattern) ?? false)))
.ToList();

// Apply all predicates to filter the source
var result = source.Where(x => groupPredicates.All(predicate => predicate(x)));

return result.ToList();
}
}

public static class LikeExtensions
{
private static readonly MethodInfo _likeMethod = typeof(LikeMemoryEvaluator).Assembly
.GetType("Pozitron.QuerySpecification.LikeExtension")!
.GetMethod("Like", BindingFlags.Public | BindingFlags.Static)!;

// I don't want to expose the internal types to Benchmark project.
// There is overhead here with reflection, but it affects all benchmarks equally.
public static bool Like(this string input, string pattern)
{
bool result = (bool)_likeMethod!.Invoke(null, [input, pattern])!;
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,33 @@ public void Filters_GivenLikeInDifferentGroup()
AssertForEvaluate(spec, input, expected);
}

[Fact]
public void Filters_GivenLikeComplexGrouping()
{
List<Customer> input =
[
new(1, "axxa", "axya"),
new(2, "aaaa", "aaaa"),
new(3, "axxa", "axza"),
new(4, "aaaa", null),
new(5, "axxa", null)
];

List<Customer> expected =
[
new(1, "axxa", "axya"),
new(3, "axxa", "axza"),
];

var spec = new Specification<Customer>();
spec.Query
.Like(x => x.FirstName, "%xx%", 1)
.Like(x => x.LastName, "%xy%", 2)
.Like(x => x.LastName, "%xz%", 2);

AssertForEvaluate(spec, input, expected);
}

[Fact]
public void DoesNotFilter_GivenNoLike()
{
Expand Down

0 comments on commit 9418435

Please sign in to comment.