From eccb06175b20a9b8e7da25a9e5db30e275df2a53 Mon Sep 17 00:00:00 2001 From: Brad Lhotsky Date: Tue, 31 Oct 2023 15:29:40 -0700 Subject: [PATCH] Add ::QueryString::Text to replace ::AutoEscape * Replace `::QueryString::AutoEscape` with `::QueryString::Text` * Move `=field:value` for `term` queries to `::QueryString::Text` * Add `*field:value` for `wildcard` queries * Add `~field:value` for `fuzzy` queries * Add `/field:value` for `regexp` queries * Add `+field:value` for `match_phrase` queries * Automatically promote queries against `text` fields to `match` queries unless otherwise specified * Add tests for the behavior --- CopyIndexes.mkdn | 74 ++++++++- Maintenance.mkdn | 2 +- README.mkdn | 2 +- Searching.mkdn | 74 ++++++++- examples/es-parse-query-string.pl | 11 +- .../ElasticSearch/Utilities/QueryString.pm | 9 +- .../Utilities/QueryString/AutoEscape.pm | 62 -------- .../Utilities/QueryString/Plugin.pm | 14 +- .../Utilities/QueryString/Text.pm | 150 ++++++++++++++++++ t/01-querystring.t | 104 +++++++++++- 10 files changed, 416 insertions(+), 86 deletions(-) delete mode 100644 lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm create mode 100644 lib/App/ElasticSearch/Utilities/QueryString/Text.pm diff --git a/CopyIndexes.mkdn b/CopyIndexes.mkdn index 77bfae4..a203d72 100644 --- a/CopyIndexes.mkdn +++ b/CopyIndexes.mkdn @@ -4,7 +4,7 @@ es-copy-index.pl - Copy an index from one cluster to another # VERSION -version 8.7 +version 8.8 # SYNOPSIS @@ -177,7 +177,17 @@ The **incident-rt1234-2013.01.11** index will now hold all the data from both of The search string is pre-analyzed before being sent to ElasticSearch. The following plugins work to manipulate the query string and provide richer, more complete syntax for CLI applications. -## App::ElasticSearch::Utilities::QueryString::AutoEscape +## App::ElasticSearch::Utilities::QueryString::Barewords + +The following barewords are transformed: + + or => OR + and => AND + not => NOT + +## App::ElasticSearch::Utilities::QueryString::Text + +### Terms Query via '=' Provide an '=' prefix to a query string parameter to promote that parameter to a `term` filter. @@ -195,15 +205,63 @@ Is translated into: { term => { user_agent => "Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" } } -Which provides an exact match to the term in the query. +### Wildcard Query via '\*' -## App::ElasticSearch::Utilities::QueryString::Barewords +Provide an '\*' prefix to a query string parameter to promote that parameter to a `wildcard` filter. -The following barewords are transformed: +This uses the wild card match for text fields to making matching more intuitive. - or => OR - and => AND - not => NOT +E.g.: + + *user_agent:"Mozilla*" + +Is translated into: + + { wildcard => { user_agent => "Mozilla* } } + +### Regexp Query via '/' + +Provide an '/' prefix to a query string parameter to promote that parameter to a `regexp` filter. + +If you want to use regexp matching for finding data, you can use: + + /message:'\\bden(ial|ied|y)' + +Is translated into: + + { regexp => { message => "\\bden(ial|ied|y)" } } + +### Fuzzy Matching via '~' + +Provide an '~' prefix to a query string parameter to promote that parameter to a `fuzzy` filter. + + ~message:deny + +Is translated into: + + { fuzzy => { message => "deny" } } + +### Phrase Matching via '+' + +Provide an '+' prefix to a query string parameter to promote that parameter to a `match_phrase` filter. + + +message:"login denied" + +Is translated into: + + { match_phrase => { message => "login denied" } } + +### Automatic Match Queries for Text Fields + +If the field meta data is provided and the field is a `text` type, the query +will automatically be mapped to a `match` query. + + # message field is text + message:"foo" + +Is translated into: + + { match => { message => "foo" } } ## App::ElasticSearch::Utilities::QueryString::IP diff --git a/Maintenance.mkdn b/Maintenance.mkdn index c8ba7e2..6a0016e 100644 --- a/Maintenance.mkdn +++ b/Maintenance.mkdn @@ -4,7 +4,7 @@ es-daily-index-maintenance.pl - Run to prune old indexes and optimize existing # VERSION -version 8.7 +version 8.8 # SYNOPSIS diff --git a/README.mkdn b/README.mkdn index 5fd588d..e290f0f 100644 --- a/README.mkdn +++ b/README.mkdn @@ -4,7 +4,7 @@ App::ElasticSearch::Utilities - Utilities for Monitoring ElasticSearch # VERSION -version 8.7 +version 8.8 # SYNOPSIS diff --git a/Searching.mkdn b/Searching.mkdn index e13b0b8..bad4342 100644 --- a/Searching.mkdn +++ b/Searching.mkdn @@ -4,7 +4,7 @@ es-search.pl - Provides a CLI for quick searches of data in ElasticSearch daily # VERSION -version 8.7 +version 8.8 # SYNOPSIS @@ -386,7 +386,17 @@ es-search.pl - Search a logging cluster for information The search string is pre-analyzed before being sent to ElasticSearch. The following plugins work to manipulate the query string and provide richer, more complete syntax for CLI applications. -## App::ElasticSearch::Utilities::QueryString::AutoEscape +## App::ElasticSearch::Utilities::QueryString::Barewords + +The following barewords are transformed: + + or => OR + and => AND + not => NOT + +## App::ElasticSearch::Utilities::QueryString::Text + +### Terms Query via '=' Provide an '=' prefix to a query string parameter to promote that parameter to a `term` filter. @@ -404,15 +414,63 @@ Is translated into: { term => { user_agent => "Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" } } -Which provides an exact match to the term in the query. +### Wildcard Query via '\*' -## App::ElasticSearch::Utilities::QueryString::Barewords +Provide an '\*' prefix to a query string parameter to promote that parameter to a `wildcard` filter. -The following barewords are transformed: +This uses the wild card match for text fields to making matching more intuitive. - or => OR - and => AND - not => NOT +E.g.: + + *user_agent:"Mozilla*" + +Is translated into: + + { wildcard => { user_agent => "Mozilla* } } + +### Regexp Query via '/' + +Provide an '/' prefix to a query string parameter to promote that parameter to a `regexp` filter. + +If you want to use regexp matching for finding data, you can use: + + /message:'\\bden(ial|ied|y)' + +Is translated into: + + { regexp => { message => "\\bden(ial|ied|y)" } } + +### Fuzzy Matching via '~' + +Provide an '~' prefix to a query string parameter to promote that parameter to a `fuzzy` filter. + + ~message:deny + +Is translated into: + + { fuzzy => { message => "deny" } } + +### Phrase Matching via '+' + +Provide an '+' prefix to a query string parameter to promote that parameter to a `match_phrase` filter. + + +message:"login denied" + +Is translated into: + + { match_phrase => { message => "login denied" } } + +### Automatic Match Queries for Text Fields + +If the field meta data is provided and the field is a `text` type, the query +will automatically be mapped to a `match` query. + + # message field is text + message:"foo" + +Is translated into: + + { match => { message => "foo" } } ## App::ElasticSearch::Utilities::QueryString::IP diff --git a/examples/es-parse-query-string.pl b/examples/es-parse-query-string.pl index 7639067..db88523 100644 --- a/examples/es-parse-query-string.pl +++ b/examples/es-parse-query-string.pl @@ -11,10 +11,19 @@ my %OPT; GetOptions(\%OPT, qw( - or + or + field=s% )); my $json = JSON->new->ascii->canonical(1)->pretty; +my %fields = (); +if( $OPT{field} ) { + foreach my $f ( keys %{ $OPT{field} } ) { + $fields{$f} = { type => $OPT{field}->{$f} }, + } + output({color=>'yellow'}, "Fields: " . $json->encode(\%fields)); +} my $qs = App::ElasticSearch::Utilities::QueryString->new( + fields_meta => \%fields, default_join => $OPT{or} ? 'OR' : 'AND', ); my $query = $qs->expand_query_string(@ARGV); diff --git a/lib/App/ElasticSearch/Utilities/QueryString.pm b/lib/App/ElasticSearch/Utilities/QueryString.pm index 2bdaf2e..48da657 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString.pm @@ -191,7 +191,10 @@ sub _build_plugins { ); my @plugins; foreach my $p ( sort { $a->priority <=> $b->priority || $a->name cmp $b->name } - $finder->plugins( options => defined $globals ? $globals : {} ) + $finder->plugins( + fields_meta => $self->fields_meta, + options => defined $globals ? $globals : {}, + ) ) { debug(sprintf "Loaded %s with priority:%d", $p->name, $p->priority); push @plugins, $p; @@ -246,10 +249,10 @@ words to prevent syntax errors. The search string is pre-analyzed before being sent to ElasticSearch. The following plugins work to manipulate the query string and provide richer, more complete syntax for CLI applications. -=from_other App::ElasticSearch::Utilities::QueryString::AutoEscape / SYNOPSIS - =from_other App::ElasticSearch::Utilities::QueryString::BareWords / SYNOPSIS +=from_other App::ElasticSearch::Utilities::QueryString::Text / SYNOPSIS + =from_other App::ElasticSearch::Utilities::QueryString::IP / SYNOPSIS =from_other App::ElasticSearch::Utilities::QueryString::Ranges / SYNOPSIS diff --git a/lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm b/lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm deleted file mode 100644 index b08ea08..0000000 --- a/lib/App/ElasticSearch/Utilities/QueryString/AutoEscape.pm +++ /dev/null @@ -1,62 +0,0 @@ -package App::ElasticSearch::Utilities::QueryString::AutoEscape; -# ABSTRACT: Provides a prefix of '=' to use the term filter - -use v5.16; -use warnings; - -# VERSION - -use CLI::Helpers qw(:output); -use Const::Fast; -use namespace::autoclean; - -use Moo; -with 'App::ElasticSearch::Utilities::QueryString::Plugin'; - -sub _build_priority { 5; } - -=for Pod::Coverage handle_token - -=cut - -sub handle_token { - my ($self,$token) = @_; - - debug(sprintf "%s - evaluating token '%s'", $self->name, $token); - if( $token =~ /^=(.*)$/ ) { - my ($f,$v) = split /:/, $1, 2; - return { condition => { term => { $f => $v } }}; - } - - return; -} - -# Return True; -1; - -__END__ - -=head1 SYNOPSIS - -=head2 App::ElasticSearch::Utilities::QueryString::AutoEscape - -Provide an '=' prefix to a query string parameter to promote that parameter to a C filter. - -This allows for exact matches of a field without worrying about escaping Lucene special character filters. - -E.g.: - - user_agent:"Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" - -Is evaluated into a weird query that doesn't do what you want. However: - - =user_agent:"Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" - -Is translated into: - - { term => { user_agent => "Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" } } - -Which provides an exact match to the term in the query. - - -=cut diff --git a/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm b/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm index 132fb9e..0ece675 100644 --- a/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm +++ b/lib/App/ElasticSearch/Utilities/QueryString/Plugin.pm @@ -9,7 +9,7 @@ use warnings; use Hash::Merge::Simple qw(clone_merge); use Moo::Role; use Ref::Util qw(is_arrayref is_hashref); -use Types::Standard qw( Str Int ); +use Types::Standard qw( HashRef Str Int ); =attr name @@ -41,6 +41,18 @@ has priority => ( ); sub _build_priority { 50; } +=attr fields_meta + +A hash reference with the field data from L. + +=cut + +has fields_meta => ( + is => 'rw', + isa => HashRef, + default => sub { {} }, +); + =head1 INTERFACE =head2 handle_token() diff --git a/lib/App/ElasticSearch/Utilities/QueryString/Text.pm b/lib/App/ElasticSearch/Utilities/QueryString/Text.pm new file mode 100644 index 0000000..ddefd40 --- /dev/null +++ b/lib/App/ElasticSearch/Utilities/QueryString/Text.pm @@ -0,0 +1,150 @@ +package App::ElasticSearch::Utilities::QueryString::Text; +# ABSTRACT: Provides a better interface for text and keyword queries + +use v5.16; +use warnings; + +# VERSION + +use CLI::Helpers qw(:output); +use Const::Fast; +use namespace::autoclean; + +use Moo; +with 'App::ElasticSearch::Utilities::QueryString::Plugin'; + +sub _build_priority { 5; } + +=for Pod::Coverage handle_token + +=cut + +sub handle_token { + my ($self,$token) = @_; + + my $meta = $self->fields_meta; + + debug(sprintf "%s - evaluating token '%s'", $self->name, $token); + if ( $token =~ /[^:]+:/ ) { + my ($f,$v) = split /:/, $token, 2; + + my $matcher = ''; + + # Grab the prefix symbol + $f =~ s/^(?[^a-zA-Z])//; + if( $+{op} ) { + $matcher = $+{op} eq '*' ? 'wildcard' + : $+{op} eq '=' ? 'term' + : $+{op} eq '/' ? 'regexp' + : $+{op} eq '~' ? 'fuzzy' + : $+{op} eq '+' ? 'match_phrase' + : ''; + } + + # Check metadata for text type + if ( exists $meta->{$f} + && exists $meta->{$f}{type} + && $meta->{$f}{type} eq 'text' + ) { + # We can't use term filters on text fields + $matcher = 'match' if !$matcher or $matcher eq 'term'; + } + + if( $matcher ) { + return { condition => { $matcher => { $f => $v } } }; + } + } + + return; +} + +# Return True; +1; + +__END__ + +=head1 SYNOPSIS + +=head2 App::ElasticSearch::Utilities::QueryString::Text + +Provides field prefixes to manipulate the text search capabilities. + +=head3 Terms Query via '=' + +Provide an '=' prefix to a query string parameter to promote that parameter to a C filter. + +This allows for exact matches of a field without worrying about escaping Lucene special character filters. + +E.g.: + + user_agent:"Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" + +Is evaluated into a weird query that doesn't do what you want. However: + + =user_agent:"Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" + +Is translated into: + + { term => { user_agent => "Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" } } + +=head3 Wildcard Query via '*' + +Provide an '*' prefix to a query string parameter to promote that parameter to a C filter. + +This uses the wild card match for text fields to making matching more intuitive. + +E.g.: + + *user_agent:"Mozilla*" + +Is translated into: + + { wildcard => { user_agent => "Mozilla* } } + +=head3 Regexp Query via '/' + +Provide an '/' prefix to a query string parameter to promote that parameter to a C filter. + +If you want to use regexp matching for finding data, you can use: + + /message:'\\bden(ial|ied|y)' + +Is translated into: + + { regexp => { message => "\\bden(ial|ied|y)" } } + +=head3 Fuzzy Matching via '~' + +Provide an '~' prefix to a query string parameter to promote that parameter to a C filter. + + ~message:deny + +Is translated into: + + { fuzzy => { message => "deny" } } + +=head3 Phrase Matching via '+' + +Provide an '+' prefix to a query string parameter to promote that parameter to a C filter. + + +message:"login denied" + +Is translated into: + + { match_phrase => { message => "login denied" } } + +=head3 Automatic Match Queries for Text Fields + +If the field meta data is provided and the field is a C type, the query +will automatically be mapped to a C query. + + # message field is text + message:"foo" + +Is translated into: + + { match => { message => "foo" } } + + +=cut + diff --git a/t/01-querystring.t b/t/01-querystring.t index 733d15d..38d7638 100644 --- a/t/01-querystring.t +++ b/t/01-querystring.t @@ -146,9 +146,111 @@ my %tests = ( } }, ], + '07-term-filter' => [ + [qw(=foo:bar)], + { + 'bool' => { + 'must' => [ + { + 'term' => { + 'foo' => 'bar', + } + } + ] + } + }, + ], + '08-fuzzy-filter' => [ + [qw(~foo:bar)], + { + 'bool' => { + 'must' => [ + { + 'fuzzy' => { + 'foo' => 'bar', + } + } + ] + } + }, + ], + '08-wildcard-filter' => [ + [qw(*foo:bar*)], + { + 'bool' => { + 'must' => [ + { + 'wildcard' => { + 'foo' => 'bar*', + } + } + ] + } + }, + ], + '09-regexp-filter' => [ + [qw(/foo:bar.*)], + { + 'bool' => { + 'must' => [ + { + 'regexp' => { + 'foo' => 'bar.*', + } + } + ] + } + }, + ], + '10-phrase-filter' => [ + ['+foo:bar baz'], + { + 'bool' => { + 'must' => [ + { + 'match_phrase' => { + 'foo' => 'bar baz', + } + } + ] + } + }, + ], + '11-match-promotion' => [ + ['a_text_field:foo'], + { + 'bool' => { + 'must' => [ + { + 'match' => { + 'a_text_field' => 'foo', + } + } + ] + } + }, + ], + '11-match-not-terms' => [ + ['=a_text_field:foo'], + { + 'bool' => { + 'must' => [ + { + 'match' => { + 'a_text_field' => 'foo', + } + } + ] + } + }, + ], ); -my $qs = App::ElasticSearch::Utilities::QueryString->new(); +my $qs = App::ElasticSearch::Utilities::QueryString->new( + fields_meta => { + a_text_field => { type => "text" }, + }, +); foreach my $t (sort keys %tests) { my $q = $qs->expand_query_string( @{ $tests{$t}->[0] } );