diff --git a/lib/filterx/CMakeLists.txt b/lib/filterx/CMakeLists.txt index 00c530bbe..32ec9c249 100644 --- a/lib/filterx/CMakeLists.txt +++ b/lib/filterx/CMakeLists.txt @@ -46,6 +46,7 @@ set(FILTERX_HEADERS filterx/func-len.h filterx/func-vars.h filterx/func-unset-empties.h + filterx/func-str.h filterx/func-str-transform.h filterx/func-flatten.h filterx/func-sdata.h @@ -104,6 +105,7 @@ set(FILTERX_SOURCES filterx/func-len.c filterx/func-vars.c filterx/func-unset-empties.c + filterx/func-str.c filterx/func-str-transform.c filterx/func-flatten.c filterx/func-sdata.c diff --git a/lib/filterx/Makefile.am b/lib/filterx/Makefile.am index 5ec5d301e..b5f92e210 100644 --- a/lib/filterx/Makefile.am +++ b/lib/filterx/Makefile.am @@ -49,6 +49,7 @@ filterxinclude_HEADERS = \ lib/filterx/func-vars.h \ lib/filterx/func-unset-empties.h \ lib/filterx/func-str-transform.h \ + lib/filterx/func-str.h \ lib/filterx/func-flatten.h \ lib/filterx/func-sdata.h \ lib/filterx/filterx-private.h \ @@ -107,6 +108,7 @@ filterx_sources = \ lib/filterx/func-vars.c \ lib/filterx/func-unset-empties.c \ lib/filterx/func-str-transform.c \ + lib/filterx/func-str.c \ lib/filterx/func-flatten.c \ lib/filterx/func-sdata.c \ lib/filterx/filterx-private.c \ diff --git a/lib/filterx/filterx-globals.c b/lib/filterx/filterx-globals.c index 133198ea3..3379bf5e2 100644 --- a/lib/filterx/filterx-globals.c +++ b/lib/filterx/filterx-globals.c @@ -34,6 +34,7 @@ #include "filterx/func-len.h" #include "filterx/func-vars.h" #include "filterx/func-unset-empties.h" +#include "filterx/func-str.h" #include "filterx/func-str-transform.h" #include "filterx/func-flatten.h" #include "filterx/func-sdata.h" @@ -134,6 +135,9 @@ _ctors_init(void) g_assert(filterx_builtin_function_ctor_register("flatten", filterx_function_flatten_new)); g_assert(filterx_builtin_function_ctor_register("is_sdata_from_enterprise", filterx_function_is_sdata_from_enterprise_new)); + g_assert(filterx_builtin_function_ctor_register("startswith", filterx_function_startswith_new)); + g_assert(filterx_builtin_function_ctor_register("endswith", filterx_function_endswith_new)); + g_assert(filterx_builtin_function_ctor_register("includes", filterx_function_includes_new)); } static void diff --git a/lib/filterx/func-str.c b/lib/filterx/func-str.c new file mode 100644 index 000000000..7840cb3ca --- /dev/null +++ b/lib/filterx/func-str.c @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2024 Axoflow + * Copyright (c) 2024 Szilard Parrag + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "filterx/func-str.h" +#include "filterx/object-primitive.h" +#include "filterx/expr-literal.h" +#include "filterx/expr-literal-generator.h" +#include "filterx/filterx-eval.h" +#include "filterx/object-extractor.h" +#include "filterx/object-string.h" +#include "filterx/object-message-value.h" +#include "filterx/object-list-interface.h" +#include "scratch-buffers.h" + +#define FILTERX_FUNC_STARTSWITH_USAGE "Usage: startswith(string, prefix, ignorecase=true)" \ +"or startswith(string, [prefix_1, prefix_2, ..], ignorecase=true)" + +#define FILTERX_FUNC_ENDSWITH_USAGE "Usage: endswith(string, suffix, ignorecase=true)" \ +"or endswith(string, [suffix_1, suffix_2, ..], ignorecase=true)" + +#define FILTERX_FUNC_INCLUDES_USAGE "Usage: includes(string, substring, ignorecase=true)" \ +"or includes(string, [substring_1, substring_2, ..], ignorecase=true)" + +typedef struct _FilterXStringWithCache +{ + FilterXExpr *expr; + gchar *str_value; + gsize str_len; +} FilterXStringWithCache; + +typedef gboolean (*FilterXExprAffixProcessFunc)(const gchar *haystack, gsize haystack_len, const gchar *needle, + gsize needle_len); +typedef struct _FilterXExprAffix +{ + FilterXFunction super; + gboolean ignore_case; + FilterXExpr *haystack; + struct + { + FilterXExpr *expr; + GPtrArray *cached_strings; + } needle; + FilterXExprAffixProcessFunc process; +} FilterXExprAffix; + +static GString * +_format_str_obj(FilterXObject *obj) +{ + GString *str = scratch_buffers_alloc(); + + const gchar *obj_str; + gsize str_len; + if(!filterx_object_extract_string_ref(obj, &obj_str, &str_len)) + return NULL; + g_string_assign(str, obj_str); + + return str; +} + +static gboolean +_do_casefold(GString *str) +{ + if(str->len > G_MAXSSIZE) + return FALSE; + + gchar *casefolded_str = g_utf8_casefold(str->str, (gssize) str->len); + g_string_assign(str, casefolded_str); + + g_free(casefolded_str); + return TRUE; +} + +static GString * +_expr_format(FilterXExpr *expr, gboolean ignore_case) +{ + FilterXObject *obj = filterx_expr_eval_typed(expr); + if (!obj) + return NULL; + + GString *result = _format_str_obj(obj); + if (!result) + filterx_eval_push_error("failed to extract string value, repr() failed", expr, obj); + filterx_object_unref(obj); + + if (ignore_case && !_do_casefold(result)) + return NULL; + + return result; +} + +static GString * +_obj_format(FilterXObject *obj, gboolean ignore_case) +{ + GString *result = _format_str_obj(obj); + if (!result) + filterx_eval_push_error("failed to extract string value, repr() failed", NULL, obj); + + if (ignore_case && !_do_casefold(result)) + return NULL; + + return result; +} + +static gboolean +_string_with_cache_fill_cache(FilterXStringWithCache *self, gboolean ignore_case) +{ + if(!filterx_expr_is_literal(self->expr)) + return TRUE; + + ScratchBuffersMarker marker; + scratch_buffers_mark(&marker); + + GString *res = _expr_format(self->expr, ignore_case); + if (!res) + return FALSE; + + self->str_value = g_strdup(res->str); + self->str_len = res->len; + + scratch_buffers_reclaim_marked(marker); + return TRUE; +} + +static FilterXStringWithCache * +_string_with_cache_new(FilterXExpr *expr, gboolean ignore_case) +{ + FilterXStringWithCache *self = g_new0(FilterXStringWithCache, 1); + self->expr = filterx_expr_ref(expr); + + if (!_string_with_cache_fill_cache(self, ignore_case)) + return NULL; + + return self; +} + +static void +_string_with_cache_free(FilterXStringWithCache *self) +{ + filterx_expr_unref(self->expr); + g_free(self->str_value); + g_free(self); +} + +static gboolean +_string_with_cache_get_string_value(FilterXStringWithCache *self, gboolean ignore_case, const gchar **dst, + gsize *len) +{ + if(self->str_value) + { + *dst = self->str_value; + *len = self->str_len; + return TRUE; + } + + GString *res = _expr_format(self->expr, ignore_case); + if (!res) + return FALSE; + + *dst = res->str; + *len = res->len; + return TRUE; +} + +static gboolean +_cache_needle(guint64 index, FilterXExpr *value, gpointer user_data) +{ + gboolean *ignore_case = ((gpointer *)user_data)[0]; + GPtrArray *cached_strings = ((gpointer *) user_data)[1]; + + FilterXStringWithCache *obj_with_cache = _string_with_cache_new(value, *ignore_case); + if(!obj_with_cache) + return FALSE; + + g_ptr_array_add(cached_strings, obj_with_cache); + return TRUE; +} + + +static gboolean +_expr_affix_init_needle(FilterXExprAffix *self) +{ + if (filterx_expr_is_literal(self->needle.expr)) + { + FilterXStringWithCache *obj_with_cache = _string_with_cache_new(self->needle.expr, self->ignore_case); + if (!obj_with_cache) + return FALSE; + g_ptr_array_add(self->needle.cached_strings, obj_with_cache); + } + if (filterx_expr_is_literal_list_generator(self->needle.expr)) + { + gpointer user_data[] = {&self->ignore_case, self->needle.cached_strings}; + if(!filterx_literal_list_generator_foreach(self->needle.expr, _cache_needle, user_data)) + return FALSE; + } + return TRUE; +} + +static void +_expr_affix_free(FilterXExpr *s) +{ + FilterXExprAffix *self = (FilterXExprAffix *) s; + filterx_expr_unref(self->haystack); + filterx_expr_unref(self->needle.expr); + g_ptr_array_free(self->needle.cached_strings, TRUE); + + filterx_function_free_method(&self->super); +} + +static GPtrArray * +_expr_affix_eval_needle(FilterXExprAffix *self) +{ + FilterXObject *needle_obj = filterx_expr_eval(self->needle.expr); + if (!needle_obj) + return NULL; + GPtrArray *needles = NULL; + + if (filterx_object_is_type(needle_obj, &FILTERX_TYPE_NAME(string)) + || filterx_object_is_type(needle_obj, &FILTERX_TYPE_NAME(message_value))) + { + GString *str_value = _obj_format(needle_obj, self->ignore_case); + filterx_object_unref(needle_obj); + if (!str_value) + return NULL; + needles = g_ptr_array_sized_new(1); + g_ptr_array_add(needles, str_value); + return needles; + } + if (filterx_object_is_type(needle_obj, &FILTERX_TYPE_NAME(list))) + { + guint64 len; + filterx_object_len(needle_obj, &len); + + if (len == 0) + { + filterx_object_unref(needle_obj); + return NULL; + } + needles = g_ptr_array_sized_new(len); + + for (guint64 i = 0; i < len; i++) + { + FilterXObject *elem = filterx_list_get_subscript(needle_obj, i); + + GString *str_value = _obj_format(elem, self->ignore_case); + filterx_object_unref(elem); + if (!str_value) + goto error; + g_ptr_array_add(needles, str_value); + } + filterx_object_unref(needle_obj); + return needles; + } + +error: + if(needles) + g_ptr_array_free(needles, TRUE); + return NULL; +} + +static FilterXObject * +_expr_affix_eval(FilterXExpr *s) +{ + FilterXExprAffix *self = (FilterXExprAffix *)s; + FilterXObject *result = NULL; + + ScratchBuffersMarker marker; + scratch_buffers_mark(&marker); + GString *haystack_str = _expr_format(self->haystack, self->ignore_case); + if (!haystack_str) + goto exit; + + gboolean matches = FALSE; + const gchar *needle_str; + gsize needle_len; + + if(self->needle.cached_strings->len != 0) + { + for(guint64 i = 0; i < self->needle.cached_strings->len && !matches; i++) + { + FilterXStringWithCache *current_needle = g_ptr_array_index(self->needle.cached_strings, i); + if (!_string_with_cache_get_string_value(current_needle, self->ignore_case, &needle_str, &needle_len)) + goto exit; + matches = self->process(haystack_str->str, haystack_str->len, needle_str, needle_len); + } + result = filterx_boolean_new(matches); + goto exit; + } + + GPtrArray *needle_list = _expr_affix_eval_needle(self); + if (!needle_list) + goto exit; + + if(needle_list->len != 0) + { + for(guint64 i = 0; i < needle_list->len && !matches; i++) + { + GString *current_needle = g_ptr_array_index(needle_list, i); + matches = self->process(haystack_str->str, haystack_str->len, current_needle->str, current_needle->len); + } + } + g_ptr_array_free(needle_list, TRUE); + result = filterx_boolean_new(matches); + +exit: + scratch_buffers_reclaim_marked(marker); + return result; + +} + +static gboolean +_extract_args(FilterXExprAffix *self, FilterXFunctionArgs *args, GError **error, const gchar *function_usage) +{ + + if (filterx_function_args_len(args) != 2) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "invalid number of arguments. %s", function_usage); + return FALSE; + } + + gboolean exists, eval_error; + gboolean value = filterx_function_args_get_named_literal_boolean(args, "ignorecase", &exists, &eval_error); + + if (eval_error) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "ignorecase argument must be boolean literal. %s", function_usage); + return FALSE; + } + if (!exists) + self->ignore_case = FALSE; + else + self->ignore_case = value; + + self->haystack = filterx_function_args_get_expr(args, 0); + g_assert(self->haystack); + + self->needle.expr = filterx_function_args_get_expr(args, 1); + g_assert(self->needle.expr); + + return TRUE; +} + +static FilterXExpr * +_function_affix_new(FilterXFunctionArgs *args, + const gchar *affix_name, + FilterXExprAffixProcessFunc process_func, + const gchar *usage, + GError **error) +{ + FilterXExprAffix *self = g_new0(FilterXExprAffix, 1); + + filterx_function_init_instance(&self->super, affix_name); + self->super.super.eval = _expr_affix_eval; + self->super.super.free_fn = _expr_affix_free; + + self->needle.cached_strings = g_ptr_array_new_with_free_func((GDestroyNotify) _string_with_cache_free); + self->process = process_func; + + + if (!_extract_args(self, args, error, usage)) + goto error; + + if(!_expr_affix_init_needle(self)) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "needle caching failed."); + goto error; + } + + filterx_function_args_free(args); + return &self->super.super; + +error: + filterx_function_args_free(args); + filterx_expr_unref(&self->super.super); + return NULL; +} + +static gboolean +_function_startswith_process(const gchar *haystack, gsize haystack_len, const gchar *needle, gsize needle_len) +{ + if (needle_len > haystack_len) + return FALSE; + return memcmp(haystack, needle, needle_len) == 0; +} + +FilterXExpr * +filterx_function_startswith_new(FilterXFunctionArgs *args, GError **error) +{ + return _function_affix_new(args, "startswith", _function_startswith_process, + FILTERX_FUNC_STARTSWITH_USAGE, error); +} + +static gboolean +_function_endswith_process(const gchar *haystack, gsize haystack_len, const gchar *needle, gsize needle_len) +{ + if (needle_len > haystack_len) + return FALSE; + return memcmp(haystack + haystack_len - needle_len, needle, needle_len) == 0; +} + +FilterXExpr * +filterx_function_endswith_new(FilterXFunctionArgs *args, GError **error) +{ + return _function_affix_new(args, "endswith", _function_endswith_process, FILTERX_FUNC_ENDSWITH_USAGE, + error); +} + +static gboolean +_function_includes_process(const gchar *haystack, gsize haystack_len, const gchar *needle, gsize needle_len) +{ + if (needle_len > haystack_len) + return FALSE; + return g_strstr_len(haystack, haystack_len, needle) != NULL; +} + +FilterXExpr * +filterx_function_includes_new(FilterXFunctionArgs *args, GError **error) +{ + return _function_affix_new(args, "includes", _function_includes_process, FILTERX_FUNC_INCLUDES_USAGE, + error); +} diff --git a/lib/filterx/func-str.h b/lib/filterx/func-str.h new file mode 100644 index 000000000..81856ed35 --- /dev/null +++ b/lib/filterx/func-str.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024 Axoflow + * Copyright (c) 2024 Szilard Parrag + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_FUNC_STR_H_INCLUDED +#define FILTERX_FUNC_STR_H_INCLUDED + +#include "filterx/expr-function.h" + +FilterXExpr *filterx_function_startswith_new(FilterXFunctionArgs *args, GError **error); +FilterXExpr *filterx_function_endswith_new(FilterXFunctionArgs *args, GError **error); +FilterXExpr *filterx_function_includes_new(FilterXFunctionArgs *args, GError **error); + +#endif diff --git a/tests/light/functional_tests/filterx/test_filterx.py b/tests/light/functional_tests/filterx/test_filterx.py index 9022e0cf2..e9ef1b5b9 100644 --- a/tests/light/functional_tests/filterx/test_filterx.py +++ b/tests/light/functional_tests/filterx/test_filterx.py @@ -2304,3 +2304,37 @@ def test_parse_windows_eventlog_xml(config, syslog_ng): }, }, } + + +def test_startswith_endswith_includes(config, syslog_ng): + (file_true, file_false) = create_config( + config, r""" + result = json(); + if (startswith($MSG, ["dummy_prefix", "foo"])) + { + result.startswith_foo = true; + }; + bar_var = "bar"; + if (includes($MSG, bar_var, ignorecase=true)) + { + result.contains_bar = true; + }; + baz_var = "baz"; + baz_list = ["dummy_suffix", baz_var]; + if (endswith($MSG, baz_list, ignorecase=true)) + { + result.endswith_baz = true; + }; + log_msg_value = ${values.str}; + if (includes(log_msg_value, log_msg_value)) + { + result.works_with_message_value = true; + }; + $MSG = json(); + $MSG = result; + """, msg="fooBARbAz", + ) + syslog_ng.start(config) + + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '{"startswith_foo":true,"contains_bar":true,"endswith_baz":true,"works_with_message_value":true}\n'