From 34eb0c2bccee14c496ef66fa050743ed6debb71f Mon Sep 17 00:00:00 2001 From: shifter Date: Wed, 15 May 2024 12:10:29 +0200 Subject: [PATCH 1/6] csvparser: add filterx-func-parse-csv Signed-off-by: shifter --- modules/csvparser/CMakeLists.txt | 2 + modules/csvparser/Makefile.am | 4 +- modules/csvparser/csvparser-plugin.c | 6 + modules/csvparser/filterx-func-parse-csv.c | 412 +++++++++++++++++++++ modules/csvparser/filterx-func-parse-csv.h | 34 ++ 5 files changed, 457 insertions(+), 1 deletion(-) create mode 100644 modules/csvparser/filterx-func-parse-csv.c create mode 100644 modules/csvparser/filterx-func-parse-csv.h diff --git a/modules/csvparser/CMakeLists.txt b/modules/csvparser/CMakeLists.txt index aab4d545f9..e9b000d24b 100644 --- a/modules/csvparser/CMakeLists.txt +++ b/modules/csvparser/CMakeLists.txt @@ -4,6 +4,8 @@ set(CSVPARSER_SOURCES csvparser-parser.c csvparser-parser.h csvparser-plugin.c + filterx-func-parse-csv.h + filterx-func-parse-csv.c ) add_module( diff --git a/modules/csvparser/Makefile.am b/modules/csvparser/Makefile.am index a6a43fdd1e..62d8e9c10d 100644 --- a/modules/csvparser/Makefile.am +++ b/modules/csvparser/Makefile.am @@ -5,7 +5,9 @@ modules_csvparser_libcsvparser_la_SOURCES = \ modules/csvparser/csvparser-grammar.y \ modules/csvparser/csvparser-parser.c \ modules/csvparser/csvparser-parser.h \ - modules/csvparser/csvparser-plugin.c + modules/csvparser/csvparser-plugin.c \ + modules/csvparser/filterx-func-parse-csv.h \ + modules/csvparser/filterx-func-parse-csv.c modules_csvparser_libcsvparser_la_CPPFLAGS = \ $(AM_CPPFLAGS) \ diff --git a/modules/csvparser/csvparser-plugin.c b/modules/csvparser/csvparser-plugin.c index f6271b25b8..5cbd7aee01 100644 --- a/modules/csvparser/csvparser-plugin.c +++ b/modules/csvparser/csvparser-plugin.c @@ -25,6 +25,7 @@ #include "csvparser.h" #include "plugin.h" #include "plugin-types.h" +#include "filterx-func-parse-csv.h" extern CfgParser csvparser_parser; @@ -35,6 +36,11 @@ static Plugin csvparser_plugins[] = .name = "csv-parser", .parser = &csvparser_parser, }, + { + .type = LL_CONTEXT_FILTERX_FUNC, + .name = "parse_csv", + .construct = filterx_function_construct_parse_csv, + }, }; gboolean diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c new file mode 100644 index 0000000000..645de88c49 --- /dev/null +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "filterx-func-parse-csv.h" +#include "filterx/object-string.h" +#include "filterx/object-primitive.h" +#include "filterx/expr-literal.h" +#include "filterx/filterx-eval.h" +#include "filterx/filterx-globals.h" +#include "filterx/object-json.h" +#include "filterx/object-message-value.h" +#include "filterx/object-null.h" +#include "filterx/filterx-object.h" +#include "filterx/object-dict-interface.h" +#include "filterx/object-list-interface.h" + +#include "scanner/csv-scanner/csv-scanner.h" +#include "parser/parser-expr.h" +#include "scratch-buffers.h" + + +typedef struct FilterXFunctionParseCSV_ +{ + FilterXFunction super; + FilterXExpr *msg; + CSVScannerOptions options; + FilterXExpr *columns; +} FilterXFunctionParseCSV; + +#define STRINGIFY(lit) #lit +#define NUM_MANDATORY_ARGS 1 + +typedef enum FxFnParseCSVOpt_ +{ + FxFnParseCSVOptColumns = NUM_MANDATORY_ARGS, + FxFnParseCSVOptDelimiters, + FxFnParseCSVOptDialect, + FxFnParseCSVOptGreedy, + FxFnParseCSVOptStripWhiteSpace, + + FxFnParseCSVOptFirst = FxFnParseCSVOptDelimiters, // workaround, since columns need to parse on a different way yet (orig val: FxFnParseCSVOptColumns) + FxFnParseCSVOptLast = FxFnParseCSVOptStripWhiteSpace +} FxFnParseCSVOpt; + +struct ArgumentDescriptor +{ + const gchar *name; + FilterXType *acceptable_type; +}; + +static const struct ArgumentDescriptor args_descs[FxFnParseCSVOptLast - FxFnParseCSVOptFirst + 1] = +{ + // { + // .name = "columns", + // .acceptable_type = &FILTERX_TYPE_NAME(json_array), + // }, + { + .name = "delimiters", + .acceptable_type = &FILTERX_TYPE_NAME(string), + }, + { + .name = "dialect", + .acceptable_type = &FILTERX_TYPE_NAME(string), + }, + { + .name = "greedy", + .acceptable_type = &FILTERX_TYPE_NAME(boolean), + }, + { + .name = "strip_whitespaces", + .acceptable_type = &FILTERX_TYPE_NAME(boolean), + }, +}; + +#define NUM_CVS_SCANNER_DIALECTS 4 + +static const gchar *parse_csv_dialect_enum_names[NUM_CVS_SCANNER_DIALECTS] = +{ + STRINGIFY(CSV_SCANNER_ESCAPE_NONE), + STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH), + STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES), + STRINGIFY(CSV_SCANNER_ESCAPE_DOUBLE_CHAR), +}; + +static gboolean +_parse_columns(FilterXFunctionParseCSV *self, GList **col_names) +{ + gboolean result = FALSE; + if (!self->columns) + return TRUE; + FilterXObject *cols_obj = filterx_expr_eval(self->columns); + if (!cols_obj) + return FALSE; + + if (filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(null))) + { + result = TRUE; + goto exit; + } + + if (!filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(json_array))) + goto exit; + + guint64 size; + if (!filterx_object_len(cols_obj, &size)) + return FALSE; + + for (guint64 i = 0; i < size; i++) + { + FilterXObject *col = filterx_list_get_subscript(cols_obj, i); + if (filterx_object_is_type(col, &FILTERX_TYPE_NAME(string))) + { + const gchar *col_name = filterx_string_get_value(col, NULL); + *col_names = g_list_append(*col_names, g_strdup(col_name)); + } + filterx_object_unref(col); + } + + result = TRUE; +exit: + filterx_object_unref(cols_obj); + return result; +} + +static FilterXObject * +_eval(FilterXExpr *s) +{ + FilterXFunctionParseCSV *self = (FilterXFunctionParseCSV *) s; + + FilterXObject *obj = filterx_expr_eval(self->msg); + if (!obj) + return NULL; + + gboolean ok = FALSE; + + gsize len; + const gchar *input; + if (filterx_object_is_type(obj, &FILTERX_TYPE_NAME(string))) + input = filterx_string_get_value(obj, &len); + else if (filterx_object_is_type(obj, &FILTERX_TYPE_NAME(message_value))) + input = filterx_message_value_get_value(obj, &len); + else + goto exit; + + FilterXObject *result = NULL; + GList *cols = NULL; + if (!_parse_columns(self, &cols)) + goto exit; + + if (cols) + { + csv_scanner_options_set_expected_columns(&self->options, g_list_length(cols)); + result = filterx_json_object_new_empty(); + } + else + result = filterx_json_array_new_empty(); + + CSVScanner scanner; + csv_scanner_init(&scanner, &self->options, input); + + GList *col = cols; + while (csv_scanner_scan_next(&scanner)) + { + if (cols) + { + if (!col) + break; + FilterXObject *key = filterx_string_new(col->data, -1); + FilterXObject *val = filterx_string_new(csv_scanner_get_current_value(&scanner), + csv_scanner_get_current_value_len(&scanner)); + + ok = filterx_object_set_subscript(result, key, &val); + + filterx_object_unref(key); + filterx_object_unref(val); + + if (!ok) + goto exit; + col = g_list_next(col); + } + else + { + const gchar *current_value = csv_scanner_get_current_value(&scanner); + gint current_value_len = csv_scanner_get_current_value_len(&scanner); + FilterXObject *val = filterx_string_new(current_value, current_value_len); + + ok = filterx_list_append(result, &val); + + filterx_object_unref(val); + } + } + +exit: + if (!ok) + { + filterx_object_unref(result); + } + g_list_free_full(cols, (GDestroyNotify)g_free); + filterx_object_unref(obj); + csv_scanner_deinit(&scanner); + return ok?result:NULL; +} + +static void +_free(FilterXExpr *s) +{ + FilterXFunctionParseCSV *self = (FilterXFunctionParseCSV *) s; + filterx_expr_unref(self->msg); + filterx_expr_unref(self->columns); + csv_scanner_options_clean(&self->options); + filterx_function_free_method(&self->super); +} + +static FilterXExpr * +_extract_parse_csv_msg_expr(GList *argument_expressions, GError **error) +{ + FilterXExpr *msg_expr = filterx_expr_ref(((FilterXExpr *) argument_expressions->data)); + if (!msg_expr) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "argument must be set: message. " FILTERX_FUNC_PARSE_CSV_USAGE); + return NULL; + } + + return msg_expr; +} + +static FilterXExpr * +_extract_parse_csv_columns_expr(GList *argument_expressions, GError **error) +{ + gsize arguments_len = argument_expressions ? g_list_length(argument_expressions) : 0; + if (arguments_len - NUM_MANDATORY_ARGS >= FxFnParseCSVOptColumns) + { + return filterx_expr_ref((FilterXExpr *) g_list_nth_data(argument_expressions, FxFnParseCSVOptColumns)); + } + return NULL; +} + +static gboolean +_extract_parse_csv_opts(FilterXFunctionParseCSV *self, GList *argument_expressions, GError **error) +{ + gsize arguments_len = argument_expressions ? g_list_length(argument_expressions) : 0; + if (arguments_len < NUM_MANDATORY_ARGS) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "invalid number of arguments. " FILTERX_FUNC_PARSE_CSV_USAGE); + return FALSE; + } + + guint32 opt_flags = self->options.flags; + + FilterXObject *arg_obj = NULL; + int opt_id = FxFnParseCSVOptFirst; + for (GList *elem = g_list_nth(argument_expressions, opt_id); elem; elem = elem->next) + { + if (opt_id > FxFnParseCSVOptLast) + break; + + FilterXExpr *argument_expr = (FilterXExpr *)elem->data; + if (!argument_expr || !filterx_expr_is_literal(argument_expr)) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "'%s' argument must be string literal. " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); + return FALSE; + } + + arg_obj = filterx_expr_eval(argument_expr); + if (!arg_obj) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "unable to parse argument '%s'. " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); + return FALSE; + } + + // optional args must be nullable + if (filterx_object_is_type(arg_obj, &FILTERX_TYPE_NAME(null))) + goto next; + + + const gchar *opt_str = NULL; + gboolean opt_bool = FALSE; + if (!filterx_object_is_type(arg_obj, args_descs[opt_id - FxFnParseCSVOptFirst].acceptable_type)) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "'%s' argument must be string literal " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); + goto error; + } + if (filterx_object_is_type(arg_obj, &FILTERX_TYPE_NAME(string))) + { + opt_str = filterx_string_get_value(arg_obj, NULL); + if (!opt_str) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "'%s' argument must be string literal " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); + goto error; + } + } + else if (filterx_object_is_type(arg_obj, &FILTERX_TYPE_NAME(boolean))) + opt_bool = filterx_object_truthy(arg_obj); + + + switch (opt_id) + { + case FxFnParseCSVOptColumns: + // this should not happened + // the framework is not yet able to handle lists/dicts on parse phase + // do nothing, parse columns in eval temporary + break; + case FxFnParseCSVOptDelimiters: + csv_scanner_options_set_delimiters(&self->options, opt_str); + break; + case FxFnParseCSVOptDialect: + { + CSVScannerDialect dialect = -1; + for (int e = 0; e < NUM_CVS_SCANNER_DIALECTS; e++) + { + if (strcmp(parse_csv_dialect_enum_names[e], opt_str) == 0) + { + dialect = e; + break; + } + } + if (dialect != -1) + csv_scanner_options_set_dialect(&self->options, dialect); + break; + } + case FxFnParseCSVOptGreedy: + if (opt_bool) + opt_flags |= CSV_SCANNER_GREEDY; + else + opt_flags &= ~CSV_SCANNER_GREEDY; + break; + case FxFnParseCSVOptStripWhiteSpace: + if (opt_bool) + opt_flags |= CSV_SCANNER_STRIP_WHITESPACE; + else + opt_flags &= ~CSV_SCANNER_STRIP_WHITESPACE; + break; + default: + g_assert_not_reached(); + break; + } +next: + filterx_object_unref(arg_obj); + opt_id++; + } + + csv_scanner_options_set_flags(&self->options, opt_flags); + + return TRUE; +error: + filterx_object_unref(arg_obj); + return FALSE; +} + +FilterXExpr * +filterx_function_parse_csv_new(const gchar *function_name, GList *argument_expressions, GError **error) + +{ + FilterXFunctionParseCSV *self = g_new0(FilterXFunctionParseCSV, 1); + filterx_function_init_instance(&self->super, function_name); + self->super.super.eval = _eval; + self->super.super.free_fn = _free; + csv_scanner_options_set_delimiters(&self->options, " "); + csv_scanner_options_set_quote_pairs(&self->options, "\"\"''"); + csv_scanner_options_set_flags(&self->options, CSV_SCANNER_STRIP_WHITESPACE); + csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_NONE); + + // there is no filterx context in construct phase, unable to parse json variable + // and json literals are not yet supported + if (!_extract_parse_csv_opts(self, argument_expressions, error)) + goto error; + + self->columns = _extract_parse_csv_columns_expr(argument_expressions, error); + + self->msg = _extract_parse_csv_msg_expr(argument_expressions, error); + if (!self->msg) + goto error; + + g_list_free_full(argument_expressions, (GDestroyNotify) filterx_expr_unref); + return &self->super.super; + +error: + g_list_free_full(argument_expressions, (GDestroyNotify) filterx_expr_unref); + filterx_expr_unref(&self->super.super); + return NULL; +} + +gpointer +filterx_function_construct_parse_csv(Plugin *self) +{ + return (gpointer) filterx_function_parse_csv_new; +} diff --git a/modules/csvparser/filterx-func-parse-csv.h b/modules/csvparser/filterx-func-parse-csv.h new file mode 100644 index 0000000000..e8ea665e11 --- /dev/null +++ b/modules/csvparser/filterx-func-parse-csv.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_FUNC_PARSE_CSV_H_INCLUDED +#define FILTERX_FUNC_PARSE_CSV_H_INCLUDED + +#include "plugin.h" +#include "filterx/expr-function.h" + +#define FILTERX_FUNC_PARSE_CSV_USAGE "Usage: parse_csv(message string [, [json_array cols], [string delimiters], [string dialect], [boolean greedy], [boolean strip_whitespaces]])" + +FilterXExpr *filterx_function_parse_csv_new(const gchar *function_name, GList *argument_expressions, GError **error); +gpointer filterx_function_construct_parse_csv(Plugin *self); + +#endif From ee6e6828fe6b5a098733baccfee23ad9438b7ab0 Mon Sep 17 00:00:00 2001 From: shifter Date: Wed, 15 May 2024 12:11:17 +0200 Subject: [PATCH 2/6] csvparser: add filterx-func-parse-csv unit tests Signed-off-by: shifter --- modules/csvparser/tests/CMakeLists.txt | 1 + modules/csvparser/tests/Makefile.am | 15 +- .../tests/test_filterx_func_parse_csv.c | 436 ++++++++++++++++++ 3 files changed, 448 insertions(+), 4 deletions(-) create mode 100644 modules/csvparser/tests/test_filterx_func_parse_csv.c diff --git a/modules/csvparser/tests/CMakeLists.txt b/modules/csvparser/tests/CMakeLists.txt index c64aab715c..109fd35348 100644 --- a/modules/csvparser/tests/CMakeLists.txt +++ b/modules/csvparser/tests/CMakeLists.txt @@ -2,3 +2,4 @@ add_unit_test(CRITERION TARGET test_csvparser DEPENDS csvparser) add_unit_test(LIBTEST CRITERION TARGET test_csvparser_from_config DEPENDS csvparser) add_unit_test(CRITERION TARGET test_csvparser_perf DEPENDS csvparser) add_unit_test(CRITERION TARGET test_csvparser_statistics DEPENDS csvparser) +add_unit_test(LIBTEST CRITERION TARGET test_filterx_func_parse_csv DEPENDS csvparser) diff --git a/modules/csvparser/tests/Makefile.am b/modules/csvparser/tests/Makefile.am index 4ea2f75e65..cd91785d07 100644 --- a/modules/csvparser/tests/Makefile.am +++ b/modules/csvparser/tests/Makefile.am @@ -1,15 +1,16 @@ modules_csvparser_tests_TESTS = \ modules/csvparser/tests/test_csvparser \ modules/csvparser/tests/test_csvparser_from_config \ - modules/csvparser/tests/test_csvparser_perf + modules/csvparser/tests/test_csvparser_perf \ + modules/csvparser/tests/test_filterx_func_parse_csv check_PROGRAMS += \ - ${modules_csvparser_tests_TESTS} + ${modules_csvparser_tests_TESTS} EXTRA_DIST += modules/csvparser/tests/CMakeLists.txt \ modules/basicfuncs/list-funcs.c \ modules/basicfuncs/tf-template.c \ - modules/basicfuncs/tests/CMakeLists.txt + modules/basicfuncs/tests/CMakeLists.txt modules_csvparser_tests_test_csvparser_CFLAGS = \ $(TEST_CFLAGS) -I$(top_srcdir)/modules/csvparser @@ -22,7 +23,7 @@ modules_csvparser_tests_test_csvparser_from_config_CFLAGS = \ modules_csvparser_tests_test_csvparser_from_config_LDADD = \ $(TEST_LDADD) \ $(PREOPEN_SYSLOGFORMAT) $(PREOPEN_BASICFUNCS) \ - -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la + -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la modules_csvparser_tests_test_csvparser_perf_CFLAGS = \ $(TEST_CFLAGS) -I$(top_srcdir)/modules/csvparser @@ -37,3 +38,9 @@ modules_csvparser_tests_test_csvparser_statistics_CFLAGS = \ modules_csvparser_tests_test_csvparser_statistics_LDADD = \ $(TEST_LDADD) \ -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la + +modules_csvparser_tests_test_filterx_func_parse_csv_CFLAGS = \ + $(TEST_CFLAGS) -I$(top_srcdir)/modules/csvparser +modules_csvparser_tests_test_filterx_func_parse_csv_LDADD = \ + $(TEST_LDADD) \ + -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la diff --git a/modules/csvparser/tests/test_filterx_func_parse_csv.c b/modules/csvparser/tests/test_filterx_func_parse_csv.c new file mode 100644 index 0000000000..d37b94b1af --- /dev/null +++ b/modules/csvparser/tests/test_filterx_func_parse_csv.c @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + */ + +#include +#include "libtest/filterx-lib.h" + +#include "apphook.h" +#include "scratch-buffers.h" + +#include "filterx/object-string.h" +#include "filterx/object-null.h" +#include "filterx/expr-literal.h" +#include "filterx/object-json.h" +#include "filterx-func-parse-csv.h" +#include "filterx/object-list-interface.h" +#include "scanner/csv-scanner/csv-scanner.h" +#include "filterx/object-primitive.h" + +static FilterXObject * +_generate_column_list(const gchar *column_name, ...) +{ + FilterXObject *result = filterx_json_array_new_empty(); + + va_list args; + va_start(args, column_name); + + const gchar *next_column = column_name; + while (next_column != NULL) + { + FilterXObject *col_name = filterx_string_new(next_column, -1); + cr_assert(filterx_list_append(result, &col_name)); + filterx_object_unref(col_name); + next_column = va_arg(args, const gchar *); + } + + va_end(args); + va_start(args, column_name); + + return result; +} + +Test(filterx_func_parse_csv, test_helper_generate_column_list_empty) +{ + FilterXObject *col_names = _generate_column_list(NULL); + cr_assert_not_null(col_names); + + GString *repr = scratch_buffers_alloc(); + LogMessageValueType lmvt; + + cr_assert(filterx_object_marshal(col_names, repr, &lmvt)); + cr_assert_str_eq(repr->str, ""); + filterx_object_unref(col_names); +} + +Test(filterx_func_parse_csv, test_helper_generate_column_list) +{ + FilterXObject *col_names = _generate_column_list("1st", NULL); + cr_assert_not_null(col_names); + + GString *repr = scratch_buffers_alloc(); + LogMessageValueType lmvt; + + cr_assert(filterx_object_marshal(col_names, repr, &lmvt)); + cr_assert_str_eq(repr->str, "1st"); + filterx_object_unref(col_names); +} + +Test(filterx_func_parse_csv, test_helper_generate_column_list_multiple_elts) +{ + FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); + cr_assert_not_null(col_names); + + GString *repr = scratch_buffers_alloc(); + LogMessageValueType lmvt; + + cr_assert(filterx_object_marshal(col_names, repr, &lmvt)); + cr_assert_str_eq(repr->str, "1st,2nd,3rd"); + filterx_object_unref(col_names); +} + +Test(filterx_func_parse_csv, test_empty_args_error) +{ + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", NULL, &err); + + cr_assert_null(func); + cr_assert_not_null(err); + cr_assert(strstr(err->message, FILTERX_FUNC_PARSE_CSV_USAGE) != NULL); + g_error_free(err); +} + + +Test(filterx_func_parse_csv, test_skipped_opts_causes_default_behaviour) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,tik,tak,toe"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_columns_optional_argument_is_nullable) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); + args = g_list_append(args, filterx_literal_new(filterx_null_new())); + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,tik,tak,toe"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_set_optional_first_argument_column_names) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz", -1))); + FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); + args = g_list_append(args, filterx_literal_new(col_names)); + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_column_names_sets_expected_column_size_additional_columns_dropped) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz more columns we did not expect", -1))); + FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); // sets expected column size 3 + args = g_list_append(args, filterx_literal_new(col_names)); + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_delimiters) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar+baz;tik|tak:toe", -1))); + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns + args = g_list_append(args, filterx_literal_new(filterx_string_new(" +;", -1))); + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,tik|tak:toe"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_delimiters_is_nullable) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar+baz;tik|tak:toe", -1))); + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar+baz;tik|tak:toe"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_dialect) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("\"PTHREAD \\\"support initialized\"", -1))); + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter + args = g_list_append(args, filterx_literal_new(filterx_string_new("CSV_SCANNER_ESCAPE_BACKSLASH", -1))); + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "'PTHREAD \"support initialized'"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_dialect_is_nullable) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("\"PTHREAD \\\"support initialized\"", -1))); + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "\"PTHREAD \\\\support\",'initialized\"'"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_flag_greedy) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); + args = g_list_append(args, filterx_literal_new(_generate_column_list("1st", "2nd", "3rd", "rest", NULL))); // columns + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect + args = g_list_append(args, filterx_literal_new(filterx_boolean_new(TRUE))); // greedy + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik tak toe\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_flag_non_greedy) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); + args = g_list_append(args, filterx_literal_new(_generate_column_list("1st", "2nd", "3rd", "rest", NULL))); // columns + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect + args = g_list_append(args, filterx_literal_new(filterx_boolean_new(FALSE))); // greedy + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_flag_strip_whitespace) +{ + GList *args = NULL; + args = g_list_append(args, filterx_literal_new(filterx_string_new(" foo , bar , baz , tik tak toe", -1))); + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns + args = g_list_append(args, filterx_literal_new(filterx_string_new(",", -1))); // delimiter + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect + args = g_list_append(args, filterx_literal_new(filterx_null_new())); // greedy + args = g_list_append(args, filterx_literal_new(filterx_boolean_new(TRUE))); // strip_whitespace + + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,\"tik tak toe\""); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + + +static void +setup(void) +{ + app_startup(); + init_libtest_filterx(); +} + +static void +teardown(void) +{ + scratch_buffers_explicit_gc(); + deinit_libtest_filterx(); + app_shutdown(); +} + +TestSuite(filterx_func_parse_csv, .init = setup, .fini = teardown); From 7ea3888c1dfa2b4490ed893206b050e7fd55128c Mon Sep 17 00:00:00 2001 From: shifter Date: Wed, 15 May 2024 16:06:30 +0200 Subject: [PATCH 3/6] csvparser: refactor filterx-func-parse-csv to use the newly introduced optional named args in filterx function Signed-off-by: shifter --- modules/csvparser/filterx-func-parse-csv.c | 284 ++++++++------------- modules/csvparser/filterx-func-parse-csv.h | 14 +- 2 files changed, 122 insertions(+), 176 deletions(-) diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c index 645de88c49..7f7ec7a859 100644 --- a/modules/csvparser/filterx-func-parse-csv.c +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -36,7 +36,8 @@ #include "scanner/csv-scanner/csv-scanner.h" #include "parser/parser-expr.h" #include "scratch-buffers.h" - +#include "str-utils.h" +#include "csvparser.h" typedef struct FilterXFunctionParseCSV_ { @@ -46,61 +47,6 @@ typedef struct FilterXFunctionParseCSV_ FilterXExpr *columns; } FilterXFunctionParseCSV; -#define STRINGIFY(lit) #lit -#define NUM_MANDATORY_ARGS 1 - -typedef enum FxFnParseCSVOpt_ -{ - FxFnParseCSVOptColumns = NUM_MANDATORY_ARGS, - FxFnParseCSVOptDelimiters, - FxFnParseCSVOptDialect, - FxFnParseCSVOptGreedy, - FxFnParseCSVOptStripWhiteSpace, - - FxFnParseCSVOptFirst = FxFnParseCSVOptDelimiters, // workaround, since columns need to parse on a different way yet (orig val: FxFnParseCSVOptColumns) - FxFnParseCSVOptLast = FxFnParseCSVOptStripWhiteSpace -} FxFnParseCSVOpt; - -struct ArgumentDescriptor -{ - const gchar *name; - FilterXType *acceptable_type; -}; - -static const struct ArgumentDescriptor args_descs[FxFnParseCSVOptLast - FxFnParseCSVOptFirst + 1] = -{ - // { - // .name = "columns", - // .acceptable_type = &FILTERX_TYPE_NAME(json_array), - // }, - { - .name = "delimiters", - .acceptable_type = &FILTERX_TYPE_NAME(string), - }, - { - .name = "dialect", - .acceptable_type = &FILTERX_TYPE_NAME(string), - }, - { - .name = "greedy", - .acceptable_type = &FILTERX_TYPE_NAME(boolean), - }, - { - .name = "strip_whitespaces", - .acceptable_type = &FILTERX_TYPE_NAME(boolean), - }, -}; - -#define NUM_CVS_SCANNER_DIALECTS 4 - -static const gchar *parse_csv_dialect_enum_names[NUM_CVS_SCANNER_DIALECTS] = -{ - STRINGIFY(CSV_SCANNER_ESCAPE_NONE), - STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH), - STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES), - STRINGIFY(CSV_SCANNER_ESCAPE_DOUBLE_CHAR), -}; - static gboolean _parse_columns(FilterXFunctionParseCSV *self, GList **col_names) { @@ -111,15 +57,13 @@ _parse_columns(FilterXFunctionParseCSV *self, GList **col_names) if (!cols_obj) return FALSE; - if (filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(null))) + if (!filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(json_array))) { - result = TRUE; + msg_error("columns argument must be a type of json array.", + evt_tag_str("current_type", cols_obj->type->name ) ); goto exit; } - if (!filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(json_array))) - goto exit; - guint64 size; if (!filterx_object_len(cols_obj, &size)) return FALSE; @@ -156,13 +100,14 @@ _eval(FilterXExpr *s) const gchar *input; if (filterx_object_is_type(obj, &FILTERX_TYPE_NAME(string))) input = filterx_string_get_value(obj, &len); - else if (filterx_object_is_type(obj, &FILTERX_TYPE_NAME(message_value))) + else if (filterx_object_is_type(obj, &FILTERX_TYPE_NAME(message_value)) + && filterx_message_value_get_type(obj) == LM_VT_STRING) input = filterx_message_value_get_value(obj, &len); else goto exit; - FilterXObject *result = NULL; - GList *cols = NULL; + APPEND_ZERO(input, input, len); + if (!_parse_columns(self, &cols)) goto exit; @@ -231,13 +176,13 @@ _free(FilterXExpr *s) } static FilterXExpr * -_extract_parse_csv_msg_expr(GList *argument_expressions, GError **error) +_extract_msg_expr(FilterXFunctionArgs *args, GError **error) { - FilterXExpr *msg_expr = filterx_expr_ref(((FilterXExpr *) argument_expressions->data)); + FilterXExpr *msg_expr = filterx_function_args_get_expr(args, 0); if (!msg_expr) { g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "argument must be set: message. " FILTERX_FUNC_PARSE_CSV_USAGE); + "argument must be set: msg_str. " FILTERX_FUNC_PARSE_CSV_USAGE); return NULL; } @@ -245,135 +190,134 @@ _extract_parse_csv_msg_expr(GList *argument_expressions, GError **error) } static FilterXExpr * -_extract_parse_csv_columns_expr(GList *argument_expressions, GError **error) +_extract_columns_expr(FilterXFunctionArgs *args, GError **error) { - gsize arguments_len = argument_expressions ? g_list_length(argument_expressions) : 0; - if (arguments_len - NUM_MANDATORY_ARGS >= FxFnParseCSVOptColumns) - { - return filterx_expr_ref((FilterXExpr *) g_list_nth_data(argument_expressions, FxFnParseCSVOptColumns)); - } - return NULL; + return filterx_function_args_get_named_expr(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS); } static gboolean -_extract_parse_csv_opts(FilterXFunctionParseCSV *self, GList *argument_expressions, GError **error) +_extract_opts(FilterXFunctionParseCSV *self, FilterXFunctionArgs *args, GError **error) { - gsize arguments_len = argument_expressions ? g_list_length(argument_expressions) : 0; - if (arguments_len < NUM_MANDATORY_ARGS) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "invalid number of arguments. " FILTERX_FUNC_PARSE_CSV_USAGE); - return FALSE; - } - guint32 opt_flags = self->options.flags; - FilterXObject *arg_obj = NULL; - int opt_id = FxFnParseCSVOptFirst; - for (GList *elem = g_list_nth(argument_expressions, opt_id); elem; elem = elem->next) - { - if (opt_id > FxFnParseCSVOptLast) - break; + const gchar *error_str = ""; + gboolean exists; + gsize len; + const gchar *value; + gboolean flag_err = FALSE; + gboolean flag_val = FALSE; - FilterXExpr *argument_expr = (FilterXExpr *)elem->data; - if (!argument_expr || !filterx_expr_is_literal(argument_expr)) + value = filterx_function_args_get_named_literal_string(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS, &len, &exists); + if (exists) + { + if (len < 1) { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "'%s' argument must be string literal. " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); - return FALSE; + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS " can not be empty"; + goto error; } - - arg_obj = filterx_expr_eval(argument_expr); - if (!arg_obj) + if (!value) { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "unable to parse argument '%s'. " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); - return FALSE; + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS " must be a string literal"; + goto error; } + csv_scanner_options_set_delimiters(&self->options, value); + } - // optional args must be nullable - if (filterx_object_is_type(arg_obj, &FILTERX_TYPE_NAME(null))) - goto next; - - - const gchar *opt_str = NULL; - gboolean opt_bool = FALSE; - if (!filterx_object_is_type(arg_obj, args_descs[opt_id - FxFnParseCSVOptFirst].acceptable_type)) + value = filterx_function_args_get_named_literal_string(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT, &len, &exists); + if (exists) + { + if (len < 1) { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "'%s' argument must be string literal " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT " can not be empty"; goto error; } - if (filterx_object_is_type(arg_obj, &FILTERX_TYPE_NAME(string))) + if (!value) { - opt_str = filterx_string_get_value(arg_obj, NULL); - if (!opt_str) - { - g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, - "'%s' argument must be string literal " FILTERX_FUNC_PARSE_CSV_USAGE, args_descs[opt_id - FxFnParseCSVOptFirst].name); - goto error; - } + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT " must be a string literal"; + goto error; } - else if (filterx_object_is_type(arg_obj, &FILTERX_TYPE_NAME(boolean))) - opt_bool = filterx_object_truthy(arg_obj); + CSVScannerDialect dialect = csv_parser_lookup_dialect(value); + if (dialect == -1) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT " argument must be one of: [" \ + "escape-none, " \ + "escape-backslash, " \ + "escape-backslash-with-sequences, " \ + "escape-double-char]"; + goto error; + } + csv_scanner_options_set_dialect(&self->options, dialect); + } + flag_val = filterx_function_args_get_named_literal_boolean(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY, &exists, + &flag_err); + if (exists) + { - switch (opt_id) + if (flag_err) { - case FxFnParseCSVOptColumns: - // this should not happened - // the framework is not yet able to handle lists/dicts on parse phase - // do nothing, parse columns in eval temporary - break; - case FxFnParseCSVOptDelimiters: - csv_scanner_options_set_delimiters(&self->options, opt_str); - break; - case FxFnParseCSVOptDialect: - { - CSVScannerDialect dialect = -1; - for (int e = 0; e < NUM_CVS_SCANNER_DIALECTS; e++) - { - if (strcmp(parse_csv_dialect_enum_names[e], opt_str) == 0) - { - dialect = e; - break; - } - } - if (dialect != -1) - csv_scanner_options_set_dialect(&self->options, dialect); - break; + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY " argument evaluation error"; + goto error; } - case FxFnParseCSVOptGreedy: - if (opt_bool) - opt_flags |= CSV_SCANNER_GREEDY; - else - opt_flags &= ~CSV_SCANNER_GREEDY; - break; - case FxFnParseCSVOptStripWhiteSpace: - if (opt_bool) - opt_flags |= CSV_SCANNER_STRIP_WHITESPACE; - else - opt_flags &= ~CSV_SCANNER_STRIP_WHITESPACE; - break; - default: - g_assert_not_reached(); - break; + + if (flag_val) + opt_flags |= CSV_SCANNER_GREEDY; + else + opt_flags &= ~CSV_SCANNER_GREEDY; + } + + flag_val = filterx_function_args_get_named_literal_boolean(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRIP_WHITESPACES, + &exists, + &flag_err); + if (exists) + { + + if (flag_err) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRIP_WHITESPACES " argument evaluation error"; + goto error; } -next: - filterx_object_unref(arg_obj); - opt_id++; + + if (flag_val) + opt_flags |= CSV_SCANNER_STRIP_WHITESPACE; + else + opt_flags &= ~CSV_SCANNER_STRIP_WHITESPACE; } csv_scanner_options_set_flags(&self->options, opt_flags); return TRUE; error: - filterx_object_unref(arg_obj); + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "%s. %s", error_str, FILTERX_FUNC_PARSE_CSV_USAGE); return FALSE; } +static gboolean +_extract_args(FilterXFunctionParseCSV *self, FilterXFunctionArgs *args, GError **error) +{ + gsize args_len = filterx_function_args_len(args); + if (args_len != 1) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "invalid number of arguments. " FILTERX_FUNC_PARSE_CSV_USAGE); + return FALSE; + } + + self->msg = _extract_msg_expr(args, error); + if (!self->msg) + return FALSE; + + self->columns = _extract_columns_expr(args, error); + + if (!_extract_opts(self, args, error)) + return FALSE; + + return TRUE; +} + FilterXExpr * -filterx_function_parse_csv_new(const gchar *function_name, GList *argument_expressions, GError **error) +filterx_function_parse_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error) { FilterXFunctionParseCSV *self = g_new0(FilterXFunctionParseCSV, 1); @@ -385,22 +329,14 @@ filterx_function_parse_csv_new(const gchar *function_name, GList *argument_expre csv_scanner_options_set_flags(&self->options, CSV_SCANNER_STRIP_WHITESPACE); csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_NONE); - // there is no filterx context in construct phase, unable to parse json variable - // and json literals are not yet supported - if (!_extract_parse_csv_opts(self, argument_expressions, error)) - goto error; - - self->columns = _extract_parse_csv_columns_expr(argument_expressions, error); - - self->msg = _extract_parse_csv_msg_expr(argument_expressions, error); - if (!self->msg) + if (!_extract_args(self, args, error)) goto error; - g_list_free_full(argument_expressions, (GDestroyNotify) filterx_expr_unref); + filterx_function_args_free(args); return &self->super.super; error: - g_list_free_full(argument_expressions, (GDestroyNotify) filterx_expr_unref); + filterx_function_args_free(args); filterx_expr_unref(&self->super.super); return NULL; } diff --git a/modules/csvparser/filterx-func-parse-csv.h b/modules/csvparser/filterx-func-parse-csv.h index e8ea665e11..c21f93656d 100644 --- a/modules/csvparser/filterx-func-parse-csv.h +++ b/modules/csvparser/filterx-func-parse-csv.h @@ -26,9 +26,19 @@ #include "plugin.h" #include "filterx/expr-function.h" -#define FILTERX_FUNC_PARSE_CSV_USAGE "Usage: parse_csv(message string [, [json_array cols], [string delimiters], [string dialect], [boolean greedy], [boolean strip_whitespaces]])" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS "columns" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS "delimiters" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT "dialect" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRIP_WHITESPACES "strip_whitespaces" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY "greedy" +#define FILTERX_FUNC_PARSE_CSV_USAGE "Usage: parse_csv(msg_str [" \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS"=json_array, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS"=string, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT"=string, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRIP_WHITESPACES"=boolean, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY"=boolean])" -FilterXExpr *filterx_function_parse_csv_new(const gchar *function_name, GList *argument_expressions, GError **error); +FilterXExpr *filterx_function_parse_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error); gpointer filterx_function_construct_parse_csv(Plugin *self); #endif From 4d3a2b1ca94c5bc538c674691e83b80661457c64 Mon Sep 17 00:00:00 2001 From: shifter Date: Wed, 15 May 2024 16:07:00 +0200 Subject: [PATCH 4/6] csvparser: filterx-func-parse-csv unit tests refactor Signed-off-by: shifter --- modules/csvparser/filterx-func-parse-csv.c | 4 +- .../tests/test_filterx_func_parse_csv.c | 198 ++++++++---------- 2 files changed, 88 insertions(+), 114 deletions(-) diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c index 7f7ec7a859..e329760efb 100644 --- a/modules/csvparser/filterx-func-parse-csv.c +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -94,7 +94,10 @@ _eval(FilterXExpr *s) if (!obj) return NULL; + CSVScanner scanner; gboolean ok = FALSE; + FilterXObject *result = NULL; + GList *cols = NULL; gsize len; const gchar *input; @@ -119,7 +122,6 @@ _eval(FilterXExpr *s) else result = filterx_json_array_new_empty(); - CSVScanner scanner; csv_scanner_init(&scanner, &self->options, input); GList *col = cols; diff --git a/modules/csvparser/tests/test_filterx_func_parse_csv.c b/modules/csvparser/tests/test_filterx_func_parse_csv.c index d37b94b1af..6f11245b4d 100644 --- a/modules/csvparser/tests/test_filterx_func_parse_csv.c +++ b/modules/csvparser/tests/test_filterx_func_parse_csv.c @@ -99,8 +99,9 @@ Test(filterx_func_parse_csv, test_helper_generate_column_list_multiple_elts) Test(filterx_func_parse_csv, test_empty_args_error) { GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", NULL, &err); - + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(NULL, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(func); cr_assert_not_null(err); cr_assert(strstr(err->message, FILTERX_FUNC_PARSE_CSV_USAGE) != NULL); @@ -111,38 +112,13 @@ Test(filterx_func_parse_csv, test_empty_args_error) Test(filterx_func_parse_csv, test_skipped_opts_causes_default_behaviour) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); - - GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); - - cr_assert_null(err); - - FilterXObject *obj = filterx_expr_eval(func); - - cr_assert_not_null(obj); - cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); - - GString *repr = scratch_buffers_alloc(); - - LogMessageValueType lmvt; - cr_assert(filterx_object_marshal(obj, repr, &lmvt)); - - cr_assert_str_eq(repr->str, "foo,bar,baz,tik,tak,toe"); - filterx_expr_unref(func); - filterx_object_unref(obj); - g_error_free(err); -} - -Test(filterx_func_parse_csv, test_columns_optional_argument_is_nullable) -{ - GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); - args = g_list_append(args, filterx_literal_new(filterx_null_new())); + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1)))); GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); - + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); @@ -164,13 +140,15 @@ Test(filterx_func_parse_csv, test_columns_optional_argument_is_nullable) Test(filterx_func_parse_csv, test_set_optional_first_argument_column_names) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz", -1))); + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new("foo bar baz", -1)))); FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); - args = g_list_append(args, filterx_literal_new(col_names)); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(col_names))); GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); - + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); @@ -192,13 +170,16 @@ Test(filterx_func_parse_csv, test_set_optional_first_argument_column_names) Test(filterx_func_parse_csv, test_column_names_sets_expected_column_size_additional_columns_dropped) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz more columns we did not expect", -1))); + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz more columns we did not expect", -1)))); FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); // sets expected column size 3 - args = g_list_append(args, filterx_literal_new(col_names)); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(col_names))); GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); - + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); @@ -220,13 +201,15 @@ Test(filterx_func_parse_csv, test_column_names_sets_expected_column_size_additio Test(filterx_func_parse_csv, test_optional_argument_delimiters) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar+baz;tik|tak:toe", -1))); - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns - args = g_list_append(args, filterx_literal_new(filterx_string_new(" +;", -1))); + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar+baz;tik|tak:toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS, + filterx_literal_new(filterx_string_new(" +;", -1)))); GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); - + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); @@ -245,44 +228,18 @@ Test(filterx_func_parse_csv, test_optional_argument_delimiters) g_error_free(err); } -Test(filterx_func_parse_csv, test_optional_argument_delimiters_is_nullable) -{ - GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar+baz;tik|tak:toe", -1))); - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter - - GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); - - cr_assert_null(err); - - FilterXObject *obj = filterx_expr_eval(func); - - cr_assert_not_null(obj); - cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); - - GString *repr = scratch_buffers_alloc(); - - LogMessageValueType lmvt; - cr_assert(filterx_object_marshal(obj, repr, &lmvt)); - - cr_assert_str_eq(repr->str, "foo,bar+baz;tik|tak:toe"); - filterx_expr_unref(func); - filterx_object_unref(obj); - g_error_free(err); -} - Test(filterx_func_parse_csv, test_optional_argument_dialect) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("\"PTHREAD \\\"support initialized\"", -1))); - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter - args = g_list_append(args, filterx_literal_new(filterx_string_new("CSV_SCANNER_ESCAPE_BACKSLASH", -1))); + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("\"PTHREAD \\\"support initialized\"", -1)))); + args = g_list_append(args, filterx_function_arg_new("dialect", + filterx_literal_new(filterx_string_new("escape-backslash", -1)))); GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); @@ -301,45 +258,54 @@ Test(filterx_func_parse_csv, test_optional_argument_dialect) g_error_free(err); } -Test(filterx_func_parse_csv, test_optional_argument_dialect_is_nullable) +Test(filterx_func_parse_csv, test_optional_argument_flag_greedy) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("\"PTHREAD \\\"support initialized\"", -1))); - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(_generate_column_list("1st", "2nd", + "3rd", "rest", NULL)))); // columns + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY, + filterx_literal_new(filterx_boolean_new(TRUE)))); // greedy GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); cr_assert_not_null(obj); - cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); GString *repr = scratch_buffers_alloc(); LogMessageValueType lmvt; cr_assert(filterx_object_marshal(obj, repr, &lmvt)); - cr_assert_str_eq(repr->str, "\"PTHREAD \\\\support\",'initialized\"'"); + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik tak toe\"}"); filterx_expr_unref(func); filterx_object_unref(obj); g_error_free(err); } -Test(filterx_func_parse_csv, test_optional_argument_flag_greedy) +Test(filterx_func_parse_csv, test_optional_argument_flag_non_greedy) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); - args = g_list_append(args, filterx_literal_new(_generate_column_list("1st", "2nd", "3rd", "rest", NULL))); // columns - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect - args = g_list_append(args, filterx_literal_new(filterx_boolean_new(TRUE))); // greedy + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(_generate_column_list("1st", "2nd", + "3rd", "rest", NULL)))); // columns + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY, + filterx_literal_new(filterx_boolean_new(FALSE)))); // greedy GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); @@ -352,53 +318,60 @@ Test(filterx_func_parse_csv, test_optional_argument_flag_greedy) LogMessageValueType lmvt; cr_assert(filterx_object_marshal(obj, repr, &lmvt)); - cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik tak toe\"}"); + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik\"}"); filterx_expr_unref(func); filterx_object_unref(obj); g_error_free(err); } -Test(filterx_func_parse_csv, test_optional_argument_flag_non_greedy) +Test(filterx_func_parse_csv, test_optional_argument_flag_strip_whitespace) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1))); - args = g_list_append(args, filterx_literal_new(_generate_column_list("1st", "2nd", "3rd", "rest", NULL))); // columns - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // delimiter - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect - args = g_list_append(args, filterx_literal_new(filterx_boolean_new(FALSE))); // greedy + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new(" foo , bar , baz , tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS, + filterx_literal_new(filterx_string_new(",", + -1)))); // delimiter + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRIP_WHITESPACES, + filterx_literal_new(filterx_boolean_new(TRUE)))); // strip_whitespace GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); cr_assert_not_null(obj); - cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); GString *repr = scratch_buffers_alloc(); LogMessageValueType lmvt; cr_assert(filterx_object_marshal(obj, repr, &lmvt)); - cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik\"}"); + cr_assert_str_eq(repr->str, "foo,bar,baz,\"tik tak toe\""); filterx_expr_unref(func); filterx_object_unref(obj); g_error_free(err); } -Test(filterx_func_parse_csv, test_optional_argument_flag_strip_whitespace) +Test(filterx_func_parse_csv, test_optional_argument_flag_not_to_strip_whitespace) { GList *args = NULL; - args = g_list_append(args, filterx_literal_new(filterx_string_new(" foo , bar , baz , tik tak toe", -1))); - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // columns - args = g_list_append(args, filterx_literal_new(filterx_string_new(",", -1))); // delimiter - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // dialect - args = g_list_append(args, filterx_literal_new(filterx_null_new())); // greedy - args = g_list_append(args, filterx_literal_new(filterx_boolean_new(TRUE))); // strip_whitespace + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new(" foo , bar , baz , tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS, + filterx_literal_new(filterx_string_new(",", + -1)))); // delimiter + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRIP_WHITESPACES, + filterx_literal_new(filterx_boolean_new(FALSE)))); // strip_whitespace GError *err = NULL; - FilterXExpr *func = filterx_function_parse_csv_new("test", args, &err); + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); cr_assert_null(err); FilterXObject *obj = filterx_expr_eval(func); @@ -411,13 +384,12 @@ Test(filterx_func_parse_csv, test_optional_argument_flag_strip_whitespace) LogMessageValueType lmvt; cr_assert(filterx_object_marshal(obj, repr, &lmvt)); - cr_assert_str_eq(repr->str, "foo,bar,baz,\"tik tak toe\""); + cr_assert_str_eq(repr->str, "\" foo \",\" bar \",\" baz \",\" tik tak toe\""); filterx_expr_unref(func); filterx_object_unref(obj); g_error_free(err); } - static void setup(void) { From b957a3e8b5f0e55a5cf5d79b0f93412a8bee78da Mon Sep 17 00:00:00 2001 From: shifter Date: Wed, 15 May 2024 19:26:10 +0200 Subject: [PATCH 5/6] csvparser: filterx light tests Signed-off-by: shifter --- .../functional_tests/filterx/test_filterx.py | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/tests/light/functional_tests/filterx/test_filterx.py b/tests/light/functional_tests/filterx/test_filterx.py index 4d88316f0a..e2d213e38a 100644 --- a/tests/light/functional_tests/filterx/test_filterx.py +++ b/tests/light/functional_tests/filterx/test_filterx.py @@ -1385,3 +1385,104 @@ def test_parse_kv_stray_words_value_name(config, syslog_ng): assert file_true.get_stats()["processed"] == 1 assert "processed" not in file_false.get_stats() assert file_true.read_log() == "{\"foo\":\"bar\",\"bar\":\"baz\",\"stray_words\":\"thisisstray\"}\n" + + +def test_parse_csv_default_arguments(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz"; + $MSG = parse_csv(custom_message); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == "foo,bar,baz\n" + + +def test_parse_csv_optional_arg_columns(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz"; + cols = json_array(["1st","2nd","3rd"]); + $MSG = parse_csv(custom_message, columns=cols); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '{"1st":"foo","2nd":"bar","3rd":"baz"}\n' + + +def test_parse_csv_optional_arg_delimiters(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar,baz.tik;tak!toe"; + $MSG = parse_csv(custom_message, delimiters=" ,."); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == 'foo,bar,baz,tik;tak!toe\n' + + +def test_parse_csv_optional_arg_non_greedy(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz tik tak toe"; + cols = json_array(["1st","2nd","3rd"]); + $MSG = parse_csv(custom_message, columns=cols, greedy=false); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '{"1st":"foo","2nd":"bar","3rd":"baz"}\n' + + +def test_parse_csv_optional_arg_greedy(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz tik tak toe"; + cols = json_array(["1st","2nd","3rd","rest"]); + $MSG = parse_csv(custom_message, columns=cols, greedy=true); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '{"1st":"foo","2nd":"bar","3rd":"baz","rest":"tik tak toe"}\n' + + +def test_parse_csv_optional_arg_strip_whitespace(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = " foo , bar , baz, tik, tak, toe "; + $MSG = parse_csv(custom_message, delimiters=",", strip_whitespace=true); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == 'foo,bar,baz,tik,tak,toe\n' + + +def test_parse_csv_dialect(config, syslog_ng): + (file_true, file_false) = create_config( + config, r""" + custom_message = "\"PTHREAD \\\"support initialized\""; + $MSG = format_json(parse_csv(custom_message, dialect="escape-backslash")); # ["PTHREAD \"support initialized"] + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '["PTHREAD \\"support initialized"]\n' From 7f7ab48b19160f4b76503e1c7ebe1b4abd06bcc6 Mon Sep 17 00:00:00 2001 From: shifter Date: Wed, 15 May 2024 19:59:00 +0200 Subject: [PATCH 6/6] csvparser: filterx policy/copyright/license update Signed-off-by: shifter --- tests/copyright/policy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/copyright/policy b/tests/copyright/policy index 3730b3e4a0..6d2d2ef828 100644 --- a/tests/copyright/policy +++ b/tests/copyright/policy @@ -277,6 +277,8 @@ docker/python-modules/webhook/scl/webhook.conf docker/python-modules/webhook/source.py packaging/package-indexer/remote_storage_synchronizer/s3_bucket_synchronizer.py packaging/package-indexer/cdn/cloudflare_cdn.py +modules/csvparser/filterx-func-parse-csv\.[ch] +modules/csvparser/tests/test_filterx_func_parse_csv.c ########################################################################### # These files are GPLd with Balabit origin.