diff --git a/modules/csvparser/CMakeLists.txt b/modules/csvparser/CMakeLists.txt index aab4d545f9..e9b000d24b 100644 --- a/modules/csvparser/CMakeLists.txt +++ b/modules/csvparser/CMakeLists.txt @@ -4,6 +4,8 @@ set(CSVPARSER_SOURCES csvparser-parser.c csvparser-parser.h csvparser-plugin.c + filterx-func-parse-csv.h + filterx-func-parse-csv.c ) add_module( diff --git a/modules/csvparser/Makefile.am b/modules/csvparser/Makefile.am index a6a43fdd1e..62d8e9c10d 100644 --- a/modules/csvparser/Makefile.am +++ b/modules/csvparser/Makefile.am @@ -5,7 +5,9 @@ modules_csvparser_libcsvparser_la_SOURCES = \ modules/csvparser/csvparser-grammar.y \ modules/csvparser/csvparser-parser.c \ modules/csvparser/csvparser-parser.h \ - modules/csvparser/csvparser-plugin.c + modules/csvparser/csvparser-plugin.c \ + modules/csvparser/filterx-func-parse-csv.h \ + modules/csvparser/filterx-func-parse-csv.c modules_csvparser_libcsvparser_la_CPPFLAGS = \ $(AM_CPPFLAGS) \ diff --git a/modules/csvparser/csvparser-plugin.c b/modules/csvparser/csvparser-plugin.c index f6271b25b8..5cbd7aee01 100644 --- a/modules/csvparser/csvparser-plugin.c +++ b/modules/csvparser/csvparser-plugin.c @@ -25,6 +25,7 @@ #include "csvparser.h" #include "plugin.h" #include "plugin-types.h" +#include "filterx-func-parse-csv.h" extern CfgParser csvparser_parser; @@ -35,6 +36,11 @@ static Plugin csvparser_plugins[] = .name = "csv-parser", .parser = &csvparser_parser, }, + { + .type = LL_CONTEXT_FILTERX_FUNC, + .name = "parse_csv", + .construct = filterx_function_construct_parse_csv, + }, }; gboolean diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c new file mode 100644 index 0000000000..aa4e377828 --- /dev/null +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#include "filterx-func-parse-csv.h" +#include "filterx/object-string.h" +#include "filterx/object-primitive.h" +#include "filterx/expr-literal.h" +#include "filterx/filterx-eval.h" +#include "filterx/filterx-globals.h" +#include "filterx/object-json.h" +#include "filterx/object-message-value.h" +#include "filterx/object-null.h" +#include "filterx/filterx-object.h" +#include "filterx/object-dict-interface.h" +#include "filterx/object-list-interface.h" + +#include "scanner/csv-scanner/csv-scanner.h" +#include "parser/parser-expr.h" +#include "scratch-buffers.h" + + +typedef struct FilterXFunctionParseCSV_ +{ + FilterXFunction super; + FilterXExpr *msg; + CSVScannerOptions options; + FilterXExpr *columns; +} FilterXFunctionParseCSV; + +#define STRINGIFY(lit) #lit +#define NUM_CVS_SCANNER_DIALECTS 4 + +static const gchar *parse_csv_dialect_enum_names[NUM_CVS_SCANNER_DIALECTS] = +{ + STRINGIFY(CSV_SCANNER_ESCAPE_NONE), + STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH), + STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES), + STRINGIFY(CSV_SCANNER_ESCAPE_DOUBLE_CHAR), +}; + +static gboolean +_parse_columns(FilterXFunctionParseCSV *self, GList **col_names) +{ + gboolean result = FALSE; + if (!self->columns) + return TRUE; + FilterXObject *cols_obj = filterx_expr_eval(self->columns); + if (!cols_obj) + return FALSE; + + if (filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(null))) + { + result = TRUE; + goto exit; + } + + if (!filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(json_array))) + goto exit; + + guint64 size; + if (!filterx_object_len(cols_obj, &size)) + return FALSE; + + for (guint64 i = 0; i < size; i++) + { + FilterXObject *col = filterx_list_get_subscript(cols_obj, i); + if (filterx_object_is_type(col, &FILTERX_TYPE_NAME(string))) + { + const gchar *col_name = filterx_string_get_value(col, NULL); + *col_names = g_list_append(*col_names, g_strdup(col_name)); + } + filterx_object_unref(col); + } + + result = TRUE; +exit: + filterx_object_unref(cols_obj); + return result; +} + +static FilterXObject * +_eval(FilterXExpr *s) +{ + FilterXFunctionParseCSV *self = (FilterXFunctionParseCSV *) s; + + FilterXObject *obj = filterx_expr_eval(self->msg); + if (!obj) + return NULL; + + CSVScanner scanner; + gboolean ok = FALSE; + FilterXObject *result = NULL; + GList *cols = NULL; + + gsize len; + const gchar *input; + if (filterx_object_is_type(obj, &FILTERX_TYPE_NAME(string))) + input = filterx_string_get_value(obj, &len); + else if (filterx_object_is_type(obj, &FILTERX_TYPE_NAME(message_value))) + input = filterx_message_value_get_value(obj, &len); + else + goto exit; + + if (!_parse_columns(self, &cols)) + goto exit; + + if (cols) + { + csv_scanner_options_set_expected_columns(&self->options, g_list_length(cols)); + result = filterx_json_object_new_empty(); + } + else + result = filterx_json_array_new_empty(); + + csv_scanner_init(&scanner, &self->options, input); + + GList *col = cols; + while (csv_scanner_scan_next(&scanner)) + { + if (cols) + { + if (!col) + break; + FilterXObject *key = filterx_string_new(col->data, -1); + FilterXObject *val = filterx_string_new(csv_scanner_get_current_value(&scanner), + csv_scanner_get_current_value_len(&scanner)); + + ok = filterx_object_set_subscript(result, key, &val); + + filterx_object_unref(key); + filterx_object_unref(val); + + if (!ok) + goto exit; + col = g_list_next(col); + } + else + { + const gchar *current_value = csv_scanner_get_current_value(&scanner); + gint current_value_len = csv_scanner_get_current_value_len(&scanner); + FilterXObject *val = filterx_string_new(current_value, current_value_len); + + ok = filterx_list_append(result, &val); + + filterx_object_unref(val); + } + } + +exit: + if (!ok) + { + filterx_object_unref(result); + } + g_list_free_full(cols, (GDestroyNotify)g_free); + filterx_object_unref(obj); + csv_scanner_deinit(&scanner); + return ok?result:NULL; +} + +static void +_free(FilterXExpr *s) +{ + FilterXFunctionParseCSV *self = (FilterXFunctionParseCSV *) s; + filterx_expr_unref(self->msg); + filterx_expr_unref(self->columns); + csv_scanner_options_clean(&self->options); + filterx_function_free_method(&self->super); +} + +static FilterXExpr * +_extract_msg_expr(FilterXFunctionArgs *args, GError **error) +{ + FilterXExpr *msg_expr = filterx_function_args_get_expr(args, 0); + if (!msg_expr) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "argument must be set: message. " FILTERX_FUNC_PARSE_CSV_USAGE); + return NULL; + } + + return msg_expr; +} + +static FilterXExpr * +_extract_columns_expr(FilterXFunctionArgs *args, GError **error) +{ + return filterx_function_args_get_named_expr(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS); +} + +static gboolean +_extract_opts(FilterXFunctionParseCSV *self, FilterXFunctionArgs *args, GError **error) +{ + guint32 opt_flags = self->options.flags; + + const gchar *error_str = ""; + gboolean exists; + gsize len; + const gchar *value; + FilterXObject *obj; + + value = filterx_function_args_get_named_literal_string(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS, &len, &exists); + if (exists) + { + if (len < 1) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS " can not be empty"; + goto error; + } + if (!value) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS " must be a string literal"; + goto error; + } + csv_scanner_options_set_delimiters(&self->options, value); + } + + value = filterx_function_args_get_named_literal_string(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT, &len, &exists); + if (exists) + { + if (len < 1) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT " can not be empty"; + goto error; + } + if (!value) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT " must be a string literal"; + goto error; + } + CSVScannerDialect dialect = -1; + for (int e = 0; e < NUM_CVS_SCANNER_DIALECTS; e++) + { + if (strcmp(parse_csv_dialect_enum_names[e], value) == 0) + { + dialect = e; + break; + } + } + if (dialect == -1) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT " argument must one of: [" \ + STRINGIFY(CSV_SCANNER_ESCAPE_NONE) ", " \ + STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH) ", " \ + STRINGIFY(CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES) ", " \ + STRINGIFY(CSV_SCANNER_ESCAPE_DOUBLE_CHAR) "]"; + goto error; + } + csv_scanner_options_set_dialect(&self->options, dialect); + } + + obj = filterx_function_args_get_named_object(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY, &exists); + if (exists) + { + if (!obj) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY " argument evaluation error"; + goto error; + } + if (filterx_object_truthy(obj)) + opt_flags |= CSV_SCANNER_GREEDY; + else + opt_flags &= ~CSV_SCANNER_GREEDY; + filterx_object_unref(obj); + } + + obj = filterx_function_args_get_named_object(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_STIP_WHITESPACES, &exists); + if (exists) + { + if (!obj) + { + error_str = FILTERX_FUNC_PARSE_CSV_ARG_NAME_STIP_WHITESPACES " argument evaluation error"; + goto error; + } + if (filterx_object_truthy(obj)) + opt_flags |= CSV_SCANNER_GREEDY; + else + opt_flags &= ~CSV_SCANNER_GREEDY; + filterx_object_unref(obj); + } + + csv_scanner_options_set_flags(&self->options, opt_flags); + + return TRUE; +error: + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "%s. %s", error_str, FILTERX_FUNC_PARSE_CSV_USAGE); + return FALSE; +} + +static gboolean +_extract_args(FilterXFunctionParseCSV *self, FilterXFunctionArgs *args, GError **error) +{ + gsize args_len = args ? filterx_function_args_len(args) : 0; + if (args_len != 1) + { + g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL, + "invalid number of arguments. " FILTERX_FUNC_PARSE_CSV_USAGE); + return FALSE; + } + + self->msg = _extract_msg_expr(args, error); + if (!self->msg) + return FALSE; + + self->columns = _extract_columns_expr(args, error); + + if (!_extract_opts(self, args, error)) + return FALSE; + + return TRUE; +} + +FilterXExpr * +filterx_function_parse_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error) + +{ + FilterXFunctionParseCSV *self = g_new0(FilterXFunctionParseCSV, 1); + filterx_function_init_instance(&self->super, function_name); + self->super.super.eval = _eval; + self->super.super.free_fn = _free; + csv_scanner_options_set_delimiters(&self->options, " "); + csv_scanner_options_set_quote_pairs(&self->options, "\"\"''"); + csv_scanner_options_set_flags(&self->options, CSV_SCANNER_STRIP_WHITESPACE); + csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_NONE); + + if (!_extract_args(self, args, error)) + goto error; + + filterx_function_args_free(args); + return &self->super.super; + +error: + if (args) + filterx_function_args_free(args); + filterx_expr_unref(&self->super.super); + return NULL; +} + +gpointer +filterx_function_construct_parse_csv(Plugin *self) +{ + return (gpointer) filterx_function_parse_csv_new; +} diff --git a/modules/csvparser/filterx-func-parse-csv.h b/modules/csvparser/filterx-func-parse-csv.h new file mode 100644 index 0000000000..2a763a806f --- /dev/null +++ b/modules/csvparser/filterx-func-parse-csv.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + * + */ + +#ifndef FILTERX_FUNC_PARSE_CSV_H_INCLUDED +#define FILTERX_FUNC_PARSE_CSV_H_INCLUDED + +#include "plugin.h" +#include "filterx/expr-function.h" + +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS "columns" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS "delimiters" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT "dialect" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_STIP_WHITESPACES "strip_whitespaces" +#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY "greedy" +#define FILTERX_FUNC_PARSE_CSV_USAGE "Usage: parse_csv(msg_str [" \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS"=json_array, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS"=string, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT"=string, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_STIP_WHITESPACES"=boolean, " \ + FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY"=boolean])" + +FilterXExpr *filterx_function_parse_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error); +gpointer filterx_function_construct_parse_csv(Plugin *self); + +#endif diff --git a/modules/csvparser/tests/CMakeLists.txt b/modules/csvparser/tests/CMakeLists.txt index c64aab715c..109fd35348 100644 --- a/modules/csvparser/tests/CMakeLists.txt +++ b/modules/csvparser/tests/CMakeLists.txt @@ -2,3 +2,4 @@ add_unit_test(CRITERION TARGET test_csvparser DEPENDS csvparser) add_unit_test(LIBTEST CRITERION TARGET test_csvparser_from_config DEPENDS csvparser) add_unit_test(CRITERION TARGET test_csvparser_perf DEPENDS csvparser) add_unit_test(CRITERION TARGET test_csvparser_statistics DEPENDS csvparser) +add_unit_test(LIBTEST CRITERION TARGET test_filterx_func_parse_csv DEPENDS csvparser) diff --git a/modules/csvparser/tests/Makefile.am b/modules/csvparser/tests/Makefile.am index 4ea2f75e65..cd91785d07 100644 --- a/modules/csvparser/tests/Makefile.am +++ b/modules/csvparser/tests/Makefile.am @@ -1,15 +1,16 @@ modules_csvparser_tests_TESTS = \ modules/csvparser/tests/test_csvparser \ modules/csvparser/tests/test_csvparser_from_config \ - modules/csvparser/tests/test_csvparser_perf + modules/csvparser/tests/test_csvparser_perf \ + modules/csvparser/tests/test_filterx_func_parse_csv check_PROGRAMS += \ - ${modules_csvparser_tests_TESTS} + ${modules_csvparser_tests_TESTS} EXTRA_DIST += modules/csvparser/tests/CMakeLists.txt \ modules/basicfuncs/list-funcs.c \ modules/basicfuncs/tf-template.c \ - modules/basicfuncs/tests/CMakeLists.txt + modules/basicfuncs/tests/CMakeLists.txt modules_csvparser_tests_test_csvparser_CFLAGS = \ $(TEST_CFLAGS) -I$(top_srcdir)/modules/csvparser @@ -22,7 +23,7 @@ modules_csvparser_tests_test_csvparser_from_config_CFLAGS = \ modules_csvparser_tests_test_csvparser_from_config_LDADD = \ $(TEST_LDADD) \ $(PREOPEN_SYSLOGFORMAT) $(PREOPEN_BASICFUNCS) \ - -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la + -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la modules_csvparser_tests_test_csvparser_perf_CFLAGS = \ $(TEST_CFLAGS) -I$(top_srcdir)/modules/csvparser @@ -37,3 +38,9 @@ modules_csvparser_tests_test_csvparser_statistics_CFLAGS = \ modules_csvparser_tests_test_csvparser_statistics_LDADD = \ $(TEST_LDADD) \ -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la + +modules_csvparser_tests_test_filterx_func_parse_csv_CFLAGS = \ + $(TEST_CFLAGS) -I$(top_srcdir)/modules/csvparser +modules_csvparser_tests_test_filterx_func_parse_csv_LDADD = \ + $(TEST_LDADD) \ + -dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la diff --git a/modules/csvparser/tests/test_filterx_func_parse_csv.c b/modules/csvparser/tests/test_filterx_func_parse_csv.c new file mode 100644 index 0000000000..50bc2e84ed --- /dev/null +++ b/modules/csvparser/tests/test_filterx_func_parse_csv.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2024 shifter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * As an additional exemption you are allowed to compile & link against the + * OpenSSL libraries as published by the OpenSSL project. See the file + * COPYING for details. + */ + +#include +#include "libtest/filterx-lib.h" + +#include "apphook.h" +#include "scratch-buffers.h" + +#include "filterx/object-string.h" +#include "filterx/object-null.h" +#include "filterx/expr-literal.h" +#include "filterx/object-json.h" +#include "filterx-func-parse-csv.h" +#include "filterx/object-list-interface.h" +#include "scanner/csv-scanner/csv-scanner.h" +#include "filterx/object-primitive.h" + +static FilterXObject * +_generate_column_list(const gchar *column_name, ...) +{ + FilterXObject *result = filterx_json_array_new_empty(); + + va_list args; + va_start(args, column_name); + + const gchar *next_column = column_name; + while (next_column != NULL) + { + FilterXObject *col_name = filterx_string_new(next_column, -1); + cr_assert(filterx_list_append(result, &col_name)); + filterx_object_unref(col_name); + next_column = va_arg(args, const gchar *); + } + + va_end(args); + va_start(args, column_name); + + return result; +} + +Test(filterx_func_parse_csv, test_helper_generate_column_list_empty) +{ + FilterXObject *col_names = _generate_column_list(NULL); + cr_assert_not_null(col_names); + + GString *repr = scratch_buffers_alloc(); + LogMessageValueType lmvt; + + cr_assert(filterx_object_marshal(col_names, repr, &lmvt)); + cr_assert_str_eq(repr->str, ""); + filterx_object_unref(col_names); +} + +Test(filterx_func_parse_csv, test_helper_generate_column_list) +{ + FilterXObject *col_names = _generate_column_list("1st", NULL); + cr_assert_not_null(col_names); + + GString *repr = scratch_buffers_alloc(); + LogMessageValueType lmvt; + + cr_assert(filterx_object_marshal(col_names, repr, &lmvt)); + cr_assert_str_eq(repr->str, "1st"); + filterx_object_unref(col_names); +} + +Test(filterx_func_parse_csv, test_helper_generate_column_list_multiple_elts) +{ + FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); + cr_assert_not_null(col_names); + + GString *repr = scratch_buffers_alloc(); + LogMessageValueType lmvt; + + cr_assert(filterx_object_marshal(col_names, repr, &lmvt)); + cr_assert_str_eq(repr->str, "1st,2nd,3rd"); + filterx_object_unref(col_names); +} + +Test(filterx_func_parse_csv, test_empty_args_error) +{ + GError *err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", NULL, &err); + + cr_assert_null(func); + cr_assert_not_null(err); + cr_assert(strstr(err->message, FILTERX_FUNC_PARSE_CSV_USAGE) != NULL); + g_error_free(err); +} + + +Test(filterx_func_parse_csv, test_skipped_opts_causes_default_behaviour) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1)))); + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,tik,tak,toe"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_columns_optional_argument_is_nullable) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(filterx_null_new()))); + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,tik,tak,toe"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_set_optional_first_argument_column_names) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new("foo bar baz", -1)))); + FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(col_names))); + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_column_names_sets_expected_column_size_additional_columns_dropped) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz more columns we did not expect", -1)))); + FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); // sets expected column size 3 + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(col_names))); + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_delimiters) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar+baz;tik|tak:toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS, + filterx_literal_new(filterx_string_new(" +;", -1)))); + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,tik|tak:toe"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_dialect) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("\"PTHREAD \\\"support initialized\"", -1)))); + args = g_list_append(args, filterx_function_arg_new("dialect", + filterx_literal_new(filterx_string_new("CSV_SCANNER_ESCAPE_BACKSLASH", -1)))); + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "'PTHREAD \"support initialized'"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_flag_greedy) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(_generate_column_list("1st", "2nd", + "3rd", "rest", NULL)))); // columns + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY, + filterx_literal_new(filterx_boolean_new(TRUE)))); // greedy + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik tak toe\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_flag_non_greedy) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new("foo bar baz tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS, + filterx_literal_new(_generate_column_list("1st", "2nd", + "3rd", "rest", NULL)))); // columns + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY, + filterx_literal_new(filterx_boolean_new(FALSE)))); // greedy + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_object))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "{\"1st\":\"foo\",\"2nd\":\"bar\",\"3rd\":\"baz\",\"rest\":\"tik\"}"); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + +Test(filterx_func_parse_csv, test_optional_argument_flag_strip_whitespace) +{ + GList *args = NULL; + args = g_list_append(args, filterx_function_arg_new(NULL, + filterx_literal_new(filterx_string_new(" foo , bar , baz , tik tak toe", -1)))); + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITERS, + filterx_literal_new(filterx_string_new(",", + -1)))); // delimiter + args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_STIP_WHITESPACES, + filterx_literal_new(filterx_boolean_new(TRUE)))); // strip_whitespace + + GError *err = NULL; + GError *args_err = NULL; + FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err); + cr_assert_null(args_err); + cr_assert_null(err); + + FilterXObject *obj = filterx_expr_eval(func); + + cr_assert_not_null(obj); + cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array))); + + GString *repr = scratch_buffers_alloc(); + + LogMessageValueType lmvt; + cr_assert(filterx_object_marshal(obj, repr, &lmvt)); + + cr_assert_str_eq(repr->str, "foo,bar,baz,\"tik tak toe\""); + filterx_expr_unref(func); + filterx_object_unref(obj); + g_error_free(err); +} + + +static void +setup(void) +{ + app_startup(); + init_libtest_filterx(); +} + +static void +teardown(void) +{ + scratch_buffers_explicit_gc(); + deinit_libtest_filterx(); + app_shutdown(); +} + +TestSuite(filterx_func_parse_csv, .init = setup, .fini = teardown); diff --git a/tests/copyright/policy b/tests/copyright/policy index f97b53aef8..a5664ad0a7 100644 --- a/tests/copyright/policy +++ b/tests/copyright/policy @@ -273,6 +273,8 @@ modules/kvformat/tests/test_filterx_func_parse_kv.c modules/kvformat/tests/test_filterx_func_format_kv.c docker/python-modules/webhook/scl/webhook.conf docker/python-modules/webhook/source.py +modules/csvparser/filterx-func-parse-csv\.[ch] +modules/csvparser/tests/test_filterx_func_parse_csv.c ########################################################################### # These files are GPLd with Balabit origin. diff --git a/tests/light/functional_tests/filterx/test_filterx.py b/tests/light/functional_tests/filterx/test_filterx.py index 4d88316f0a..9d094a5788 100644 --- a/tests/light/functional_tests/filterx/test_filterx.py +++ b/tests/light/functional_tests/filterx/test_filterx.py @@ -1385,3 +1385,90 @@ def test_parse_kv_stray_words_value_name(config, syslog_ng): assert file_true.get_stats()["processed"] == 1 assert "processed" not in file_false.get_stats() assert file_true.read_log() == "{\"foo\":\"bar\",\"bar\":\"baz\",\"stray_words\":\"thisisstray\"}\n" + + +def test_parse_csv_default_arguments(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz"; + $MSG = parse_csv(custom_message); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == "foo,bar,baz\n" + + +def test_parse_csv_optional_arg_columns(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz"; + cols = json_array(["1st","2nd","3rd"]); + $MSG = parse_csv(custom_message, columns=cols); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '{"1st":"foo","2nd":"bar","3rd":"baz"}\n' + + +def test_parse_csv_optional_arg_delimiters(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar,baz.tik;tak!toe"; + $MSG = parse_csv(custom_message, delimiters=" ,."); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == 'foo,bar,baz,tik;tak!toe\n' + + +def test_parse_csv_optional_arg_non_greedy(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz tik tak toe"; + cols = json_array(["1st","2nd","3rd"]); + $MSG = parse_csv(custom_message, columns=cols, greedy=false); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '{"1st":"foo","2nd":"bar","3rd":"baz"}\n' + + +def test_parse_csv_optional_arg_greedy(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = "foo bar baz tik tak toe"; + cols = json_array(["1st","2nd","3rd","rest"]); + $MSG = parse_csv(custom_message, columns=cols, greedy=true); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == '{"1st":"foo","2nd":"bar","3rd":"baz","rest":"tik tak toe"}\n' + + +def test_parse_csv_optional_arg_strip_whitespace(config, syslog_ng): + (file_true, file_false) = create_config( + config, """ + custom_message = " foo , bar , baz, tik, tak, toe "; + $MSG = parse_csv(custom_message, delimiters=",", strip_whitespace=true); + """, + ) + syslog_ng.start(config) + + assert file_true.get_stats()["processed"] == 1 + assert "processed" not in file_false.get_stats() + assert file_true.read_log() == 'foo,bar,baz,tik,tak,toe\n'