From eab31e3a3fc6752a9f6d1731ddba11067c2fcad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Mon, 12 Aug 2024 15:53:08 +0200 Subject: [PATCH 1/5] csvparser-filterx: refactor _init_scanner() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- modules/csvparser/filterx-func-parse-csv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c index c24d977df9..a5bc4b3a76 100644 --- a/modules/csvparser/filterx-func-parse-csv.c +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -89,12 +89,12 @@ _parse_list_argument(FilterXFunctionParseCSV *self, FilterXExpr *list_expr, GLis } static inline void -_init_scanner(FilterXFunctionParseCSV *self, GList *string_delimiters, GList *cols, const gchar *input, +_init_scanner(FilterXFunctionParseCSV *self, GList *string_delimiters, gint num_of_cols, const gchar *input, CSVScanner *scanner, CSVScannerOptions *local_opts) { CSVScannerOptions *opts = &self->options; - if (string_delimiters || cols) + if (string_delimiters || num_of_cols) { csv_scanner_options_copy(local_opts, &self->options); opts = local_opts; @@ -103,8 +103,8 @@ _init_scanner(FilterXFunctionParseCSV *self, GList *string_delimiters, GList *co if (string_delimiters) csv_scanner_options_set_string_delimiters(local_opts, string_delimiters); - if (cols) - csv_scanner_options_set_expected_columns(local_opts, g_list_length(cols)); + if (num_of_cols) + csv_scanner_options_set_expected_columns(local_opts, num_of_cols); csv_scanner_init(scanner, opts, input); } @@ -144,7 +144,7 @@ _eval(FilterXExpr *s) CSVScanner scanner; CSVScannerOptions local_opts = {0}; - _init_scanner(self, string_delimiters, cols, input, &scanner, &local_opts); + _init_scanner(self, string_delimiters, g_list_length(cols), input, &scanner, &local_opts); GList *col = cols; while (csv_scanner_scan_next(&scanner)) From 4b8938708dc81bd91fc99f4e1a1bac9c1a5da4a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Mon, 12 Aug 2024 16:24:08 +0200 Subject: [PATCH 2/5] csvparser-filterx: do not traverse column list 3 times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - constructing list - g_list_length() - traversing list This also fixes a possible bug where a name-value pair passed to filterx_string_new() may not have been null-terminated. Signed-off-by: László Várady --- modules/csvparser/filterx-func-parse-csv.c | 54 ++++++++++++++++++---- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c index a5bc4b3a76..380a5fce54 100644 --- a/modules/csvparser/filterx-func-parse-csv.c +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -109,6 +109,34 @@ _init_scanner(FilterXFunctionParseCSV *self, GList *string_delimiters, gint num_ csv_scanner_init(scanner, opts, input); } +static inline gboolean +_maybe_init_columns(FilterXFunctionParseCSV *self, FilterXObject **columns, guint64 *num_of_columns) +{ + if (!self->columns) + { + *columns = NULL; + *num_of_columns = 0; + return TRUE; + } + + *columns = filterx_expr_eval(self->columns); + if (!*columns) + return FALSE; + + if (!filterx_object_is_type(*columns, &FILTERX_TYPE_NAME(json_array))) + { + msg_error("list object argument must be a type of json array.", + evt_tag_str("current_type", (*columns)->type->name), + evt_tag_str("argument_name", FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS)); + return FALSE; + } + + if (!filterx_object_len(*columns, num_of_columns)) + return FALSE; + + return TRUE; +} + static FilterXObject * _eval(FilterXExpr *s) { @@ -120,8 +148,9 @@ _eval(FilterXExpr *s) gboolean ok = FALSE; FilterXObject *result = NULL; - GList *cols = NULL; GList *string_delimiters = NULL; + guint64 num_of_columns = 0; + FilterXObject *cols = NULL; gsize len; const gchar *input; @@ -134,7 +163,7 @@ _eval(FilterXExpr *s) FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRING_DELIMITERS)) goto exit; - if (!_parse_list_argument(self, self->columns, &cols, FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS)) + if(!_maybe_init_columns(self, &cols, &num_of_columns)) goto exit; if (cols) @@ -144,16 +173,22 @@ _eval(FilterXExpr *s) CSVScanner scanner; CSVScannerOptions local_opts = {0}; - _init_scanner(self, string_delimiters, g_list_length(cols), input, &scanner, &local_opts); + _init_scanner(self, string_delimiters, num_of_columns, input, &scanner, &local_opts); - GList *col = cols; + guint64 i = 0; while (csv_scanner_scan_next(&scanner)) { if (cols) { - if (!col) + if (i >= num_of_columns) break; - FilterXObject *key = filterx_string_new(col->data, -1); + + FilterXObject *col = filterx_list_get_subscript(cols, i); + const gchar *col_name; + gsize col_name_len; + filterx_object_extract_string(col, &col_name, &col_name_len); + + FilterXObject *key = filterx_string_new(col_name, col_name_len); FilterXObject *val = filterx_string_new(csv_scanner_get_current_value(&scanner), csv_scanner_get_current_value_len(&scanner)); @@ -161,10 +196,11 @@ _eval(FilterXExpr *s) filterx_object_unref(key); filterx_object_unref(val); + filterx_object_unref(col); if (!ok) goto exit; - col = g_list_next(col); + i++; } else { @@ -184,10 +220,10 @@ _eval(FilterXExpr *s) { filterx_object_unref(result); } - g_list_free_full(cols, (GDestroyNotify)g_free); + filterx_object_unref(cols); filterx_object_unref(obj); csv_scanner_deinit(&scanner); - return ok?result:NULL; + return ok ? result : NULL; } static void From be1ac66942fb39135e0dbe43cfd9ecf6ab495f05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Mon, 12 Aug 2024 17:35:12 +0200 Subject: [PATCH 3/5] csvparser-filterx: extract _fill_object_col() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- modules/csvparser/filterx-func-parse-csv.c | 40 +++++++++++++--------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c index 380a5fce54..45ea55a67b 100644 --- a/modules/csvparser/filterx-func-parse-csv.c +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -137,6 +137,27 @@ _maybe_init_columns(FilterXFunctionParseCSV *self, FilterXObject **columns, guin return TRUE; } +static inline gboolean +_fill_object_col(FilterXObject *cols, gint64 index, CSVScanner *scanner, FilterXObject *result) +{ + FilterXObject *col = filterx_list_get_subscript(cols, index); + const gchar *col_name; + gsize col_name_len; + filterx_object_extract_string(col, &col_name, &col_name_len); + + FilterXObject *key = filterx_string_new(col_name, col_name_len); + FilterXObject *val = filterx_string_new(csv_scanner_get_current_value(scanner), + csv_scanner_get_current_value_len(scanner)); + + gboolean ok = filterx_object_set_subscript(result, key, &val); + + filterx_object_unref(key); + filterx_object_unref(val); + filterx_object_unref(col); + + return ok; +} + static FilterXObject * _eval(FilterXExpr *s) { @@ -183,23 +204,10 @@ _eval(FilterXExpr *s) if (i >= num_of_columns) break; - FilterXObject *col = filterx_list_get_subscript(cols, i); - const gchar *col_name; - gsize col_name_len; - filterx_object_extract_string(col, &col_name, &col_name_len); - - FilterXObject *key = filterx_string_new(col_name, col_name_len); - FilterXObject *val = filterx_string_new(csv_scanner_get_current_value(&scanner), - csv_scanner_get_current_value_len(&scanner)); - - ok = filterx_object_set_subscript(result, key, &val); - - filterx_object_unref(key); - filterx_object_unref(val); - filterx_object_unref(col); - - if (!ok) + ok = _fill_object_col(cols, i, &scanner, result); + if(!ok) goto exit; + i++; } else From 40826723269cec667969906d8619d7cf67f74112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Tue, 13 Aug 2024 14:38:18 +0200 Subject: [PATCH 4/5] csvparser-filterx: use column names directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- modules/csvparser/filterx-func-parse-csv.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c index 45ea55a67b..e5d2a8eb2f 100644 --- a/modules/csvparser/filterx-func-parse-csv.c +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -141,17 +141,11 @@ static inline gboolean _fill_object_col(FilterXObject *cols, gint64 index, CSVScanner *scanner, FilterXObject *result) { FilterXObject *col = filterx_list_get_subscript(cols, index); - const gchar *col_name; - gsize col_name_len; - filterx_object_extract_string(col, &col_name, &col_name_len); - - FilterXObject *key = filterx_string_new(col_name, col_name_len); FilterXObject *val = filterx_string_new(csv_scanner_get_current_value(scanner), csv_scanner_get_current_value_len(scanner)); - gboolean ok = filterx_object_set_subscript(result, key, &val); + gboolean ok = filterx_object_set_subscript(result, col, &val); - filterx_object_unref(key); filterx_object_unref(val); filterx_object_unref(col); From 51831dd54dc391be9c031dd90197d80db5f46a77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Tue, 13 Aug 2024 14:49:17 +0200 Subject: [PATCH 5/5] csvparser-filterx: extract _fill_array_element() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- modules/csvparser/filterx-func-parse-csv.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c index e5d2a8eb2f..45991e6e1d 100644 --- a/modules/csvparser/filterx-func-parse-csv.c +++ b/modules/csvparser/filterx-func-parse-csv.c @@ -152,6 +152,20 @@ _fill_object_col(FilterXObject *cols, gint64 index, CSVScanner *scanner, FilterX return ok; } +static inline gboolean +_fill_array_element(CSVScanner *scanner, FilterXObject *result) +{ + const gchar *current_value = csv_scanner_get_current_value(scanner); + gint current_value_len = csv_scanner_get_current_value_len(scanner); + FilterXObject *val = filterx_string_new(current_value, current_value_len); + + gboolean ok = filterx_list_append(result, &val); + + filterx_object_unref(val); + + return ok; +} + static FilterXObject * _eval(FilterXExpr *s) { @@ -206,13 +220,7 @@ _eval(FilterXExpr *s) } else { - const gchar *current_value = csv_scanner_get_current_value(&scanner); - gint current_value_len = csv_scanner_get_current_value_len(&scanner); - FilterXObject *val = filterx_string_new(current_value, current_value_len); - - ok = filterx_list_append(result, &val); - - filterx_object_unref(val); + ok = _fill_array_element(&scanner, result); } }