Skip to content

Commit

Permalink
Merge pull request #194 from bshifter/filterx-parse-csv-string-delimi…
Browse files Browse the repository at this point in the history
…ters

Filterx parse csv string delimiters
  • Loading branch information
alltilla authored Jul 5, 2024
2 parents f18641e + ba2c712 commit 51e8672
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 34 deletions.
49 changes: 34 additions & 15 deletions modules/csvparser/filterx-func-parse-csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,43 +45,45 @@ typedef struct FilterXFunctionParseCSV_
FilterXExpr *msg;
CSVScannerOptions options;
FilterXExpr *columns;
FilterXExpr *string_delimiters;
} FilterXFunctionParseCSV;

static gboolean
_parse_columns(FilterXFunctionParseCSV *self, GList **col_names)
_parse_list_argument(FilterXFunctionParseCSV *self, FilterXExpr *list_expr, GList **list, const gchar *arg_name)
{
gboolean result = FALSE;
if (!self->columns)
if (!list_expr)
return TRUE;
FilterXObject *cols_obj = filterx_expr_eval(self->columns);
if (!cols_obj)
FilterXObject *list_obj = filterx_expr_eval(list_expr);
if (!list_obj)
return FALSE;

if (!filterx_object_is_type(cols_obj, &FILTERX_TYPE_NAME(json_array)))
if (!filterx_object_is_type(list_obj, &FILTERX_TYPE_NAME(json_array)))
{
msg_error("columns argument must be a type of json array.",
evt_tag_str("current_type", cols_obj->type->name ) );
msg_error("list object argument must be a type of json array.",
evt_tag_str("current_type", list_obj->type->name ),
evt_tag_str("argument_name", arg_name));
goto exit;
}

guint64 size;
if (!filterx_object_len(cols_obj, &size))
if (!filterx_object_len(list_obj, &size))
return FALSE;

for (guint64 i = 0; i < size; i++)
{
FilterXObject *col = filterx_list_get_subscript(cols_obj, i);
if (filterx_object_is_type(col, &FILTERX_TYPE_NAME(string)))
FilterXObject *elt = filterx_list_get_subscript(list_obj, i);
if (filterx_object_is_type(elt, &FILTERX_TYPE_NAME(string)))
{
const gchar *col_name = filterx_string_get_value(col, NULL);
*col_names = g_list_append(*col_names, g_strdup(col_name));
const gchar *val = filterx_string_get_value(elt, NULL);
*list = g_list_append(*list, g_strdup(val));
}
filterx_object_unref(col);
filterx_object_unref(elt);
}

result = TRUE;
exit:
filterx_object_unref(cols_obj);
filterx_object_unref(list_obj);
return result;
}

Expand All @@ -98,6 +100,7 @@ _eval(FilterXExpr *s)
gboolean ok = FALSE;
FilterXObject *result = NULL;
GList *cols = NULL;
GList *string_delimiters = NULL;

gsize len;
const gchar *input;
Expand All @@ -111,7 +114,14 @@ _eval(FilterXExpr *s)

APPEND_ZERO(input, input, len);

if (!_parse_columns(self, &cols))
if (!_parse_list_argument(self, self->string_delimiters, &string_delimiters,
FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRING_DELIMITERS))
goto exit;

if (string_delimiters)
csv_scanner_options_set_string_delimiters(&self->options, string_delimiters);

if (!_parse_list_argument(self, self->columns, &cols, FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS))
goto exit;

if (cols)
Expand Down Expand Up @@ -173,6 +183,7 @@ _free(FilterXExpr *s)
FilterXFunctionParseCSV *self = (FilterXFunctionParseCSV *) s;
filterx_expr_unref(self->msg);
filterx_expr_unref(self->columns);
filterx_expr_unref(self->string_delimiters);
csv_scanner_options_clean(&self->options);
filterx_function_free_method(&self->super);
}
Expand All @@ -197,6 +208,12 @@ _extract_columns_expr(FilterXFunctionArgs *args, GError **error)
return filterx_function_args_get_named_expr(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS);
}

static FilterXExpr *
_extract_stringdelimiters_expr(FilterXFunctionArgs *args, GError **error)
{
return filterx_function_args_get_named_expr(args, FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRING_DELIMITERS);
}

static gboolean
_extract_opts(FilterXFunctionParseCSV *self, FilterXFunctionArgs *args, GError **error)
{
Expand Down Expand Up @@ -312,6 +329,8 @@ _extract_args(FilterXFunctionParseCSV *self, FilterXFunctionArgs *args, GError *

self->columns = _extract_columns_expr(args, error);

self->string_delimiters = _extract_stringdelimiters_expr(args, error);

if (!_extract_opts(self, args, error))
return FALSE;

Expand Down
1 change: 1 addition & 0 deletions modules/csvparser/filterx-func-parse-csv.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS "columns"
#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITER "delimiter"
#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRING_DELIMITERS "string_delimiters"
#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_DIALECT "dialect"
#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRIP_WHITESPACES "strip_whitespaces"
#define FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY "greedy"
Expand Down
103 changes: 84 additions & 19 deletions modules/csvparser/tests/test_filterx_func_parse_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,31 @@
#include "filterx/object-primitive.h"

static FilterXObject *
_generate_column_list(const gchar *column_name, ...)
_generate_string_list(const gchar *elts, ...)
{
FilterXObject *result = filterx_json_array_new_empty();

va_list args;
va_start(args, column_name);
va_start(args, elts);

const gchar *next_column = column_name;
while (next_column != NULL)
const gchar *elt = elts;
while (elt != NULL)
{
FilterXObject *col_name = filterx_string_new(next_column, -1);
cr_assert(filterx_list_append(result, &col_name));
filterx_object_unref(col_name);
next_column = va_arg(args, const gchar *);
FilterXObject *str = filterx_string_new(elt, -1);
cr_assert(filterx_list_append(result, &str));
filterx_object_unref(str);
elt = va_arg(args, const gchar *);
}

va_end(args);
va_start(args, column_name);
va_start(args, elts);

return result;
}

Test(filterx_func_parse_csv, test_helper_generate_column_list_empty)
Test(filterx_func_parse_csv, test_helper_generate_string_list_empty)
{
FilterXObject *col_names = _generate_column_list(NULL);
FilterXObject *col_names = _generate_string_list(NULL);
cr_assert_not_null(col_names);

GString *repr = scratch_buffers_alloc();
Expand All @@ -70,9 +70,9 @@ Test(filterx_func_parse_csv, test_helper_generate_column_list_empty)
filterx_object_unref(col_names);
}

Test(filterx_func_parse_csv, test_helper_generate_column_list)
Test(filterx_func_parse_csv, test_helper_generate_string_list)
{
FilterXObject *col_names = _generate_column_list("1st", NULL);
FilterXObject *col_names = _generate_string_list("1st", NULL);
cr_assert_not_null(col_names);

GString *repr = scratch_buffers_alloc();
Expand All @@ -83,9 +83,9 @@ Test(filterx_func_parse_csv, test_helper_generate_column_list)
filterx_object_unref(col_names);
}

Test(filterx_func_parse_csv, test_helper_generate_column_list_multiple_elts)
Test(filterx_func_parse_csv, test_helper_generate_string_list_multiple_elts)
{
FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL);
FilterXObject *col_names = _generate_string_list("1st", "2nd", "3rd", NULL);
cr_assert_not_null(col_names);

GString *repr = scratch_buffers_alloc();
Expand Down Expand Up @@ -141,7 +141,7 @@ Test(filterx_func_parse_csv, test_set_optional_first_argument_column_names)
{
GList *args = NULL;
args = g_list_append(args, filterx_function_arg_new(NULL, filterx_literal_new(filterx_string_new("foo,bar,baz", -1))));
FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL);
FilterXObject *col_names = _generate_string_list("1st", "2nd", "3rd", NULL);
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS,
filterx_literal_new(col_names)));

Expand Down Expand Up @@ -172,7 +172,7 @@ Test(filterx_func_parse_csv, test_column_names_sets_expected_column_size_additio
GList *args = NULL;
args = g_list_append(args, filterx_function_arg_new(NULL,
filterx_literal_new(filterx_string_new("foo,bar,baz,more,columns,we,did,not,expect", -1))));
FilterXObject *col_names = _generate_column_list("1st", "2nd", "3rd", NULL); // sets expected column size 3
FilterXObject *col_names = _generate_string_list("1st", "2nd", "3rd", NULL); // sets expected column size 3
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS,
filterx_literal_new(col_names)));

Expand Down Expand Up @@ -264,7 +264,7 @@ Test(filterx_func_parse_csv, test_optional_argument_flag_greedy)
args = g_list_append(args, filterx_function_arg_new(NULL,
filterx_literal_new(filterx_string_new("foo,bar,baz,tik,tak,toe", -1))));
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS,
filterx_literal_new(_generate_column_list("1st", "2nd",
filterx_literal_new(_generate_string_list("1st", "2nd",
"3rd", "rest", NULL)))); // columns
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY,
filterx_literal_new(filterx_boolean_new(TRUE)))); // greedy
Expand Down Expand Up @@ -297,7 +297,7 @@ Test(filterx_func_parse_csv, test_optional_argument_flag_non_greedy)
args = g_list_append(args, filterx_function_arg_new(NULL,
filterx_literal_new(filterx_string_new("foo,bar,baz,tik,tak,toe", -1))));
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_COLUMNS,
filterx_literal_new(_generate_column_list("1st", "2nd",
filterx_literal_new(_generate_string_list("1st", "2nd",
"3rd", "rest", NULL)))); // columns
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_GREEDY,
filterx_literal_new(filterx_boolean_new(FALSE)))); // greedy
Expand Down Expand Up @@ -390,6 +390,71 @@ Test(filterx_func_parse_csv, test_optional_argument_flag_not_to_strip_whitespace
g_error_free(err);
}

Test(filterx_func_parse_csv, test_optional_argument_string_delimiters)
{
GList *args = NULL;
args = g_list_append(args, filterx_function_arg_new(NULL,
filterx_literal_new(filterx_string_new("testingfoostringbardelimitersbazthisfooway", -1))));
FilterXObject *string_delimiters = _generate_string_list("foo", "bar", "baz", NULL);
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRING_DELIMITERS,
filterx_literal_new(string_delimiters)));

GError *err = NULL;
GError *args_err = NULL;
FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err);
cr_assert_null(args_err);
cr_assert_null(err);

FilterXObject *obj = filterx_expr_eval(func);

cr_assert_not_null(obj);
cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array)));

GString *repr = scratch_buffers_alloc();

LogMessageValueType lmvt;
cr_assert(filterx_object_marshal(obj, repr, &lmvt));

cr_assert_str_eq(repr->str, "testing,string,delimiters,this,way");
filterx_expr_unref(func);
filterx_object_unref(obj);
g_error_free(err);
}

Test(filterx_func_parse_csv, test_optional_argument_string_delimiters_and_delimiters)
{
GList *args = NULL;
args = g_list_append(args, filterx_function_arg_new(NULL,
filterx_literal_new(filterx_string_new("testing;delimiterfoochaos,withbarthis.longbazstring", -1))));
FilterXObject *string_delimiters = _generate_string_list("foo", "bar", "baz", NULL);
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_STRING_DELIMITERS,
filterx_literal_new(string_delimiters)));
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_PARSE_CSV_ARG_NAME_DELIMITER,
filterx_literal_new(filterx_string_new(".,;", -1))));

GError *err = NULL;
GError *args_err = NULL;
FilterXExpr *func = filterx_function_parse_csv_new("test", filterx_function_args_new(args, &args_err), &err);
cr_assert_null(args_err);
cr_assert_null(err);

FilterXObject *obj = filterx_expr_eval(func);

cr_assert_not_null(obj);
cr_assert(filterx_object_is_type(obj, &FILTERX_TYPE_NAME(json_array)));

GString *repr = scratch_buffers_alloc();

LogMessageValueType lmvt;
cr_assert(filterx_object_marshal(obj, repr, &lmvt));

cr_assert_str_eq(repr->str, "testing,delimiter,chaos,with,this,long,string");
filterx_expr_unref(func);
filterx_object_unref(obj);
g_error_free(err);
}


static void
setup(void)
{
Expand Down

0 comments on commit 51e8672

Please sign in to comment.