Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filterx format csv #132

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions modules/csvparser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ set(CSVPARSER_SOURCES
csvparser-plugin.c
filterx-func-parse-csv.h
filterx-func-parse-csv.c
filterx-func-format-csv.h
filterx-func-format-csv.c
)

add_module(
Expand Down
4 changes: 3 additions & 1 deletion modules/csvparser/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ modules_csvparser_libcsvparser_la_SOURCES = \
modules/csvparser/csvparser-parser.h \
modules/csvparser/csvparser-plugin.c \
modules/csvparser/filterx-func-parse-csv.h \
modules/csvparser/filterx-func-parse-csv.c
modules/csvparser/filterx-func-parse-csv.c \
modules/csvparser/filterx-func-format-csv.h \
modules/csvparser/filterx-func-format-csv.c

modules_csvparser_libcsvparser_la_CPPFLAGS = \
$(AM_CPPFLAGS) \
Expand Down
6 changes: 6 additions & 0 deletions modules/csvparser/csvparser-plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "plugin.h"
#include "plugin-types.h"
#include "filterx-func-parse-csv.h"
#include "filterx-func-format-csv.h"

extern CfgParser csvparser_parser;

Expand All @@ -41,6 +42,11 @@ static Plugin csvparser_plugins[] =
.name = "parse_csv",
.construct = filterx_function_construct_parse_csv,
},
{
.type = LL_CONTEXT_FILTERX_FUNC,
.name = "format_csv",
.construct = filterx_function_construct_format_csv,
},
};

gboolean
Expand Down
256 changes: 256 additions & 0 deletions modules/csvparser/filterx-func-format-csv.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
/*
* Copyright (c) 2024 shifter
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* As an additional exemption you are allowed to compile & link against the
* OpenSSL libraries as published by the OpenSSL project. See the file
* COPYING for details.
*
*/

#include "filterx-func-format-csv.h"
#include "filterx/expr-literal.h"
#include "filterx/object-string.h"
#include "filterx/object-null.h"
#include "filterx/object-dict-interface.h"
#include "filterx/object-list-interface.h"
#include "filterx/filterx-eval.h"

#include "scratch-buffers.h"
#include "utf8utils.h"

#define FILTERX_FUNC_FORMAT_CSV_USAGE "Usage: format_csv($input(list or dict), delimiter=\",\", columns=$columns(list))"

typedef struct FilterXFunctionFormatCSV_
{
FilterXFunction super;
FilterXExpr *input;
gchar delimiter;
FilterXExpr *columns;
} FilterXFunctionFormatCSV;

static gboolean
_append_to_buffer(FilterXObject *key, FilterXObject *value, gpointer user_data)
{
if (!value)
return FALSE;

FilterXFunctionFormatCSV *self = ((gpointer *) user_data)[0];
GString *buffer = ((gpointer *) user_data)[1];

if (filterx_object_is_type(value, &FILTERX_TYPE_NAME(dict)) ||
filterx_object_is_type(value, &FILTERX_TYPE_NAME(list)))
{
msg_debug("FilterX: format_csv(): skipping object, type not supported",
evt_tag_str("type", value->type->name));
return TRUE;
}

if (buffer->len)
g_string_append(buffer, &self->delimiter);

gsize len_before_value = buffer->len;
if (!filterx_object_repr_append(value, buffer))
return FALSE;

/* TODO: make the characters here configurable. */
if (memchr(buffer->str + len_before_value, self->delimiter, buffer->len - len_before_value) != NULL)
{
ScratchBuffersMarker marker;
GString *value_buffer = scratch_buffers_alloc_and_mark(&marker);

g_string_assign(value_buffer, buffer->str + len_before_value);
g_string_truncate(buffer, len_before_value);
g_string_append_c(buffer, '"');
append_unsafe_utf8_as_escaped_binary(buffer, value_buffer->str, value_buffer->len, "\"");
g_string_append_c(buffer, '"');

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is kind of the "dialect" parameter we already support on the parse_csv() side. the dialect would determine what kind of quotation we produce on the output side.

scratch_buffers_reclaim_marked(marker);
}

return TRUE;
}

static gboolean
_handle_list_input(FilterXFunctionFormatCSV *self, FilterXObject *csv_data, GString *formatted)
{
guint64 size;
if (!filterx_object_len(csv_data, &size))
return FALSE;

gpointer user_data[] = { self, formatted };
gboolean success = TRUE;
for (guint64 i = 0; i < size && success; i++)
{
FilterXObject *elt = filterx_list_get_subscript(csv_data, i);
success = _append_to_buffer(NULL, elt, user_data);
filterx_object_unref(elt);
}
return success;
}

static gboolean
_handle_dict_input(FilterXFunctionFormatCSV *self, FilterXObject *csv_data, GString *formatted)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[optional]

Usually we functions are written with early returns in axosyslog, as it is more readable. Can you reorganize this function? Thanks!

{
gpointer user_data[] = { self, formatted };
guint64 size;
if (self->columns)
{
FilterXObject *cols = filterx_expr_eval(self->columns);
if (!cols || !filterx_object_is_type(cols, &FILTERX_TYPE_NAME(list)) || !filterx_object_len(cols, &size))
{
filterx_object_unref(cols);
filterx_eval_push_error("Columns must represented as list. " FILTERX_FUNC_FORMAT_CSV_USAGE, &self->super.super, NULL);
return FALSE;
}

gboolean success = TRUE;
for (guint64 i = 0; i < size && success; i++)
{
FilterXObject *col = filterx_list_get_subscript(cols, i);
FilterXObject *elt = filterx_object_get_subscript(csv_data, col);
success = _append_to_buffer(col, elt, user_data);
filterx_object_unref(col);
filterx_object_unref(elt);
}
filterx_object_unref(cols);
return success;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[optional] I'd extract the specializations to separate functions to make this a bit easier to follow.

else
return filterx_dict_iter(csv_data, _append_to_buffer, user_data);
}

static FilterXObject *
_eval(FilterXExpr *s)
{
FilterXFunctionFormatCSV *self = (FilterXFunctionFormatCSV *) s;

FilterXObject *csv_data = filterx_expr_eval_typed(self->input);
if (!csv_data)
return NULL;

gboolean success = FALSE;
GString *formatted = scratch_buffers_alloc();

if (filterx_object_is_type(csv_data, &FILTERX_TYPE_NAME(list)))
success = _handle_list_input(self, csv_data, formatted);
else if (filterx_object_is_type(csv_data, &FILTERX_TYPE_NAME(dict)))
success = _handle_dict_input(self, csv_data, formatted);
else
filterx_eval_push_error("input must be a dict or list. " FILTERX_FUNC_FORMAT_CSV_USAGE, s, csv_data);

filterx_object_unref(csv_data);
return success ? filterx_string_new(formatted->str, formatted->len) : NULL;
}

static void
_free(FilterXExpr *s)
{
FilterXFunctionFormatCSV *self = (FilterXFunctionFormatCSV *) s;

filterx_expr_unref(self->input);
filterx_expr_unref(self->columns);
filterx_function_free_method(&self->super);
}

static FilterXExpr *
_extract_columns_expr(FilterXFunctionArgs *args, GError **error)
{
return filterx_function_args_get_named_expr(args, FILTERX_FUNC_FORMAT_CSV_ARG_NAME_COLUMNS);
}

static gboolean
_extract_delimiter_arg(FilterXFunctionFormatCSV *self, FilterXFunctionArgs *args, GError **error)
{
gboolean exists;
gsize delimiter_len;
const gchar *delimiter = filterx_function_args_get_named_literal_string(args,
FILTERX_FUNC_FORMAT_CSV_ARG_NAME_DELIMITER,
&delimiter_len, &exists);
if (!exists)
return TRUE;

if (!delimiter)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"delimiter must be a string literal. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

if (delimiter_len != 1)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"delimiter must be a single character. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

self->delimiter = delimiter[0];
return TRUE;
}

static gboolean
_extract_arguments(FilterXFunctionFormatCSV *self, FilterXFunctionArgs *args, GError **error)
{
gsize args_len = filterx_function_args_len(args);
if (args_len != 1)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"invalid number of arguments. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

self->input = filterx_function_args_get_expr(args, 0);
if (!self->input)
{
g_set_error(error, FILTERX_FUNCTION_ERROR, FILTERX_FUNCTION_ERROR_CTOR_FAIL,
"input must be set. " FILTERX_FUNC_FORMAT_CSV_USAGE);
return FALSE;
}

if (!_extract_delimiter_arg(self, args, error))
return FALSE;

self->columns = _extract_columns_expr(args, error);

return TRUE;
}

FilterXFunction *
filterx_function_format_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error)
{
FilterXFunctionFormatCSV *self = g_new0(FilterXFunctionFormatCSV, 1);
filterx_function_init_instance(&self->super, function_name);

self->super.super.eval = _eval;
self->super.super.free_fn = _free;
self->delimiter = ' ';

if (!_extract_arguments(self, args, error))
goto error;

filterx_function_args_free(args);
return &self->super;

error:
filterx_function_args_free(args);
filterx_expr_unref(&self->super.super);
return NULL;
}

gpointer
filterx_function_construct_format_csv(Plugin *self)
{
return (gpointer) filterx_function_format_csv_new;
}
36 changes: 36 additions & 0 deletions modules/csvparser/filterx-func-format-csv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (c) 2024 shifter
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* As an additional exemption you are allowed to compile & link against the
* OpenSSL libraries as published by the OpenSSL project. See the file
* COPYING for details.
*
*/

#ifndef FILTERX_FUNC_FORMAT_CSV_H_INCLUDED
#define FILTERX_FUNC_FORMAT_CSV_H_INCLUDED


#include "plugin.h"
#include "filterx/expr-function.h"

#define FILTERX_FUNC_FORMAT_CSV_ARG_NAME_COLUMNS "columns"
#define FILTERX_FUNC_FORMAT_CSV_ARG_NAME_DELIMITER "delimiter"

FilterXFunction *filterx_function_format_csv_new(const gchar *function_name, FilterXFunctionArgs *args, GError **error);
gpointer filterx_function_construct_format_csv(Plugin *self);

#endif
1 change: 1 addition & 0 deletions modules/csvparser/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ add_unit_test(LIBTEST CRITERION TARGET test_csvparser_from_config DEPENDS csvpar
add_unit_test(CRITERION TARGET test_csvparser_perf DEPENDS csvparser)
add_unit_test(CRITERION TARGET test_csvparser_statistics DEPENDS csvparser)
add_unit_test(LIBTEST CRITERION TARGET test_filterx_func_parse_csv DEPENDS csvparser)
add_unit_test(LIBTEST CRITERION TARGET test_filterx_func_format_csv DEPENDS csvparser)
9 changes: 8 additions & 1 deletion modules/csvparser/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ modules_csvparser_tests_TESTS = \
modules/csvparser/tests/test_csvparser \
modules/csvparser/tests/test_csvparser_from_config \
modules/csvparser/tests/test_csvparser_perf \
modules/csvparser/tests/test_filterx_func_parse_csv
modules/csvparser/tests/test_filterx_func_parse_csv \
modules/csvparser/tests/test_filterx_func_format_csv

check_PROGRAMS += \
${modules_csvparser_tests_TESTS}
Expand Down Expand Up @@ -44,3 +45,9 @@ modules_csvparser_tests_test_filterx_func_parse_csv_CFLAGS = \
modules_csvparser_tests_test_filterx_func_parse_csv_LDADD = \
$(TEST_LDADD) \
-dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la

modules_csvparser_tests_test_filterx_func_format_csv_CFLAGS = \
$(TEST_CFLAGS) -I$(top_srcdir)/modules/csvparser
modules_csvparser_tests_test_filterx_func_format_csv_LDADD = \
$(TEST_LDADD) \
-dlpreopen $(top_builddir)/modules/csvparser/libcsvparser.la
Loading
Loading