Skip to content

Hierarchical parser #21

Open
Open
@cescp

Description

@cescp

Hi,
When rendering a hierarchical structure, the renderer works fine and the name
of the columns are:
level.N.sublevel
But the parser does not take this structure in account. Instead, it just takes the names
of the columns and put the values, all in a flat structure.
I suggest the following code to recover the hierarchical structure:

class HierarchicalCSVParser(BaseParser):
    """
    Parses CSV serialized data into hierarchical structure.

    The parser assumes the first line contains the column names.
    """

    media_type = 'text/csv'

    def parse(self, stream, media_type=None, parser_context=None):
        parser_context = parser_context or {}
        delimiter = parser_context.get('delimiter', ',')

        try:
            encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
            rows = unicode_csv_reader(universal_newlines(stream), delimiter=delimiter, charset=encoding)
            data = OrderedRows(next(rows))
            for row in rows:
                row_data = dict(zip(data.header, row))
                hierarchical_data = self._csv_convert(row_data)
                data.append(hierarchical_data)
            return data
        except Exception as exc:
            raise ParseError('CSV parse error - %s' % str(exc))

    def _csv_convert(self,flat_data):
        first_level_keys = {key.split(".")[0] for key in flat_data.keys()}
        if list(first_level_keys)[0].isdigit():
            d = []
        else:
            d = {}
        for first_level_key in first_level_keys:                
            # a subset of the dictionary with only the entries with the
            # key: first_level_key.* and non empty value
            subset = {key:value for key, value in flat_data.items() if key.partition(".")[0]==first_level_key and len(value)>0}
            if len(subset) > 0:
                at_deepest = subset.keys()[0].partition(".")[1]==''
                if at_deepest:
                    # end of recursivity
                    d.update(subset)
                else:
                    # can go deeper
                    # remove the first_level_key 
                    flat_second_level_subset = {key.partition(".")[2]:value for key, value in subset.items()}
                    second_level_subset = self._csv_convert(flat_second_level_subset)
                    if first_level_key.isdigit():
                        # add to the list
                        d.append(second_level_subset)
                    else:
                        # add to the dictionary
                        d[first_level_key] = second_level_subset

        return d

Francesc

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions