Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hierarchical parser #21

Open
cescp opened this issue Nov 20, 2014 · 2 comments
Open

Hierarchical parser #21

cescp opened this issue Nov 20, 2014 · 2 comments

Comments

@cescp
Copy link

cescp commented Nov 20, 2014

Hi,
When rendering a hierarchical structure, the renderer works fine and the name
of the columns are:
level.N.sublevel
But the parser does not take this structure in account. Instead, it just takes the names
of the columns and put the values, all in a flat structure.
I suggest the following code to recover the hierarchical structure:

class HierarchicalCSVParser(BaseParser):
    """
    Parses CSV serialized data into hierarchical structure.

    The parser assumes the first line contains the column names.
    """

    media_type = 'text/csv'

    def parse(self, stream, media_type=None, parser_context=None):
        parser_context = parser_context or {}
        delimiter = parser_context.get('delimiter', ',')

        try:
            encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
            rows = unicode_csv_reader(universal_newlines(stream), delimiter=delimiter, charset=encoding)
            data = OrderedRows(next(rows))
            for row in rows:
                row_data = dict(zip(data.header, row))
                hierarchical_data = self._csv_convert(row_data)
                data.append(hierarchical_data)
            return data
        except Exception as exc:
            raise ParseError('CSV parse error - %s' % str(exc))

    def _csv_convert(self,flat_data):
        first_level_keys = {key.split(".")[0] for key in flat_data.keys()}
        if list(first_level_keys)[0].isdigit():
            d = []
        else:
            d = {}
        for first_level_key in first_level_keys:                
            # a subset of the dictionary with only the entries with the
            # key: first_level_key.* and non empty value
            subset = {key:value for key, value in flat_data.items() if key.partition(".")[0]==first_level_key and len(value)>0}
            if len(subset) > 0:
                at_deepest = subset.keys()[0].partition(".")[1]==''
                if at_deepest:
                    # end of recursivity
                    d.update(subset)
                else:
                    # can go deeper
                    # remove the first_level_key 
                    flat_second_level_subset = {key.partition(".")[2]:value for key, value in subset.items()}
                    second_level_subset = self._csv_convert(flat_second_level_subset)
                    if first_level_key.isdigit():
                        # add to the list
                        d.append(second_level_subset)
                    else:
                        # add to the dictionary
                        d[first_level_key] = second_level_subset

        return d

Francesc

@FlorianWendelborn
Copy link

Can confirm that this issue is still relevant in 2019.

@FlorianWendelborn
Copy link

Here’s another implementation

from rest_framework_csv.parsers import CSVParser


class HierarchicalCSVParser(CSVParser):
    def parse(self, stream, media_type=None, parser_context=None):
        flattened_data = super().parse(stream, media_type, parser_context)
        hierarchical_data = self.hierarchify_many(flattened_data)
        return hierarchical_data

    @staticmethod
    def hierarchify_many(flattened_list: list):
        return [
            HierarchicalCSVParser.hierarchify_one(flattened_item)
            for flattened_item in flattened_list
        ]

    @staticmethod
    def hierarchify_one(flattened_dictionary: dict):
        result = {}

        for flat_key, value in flattened_dictionary.items():
            keys = flat_key.split(".")
            last = keys.pop()

            pointer = result
            for key in keys:
                if key not in pointer:
                    pointer[key] = {}
                elif not isinstance(pointer[key], dict):
                    raise KeyError(f"{flat_key} {key} is not a dictionary")

                pointer = pointer[key]

            pointer[last] = value

        return result

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants