From 39210cc76c301bbd879b2c79685c37e0c9783290 Mon Sep 17 00:00:00 2001 From: Pierre Marcenac Date: Tue, 3 Dec 2024 11:46:35 +0100 Subject: [PATCH] Clean code by checking attribute. (#779) --- .../_src/operation_graph/operations/field.py | 29 +++++++------------ .../_src/structure_graph/base_node.py | 18 +++++++----- .../_src/structure_graph/nodes/field.py | 8 +++++ .../_src/structure_graph/nodes/record_set.py | 3 +- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/python/mlcroissant/mlcroissant/_src/operation_graph/operations/field.py b/python/mlcroissant/mlcroissant/_src/operation_graph/operations/field.py index 5fd112dbb..b8fe5db04 100644 --- a/python/mlcroissant/mlcroissant/_src/operation_graph/operations/field.py +++ b/python/mlcroissant/mlcroissant/_src/operation_graph/operations/field.py @@ -220,9 +220,7 @@ def _get_result(row): f'Column "{column}" does not exist. Inspect the ancestors of the' f" field {field} to understand why. Possible fields: {df.columns}" ) - is_repeated = field.repeated or ( - field.parent and _is_repeated_field(field.parent) - ) + is_repeated = field.repeated or _is_repeated_field(field.parent) value = apply_transforms_fn( row[column], field=field, repeated=is_repeated ) @@ -242,23 +240,16 @@ def _get_result(row): result[field.id] = value else: # Repeated nested sub-fields render as a list of dictionaries. - if field.parent: - if _is_repeated_field(field.parent): - result = _populate_repeated_nested_subfield( - value=value, field=field, result=result - ) - # Non-repeated subfields render as a single dictionary. - else: - parent_id = ( - field.parent.id # pytype: disable=attribute-error - ) - if parent_id not in result: - result[parent_id] = {} - result[parent_id][field.id] = value - else: - raise ValueError( - f"The field {field.id} is a SubField but has no parent." + if _is_repeated_field(field.parent): + result = _populate_repeated_nested_subfield( + value=value, field=field, result=result ) + # Non-repeated subfields render as a single dictionary. + else: + parent_id = field.parent.id + if parent_id not in result: + result[parent_id] = {} + result[parent_id][field.id] = value return result chunk_size = 100 diff --git a/python/mlcroissant/mlcroissant/_src/structure_graph/base_node.py b/python/mlcroissant/mlcroissant/_src/structure_graph/base_node.py index 171fb944a..19b444d8c 100644 --- a/python/mlcroissant/mlcroissant/_src/structure_graph/base_node.py +++ b/python/mlcroissant/mlcroissant/_src/structure_graph/base_node.py @@ -1,5 +1,7 @@ """Base node module.""" +from __future__ import annotations + import dataclasses import inspect import re @@ -63,7 +65,7 @@ class Node: ctx: Context = dataclasses.field(default_factory=Context) id: str = dataclasses.field(default_factory=generate_uuid) name: str | None = None - parents: list["Node"] = dataclasses.field(default_factory=list) + parents: list[Node] = dataclasses.field(default_factory=list) jsonld: Any = None def __post_init__(self): @@ -205,14 +207,14 @@ def uuid(self) -> str: return self.id @property - def parent(self) -> "Node | None": + def parent(self) -> Node | None: """Direct parent of the node or None if no parent.""" if not self.parents: return None return self.parents[-1] @property - def predecessors(self) -> set["Node"]: + def predecessors(self) -> set[Node]: """Predecessors in the structure graph.""" try: predecessors = self.ctx.graph.predecessors(self) @@ -225,7 +227,7 @@ def predecessors(self) -> set["Node"]: ) from e @property - def recursive_predecessors(self) -> set["Node"]: + def recursive_predecessors(self) -> set[Node]: """Predecessors and predecessors of predecessors in the structure graph.""" predecessors = set() for predecessor in self.predecessors: @@ -234,7 +236,7 @@ def recursive_predecessors(self) -> set["Node"]: return predecessors @property - def predecessor(self) -> "Node | None": + def predecessor(self) -> Node | None: """First predecessor in the structure graph.""" if not self.ctx.graph.has_node(self): return None @@ -243,7 +245,7 @@ def predecessor(self) -> "Node | None": ) # pytype: disable=bad-return-type @property - def successors(self) -> tuple["Node", ...]: + def successors(self) -> tuple[Node, ...]: """Successors in the structure graph.""" if self not in self.ctx.graph: return () @@ -252,7 +254,7 @@ def successors(self) -> tuple["Node", ...]: return tuple(self.ctx.graph.successors(self)) # pytype: disable=bad-return-type @property - def recursive_successors(self) -> set["Node"]: + def recursive_successors(self) -> set[Node]: """Successors and successors of successors in the structure graph.""" successors = set() for successor in self.successors: @@ -261,7 +263,7 @@ def recursive_successors(self) -> set["Node"]: return successors @property - def successor(self) -> "Node | None": + def successor(self) -> Node | None: """Direct successor in the structure graph.""" if not self.ctx.graph.has_node(self): return None diff --git a/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/field.py b/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/field.py index 8e5115576..3066d909f 100644 --- a/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/field.py +++ b/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/field.py @@ -182,3 +182,11 @@ def data(self) -> str | None: if hasattr(self.parent, "data"): return getattr(self.parent, "data") return None + + @property + def parent(self) -> Node: + """Direct parent of the field.""" + parent = super().parent + if parent: + return parent + raise ValueError(f"field={self} does not have any parent.") diff --git a/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/record_set.py b/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/record_set.py index e1e316c5d..76dcd5910 100644 --- a/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/record_set.py +++ b/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/record_set.py @@ -186,6 +186,5 @@ def get_parent_uuid(ctx: Context, uuid: str) -> str | None: ) return None if isinstance(node, Field): - if node.parent: - return node.parent.uuid + return node.parent.uuid return node.uuid