Merge pull request #157 from wwkimball/development

Release 3.6.4
wwkimball · Dec 6, 2021 · 64253e8 · 64253e8
2 parents 2b6816c + d0a4fd4
commit 64253e8
Show file tree

Hide file tree

Showing 7 changed files with 298 additions and 18 deletions.
diff --git a/CHANGES b/CHANGES
@@ -1,3 +1,11 @@
+3.6.4
+Enhancements:
+* Support ruamel.yaml up to version 0.17.17.
+
+Bug Fixes:
+* Refactored single-star wildcard segment (*) handling to enable filtering
+  matches when subsequent segments exist; this fixes Issue #154.
+
 3.6.3
 Bug Fixes:
 * The eyaml-rotate-keys command-line tool failed to preserve block-style EYAML

diff --git a/setup.py b/setup.py
@@ -43,7 +43,7 @@
     },
     python_requires=">3.6.0",
     install_requires=[
-        "ruamel.yaml>=0.15.96,!=0.17.0,!=0.17.1,!=0.17.2,!=0.17.5,<=0.17.10",
+        "ruamel.yaml>=0.15.96,!=0.17.0,!=0.17.1,!=0.17.2,!=0.17.5,<=0.17.17",
     ],
     tests_require=[
         "pytest",

diff --git a/tests/test_processor.py b/tests/test_processor.py
@@ -82,7 +82,11 @@ def test_get_none_data_nodes(self, quiet_logger):
         ("/array_of_hashes/**", [1, "one", 2, "two"], True, None),
         ("products_hash.*[dimensions.weight==4].(availability.start.date)+(availability.stop.date)", [[date(2020, 8, 1), date(2020, 9, 25)], [date(2020, 1, 1), date(2020, 1, 1)]], True, None),
         ("products_array[dimensions.weight==4].product", ["doohickey", "widget"], True, None),
-        ("(products_hash.*.dimensions.weight)[max()][parent(2)].dimensions.weight", [10], True, None)
+        ("(products_hash.*.dimensions.weight)[max()][parent(2)].dimensions.weight", [10], True, None),
+        ("/Locations/*/*", ["ny", "bstn"], True, None),
+        ("/AoH_Locations/*/*/*", ["nyc", "bo"], True, None),
+        ("/Weird_AoH_Locations/*/*/*", ["nyc", "bstn"], True, None),
+        ("/Set_Locations/*/*", ["New York", "Boston"], True, None),
     ])
     def test_get_nodes(self, quiet_logger, yamlpath, results, mustexist, default):
         yamldata = """---
@@ -222,7 +226,35 @@ def test_get_nodes(self, quiet_logger, yamlpath, results, mustexist, default):
       height: 10
       depth: 1
       weight: 4
+
 ###############################################################################
+# For wildcard matching (#154)
+Locations:
+  United States:
+    New York: ny
+    Boston: bstn
+  Canada: cnd
+
+AoH_Locations:
+  - United States: us
+    New York:
+      New York City: nyc
+    Massachussets:
+      Boston: bo
+  - Canada: ca
+
+# Weird Array-of-Hashes
+Weird_AoH_Locations:
+  - United States:
+      New York: nyc
+      Boston: bstn
+  - Canada: cnd
+
+Set_Locations:
+  United States: !!set
+    ? New York
+    ? Boston
+  Canada:
 """
         yaml = YAML()
         processor = Processor(quiet_logger, yaml.load(yamldata))

diff --git a/yamlpath/__init__.py b/yamlpath/__init__.py
@@ -1,6 +1,6 @@
 """Core YAML Path classes."""
 # Establish the version number common to all components
-__version__ = "3.6.3"
+__version__ = "3.6.4"
 
 from yamlpath.yamlpath import YAMLPath
 from yamlpath.processor import Processor
diff --git a/yamlpath/enums/pathsegmenttypes.py b/yamlpath/enums/pathsegmenttypes.py
@@ -36,6 +36,9 @@ class PathSegmentTypes(Enum):
         Traverses the document tree deeply.  If there is a next segment, it
         must match or no data is matched.  When there is no next segment, every
         leaf node matches.
+
+    `MATCH_ALL`
+        Matches every immediate child node.
     """
 
     ANCHOR = auto()
@@ -45,3 +48,4 @@ class PathSegmentTypes(Enum):
     SEARCH = auto()
     TRAVERSE = auto()
     KEYWORD_SEARCH = auto()
+    MATCH_ALL = auto()
diff --git a/yamlpath/processor.py b/yamlpath/processor.py
@@ -839,6 +839,11 @@ def _get_nodes_by_path_segment(
             node_coords = self._get_nodes_by_index(
                 data, yaml_path, segment_index,
                 translated_path=translated_path, ancestry=ancestry)
+        elif segment_type == PathSegmentTypes.MATCH_ALL:
+            node_coords = self._get_nodes_by_match_all(
+                data, yaml_path, segment_index, parent=parent,
+                parentref=parentref, translated_path=translated_path,
+                ancestry=ancestry)
         elif segment_type == PathSegmentTypes.ANCHOR:
             node_coords = self._get_nodes_by_anchor(
                 data, yaml_path, segment_index,
@@ -1894,6 +1899,244 @@ def _get_nodes_by_traversal(
                             data=node_coord)
                         yield node_coord
 
+    def _get_nodes_by_match_all_unfiltered(
+        self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any
+    ) -> Generator[Any, None, None]:
+        """
+        Yield every immediate, non-leaf child node.
+
+        Parameters:
+        1. data (ruamel.yaml data) The parsed YAML data to process
+        2. yaml_path (yamlpath.Path) The YAML Path being processed
+        3. segment_index (int) Segment index of the YAML Path to process
+
+        Keyword Arguments:
+        * parent (ruamel.yaml node) The parent node from which this query
+          originates
+        * parentref (Any) The Index or Key of data within parent
+        * translated_path (YAMLPath) YAML Path indicating precisely which node
+          is being evaluated
+        * ancestry (List[AncestryEntry]) Stack of ancestors preceding the
+          present node under evaluation
+
+        Returns:  (Generator[Any, None, None]) Each node coordinate as they are
+        matched.
+        """
+        dbg_prefix="Processor::_get_nodes_by_match_all_unfiltered:  "
+        parent: Any = kwargs.pop("parent", None)
+        parentref: Any = kwargs.pop("parentref", None)
+        translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath(""))
+        ancestry: List[AncestryEntry] = kwargs.pop("ancestry", [])
+        segments = yaml_path.escaped
+        pathseg: PathSegment = segments[segment_index]
+
+        self.logger.debug(
+            "Gathering ALL immediate children in the tree at parentref,"
+            f" {parentref}, in data:",
+            prefix=dbg_prefix, data=data)
+
+        if isinstance(data, (CommentedMap, dict)):
+            self.logger.debug(
+                "Iterating over all keys to find ANY matches in data:",
+                prefix=dbg_prefix, data=data)
+            for key, val in data.items():
+                next_translated_path = (
+                    translated_path + YAMLPath.escape_path_section(
+                        key, translated_path.seperator))
+                next_ancestry = ancestry + [(data, key)]
+                self.logger.debug(
+                    f"Yielding dict value at key, {key} from data:",
+                    prefix=dbg_prefix, data={'VAL': val, 'OF_DATA': data})
+                yield NodeCoords(val, data, key, next_translated_path,
+                    next_ancestry, pathseg)
+            return
+
+        if isinstance(data, (CommentedSeq, list)):
+            for idx, ele in enumerate(data):
+                next_translated_path = translated_path + f"[{idx}]"
+                next_ancestry = ancestry + [(data, idx)]
+                self.logger.debug(
+                    f"Yielding list element at index, {idx}:",
+                    prefix=dbg_prefix, data=ele)
+                yield NodeCoords(ele, data, idx, next_translated_path,
+                    next_ancestry, pathseg)
+            return
+
+        if isinstance(data, (CommentedSet, set)):
+            for ele in data:
+                next_translated_path = (
+                    translated_path + YAMLPath.escape_path_section(
+                        ele, translated_path.seperator))
+                self.logger.debug(
+                    "Yielding set element:",
+                    prefix=dbg_prefix, data=ele)
+                yield NodeCoords(
+                    ele, parent, ele, next_translated_path, ancestry, pathseg)
+            return
+
+        self.logger.debug(
+            "NOT yielding Scalar node (* excludes scalars):",
+            prefix=dbg_prefix, data=data)
+        return
+
+    def _get_nodes_by_match_all_filtered(
+        self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any
+    ) -> Generator[Any, None, None]:
+        """
+        Yield immediate child nodes whose children match additional filters.
+
+        Parameters:
+        1. data (ruamel.yaml data) The parsed YAML data to process
+        2. yaml_path (yamlpath.Path) The YAML Path being processed
+        3. segment_index (int) Segment index of the YAML Path to process
+
+        Keyword Arguments:
+        * parent (ruamel.yaml node) The parent node from which this query
+          originates
+        * parentref (Any) The Index or Key of data within parent
+        * translated_path (YAMLPath) YAML Path indicating precisely which node
+          is being evaluated
+        * ancestry (List[AncestryEntry]) Stack of ancestors preceding the
+          present node under evaluation
+
+        Returns:  (Generator[Any, None, None]) Each node coordinate as they are
+        matched.
+        """
+        dbg_prefix="Processor::_get_nodes_by_match_all_filtered:  "
+        parentref: Any = kwargs.pop("parentref", None)
+        translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath(""))
+        ancestry: List[AncestryEntry] = kwargs.pop("ancestry", [])
+        segments = yaml_path.escaped
+        pathseg: PathSegment = segments[segment_index]
+        next_segment_idx: int = segment_index + 1
+
+        self.logger.debug(
+            "FILTERING children in the tree at parentref,"
+            f" {parentref}, of data:",
+            prefix=dbg_prefix, data=data)
+
+        # There is a filter on this segment.  Return nodes from the present
+        # data if-and-only-if any of their immediate children will match the
+        # filter.  Do not return the child nodes; the caller will continue to
+        # process subsequent path segments to yield them.
+        if isinstance(data, dict):
+            self.logger.debug(
+                "Iterating over all keys to find ANY matches in data:",
+                prefix=dbg_prefix, data=data)
+            for key, val in data.items():
+                next_translated_path = (
+                    translated_path + YAMLPath.escape_path_section(
+                        key, translated_path.seperator))
+                next_ancestry = ancestry + [(data, key)]
+                for filtered_nc in self._get_nodes_by_path_segment(
+                    val, yaml_path, next_segment_idx, parent=data,
+                    parentref=key, translated_path=next_translated_path,
+                    ancestry=next_ancestry
+                ):
+                    self.logger.debug(
+                        "Ignoring yielded child node coordinate to yield its"
+                        " successfully matched, filtered dict val parent for"
+                        f" key, {key}:"
+                        , prefix=dbg_prefix
+                        , data={
+                            'VAL': val
+                            , 'OF_DATA': data
+                            , 'IGNORING': filtered_nc
+                        })
+                    yield NodeCoords(
+                        val, data, key, next_translated_path, next_ancestry,
+                        pathseg
+                    )
+                    break # because we need only the matching parent
+            return
+
+        if isinstance(data, list):
+            for idx, ele in enumerate(data):
+                self.logger.debug(
+                    f"Recursing into INDEX '{idx}' at ref '{parentref}' for"
+                    " next-segment matches...", prefix=dbg_prefix)
+                next_translated_path = translated_path + f"[{idx}]"
+                next_ancestry = ancestry + [(data, idx)]
+                for filtered_nc in self._get_nodes_by_path_segment(
+                    ele, yaml_path, next_segment_idx, parent=data,
+                    parentref=idx, translated_path=next_translated_path,
+                    ancestry=next_ancestry
+                ):
+                    self.logger.debug(
+                        "Ignoring yielded child node coordinate to yield its"
+                        " successfully matched, filtered list ele parent for"
+                        f" idx, {idx}:"
+                        , prefix=dbg_prefix
+                        , data={
+                            'ELE': ele
+                            , 'OF_DATA': data
+                            , 'IGNORING': filtered_nc
+                        })
+                    yield NodeCoords(
+                        ele, data, idx, next_translated_path, next_ancestry,
+                        pathseg
+                    )
+                    break # because we need only the matching parent
+            return
+
+    def _get_nodes_by_match_all(
+        self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any
+    ) -> Generator[Any, None, None]:
+        """
+        Yield every immediate child node.
+
+        Parameters:
+        1. data (ruamel.yaml data) The parsed YAML data to process
+        2. yaml_path (yamlpath.Path) The YAML Path being processed
+        3. segment_index (int) Segment index of the YAML Path to process
+
+        Keyword Arguments:
+        * parent (ruamel.yaml node) The parent node from which this query
+          originates
+        * parentref (Any) The Index or Key of data within parent
+        * translated_path (YAMLPath) YAML Path indicating precisely which node
+          is being evaluated
+        * ancestry (List[AncestryEntry]) Stack of ancestors preceding the
+          present node under evaluation
+
+        Returns:  (Generator[Any, None, None]) Each node coordinate as they are
+        matched.
+        """
+        dbg_prefix="Processor::_get_nodes_by_match_all:  "
+        parent: Any = kwargs.pop("parent", None)
+        parentref: Any = kwargs.pop("parentref", None)
+        translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath(""))
+        ancestry: List[AncestryEntry] = kwargs.pop("ancestry", [])
+
+        segments = yaml_path.escaped
+        next_segment_idx: int = segment_index + 1
+        filter_results = next_segment_idx < len(segments)
+
+        self.logger.debug(
+            "Processing either FILTERED or UNFILTERED nodes from data:"
+            , prefix=dbg_prefix, data=data)
+
+        if filter_results:
+            # Of data, yield every node which has children matching next seg
+            all_coords = self._get_nodes_by_match_all_filtered(
+                data, yaml_path, segment_index,
+                parent=parent, parentref=parentref,
+                translated_path=translated_path, ancestry=ancestry
+            )
+        else:
+            # Of data, yield every node
+            all_coords = self._get_nodes_by_match_all_unfiltered(
+                data, yaml_path, segment_index,
+                parent=parent, parentref=parentref,
+                translated_path=translated_path, ancestry=ancestry
+            )
+
+        for all_coord in all_coords:
+            self.logger.debug(
+                "Yielding matched child node of source data:"
+                , prefix=dbg_prefix, data={'NODE': all_coord, 'DATA': data})
+            yield all_coord
+
     def _get_required_nodes(
         self, data: Any, yaml_path: YAMLPath, depth: int = 0, **kwargs: Any
     ) -> Generator[NodeCoords, None, None]:

diff --git a/yamlpath/yamlpath.py b/yamlpath/yamlpath.py
@@ -798,10 +798,9 @@ def _expand_splats(
             segment_len = len(segment_id)
             if splat_count == 1:
                 if segment_len == 1:
-                    # /*/ -> [.=~/.*/]
-                    coal_type = PathSegmentTypes.SEARCH
-                    coal_value = SearchTerms(
-                        False, PathSearchMethods.REGEX, ".", ".*")
+                    # /*/ -> MATCH_ALL
+                    coal_type = PathSegmentTypes.MATCH_ALL
+                    coal_value = None
                 elif splat_pos == 0:
                     # /*text/ -> [.$text]
                     coal_type = PathSegmentTypes.SEARCH
@@ -877,6 +876,10 @@ def _stringify_yamlpath_segments(
                 )
             elif segment_type == PathSegmentTypes.INDEX:
                 ppath += "[{}]".format(segment_attrs)
+            elif segment_type == PathSegmentTypes.MATCH_ALL:
+                if add_sep:
+                    ppath += pathsep
+                ppath += "*"
             elif segment_type == PathSegmentTypes.ANCHOR:
                 if add_sep:
                     ppath += "[&{}]".format(segment_attrs)
@@ -886,17 +889,7 @@ def _stringify_yamlpath_segments(
                 ppath += str(segment_attrs)
             elif (segment_type == PathSegmentTypes.SEARCH
                   and isinstance(segment_attrs, SearchTerms)):
-                terms: SearchTerms = segment_attrs
-                if (terms.method == PathSearchMethods.REGEX
-                    and terms.attribute == "."
-                    and terms.term == ".*"
-                    and not terms.inverted
-                ):
-                    if add_sep:
-                        ppath += pathsep
-                    ppath += "*"
-                else:
-                    ppath += str(segment_attrs)
+                ppath += str(segment_attrs)
             elif segment_type == PathSegmentTypes.COLLECTOR:
                 ppath += str(segment_attrs)
             elif segment_type == PathSegmentTypes.TRAVERSE: