Skip to content

Commit

Permalink
Merge pull request #157 from wwkimball/development
Browse files Browse the repository at this point in the history
Release 3.6.4
  • Loading branch information
wwkimball authored Dec 6, 2021
2 parents 2b6816c + d0a4fd4 commit 64253e8
Show file tree
Hide file tree
Showing 7 changed files with 298 additions and 18 deletions.
8 changes: 8 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
3.6.4
Enhancements:
* Support ruamel.yaml up to version 0.17.17.

Bug Fixes:
* Refactored single-star wildcard segment (*) handling to enable filtering
matches when subsequent segments exist; this fixes Issue #154.

3.6.3
Bug Fixes:
* The eyaml-rotate-keys command-line tool failed to preserve block-style EYAML
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
},
python_requires=">3.6.0",
install_requires=[
"ruamel.yaml>=0.15.96,!=0.17.0,!=0.17.1,!=0.17.2,!=0.17.5,<=0.17.10",
"ruamel.yaml>=0.15.96,!=0.17.0,!=0.17.1,!=0.17.2,!=0.17.5,<=0.17.17",
],
tests_require=[
"pytest",
Expand Down
34 changes: 33 additions & 1 deletion tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def test_get_none_data_nodes(self, quiet_logger):
("/array_of_hashes/**", [1, "one", 2, "two"], True, None),
("products_hash.*[dimensions.weight==4].(availability.start.date)+(availability.stop.date)", [[date(2020, 8, 1), date(2020, 9, 25)], [date(2020, 1, 1), date(2020, 1, 1)]], True, None),
("products_array[dimensions.weight==4].product", ["doohickey", "widget"], True, None),
("(products_hash.*.dimensions.weight)[max()][parent(2)].dimensions.weight", [10], True, None)
("(products_hash.*.dimensions.weight)[max()][parent(2)].dimensions.weight", [10], True, None),
("/Locations/*/*", ["ny", "bstn"], True, None),
("/AoH_Locations/*/*/*", ["nyc", "bo"], True, None),
("/Weird_AoH_Locations/*/*/*", ["nyc", "bstn"], True, None),
("/Set_Locations/*/*", ["New York", "Boston"], True, None),
])
def test_get_nodes(self, quiet_logger, yamlpath, results, mustexist, default):
yamldata = """---
Expand Down Expand Up @@ -222,7 +226,35 @@ def test_get_nodes(self, quiet_logger, yamlpath, results, mustexist, default):
height: 10
depth: 1
weight: 4
###############################################################################
# For wildcard matching (#154)
Locations:
United States:
New York: ny
Boston: bstn
Canada: cnd
AoH_Locations:
- United States: us
New York:
New York City: nyc
Massachussets:
Boston: bo
- Canada: ca
# Weird Array-of-Hashes
Weird_AoH_Locations:
- United States:
New York: nyc
Boston: bstn
- Canada: cnd
Set_Locations:
United States: !!set
? New York
? Boston
Canada:
"""
yaml = YAML()
processor = Processor(quiet_logger, yaml.load(yamldata))
Expand Down
2 changes: 1 addition & 1 deletion yamlpath/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Core YAML Path classes."""
# Establish the version number common to all components
__version__ = "3.6.3"
__version__ = "3.6.4"

from yamlpath.yamlpath import YAMLPath
from yamlpath.processor import Processor
4 changes: 4 additions & 0 deletions yamlpath/enums/pathsegmenttypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class PathSegmentTypes(Enum):
Traverses the document tree deeply. If there is a next segment, it
must match or no data is matched. When there is no next segment, every
leaf node matches.
`MATCH_ALL`
Matches every immediate child node.
"""

ANCHOR = auto()
Expand All @@ -45,3 +48,4 @@ class PathSegmentTypes(Enum):
SEARCH = auto()
TRAVERSE = auto()
KEYWORD_SEARCH = auto()
MATCH_ALL = auto()
243 changes: 243 additions & 0 deletions yamlpath/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,11 @@ def _get_nodes_by_path_segment(
node_coords = self._get_nodes_by_index(
data, yaml_path, segment_index,
translated_path=translated_path, ancestry=ancestry)
elif segment_type == PathSegmentTypes.MATCH_ALL:
node_coords = self._get_nodes_by_match_all(
data, yaml_path, segment_index, parent=parent,
parentref=parentref, translated_path=translated_path,
ancestry=ancestry)
elif segment_type == PathSegmentTypes.ANCHOR:
node_coords = self._get_nodes_by_anchor(
data, yaml_path, segment_index,
Expand Down Expand Up @@ -1894,6 +1899,244 @@ def _get_nodes_by_traversal(
data=node_coord)
yield node_coord

def _get_nodes_by_match_all_unfiltered(
self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any
) -> Generator[Any, None, None]:
"""
Yield every immediate, non-leaf child node.
Parameters:
1. data (ruamel.yaml data) The parsed YAML data to process
2. yaml_path (yamlpath.Path) The YAML Path being processed
3. segment_index (int) Segment index of the YAML Path to process
Keyword Arguments:
* parent (ruamel.yaml node) The parent node from which this query
originates
* parentref (Any) The Index or Key of data within parent
* translated_path (YAMLPath) YAML Path indicating precisely which node
is being evaluated
* ancestry (List[AncestryEntry]) Stack of ancestors preceding the
present node under evaluation
Returns: (Generator[Any, None, None]) Each node coordinate as they are
matched.
"""
dbg_prefix="Processor::_get_nodes_by_match_all_unfiltered: "
parent: Any = kwargs.pop("parent", None)
parentref: Any = kwargs.pop("parentref", None)
translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath(""))
ancestry: List[AncestryEntry] = kwargs.pop("ancestry", [])
segments = yaml_path.escaped
pathseg: PathSegment = segments[segment_index]

self.logger.debug(
"Gathering ALL immediate children in the tree at parentref,"
f" {parentref}, in data:",
prefix=dbg_prefix, data=data)

if isinstance(data, (CommentedMap, dict)):
self.logger.debug(
"Iterating over all keys to find ANY matches in data:",
prefix=dbg_prefix, data=data)
for key, val in data.items():
next_translated_path = (
translated_path + YAMLPath.escape_path_section(
key, translated_path.seperator))
next_ancestry = ancestry + [(data, key)]
self.logger.debug(
f"Yielding dict value at key, {key} from data:",
prefix=dbg_prefix, data={'VAL': val, 'OF_DATA': data})
yield NodeCoords(val, data, key, next_translated_path,
next_ancestry, pathseg)
return

if isinstance(data, (CommentedSeq, list)):
for idx, ele in enumerate(data):
next_translated_path = translated_path + f"[{idx}]"
next_ancestry = ancestry + [(data, idx)]
self.logger.debug(
f"Yielding list element at index, {idx}:",
prefix=dbg_prefix, data=ele)
yield NodeCoords(ele, data, idx, next_translated_path,
next_ancestry, pathseg)
return

if isinstance(data, (CommentedSet, set)):
for ele in data:
next_translated_path = (
translated_path + YAMLPath.escape_path_section(
ele, translated_path.seperator))
self.logger.debug(
"Yielding set element:",
prefix=dbg_prefix, data=ele)
yield NodeCoords(
ele, parent, ele, next_translated_path, ancestry, pathseg)
return

self.logger.debug(
"NOT yielding Scalar node (* excludes scalars):",
prefix=dbg_prefix, data=data)
return

def _get_nodes_by_match_all_filtered(
self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any
) -> Generator[Any, None, None]:
"""
Yield immediate child nodes whose children match additional filters.
Parameters:
1. data (ruamel.yaml data) The parsed YAML data to process
2. yaml_path (yamlpath.Path) The YAML Path being processed
3. segment_index (int) Segment index of the YAML Path to process
Keyword Arguments:
* parent (ruamel.yaml node) The parent node from which this query
originates
* parentref (Any) The Index or Key of data within parent
* translated_path (YAMLPath) YAML Path indicating precisely which node
is being evaluated
* ancestry (List[AncestryEntry]) Stack of ancestors preceding the
present node under evaluation
Returns: (Generator[Any, None, None]) Each node coordinate as they are
matched.
"""
dbg_prefix="Processor::_get_nodes_by_match_all_filtered: "
parentref: Any = kwargs.pop("parentref", None)
translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath(""))
ancestry: List[AncestryEntry] = kwargs.pop("ancestry", [])
segments = yaml_path.escaped
pathseg: PathSegment = segments[segment_index]
next_segment_idx: int = segment_index + 1

self.logger.debug(
"FILTERING children in the tree at parentref,"
f" {parentref}, of data:",
prefix=dbg_prefix, data=data)

# There is a filter on this segment. Return nodes from the present
# data if-and-only-if any of their immediate children will match the
# filter. Do not return the child nodes; the caller will continue to
# process subsequent path segments to yield them.
if isinstance(data, dict):
self.logger.debug(
"Iterating over all keys to find ANY matches in data:",
prefix=dbg_prefix, data=data)
for key, val in data.items():
next_translated_path = (
translated_path + YAMLPath.escape_path_section(
key, translated_path.seperator))
next_ancestry = ancestry + [(data, key)]
for filtered_nc in self._get_nodes_by_path_segment(
val, yaml_path, next_segment_idx, parent=data,
parentref=key, translated_path=next_translated_path,
ancestry=next_ancestry
):
self.logger.debug(
"Ignoring yielded child node coordinate to yield its"
" successfully matched, filtered dict val parent for"
f" key, {key}:"
, prefix=dbg_prefix
, data={
'VAL': val
, 'OF_DATA': data
, 'IGNORING': filtered_nc
})
yield NodeCoords(
val, data, key, next_translated_path, next_ancestry,
pathseg
)
break # because we need only the matching parent
return

if isinstance(data, list):
for idx, ele in enumerate(data):
self.logger.debug(
f"Recursing into INDEX '{idx}' at ref '{parentref}' for"
" next-segment matches...", prefix=dbg_prefix)
next_translated_path = translated_path + f"[{idx}]"
next_ancestry = ancestry + [(data, idx)]
for filtered_nc in self._get_nodes_by_path_segment(
ele, yaml_path, next_segment_idx, parent=data,
parentref=idx, translated_path=next_translated_path,
ancestry=next_ancestry
):
self.logger.debug(
"Ignoring yielded child node coordinate to yield its"
" successfully matched, filtered list ele parent for"
f" idx, {idx}:"
, prefix=dbg_prefix
, data={
'ELE': ele
, 'OF_DATA': data
, 'IGNORING': filtered_nc
})
yield NodeCoords(
ele, data, idx, next_translated_path, next_ancestry,
pathseg
)
break # because we need only the matching parent
return

def _get_nodes_by_match_all(
self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any
) -> Generator[Any, None, None]:
"""
Yield every immediate child node.
Parameters:
1. data (ruamel.yaml data) The parsed YAML data to process
2. yaml_path (yamlpath.Path) The YAML Path being processed
3. segment_index (int) Segment index of the YAML Path to process
Keyword Arguments:
* parent (ruamel.yaml node) The parent node from which this query
originates
* parentref (Any) The Index or Key of data within parent
* translated_path (YAMLPath) YAML Path indicating precisely which node
is being evaluated
* ancestry (List[AncestryEntry]) Stack of ancestors preceding the
present node under evaluation
Returns: (Generator[Any, None, None]) Each node coordinate as they are
matched.
"""
dbg_prefix="Processor::_get_nodes_by_match_all: "
parent: Any = kwargs.pop("parent", None)
parentref: Any = kwargs.pop("parentref", None)
translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath(""))
ancestry: List[AncestryEntry] = kwargs.pop("ancestry", [])

segments = yaml_path.escaped
next_segment_idx: int = segment_index + 1
filter_results = next_segment_idx < len(segments)

self.logger.debug(
"Processing either FILTERED or UNFILTERED nodes from data:"
, prefix=dbg_prefix, data=data)

if filter_results:
# Of data, yield every node which has children matching next seg
all_coords = self._get_nodes_by_match_all_filtered(
data, yaml_path, segment_index,
parent=parent, parentref=parentref,
translated_path=translated_path, ancestry=ancestry
)
else:
# Of data, yield every node
all_coords = self._get_nodes_by_match_all_unfiltered(
data, yaml_path, segment_index,
parent=parent, parentref=parentref,
translated_path=translated_path, ancestry=ancestry
)

for all_coord in all_coords:
self.logger.debug(
"Yielding matched child node of source data:"
, prefix=dbg_prefix, data={'NODE': all_coord, 'DATA': data})
yield all_coord

def _get_required_nodes(
self, data: Any, yaml_path: YAMLPath, depth: int = 0, **kwargs: Any
) -> Generator[NodeCoords, None, None]:
Expand Down
23 changes: 8 additions & 15 deletions yamlpath/yamlpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,10 +798,9 @@ def _expand_splats(
segment_len = len(segment_id)
if splat_count == 1:
if segment_len == 1:
# /*/ -> [.=~/.*/]
coal_type = PathSegmentTypes.SEARCH
coal_value = SearchTerms(
False, PathSearchMethods.REGEX, ".", ".*")
# /*/ -> MATCH_ALL
coal_type = PathSegmentTypes.MATCH_ALL
coal_value = None
elif splat_pos == 0:
# /*text/ -> [.$text]
coal_type = PathSegmentTypes.SEARCH
Expand Down Expand Up @@ -877,6 +876,10 @@ def _stringify_yamlpath_segments(
)
elif segment_type == PathSegmentTypes.INDEX:
ppath += "[{}]".format(segment_attrs)
elif segment_type == PathSegmentTypes.MATCH_ALL:
if add_sep:
ppath += pathsep
ppath += "*"
elif segment_type == PathSegmentTypes.ANCHOR:
if add_sep:
ppath += "[&{}]".format(segment_attrs)
Expand All @@ -886,17 +889,7 @@ def _stringify_yamlpath_segments(
ppath += str(segment_attrs)
elif (segment_type == PathSegmentTypes.SEARCH
and isinstance(segment_attrs, SearchTerms)):
terms: SearchTerms = segment_attrs
if (terms.method == PathSearchMethods.REGEX
and terms.attribute == "."
and terms.term == ".*"
and not terms.inverted
):
if add_sep:
ppath += pathsep
ppath += "*"
else:
ppath += str(segment_attrs)
ppath += str(segment_attrs)
elif segment_type == PathSegmentTypes.COLLECTOR:
ppath += str(segment_attrs)
elif segment_type == PathSegmentTypes.TRAVERSE:
Expand Down

0 comments on commit 64253e8

Please sign in to comment.