Skip to content

Commit

Permalink
Adds guards around version logic (#789)
Browse files Browse the repository at this point in the history
* Adds guards around version logic

HamiltonNode.version was returning None in certain cases. This
broke graph.version. Culprits: config, and sometimes materializers.

This change:

1. makes the node version for config == name of the node.
2. guards graph.version against computing functions with None
as the version value.
3. after investigating more, materializers aren't the issue. We could
include the materializer code in the originating function if needed though.

So node.version should now hardly ever be None. But if it is
for some strange reason, we should now handle it better.

Adds tests for it.
Also fixed a fixture that was failing for me for the shelve caching
adapter.

* Fixing hashing for 3.8

So different python versions hash differently.
So rather than doing a lot of work to figure out the code
and hash of each node... I am hardcoding the hash for
versions that are different. It appears that 3.8 is the odd
one out -- all the other python versions seem to hash
to the same thing.

* Reverting adding originating function to materializers

We can figure something better out later. Right now what
I had worked, but wasn't useful.
  • Loading branch information
skrawcz authored Mar 28, 2024
1 parent 16efa8f commit 0ead276
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 7 deletions.
1 change: 0 additions & 1 deletion hamilton/function_modifiers/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,6 @@ def get_input_type_key(key: str) -> str:
key: value for key, value in input_types.items() if key not in resolved_kwargs
}
input_types[node_to_save] = (node_.type, DependencyType.REQUIRED)

save_node = node.Node(
name=artifact_name,
callabl=save_data,
Expand Down
10 changes: 7 additions & 3 deletions hamilton/graph_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,13 @@ def version(self) -> typing.Optional[str]:
The option `strip=True` means docstring and comments are ignored
when hashing the function.
"""
if not self.originating_functions:
return None
if self.originating_functions is None or len(self.originating_functions) == 0:
if self.is_external_input:
# return the name of the config node. (we could add type but skipping for now)
return self.name
return None # this shouldn't happen often.
try:
# return hash of first function. It could be that others are Hamilton framework code.
return hash_source_code(self.originating_functions[0], strip=True)
except (
OSError
Expand Down Expand Up @@ -203,5 +207,5 @@ def version(self) -> str:
Node hashes are in a sorted list, then concatenated as a string before hashing.
To find differences between dataflows, you need to inspect the node level.
"""
sorted_node_versions = sorted([n.version for n in self.nodes])
sorted_node_versions = sorted([n.version for n in self.nodes if n.version is not None])
return hashlib.sha256(str(sorted_node_versions).encode()).hexdigest()
2 changes: 1 addition & 1 deletion tests/lifecycle/test_cache_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def _callable_to_node(callable) -> node.Node:

@pytest.fixture()
def hook(tmp_path: pathlib.Path):
return CacheAdapter(cache_path=str(tmp_path.resolve()))
return CacheAdapter(cache_path=str((tmp_path / "cache.db").resolve()))


@pytest.fixture()
Expand Down
43 changes: 41 additions & 2 deletions tests/test_graph_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

import pytest

from hamilton import graph_types, node
from hamilton import driver, graph_types, node
from hamilton.node import Node, NodeType

from tests import nodes as test_nodes
from tests.resources.dynamic_parallelism import no_parallel


@pytest.fixture()
Expand Down Expand Up @@ -113,14 +114,52 @@ def node_to_create(required_dep: int, optional_dep: int = 1) -> str:
}


def test_create_hamilton_node_missing_version():
def test_create_hamilton_config_node_version():
"""Config nodes now return the name as the version."""
n = Node("foo", int, node_source=NodeType.EXTERNAL)
hamilton_node = graph_types.HamiltonNode.from_node(n)
# the above will have no specified versions
assert hamilton_node.version == "foo"
assert hamilton_node.as_dict()["version"] == "foo"


def test_create_hamilton_node_missing_version():
"""We contrive a case where originating_functions is None."""

def foo(i: int) -> int:
return i

n = Node("foo", int, "", foo, node_source=NodeType.STANDARD)
hamilton_node = graph_types.HamiltonNode.from_node(n)
# the above will have no specified versions
assert hamilton_node.version is None
assert hamilton_node.as_dict()["version"] is None


def test_hamilton_graph_version_normal():
dr = driver.Builder().with_modules(no_parallel).build()
graph = graph_types.HamiltonGraph.from_graph(dr.graph)
# assumption is for python 3
if sys.version_info.minor == 8:
hash_value = "0a375f3366590453dea8927d4c02c15dc090f8be42e9129d9a1139284eac920c"
else:
hash_value = "3b3487599ccc4fc56995989c6d32b58a90c0b91b8c16b3f453a2793f47436831"
assert graph.version == hash_value


def test_hamilton_graph_version_with_none_originating_functions():
dr = driver.Builder().with_modules(no_parallel).build()
graph = graph_types.HamiltonGraph.from_graph(dr.graph)
# if this gets flakey we should find a specific node to make None
graph.nodes[-1].originating_functions = None
# assumption is for python 3
if sys.version_info.minor == 8:
hash_value = "7d556424cd84b97a395d9b6219f502e8f818f17002ce88f47974266c0cce454a"
else:
hash_value = "781d89517c1744c40a7afcdc49ee8592fbb23955e28d87f1b584d08430a3e837"
assert graph.version == hash_value


def test_json_serializable_dict():
for name, obj in inspect.getmembers(test_nodes):
if inspect.isfunction(obj) and not name.startswith("_"):
Expand Down

0 comments on commit 0ead276

Please sign in to comment.