Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing dtype issues in network.get #217

Merged
merged 9 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
Changelog
=========

Version v1.2.0
--------------

Breaking Changes
~~~~~~~~~~~~~~~~
- ``nodes.get`` and ``edges.get`` (and ``network.get``) no longer return a dataframe
- returns a generator yielding tuples of ``(<population_name>, <dataframe>)`` instead
mgeplf marked this conversation as resolved.
Show resolved Hide resolved
- Removed ``Network.property_dtypes``, ``CircuitIds.index_schema``

Bug Fixes
~~~~~~~~~
- Fixed the `Same property with different dtype` issue with ``nodes.get``, ``edges.get``

Version v1.1.0
--------------

Expand Down
5 changes: 0 additions & 5 deletions bluepysnap/circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,6 @@ def __init__(self, index, sort_index=True):
index = index.sortlevel()[0]
self.index = index

@property
def index_schema(self):
"""Return an empty index with the same names of the wrapped index."""
return pd.MultiIndex.from_tuples([], names=self.index.names)

@classmethod
def _instance(cls, index, sort_index=True):
"""The instance returned by the functions."""
Expand Down
33 changes: 4 additions & 29 deletions bluepysnap/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,6 @@ def _populations(self):
def population_names(self):
"""Should define all sorted NetworkObjects population names from the Circuit."""

@cached_property
def property_dtypes(self):
"""Returns all the NetworkObjects property dtypes for the Circuit."""

def _update(d, index, value):
if d.setdefault(index, value) != value:
raise BluepySnapError(
f"Same property with different dtype. {index}: {value}!= {d[index]}"
)

res = {}
for pop in self.values():
for varname, dtype in pop.property_dtypes.items():
_update(res, varname, dtype)
return pd.Series(res)

def keys(self):
"""Returns iterator on the NetworkObjectPopulation names.

Expand Down Expand Up @@ -149,7 +133,7 @@ def ids(self, group=None, sample=None, limit=None):

@abc.abstractmethod
def get(self, group=None, properties=None):
"""Returns the properties of the NetworkObject."""
"""Yields the properties of the NetworkObject."""
ids = self.ids(group)
properties = utils.ensure_list(properties)
# We don t convert to set properties itself to keep the column order.
Expand All @@ -159,14 +143,6 @@ def get(self, group=None, properties=None):
if unknown_props:
raise BluepySnapError(f"Unknown properties required: {unknown_props}")

# Retrieve the dtypes of the selected properties.
# However, the int dtype may not be preserved if some values are NaN.
dtypes = {
column: dtype
for column, dtype in self.property_dtypes.items()
joni-herttuainen marked this conversation as resolved.
Show resolved Hide resolved
if column in properties_set
}
dataframes = [pd.DataFrame(columns=properties, index=ids.index_schema).astype(dtypes)]
for name, pop in sorted(self.items()):
# since ids is sorted, global_pop_ids should be sorted as well
global_pop_ids = ids.filter_population(name)
Expand All @@ -177,10 +153,9 @@ def get(self, group=None, properties=None):
# However, it's a bit more performant than converting the Series to numpy arrays.
pop_df = pd.DataFrame({prop: pop.get(pop_ids, prop) for prop in pop_properties})
pop_df.index = global_pop_ids.index
dataframes.append(pop_df)
res = pd.concat(dataframes)
assert res.index.is_monotonic_increasing, "The index should be already sorted"
return res

# Sort the columns in the given order
yield name, pop_df[[p for p in properties if p in pop_properties]]

@abc.abstractmethod
def __getstate__(self):
Expand Down
2 changes: 2 additions & 0 deletions tests/test_circuit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import pickle

import pandas as pd
import pytest
from libsonata import SonataError

Expand Down Expand Up @@ -78,6 +79,7 @@ def test_integration():
edge_ids = circuit.edges.afferent_edges(node_ids)
edge_props = circuit.edges.get(edge_ids, properties=["syn_weight", "delay"])
edge_reduced = edge_ids.limit(2)
edge_props = pd.concat(df for _, df in edge_props)
edge_props_reduced = edge_props.loc[edge_reduced]
assert edge_props_reduced["syn_weight"].tolist() == [1, 1]

Expand Down
136 changes: 42 additions & 94 deletions tests/test_edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,80 +85,6 @@ def test_property_names(self):
"syn_weight",
}

def test_property_dtypes(self):
expected = pd.Series(
data=[
dtype("float32"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float32"),
dtype("float64"),
dtype("float32"),
dtype("float64"),
dtype("int64"),
dtype("int64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float32"),
dtype("float32"),
dtype("float64"),
dtype("float64"),
IDS_DTYPE,
IDS_DTYPE,
dtype("O"),
dtype("int32"),
],
index=[
"syn_weight",
"@dynamics:param1",
"afferent_surface_y",
"afferent_surface_z",
"conductance",
"efferent_center_x",
"delay",
"afferent_center_z",
"efferent_section_id",
"afferent_section_id",
"efferent_center_y",
"afferent_center_x",
"efferent_surface_z",
"afferent_center_y",
"afferent_surface_x",
"efferent_surface_x",
"afferent_section_pos",
"efferent_section_pos",
"efferent_surface_y",
"efferent_center_z",
"@source_node",
"@target_node",
"other1",
"other2",
],
).sort_index()
pdt.assert_series_equal(self.test_obj.property_dtypes.sort_index(), expected)

def test_property_dtypes_fail(self):
a = pd.Series(
data=[dtype("int64"), dtype("float64")], index=["syn_weight", "efferent_surface_z"]
).sort_index()
b = pd.Series(
data=[dtype("int32"), dtype("float64")], index=["syn_weight", "efferent_surface_z"]
).sort_index()

with patch(
"bluepysnap.edges.EdgePopulation.property_dtypes", new_callable=PropertyMock
) as mock:
mock.side_effect = [a, b]
circuit = Circuit(str(TEST_DATA_DIR / "circuit_config.json"))
test_obj = test_module.Edges(circuit)
with pytest.raises(BluepySnapError):
test_obj.property_dtypes.sort_index()

def test_ids(self):
np.random.seed(0)
# single edge ID --> CircuitEdgeIds return populations with the 0 id
Expand Down Expand Up @@ -269,6 +195,8 @@ def test_get(self):
assert tested == ids

tested = self.test_obj.get(ids, properties=self.test_obj.property_names)
tested = pd.concat(df for _, df in tested)

assert len(tested) == 8
assert len(list(tested)) == 24

Expand All @@ -277,9 +205,9 @@ def test_get(self):

# the index of the dataframe is indentical to the CircuitEdgeIds index
pdt.assert_index_equal(tested.index, ids.index)
pdt.assert_frame_equal(
self.test_obj.get([0, 1, 2, 3], properties=self.test_obj.property_names), tested
)
tested2 = self.test_obj.get([0, 1, 2, 3], properties=self.test_obj.property_names)
tested2 = pd.concat(df for _, df in tested2)
pdt.assert_frame_equal(tested2, tested)

# tested columns
tested = self.test_obj.get(ids, properties=["other2", "other1", "@source_node"])
Expand All @@ -305,7 +233,8 @@ def test_get(self):
names=["population", "edge_ids"],
),
)
pdt.assert_frame_equal(tested, expected)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(tested[expected.columns], expected)

tested = self.test_obj.get(
CircuitEdgeIds.from_dict({"default2": [0, 1, 2, 3]}),
Expand All @@ -328,6 +257,7 @@ def test_get(self):
names=["population", "edge_ids"],
),
)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(tested, expected)

with pytest.raises(KeyError, match="'default'"):
Expand All @@ -339,8 +269,6 @@ def test_get(self):
)
expected = pd.DataFrame(
{
"other2": np.array([np.NaN, np.NaN, np.NaN, np.NaN], dtype=float),
"other1": np.array([np.NaN, np.NaN, np.NaN, np.NaN], dtype=object),
"@source_node": np.array([2, 0, 0, 2], dtype=int),
},
index=pd.MultiIndex.from_tuples(
Expand All @@ -353,6 +281,7 @@ def test_get(self):
names=["population", "edge_ids"],
),
)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(tested, expected)

tested = self.test_obj.get(ids, properties="@source_node")
Expand All @@ -374,6 +303,7 @@ def test_get(self):
names=["population", "edge_ids"],
),
)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(tested, expected)

tested = self.test_obj.get(ids, properties="other2")
Expand All @@ -395,13 +325,14 @@ def test_get(self):
names=["population", "edge_ids"],
),
)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(tested, expected)

with pytest.raises(BluepySnapError, match="Unknown properties required: {'unknown'}"):
self.test_obj.get(ids, properties=["other2", "unknown"])
next(self.test_obj.get(ids, properties=["other2", "unknown"]))

with pytest.raises(BluepySnapError, match="Unknown properties required: {'unknown'}"):
self.test_obj.get(ids, properties="unknown")
next(self.test_obj.get(ids, properties="unknown"))

with pytest.deprecated_call(
match=(
Expand All @@ -418,6 +349,8 @@ def test_properties_deprecated(self):
):
tested = self.test_obj.properties(ids, properties=["other2", "@source_node"])
expected = self.test_obj.get(ids, properties=["other2", "@source_node"])
tested = pd.concat(df for _, df in tested)
expected = pd.concat(df for _, df in expected)
pdt.assert_frame_equal(tested, expected, check_exact=False)

def test_afferent_nodes(self):
Expand Down Expand Up @@ -486,8 +419,10 @@ def test_pathway_edges(self):
target = CircuitNodeIds.from_dict({"default": [1, 2]})

expected_index = CircuitEdgeIds.from_dict({"default": [1, 2], "default2": [1, 2]})
tested = self.test_obj.pathway_edges(source=source, target=target, properties=properties)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.pathway_edges(source=source, target=target, properties=properties),
tested,
pd.DataFrame(
[
[88.1862],
Expand All @@ -503,8 +438,10 @@ def test_pathway_edges(self):

properties = [Synapse.SOURCE_NODE_ID, "other1"]
expected_index = CircuitEdgeIds.from_dict({"default": [1, 2], "default2": [1, 2]})
tested = self.test_obj.pathway_edges(source=source, target=target, properties=properties)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.pathway_edges(source=source, target=target, properties=properties),
tested,
pd.DataFrame(
[
[0, np.nan],
Expand Down Expand Up @@ -540,8 +477,10 @@ def test_pathway_edges(self):
source = CircuitNodeId("default", 0)
target = CircuitNodeId("default", 1)
expected_index = CircuitEdgeIds.from_dict({"default": [1, 2], "default2": [1, 2]})
tested = self.test_obj.pathway_edges(source=source, target=target, properties=properties)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.pathway_edges(source=source, target=target, properties=properties),
tested,
pd.DataFrame(
[
[0, 1],
Expand Down Expand Up @@ -575,8 +514,10 @@ def test_afferent_edges(self):
assert self.test_obj.afferent_edges(CircuitNodeId("default", 1), None) == expected

properties = [Synapse.AXONAL_DELAY]
tested = self.test_obj.afferent_edges(1, properties)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.afferent_edges(1, properties),
tested,
pd.DataFrame(
[
[88.1862],
Expand All @@ -597,10 +538,12 @@ def test_afferent_edges(self):
expected_index = CircuitEdgeIds.from_dict(
{"default": [0, 1, 2, 3], "default2": [0, 1, 2, 3]}
)
tested = self.test_obj.afferent_edges(
CircuitNodeIds.from_dict({"default": [0, 1]}), properties=properties
)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.afferent_edges(
CircuitNodeIds.from_dict({"default": [0, 1]}), properties=properties
),
tested,
pd.DataFrame(
[
[2, np.nan],
Expand All @@ -626,8 +569,10 @@ def test_efferent_edges(self):
assert self.test_obj.efferent_edges(CircuitNodeId("default", 2), None) == expected

properties = [Synapse.AXONAL_DELAY]
tested = self.test_obj.efferent_edges(2, properties)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.efferent_edges(2, properties),
tested,
pd.DataFrame(
[
[99.8945],
Expand All @@ -645,8 +590,10 @@ def test_efferent_edges(self):
properties = [Synapse.TARGET_NODE_ID, "other1"]
expected_index = CircuitEdgeIds.from_dict({"default": [0, 3], "default2": [0, 3]})

tested = self.test_obj.efferent_edges(2, properties)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.efferent_edges(2, properties),
tested,
pd.DataFrame(
[
[0, np.nan],
Expand All @@ -664,15 +611,17 @@ def test_pair_edges(self):
# no connection between 0 and 2
assert self.test_obj.pair_edges(0, 2, None) == CircuitEdgeIds.from_arrays([], [])
actual = self.test_obj.pair_edges(0, 2, [Synapse.AXONAL_DELAY])
assert actual.empty
assert next(actual, None) is None

assert self.test_obj.pair_edges(2, 0, None) == CircuitEdgeIds.from_tuples(
[("default", 0), ("default2", 0)]
)

properties = [Synapse.AXONAL_DELAY]
tested = self.test_obj.pair_edges(2, 0, properties)
tested = pd.concat(df for _, df in tested)
pdt.assert_frame_equal(
self.test_obj.pair_edges(2, 0, properties),
tested,
pd.DataFrame(
[
[99.8945],
Expand Down Expand Up @@ -776,7 +725,6 @@ def test_pickle(self, tmp_path):
# trigger some cached properties, to makes sure they aren't being pickeld
self.test_obj.size
self.test_obj.property_names
self.test_obj.property_dtypes

with open(pickle_path, "wb") as fd:
pickle.dump(self.test_obj, fd)
Expand Down
Loading