Skip to content

Commit

Permalink
Remove network object level property_dtypes.
Browse files Browse the repository at this point in the history
* fix the functionaltity to fetch and merge different properties from multiple populations
  • Loading branch information
Joni Herttuainen committed Mar 24, 2023
1 parent aa06b0d commit 2d2095f
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 202 deletions.
5 changes: 0 additions & 5 deletions bluepysnap/circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,6 @@ def __init__(self, index, sort_index=True):
index = index.sortlevel()[0]
self.index = index

@property
def index_schema(self):
"""Return an empty index with the same names of the wrapped index."""
return pd.MultiIndex.from_tuples([], names=self.index.names)

@classmethod
def _instance(cls, index, sort_index=True):
"""The instance returned by the functions."""
Expand Down
100 changes: 61 additions & 39 deletions bluepysnap/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,62 @@
from bluepysnap.exceptions import BluepySnapError


def _gather_properties(populations, ids, properties):
"""Helper function to get data from populations.
Args:
populations (list): populations (NodePopulation, EdgePopulation) to consider
ids (CircuitNodeIds, CircuitEdgeIds): node/edge ids to retrieve
properties (list): properties to retrieve
Returns:
pandas.DataFrame: dataframe containing the gathered data
"""
values = {}
indices = {}

# gather properties for each of the population
for pop in populations:
global_pop_ids = ids.filter_population(pop.name)

pop_ids = global_pop_ids.get_ids()
pop_properties = set(properties) & pop.property_names

for prop in pop_properties:
data = pop.get(pop_ids, prop)

if data.size > 0:
values[prop] = values.get(prop, []) + [data]

# Is there a better way to merge multi-indices than append?
if prop in indices:
indices[prop] = indices[prop].append(global_pop_ids.index)
else:
indices[prop] = global_pop_ids.index

def _serialize(property_name):
ids = indices[property_name]
val = values[property_name]

# If any of the dtypes is a category, force it. Otherwise, let pandas handle it.
has_categoricals = any(map(pd.api.types.is_categorical_dtype, val))

return pd.Series(
np.concatenate(val),
index=ids,
name=property_name,
dtype="category" if has_categoricals else None,
)

series = [_serialize(prop) for prop in values]

return pd.DataFrame(
pd.concat(series, axis=1) if series else None,
columns=properties,
index=ids.index,
)


class NetworkObject(abc.ABC):
"""Abstract class for the top level NetworkObjects accessor."""

Expand All @@ -48,22 +104,6 @@ def _populations(self):
def population_names(self):
"""Should define all sorted NetworkObjects population names from the Circuit."""

@cached_property
def property_dtypes(self):
"""Returns all the NetworkObjects property dtypes for the Circuit."""

def _update(d, index, value):
if d.setdefault(index, value) != value:
raise BluepySnapError(
f"Same property with different dtype. {index}: {value}!= {d[index]}"
)

res = {}
for pop in self.values():
for varname, dtype in pop.property_dtypes.items():
_update(res, varname, dtype)
return pd.Series(res)

def keys(self):
"""Returns iterator on the NetworkObjectPopulation names.
Expand Down Expand Up @@ -149,33 +189,15 @@ def get(self, group=None, properties=None):
"""Returns the properties of the NetworkObject."""
ids = self.ids(group)
properties = utils.ensure_list(properties)
# We don t convert to set properties itself to keep the column order.
properties_set = set(properties)

unknown_props = properties_set - self.property_names
unknown_props = set(properties) - self.property_names
if unknown_props:
raise BluepySnapError(f"Unknown properties required: {unknown_props}")

# Retrieve the dtypes of the selected properties.
# However, the int dtype may not be preserved if some values are NaN.
dtypes = {
column: dtype
for column, dtype in self.property_dtypes.items()
if column in properties_set
}
dataframes = [pd.DataFrame(columns=properties, index=ids.index_schema).astype(dtypes)]
for name, pop in sorted(self.items()):
# since ids is sorted, global_pop_ids should be sorted as well
global_pop_ids = ids.filter_population(name)
pop_ids = global_pop_ids.get_ids()
if len(pop_ids) > 0:
pop_properties = properties_set & pop.property_names
# Since the columns are passed as Series, index cannot be specified directly.
# However, it's a bit more performant than converting the Series to numpy arrays.
pop_df = pd.DataFrame({prop: pop.get(pop_ids, prop) for prop in pop_properties})
pop_df.index = global_pop_ids.index
dataframes.append(pop_df)
res = pd.concat(dataframes)
populations = self.values()

res = _gather_properties(populations, ids, properties)

assert res.index.is_monotonic_increasing, "The index should be already sorted"
return res

Expand Down
58 changes: 30 additions & 28 deletions tests/test_dtype_mismatch.py → tests/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,18 @@
_str_dtype = h5py.string_dtype(encoding="utf-8")

MAP_DTYPE = {
"float": float,
"int": int,
"uint": np.uint64,
"object": _str_dtype,
"str": _str_dtype,
"float32": np.float32,
"float": float,
"int8": np.int8,
"int16": np.int16,
"int32": np.int32,
"int": int,
"uint8": np.uint8,
"uint16": np.uint16,
"uint32": np.uint32,
"uint": np.uint64,
}


Expand All @@ -34,7 +37,7 @@ def add_test_field(file_path, population_name, data, data_type):
test_data_path = f"{pop_0_path}/{TEST_FIELD}"

with h5py.File(file_path, "r+") as h5:
if data_type == "categorical":
if data_type == "category":
categorical = pd.Categorical(data)
categories = categorical.categories.values

Expand All @@ -51,35 +54,34 @@ def add_test_field(file_path, population_name, data, data_type):


@pytest.mark.parametrize(
"dtypes",
("dtypes", "expected"),
(
("categorical", "categorical"),
("int", "float"),
("int", "uint"),
("int", "str"),
("int", "int16"),
("int", "int32"),
("int16", "int32"),
("uint32", "int32"),
("uint", "float"),
(("category", "category"), "category"),
(("int8", "int8"), "int8"),
(("uint8", "uint8"), "uint8"),
(("object", "object"), "object"),
(("category", "str"), "category"),
(("category", "int"), "category"),
(("int", "float"), "float"),
(("int", "str"), "object"),
(("int", "int16"), "int"),
(("int", "int32"), "int"),
(("uint32", "int32"), "int"),
(("uint", "float"), "float"),
(("float32", "float"), "float"),
(("int8", "uint8"), "int16"),
(("int16", "uint8"), "int16"),
(("int16", "uint16"), "int32"),
(("int32", "uint32"), "int"),
(("int", "uint32"), "int"),
(("int", "uint"), "float"),
),
)
def test_mismatching_dtypes(dtypes):
def test_resulting_dtypes(dtypes, expected):
with copy_test_data() as (test_dir, config_path):
node_path = Path(test_dir) / "nodes.h5"
for population, data, dtype in zip(TEST_POPULATIONS, TEST_DATA, dtypes):
add_test_field(node_path, population, data, dtype)

with pytest.raises(BluepySnapError, match="Same property with different dtype."):
Circuit(config_path).nodes.property_dtypes


@pytest.mark.parametrize("dtype", list(MAP_DTYPE))
def test_matching_dtypes(dtype):
with copy_test_data() as (test_dir, config_path):
node_path = Path(test_dir) / "nodes.h5"
for population, data in zip(TEST_POPULATIONS, TEST_DATA):
add_test_field(node_path, population, data, dtype)

res = Circuit(config_path).nodes.property_dtypes
assert isinstance(res, pd.Series)
res = Circuit(config_path).nodes.get(properties=TEST_FIELD)
assert res[TEST_FIELD].dtype == expected
76 changes: 1 addition & 75 deletions tests/test_edges/test_edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,80 +85,6 @@ def test_property_names(self):
"syn_weight",
}

def test_property_dtypes(self):
expected = pd.Series(
data=[
dtype("float32"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float32"),
dtype("float64"),
dtype("float32"),
dtype("float64"),
dtype("int64"),
dtype("int64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float32"),
dtype("float32"),
dtype("float64"),
dtype("float64"),
IDS_DTYPE,
IDS_DTYPE,
dtype("O"),
dtype("int32"),
],
index=[
"syn_weight",
"@dynamics:param1",
"afferent_surface_y",
"afferent_surface_z",
"conductance",
"efferent_center_x",
"delay",
"afferent_center_z",
"efferent_section_id",
"afferent_section_id",
"efferent_center_y",
"afferent_center_x",
"efferent_surface_z",
"afferent_center_y",
"afferent_surface_x",
"efferent_surface_x",
"afferent_section_pos",
"efferent_section_pos",
"efferent_surface_y",
"efferent_center_z",
"@source_node",
"@target_node",
"other1",
"other2",
],
).sort_index()
pdt.assert_series_equal(self.test_obj.property_dtypes.sort_index(), expected)

def test_property_dtypes_fail(self):
a = pd.Series(
data=[dtype("int64"), dtype("float64")], index=["syn_weight", "efferent_surface_z"]
).sort_index()
b = pd.Series(
data=[dtype("int32"), dtype("float64")], index=["syn_weight", "efferent_surface_z"]
).sort_index()

with patch(
"bluepysnap.edges.EdgePopulation.property_dtypes", new_callable=PropertyMock
) as mock:
mock.side_effect = [a, b]
circuit = Circuit(str(TEST_DATA_DIR / "circuit_config.json"))
test_obj = test_module.Edges(circuit)
with pytest.raises(BluepySnapError):
test_obj.property_dtypes.sort_index()

def test_ids(self):
np.random.seed(42)
# single edge ID --> CircuitEdgeIds return populations with the 0 id
Expand Down Expand Up @@ -340,7 +266,7 @@ def test_get(self):
expected = pd.DataFrame(
{
"other2": np.array([np.NaN, np.NaN, np.NaN, np.NaN], dtype=float),
"other1": np.array([np.NaN, np.NaN, np.NaN, np.NaN], dtype=object),
"other1": np.array([np.NaN, np.NaN, np.NaN, np.NaN], dtype=float),
"@source_node": np.array([2, 0, 0, 2], dtype=int),
},
index=pd.MultiIndex.from_tuples(
Expand Down
56 changes: 1 addition & 55 deletions tests/test_nodes/test_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,60 +78,6 @@ def test_property_value(self):
assert self.test_obj.property_values("mtype") == {"L2_X", "L7_X", "L9_Z", "L8_Y", "L6_Y"}
assert self.test_obj.property_values("other2") == {10, 11, 12, 13}

def test_property_dtypes(self):
expected = pd.Series(
data=[
dtype("int64"),
dtype("O"),
dtype("O"),
dtype("O"),
dtype("O"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("float64"),
dtype("O"),
dtype("int64"),
],
index=[
"layer",
"model_template",
"model_type",
"morphology",
"mtype",
"rotation_angle_xaxis",
"rotation_angle_yaxis",
"rotation_angle_zaxis",
"x",
"y",
"z",
"@dynamics:holding_current",
"other1",
"other2",
],
).sort_index()
pdt.assert_series_equal(self.test_obj.property_dtypes.sort_index(), expected)

def test_property_dtypes_fail(self):
a = pd.Series(
data=[dtype("int64"), dtype("O")], index=["layer", "model_template"]
).sort_index()
b = pd.Series(
data=[dtype("int32"), dtype("O")], index=["layer", "model_template"]
).sort_index()

with patch(
"bluepysnap.nodes.NodePopulation.property_dtypes", new_callable=PropertyMock
) as mock:
mock.side_effect = [a, b]
circuit = Circuit(str(TEST_DATA_DIR / "circuit_config.json"))
test_obj = test_module.Nodes(circuit)
with pytest.raises(BluepySnapError):
test_obj.property_dtypes.sort_index()

def test_ids(self):
np.random.seed(42)

Expand Down Expand Up @@ -352,7 +298,7 @@ def test_get(self):
expected = pd.DataFrame(
{
"other2": np.array([np.NaN, np.NaN, np.NaN], dtype=float),
"other1": np.array([np.NaN, np.NaN, np.NaN], dtype=object),
"other1": np.array([np.NaN, np.NaN, np.NaN], dtype=float),
"layer": np.array([2, 6, 6], dtype=int),
},
index=pd.MultiIndex.from_tuples(
Expand Down

0 comments on commit 2d2095f

Please sign in to comment.