Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing dtype issues in network.get #217

Merged
merged 9 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Changelog
=========

Version v1.1.0
Version v2.0.0
--------------

New Features
Expand All @@ -21,8 +21,16 @@ Improvements
- Added kwarg: ``raise_missing_property`` to ``NodePopulation.get``
- Undeprecated calling ``Edges.get`` and ``EdgePopulation.get`` with ``properties=None``

Bug Fixes
~~~~~~~~~
- Fixed the `Same property with different dtype` issue with ``nodes.get``, ``edges.get``

Breaking Changes
~~~~~~~~~~~~~~~~
- ``nodes.get`` and ``edges.get`` (and ``network.get``) no longer return a dataframe
- returns a generator yielding tuples of ``(<population_name>, <dataframe>)`` instead
- to get the previous behavior (all in one dataframe): ``pd.concat(df for _, df in circuit.nodes.get(*args, **kwargs))``
- Removed ``Network.property_dtypes``, ``CircuitIds.index_schema``
- ``Circuit.node_sets``, ``Simulation.node_sets`` returns ``NodeSets`` object initialized with empty dict when node sets file is not present
- ``NodeSet.resolved`` is no longer available
- ``FrameReport.node_set`` returns node_set name instead of resolved node set query
Expand Down
8 changes: 0 additions & 8 deletions bluepysnap/circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,6 @@ def __init__(self, index, sort_index=True):
index = index.sortlevel()[0]
self.index = index

@property
def index_schema(self):
"""Return an empty index with the same names and dtypes of the wrapped index."""
# NOTE: Since pandas 2.1.0, the index needs to contain the explicit dtypes. In pd.concat,
# the dtypes of multi-index are coerced to 'object' if any dataframe has indices with
# dtype='object'
return self.index[:0]

@classmethod
def _instance(cls, index, sort_index=True):
"""The instance returned by the functions."""
Expand Down
33 changes: 4 additions & 29 deletions bluepysnap/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,6 @@ def _populations(self):
def population_names(self):
"""Should define all sorted NetworkObjects population names from the Circuit."""

@cached_property
def property_dtypes(self):
"""Returns all the NetworkObjects property dtypes for the Circuit."""

def _update(d, index, value):
if d.setdefault(index, value) != value:
raise BluepySnapError(
f"Same property with different dtype. {index}: {value}!= {d[index]}"
)

res = {}
for pop in self.values():
for varname, dtype in pop.property_dtypes.items():
_update(res, varname, dtype)
return pd.Series(res)

def keys(self):
"""Returns iterator on the NetworkObjectPopulation names.

Expand Down Expand Up @@ -149,7 +133,7 @@ def ids(self, group=None, sample=None, limit=None):

@abc.abstractmethod
def get(self, group=None, properties=None):
"""Returns the properties of the NetworkObject."""
"""Yields the properties of the NetworkObject."""
ids = self.ids(group)
properties = utils.ensure_list(properties)
# We don t convert to set properties itself to keep the column order.
Expand All @@ -159,14 +143,6 @@ def get(self, group=None, properties=None):
if unknown_props:
raise BluepySnapError(f"Unknown properties required: {unknown_props}")

# Retrieve the dtypes of the selected properties.
# However, the int dtype may not be preserved if some values are NaN.
dtypes = {
column: dtype
for column, dtype in self.property_dtypes.items()
joni-herttuainen marked this conversation as resolved.
Show resolved Hide resolved
if column in properties_set
}
dataframes = [pd.DataFrame(columns=properties, index=ids.index_schema).astype(dtypes)]
for name, pop in sorted(self.items()):
# since ids is sorted, global_pop_ids should be sorted as well
global_pop_ids = ids.filter_population(name)
Expand All @@ -177,10 +153,9 @@ def get(self, group=None, properties=None):
# However, it's a bit more performant than converting the Series to numpy arrays.
pop_df = pd.DataFrame({prop: pop.get(pop_ids, prop) for prop in pop_properties})
pop_df.index = global_pop_ids.index
dataframes.append(pop_df)
res = pd.concat(dataframes)
assert res.index.is_monotonic_increasing, "The index should be already sorted"
return res

# Sort the columns in the given order
yield name, pop_df[[p for p in properties if p in pop_properties]]

@abc.abstractmethod
def __getstate__(self):
Expand Down
Loading