-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #77 from DHI/feature/pandas_dataframe_accessor_ext…
…ension Feature/pandas dataframe accessor extension
- Loading branch information
Showing
8 changed files
with
610 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from .mikeio1d_accessor import Mikeio1dAccessor # noqa | ||
from .transposed_groupby import TransposedGroupBy # noqa | ||
from .result_reaches_helpers import agg_chainage # noqa | ||
from .result_reaches_helpers import groupby_chainage # noqa | ||
from .various import compact_dataframe | ||
|
||
__all___ = [ | ||
"Mikeio1dAccessor", | ||
"TransposedGroupBy", | ||
"agg_chainage", | ||
"groupby_chainage", | ||
"compact_dataframe", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import pandas as pd | ||
|
||
from .transposed_groupby import TransposedGroupBy | ||
from .result_reaches_helpers import agg_chainage | ||
from .result_reaches_helpers import groupby_chainage | ||
from .various import compact_dataframe | ||
|
||
|
||
@pd.api.extensions.register_dataframe_accessor("m1d") | ||
class Mikeio1dAccessor: | ||
""" | ||
This class uses Pandas Extension API to register a custom accessor for DataFrames. More | ||
information can be found here: | ||
https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors | ||
The accessor provides convenience methods for working with DataFrames with a MultiIndex. | ||
The intent is as a facade for various helper functions that live elsewhere in the package. | ||
""" | ||
|
||
def __init__(self, pandas_obj): | ||
self._validate(pandas_obj) | ||
self._obj = pandas_obj | ||
|
||
@staticmethod | ||
def _validate(obj): | ||
if not isinstance(obj, pd.DataFrame): | ||
raise AttributeError("Mikeio1dAccessor only supports DataFrames.") | ||
df: pd.DataFrame = obj | ||
if not isinstance(df.columns, pd.MultiIndex): | ||
raise AttributeError("Must have a MultiIndex columns.") | ||
|
||
def _validate_has_chainage(self): | ||
self._validate(self._obj) | ||
if "chainage" not in self._obj.columns.names: | ||
raise ValueError("DataFrame must have chainage column.") | ||
|
||
def agg_chainage(self, agg=None) -> pd.DataFrame: | ||
""" | ||
Convenience wrapper for ResultReaches.agg_chainage. | ||
""" | ||
self._validate_has_chainage() | ||
kwargs = {} | ||
if agg is not None: | ||
kwargs["agg"] = agg | ||
|
||
return agg_chainage(self._obj, **kwargs) | ||
|
||
def groupby_chainage(self, *args, **kwargs) -> TransposedGroupBy: | ||
""" | ||
Convenience wrapper for pd.DataFrame.groupby. The groupby is performed on | ||
the columns of the DataFrame, which are in the form of a MultiIndex. | ||
""" | ||
self._validate_has_chainage() | ||
df: pd.DataFrame = self._obj | ||
return groupby_chainage(df, *args, **kwargs) | ||
|
||
def groupby(self, *args, **kwargs) -> TransposedGroupBy: | ||
""" | ||
Convenience wrapper for pd.DataFrame.groupby. The groupby is performed on | ||
the columns of the DataFrame, which are in the form of a MultiIndex. | ||
""" | ||
df: pd.DataFrame = self._obj | ||
groupby = TransposedGroupBy(transposed_groupby=df.T.groupby(*args, **kwargs)) | ||
return groupby | ||
|
||
def query(self, *args, **kwargs) -> pd.DataFrame: | ||
""" | ||
Convenience wrapper for pd.DataFrame.query. The query is performed on | ||
the columns of the DataFrame, which are in the form of a MultiIndex. | ||
""" | ||
df = self._obj | ||
return df.T.query(*args, **kwargs).T | ||
|
||
def compact(self, *args, **kwargs) -> pd.DataFrame: | ||
""" | ||
Convenience wrapper for compact_dataframe. | ||
""" | ||
df = self._obj | ||
return compact_dataframe(df, *args, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import pandas as pd | ||
|
||
from .transposed_groupby import TransposedGroupBy | ||
|
||
|
||
def groupby_chainage(df: pd.DataFrame, **kwargs) -> TransposedGroupBy: | ||
""" | ||
Group results for aggregation along the chainage axis. | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
DataFrame with results. Must have hierarchical column index (e.g. column_mode = 'all'). | ||
Returns | ||
------- | ||
groupby : TransposedGroupBy | ||
GroupBy object, which can be used for aggregation. | ||
""" | ||
fixed_level_names = [n for n in df.columns.names if n != "chainage"] | ||
groupby = TransposedGroupBy(transposed_groupby=df.T.groupby(fixed_level_names, **kwargs)) | ||
return groupby | ||
|
||
|
||
def agg_chainage(df: pd.DataFrame, agg=["first"], gb_kwargs: dict = {}, **kwargs) -> pd.DataFrame: | ||
""" | ||
Aggregate results along the chainage axis. | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
DataFrame with results. Must have hierarchical column index (e.g. column_mode = 'all'). | ||
agg : function, str, list or dict | ||
Aggregation function(s) to apply. Same as pandas.DataFrame.agg. | ||
Returns | ||
------- | ||
df : pd.DataFrame | ||
DataFrame with aggregated results. | ||
""" | ||
groupby = groupby_chainage(df, **gb_kwargs) | ||
return groupby.agg(agg, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import pandas as pd | ||
|
||
|
||
class TransposedGroupBy: | ||
""" | ||
Same as pandas.DataFrameGroupBy, but returns the transpose of the result. | ||
Useful where a groupby is performed on a transposed DataFrame, and after | ||
aggregation the result should be transposed back. | ||
Parameters | ||
---------- | ||
transposed_groupby : pandas.DataFrameGroupBy | ||
A pandas.DataFrameGroupBy object, which is created from a transposed DataFrame. | ||
Examples | ||
-------- | ||
>>> df = res.reaches.read(column_mode='all') | ||
>>> groupby = TransposedGroupBy(df.T.groupby(['quantity])) | ||
>>> groupby.agg(['mean', 'max']) | ||
... # performs agg function, then returns the transpose of the result. | ||
""" | ||
|
||
def __init__(self, transposed_groupby): | ||
self.transposed_groupby = transposed_groupby | ||
|
||
def __getattr__(self, name): | ||
def method(*args, **kwargs): | ||
result = getattr(self.transposed_groupby, name)(*args, **kwargs) | ||
if isinstance(result, pd.DataFrame): | ||
return result.T | ||
return result | ||
|
||
return method |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import fields | ||
|
||
import pandas as pd | ||
|
||
from ..quantities import TimeSeriesId | ||
|
||
|
||
def compact_dataframe(df: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
Convert a DataFrame with a hierarchical column index to a compact DataFrame. | ||
A compact DataFrame removes levels where every value matches the TimeSeriesId default value. | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
DataFrame with hierarchical column index. | ||
Returns | ||
------- | ||
df : pd.DataFrame | ||
Compact DataFrame. | ||
""" | ||
index = df.columns | ||
|
||
is_hierarchical_index = isinstance(index, pd.MultiIndex) | ||
if not is_hierarchical_index: | ||
raise ValueError("DataFrame must have a hierarchical column index to compact.") | ||
|
||
for field in fields(TimeSeriesId): | ||
level_values = index.get_level_values(field.name) | ||
is_only_one_unique_value = len(level_values.unique()) == 1 | ||
if not is_only_one_unique_value: | ||
continue | ||
level_value = level_values[0] | ||
is_all_default_values = (level_value == field.default) or ( | ||
level_value != level_value and field.default != field.default | ||
) | ||
if is_all_default_values: | ||
index = index.droplevel(field.name) | ||
|
||
df.columns = index | ||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.