Skip to content

Commit

Permalink
Merge pull request #77 from DHI/feature/pandas_dataframe_accessor_ext…
Browse files Browse the repository at this point in the history
…ension

Feature/pandas dataframe accessor extension
  • Loading branch information
ryan-kipawa authored Jan 24, 2024
2 parents d4686ba + f5aea7f commit d41a0f4
Show file tree
Hide file tree
Showing 8 changed files with 610 additions and 0 deletions.
13 changes: 13 additions & 0 deletions mikeio1d/pandas_extension/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from .mikeio1d_accessor import Mikeio1dAccessor # noqa
from .transposed_groupby import TransposedGroupBy # noqa
from .result_reaches_helpers import agg_chainage # noqa
from .result_reaches_helpers import groupby_chainage # noqa
from .various import compact_dataframe

__all___ = [
"Mikeio1dAccessor",
"TransposedGroupBy",
"agg_chainage",
"groupby_chainage",
"compact_dataframe",
]
80 changes: 80 additions & 0 deletions mikeio1d/pandas_extension/mikeio1d_accessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import pandas as pd

from .transposed_groupby import TransposedGroupBy
from .result_reaches_helpers import agg_chainage
from .result_reaches_helpers import groupby_chainage
from .various import compact_dataframe


@pd.api.extensions.register_dataframe_accessor("m1d")
class Mikeio1dAccessor:
"""
This class uses Pandas Extension API to register a custom accessor for DataFrames. More
information can be found here:
https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors
The accessor provides convenience methods for working with DataFrames with a MultiIndex.
The intent is as a facade for various helper functions that live elsewhere in the package.
"""

def __init__(self, pandas_obj):
self._validate(pandas_obj)
self._obj = pandas_obj

@staticmethod
def _validate(obj):
if not isinstance(obj, pd.DataFrame):
raise AttributeError("Mikeio1dAccessor only supports DataFrames.")
df: pd.DataFrame = obj
if not isinstance(df.columns, pd.MultiIndex):
raise AttributeError("Must have a MultiIndex columns.")

def _validate_has_chainage(self):
self._validate(self._obj)
if "chainage" not in self._obj.columns.names:
raise ValueError("DataFrame must have chainage column.")

def agg_chainage(self, agg=None) -> pd.DataFrame:
"""
Convenience wrapper for ResultReaches.agg_chainage.
"""
self._validate_has_chainage()
kwargs = {}
if agg is not None:
kwargs["agg"] = agg

return agg_chainage(self._obj, **kwargs)

def groupby_chainage(self, *args, **kwargs) -> TransposedGroupBy:
"""
Convenience wrapper for pd.DataFrame.groupby. The groupby is performed on
the columns of the DataFrame, which are in the form of a MultiIndex.
"""
self._validate_has_chainage()
df: pd.DataFrame = self._obj
return groupby_chainage(df, *args, **kwargs)

def groupby(self, *args, **kwargs) -> TransposedGroupBy:
"""
Convenience wrapper for pd.DataFrame.groupby. The groupby is performed on
the columns of the DataFrame, which are in the form of a MultiIndex.
"""
df: pd.DataFrame = self._obj
groupby = TransposedGroupBy(transposed_groupby=df.T.groupby(*args, **kwargs))
return groupby

def query(self, *args, **kwargs) -> pd.DataFrame:
"""
Convenience wrapper for pd.DataFrame.query. The query is performed on
the columns of the DataFrame, which are in the form of a MultiIndex.
"""
df = self._obj
return df.T.query(*args, **kwargs).T

def compact(self, *args, **kwargs) -> pd.DataFrame:
"""
Convenience wrapper for compact_dataframe.
"""
df = self._obj
return compact_dataframe(df, *args, **kwargs)
42 changes: 42 additions & 0 deletions mikeio1d/pandas_extension/result_reaches_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pandas as pd

from .transposed_groupby import TransposedGroupBy


def groupby_chainage(df: pd.DataFrame, **kwargs) -> TransposedGroupBy:
"""
Group results for aggregation along the chainage axis.
Parameters
----------
df : pd.DataFrame
DataFrame with results. Must have hierarchical column index (e.g. column_mode = 'all').
Returns
-------
groupby : TransposedGroupBy
GroupBy object, which can be used for aggregation.
"""
fixed_level_names = [n for n in df.columns.names if n != "chainage"]
groupby = TransposedGroupBy(transposed_groupby=df.T.groupby(fixed_level_names, **kwargs))
return groupby


def agg_chainage(df: pd.DataFrame, agg=["first"], gb_kwargs: dict = {}, **kwargs) -> pd.DataFrame:
"""
Aggregate results along the chainage axis.
Parameters
----------
df : pd.DataFrame
DataFrame with results. Must have hierarchical column index (e.g. column_mode = 'all').
agg : function, str, list or dict
Aggregation function(s) to apply. Same as pandas.DataFrame.agg.
Returns
-------
df : pd.DataFrame
DataFrame with aggregated results.
"""
groupby = groupby_chainage(df, **gb_kwargs)
return groupby.agg(agg, **kwargs)
34 changes: 34 additions & 0 deletions mikeio1d/pandas_extension/transposed_groupby.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pandas as pd


class TransposedGroupBy:
"""
Same as pandas.DataFrameGroupBy, but returns the transpose of the result.
Useful where a groupby is performed on a transposed DataFrame, and after
aggregation the result should be transposed back.
Parameters
----------
transposed_groupby : pandas.DataFrameGroupBy
A pandas.DataFrameGroupBy object, which is created from a transposed DataFrame.
Examples
--------
>>> df = res.reaches.read(column_mode='all')
>>> groupby = TransposedGroupBy(df.T.groupby(['quantity]))
>>> groupby.agg(['mean', 'max'])
... # performs agg function, then returns the transpose of the result.
"""

def __init__(self, transposed_groupby):
self.transposed_groupby = transposed_groupby

def __getattr__(self, name):
def method(*args, **kwargs):
result = getattr(self.transposed_groupby, name)(*args, **kwargs)
if isinstance(result, pd.DataFrame):
return result.T
return result

return method
45 changes: 45 additions & 0 deletions mikeio1d/pandas_extension/various.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import annotations

from dataclasses import fields

import pandas as pd

from ..quantities import TimeSeriesId


def compact_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""
Convert a DataFrame with a hierarchical column index to a compact DataFrame.
A compact DataFrame removes levels where every value matches the TimeSeriesId default value.
Parameters
----------
df : pd.DataFrame
DataFrame with hierarchical column index.
Returns
-------
df : pd.DataFrame
Compact DataFrame.
"""
index = df.columns

is_hierarchical_index = isinstance(index, pd.MultiIndex)
if not is_hierarchical_index:
raise ValueError("DataFrame must have a hierarchical column index to compact.")

for field in fields(TimeSeriesId):
level_values = index.get_level_values(field.name)
is_only_one_unique_value = len(level_values.unique()) == 1
if not is_only_one_unique_value:
continue
level_value = level_values[0]
is_all_default_values = (level_value == field.default) or (
level_value != level_value and field.default != field.default
)
if is_all_default_values:
index = index.droplevel(field.name)

df.columns = index
return df
2 changes: 2 additions & 0 deletions mikeio1d/res1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@

from .quantities import TimeSeriesId

from .pandas_extension import Mikeio1dAccessor # noqa: F401

from System import DateTime


Expand Down
Loading

0 comments on commit d41a0f4

Please sign in to comment.