forked from poplarShift/python-data-science-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
__init__.py
116 lines (94 loc) · 3.27 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from .switch_backend import *
from .element import *
from .operation import *
from functools import reduce
import holoviews as hv
import pandas as pd
import numpy as np
import param
def _dt(self, attr='month'):
def get_dt(x, attr):
# allowed = [attr for attr in dir(a.dt) if not attr.startswith('_')]
return getattr(pd.Series(x).dt, attr).values
return dim(self, get_dt, attr)
hv.dim.dt = _dt
### FLATTEN
def flatten(l):
"""
Collapse data of a dimensioned container one level down
"""
target = l.traverse()[1]
return target.clone(data=l.dframe())
### extract all data from object
_sanitize_units = str.maketrans("⁰¹²³⁴⁵⁶⁷⁸⁹⁻μ", "0123456789-u")
_sanitize_units.update({176: 'deg'}) # degree symbol
def _create_column_header(d):
"""
Convert hv.Dimension into descriptive string
"""
s = d.label
if d.unit is not None:
s += '_' + d.unit
return s.translate(_sanitize_units).replace(' ', '_')
def _to_dframe(e):
translate_dimensions_dict = {
d.name: _create_column_header(d)
for d in e.dimensions() if d.label is not None
}
dims = [d.name for d in e.dimensions()[:2]]
df = e.dframe().dropna(subset=dims, how='any')
# df = df.assign(Element=e.group)
return df.rename(columns=translate_dimensions_dict)
def get_all_data(obj):
"""
Produce pandas DataFrame from any holoviews object.
License
-------
GNU-GPLv3, (C) A. R.
(https://github.com/poplarShift/python-data-science-utils)
"""
df_list = obj.traverse(fn=_to_dframe, specs=lambda x: not x._deep_indexable)
# specs=lambda x: hasattr(x, 'dframe')
# too permissive, both nd containers and elements have it
# do not separate between frames for now, in principle each dimension
# should be unique w/r/t values they represent...
# df_list = [df.assign(Frame=i) for i, df in enumerate(df_list)]
return reduce(lambda df1, df2: df1.merge(df2, how='outer'), df_list)
### aggregate & compare vdims over a number of elements
import holoviews.operation.datashader as hd
import datashader as dsh
# import numpy as np, datashader as sh, xarray as xr
# from datashader import transfer_functions as tf
# import reductions as rd
def agg_vdims(elements, vdims=None, N=100):
"""
Spatially aggregate vdims from a number of holoviews Elements.
Parameters:
-----------
elements : list
List of holoviews Elements
vdims : list
List of vdim names over which to aggregate for each Element
N : int
Number of bins (in each kdim) to aggegrate
License
-------
GNU-GPLv3, (C) A. R.
(https://github.com/poplarShift/python-data-science-utils)
"""
data = []
kdims0 = [kd.name for kd in elements[0].kdims]
for k, e in enumerate(elements):
if vdims is None:
vdim = e.vdims[0].name
else:
vdim = vdims[k]
kdims = [kd.name for kd in e.kdims]
dims = [d.name for d in e.dimensions()]
data.append(
e.data[dims].rename(columns={kd: kd0 for kd, kd0 in zip(kdims, kdims0)}).copy()
)
df = pd.concat(data, sort=False).dropna(how='all')
for kd in kdims0:
df[kd] = pd.IntervalIndex(pd.cut(df[kd], bins=N)).mid
return df.groupby(kdims0).mean()