Skip to content

Commit

Permalink
CLN: ASV FromDictwithTimestamp (pandas-dev#18527)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and jorisvandenbossche committed Dec 10, 2017
1 parent 451811d commit 1c1f507
Showing 1 changed file with 19 additions and 72 deletions.
91 changes: 19 additions & 72 deletions asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import pandas.util.testing as tm
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
try:
from pandas.tseries import offsets
except:
from pandas.tseries.offsets import Nano, Hour
except ImportError:
# For compatability with older versions
from pandas.core.datetools import * # noqa

from .pandas_vb_common import setup # noqa
Expand All @@ -24,16 +25,16 @@ def setup(self):
self.data2 = {i: {j: float(j) for j in range(100)}
for i in range(2000)}

def time_frame_ctor_list_of_dict(self):
def time_list_of_dict(self):
DataFrame(self.dict_list)

def time_frame_ctor_nested_dict(self):
def time_nested_dict(self):
DataFrame(self.data)

def time_series_ctor_from_dict(self):
def time_dict(self):
Series(self.some_dict)

def time_frame_ctor_nested_dict_int64(self):
def time_nested_dict_int64(self):
# nested dict, integer indexes, regression described in #621
DataFrame(self.data2)

Expand All @@ -46,78 +47,24 @@ def setup(self):
mi = MultiIndex.from_product([range(100), range(100)])
self.s = Series(np.random.randn(10000), index=mi)

def time_frame_from_mi_series(self):
def time_mi_series(self):
DataFrame(self.s)

# ----------------------------------------------------------------------
# From dict with DatetimeIndex with all offsets

# dynamically generate benchmarks for every offset
#
# get_period_count & get_index_for_offset are there because blindly taking each
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
class FromDictwithTimestamp(object):

goal_time = 0.2
params = [Nano(1), Hour(1)]
param_names = ['offset']

def get_period_count(start_date, off):
ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days
if (ten_offsets_in_days == 0):
return 1000
else:
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
return min(periods, 1000)


def get_index_for_offset(off):
start_date = Timestamp('1/1/1900')
return date_range(start_date,
periods=get_period_count(start_date, off),
freq=off)


all_offsets = offsets.__all__
# extra cases
for off in ['FY5253', 'FY5253Quarter']:
all_offsets.pop(all_offsets.index(off))
all_offsets.extend([off + '_1', off + '_2'])


class FromDictwithTimestampOffsets(object):

params = [all_offsets, [1, 2]]
param_names = ['offset', 'n_steps']

offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
'FY5253': {'startingMonth': 1, 'weekday': 1},
'FY5253Quarter': {'qtr_with_extra_week': 1,
'startingMonth': 1,
'weekday': 1}}

offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
'FY5253Quarter': {'variation': ['nearest', 'last']}}

def setup(self, offset, n_steps):
def setup(self, offset):
N = 10**3
np.random.seed(1234)
extra = False
if offset.endswith("_", None, -1):
extra = int(offset[-1])
offset = offset[:-2]

kwargs = {}
if offset in self.offset_kwargs:
kwargs = self.offset_kwargs[offset]

if extra:
extras = self.offset_extra_cases[offset]
for extra_arg in extras:
kwargs[extra_arg] = extras[extra_arg][extra - 1]

offset = getattr(offsets, offset)
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
self.d = self.df.to_dict()

def time_frame_ctor(self, offset, n_steps):
idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N)
df = DataFrame(np.random.randn(N, 10), index=idx)
self.d = df.to_dict()

def time_dict_with_timestamp_offsets(self, offset):
DataFrame(self.d)


Expand Down

0 comments on commit 1c1f507

Please sign in to comment.