CLN: ASV FromDictwithTimestamp (pandas-dev#18527)

mroeschke · jorisvandenbossche · commit 1c1f5076173c · 2017-12-10T17:27:11.000+01:00
diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -2,8 +2,9 @@
 import pandas.util.testing as tm
 from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
 try:
-    from pandas.tseries import offsets
-except:
+    from pandas.tseries.offsets import Nano, Hour
+except ImportError:
+    # For compatability with older versions
     from pandas.core.datetools import * # noqa
 
 from .pandas_vb_common import setup # noqa
@@ -24,16 +25,16 @@ def setup(self):
         self.data2 = {i: {j: float(j) for j in range(100)}
                       for i in range(2000)}
 
-    def time_frame_ctor_list_of_dict(self):
+    def time_list_of_dict(self):
         DataFrame(self.dict_list)
 
-    def time_frame_ctor_nested_dict(self):
+    def time_nested_dict(self):
         DataFrame(self.data)
 
-    def time_series_ctor_from_dict(self):
+    def time_dict(self):
         Series(self.some_dict)
 
-    def time_frame_ctor_nested_dict_int64(self):
+    def time_nested_dict_int64(self):
         # nested dict, integer indexes, regression described in #621
         DataFrame(self.data2)
 
@@ -46,78 +47,24 @@ def setup(self):
         mi = MultiIndex.from_product([range(100), range(100)])
         self.s = Series(np.random.randn(10000), index=mi)
 
-    def time_frame_from_mi_series(self):
+    def time_mi_series(self):
         DataFrame(self.s)
 
-# ----------------------------------------------------------------------
-# From dict with DatetimeIndex with all offsets
 
-# dynamically generate benchmarks for every offset
-#
-# get_period_count & get_index_for_offset are there because blindly taking each
-# offset times 1000 can easily go out of Timestamp bounds and raise errors.
+class FromDictwithTimestamp(object):
 
+    goal_time = 0.2
+    params = [Nano(1), Hour(1)]
+    param_names = ['offset']
 
-def get_period_count(start_date, off):
-    ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days
-    if (ten_offsets_in_days == 0):
-        return 1000
-    else:
-        periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
-        return min(periods, 1000)
-
-
-def get_index_for_offset(off):
-    start_date = Timestamp('1/1/1900')
-    return date_range(start_date,
-                      periods=get_period_count(start_date, off),
-                      freq=off)
-
-
-all_offsets = offsets.__all__
-# extra cases
-for off in ['FY5253', 'FY5253Quarter']:
-    all_offsets.pop(all_offsets.index(off))
-    all_offsets.extend([off + '_1', off + '_2'])
-
-
-class FromDictwithTimestampOffsets(object):
-
-    params = [all_offsets, [1, 2]]
-    param_names = ['offset', 'n_steps']
-
-    offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
-                     'LastWeekOfMonth': {'weekday': 1, 'week': 1},
-                     'FY5253': {'startingMonth': 1, 'weekday': 1},
-                     'FY5253Quarter': {'qtr_with_extra_week': 1,
-                                       'startingMonth': 1,
-                                       'weekday': 1}}
-
-    offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
-                          'FY5253Quarter': {'variation': ['nearest', 'last']}}
-
-    def setup(self, offset, n_steps):
+    def setup(self, offset):
+        N = 10**3
         np.random.seed(1234)
-        extra = False
-        if offset.endswith("_", None, -1):
-            extra = int(offset[-1])
-            offset = offset[:-2]
-
-        kwargs = {}
-        if offset in self.offset_kwargs:
-            kwargs = self.offset_kwargs[offset]
-
-        if extra:
-            extras = self.offset_extra_cases[offset]
-            for extra_arg in extras:
-                kwargs[extra_arg] = extras[extra_arg][extra - 1]
-
-        offset = getattr(offsets, offset)
-        self.idx = get_index_for_offset(offset(n_steps, **kwargs))
-        self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
-        self.d = self.df.to_dict()
-
-    def time_frame_ctor(self, offset, n_steps):
+        idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N)
+        df = DataFrame(np.random.randn(N, 10), index=idx)
+        self.d = df.to_dict()
+
+    def time_dict_with_timestamp_offsets(self, offset):
         DataFrame(self.d)