Skip to content

Commit

Permalink
Add root mean square feature (#813)
Browse files Browse the repository at this point in the history
* root_mean_square feature and unit tests

* removed .coverage.* files

* fix codestyle

* fixed test errors
  • Loading branch information
OliEfr authored Mar 3, 2021
1 parent 23fce40 commit 4fb8967
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 17 deletions.
Empty file.
Empty file.
Binary file added notebooks/examples/pipeline.pkl
Binary file not shown.
32 changes: 16 additions & 16 deletions tests/integrations/test_feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,28 @@ def test_pandas(self):
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 16))
self.assertEqual(X.shape, (100, 18))

X = extract_features(df, column_id="my_id", column_sort="time", column_kind="dimension",
default_fc_parameters=MinimalFCParameters())
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 16))
self.assertEqual(X.shape, (100, 18))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id", column_sort="time",
default_fc_parameters=MinimalFCParameters())
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 8))
self.assertEqual(X.shape, (100, 9))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
default_fc_parameters=MinimalFCParameters())
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 8))
self.assertEqual(X.shape, (100, 9))

def test_pandas_no_pivot(self):
df = self.df
Expand All @@ -62,7 +62,7 @@ def test_pandas_no_pivot(self):
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*16, 3))
self.assertEqual(X.shape, (100*18, 3))

X = extract_features(df, column_id="my_id", column_sort="time",
column_kind="dimension",
Expand All @@ -71,7 +71,7 @@ def test_pandas_no_pivot(self):
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*16, 3))
self.assertEqual(X.shape, (100*18, 3))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id",
column_sort="time",
Expand All @@ -80,15 +80,15 @@ def test_pandas_no_pivot(self):
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*8, 3))
self.assertEqual(X.shape, (100*9, 3))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
pivot=False,
default_fc_parameters=MinimalFCParameters())
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*8, 3))
self.assertEqual(X.shape, (100*9, 3))

def test_dask(self):
df = dd.from_pandas(self.df, npartitions=1)
Expand All @@ -99,30 +99,30 @@ def test_dask(self):
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 16))
self.assertEqual(X.shape, (100, 18))

X = extract_features(df, column_id="my_id", column_sort="time",
column_kind="dimension",
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 16))
self.assertEqual(X.shape, (100, 18))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id",
column_sort="time",
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 8))
self.assertEqual(X.shape, (100, 9))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 8))
self.assertEqual(X.shape, (100, 9))

def test_dask_no_pivot(self):
df = dd.from_pandas(self.df, npartitions=1)
Expand All @@ -133,27 +133,27 @@ def test_dask_no_pivot(self):
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*16, 3))
self.assertEqual(X.shape, (100*18, 3))

X = extract_features(df, column_id="my_id", column_sort="time",
column_kind="dimension",
pivot=False,
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*16, 3))
self.assertEqual(X.shape, (100*18, 3))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id",
column_sort="time",
pivot=False,
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*8, 3))
self.assertEqual(X.shape, (100*9, 3))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
pivot=False,
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*8, 3))
self.assertEqual(X.shape, (100*9, 3))
7 changes: 7 additions & 0 deletions tests/units/feature_extraction/test_feature_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,13 @@ def test_kurtosis(self):
self.assertAlmostEqualOnAllArrayTypes(kurtosis, [1, 1, 1, 1], 0)
self.assertIsNanOnAllArrayTypes(kurtosis, [1, 1, 1])

def test_root_mean_square(self):
self.assertAlmostEqualOnAllArrayTypes(root_mean_square, [1, 1, 1, 2, 2], 1.4832396974191)
self.assertAlmostEqualOnAllArrayTypes(root_mean_square, [0], 0)
self.assertIsNanOnAllArrayTypes(root_mean_square, [])
self.assertAlmostEqualOnAllArrayTypes(root_mean_square, [1], 1)
self.assertAlmostEqualOnAllArrayTypes(root_mean_square, [-1], 1)

def test_absolute_sum_of_changes(self):
self.assertEqualOnAllArrayTypes(absolute_sum_of_changes, [1, 1, 1, 1, 2, 1], 2)
self.assertEqualOnAllArrayTypes(absolute_sum_of_changes, [1, -1, 1, -1], 6)
Expand Down
2 changes: 1 addition & 1 deletion tests/units/feature_extraction/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,5 +196,5 @@ def test_extraction_runs_through(self):

self.assertCountEqual(extracted_features.columns, ["0__median", "0__standard_deviation", "0__sum_values",
"0__maximum", "0__variance", "0__minimum", "0__mean",
"0__length"])
"0__length", "0__root_mean_square"])
self.assertCountEqual(extracted_features.index, [0, 1])
14 changes: 14 additions & 0 deletions tsfresh/feature_extraction/feature_calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,20 @@ def kurtosis(x):
return pd.Series.kurtosis(x)


@set_property("fctype", "simple")
@set_property("minimal", True)
def root_mean_square(x):
"""
Returns the root mean square (rms) of the time series.
:param x: the time series to calculate the feature of
:type x: numpy.ndarray
:return: the value of this feature
:return type: float
"""
return np.sqrt(np.mean(np.square(x))) if len(x) > 0 else np.NaN


@set_property("fctype", "simple")
def absolute_sum_of_changes(x):
"""
Expand Down

0 comments on commit 4fb8967

Please sign in to comment.