From f65d432573c880b309b2e05d8f51ede6429bd7e4 Mon Sep 17 00:00:00 2001 From: Michael Panchenko Date: Wed, 28 Feb 2024 21:27:30 +0100 Subject: [PATCH] Docstrings --- src/sensai/data_transformation/dft.py | 22 ++++++++++------------ src/sensai/featuregen/feature_generator.py | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/sensai/data_transformation/dft.py b/src/sensai/data_transformation/dft.py index 9a87e82b..952c5c9d 100644 --- a/src/sensai/data_transformation/dft.py +++ b/src/sensai/data_transformation/dft.py @@ -552,12 +552,11 @@ def __init__(self, transformation will be applied to each entry in the array. :param fit: whether the rule's transformer shall be fitted. One use case for setting this to False is if a transformer instance instead of a factory is given and the transformer is already fitted. - :param independent_columns: only relevant if the resulting rule matches multiple columns, in which case it is required. - If True, the columns are treated independent and a separate transformation is to be learned for each of them. Note that - this doesn't mean each column will get a separate transformer instance! Rather, the transformer will be fitted - on the array resulting from selecting the matched columns. - If False, all matching columns are treated as a single feature for the purpose of normalisation. - Thus, all columns will be concatenated before fitting the transformer. + :param independent_columns: whether, for the case where the rule matches multiple columns, the columns are independent and a + separate transformation is to be learned for each of them (rather than using the same transformation for all columns and + learning the transformation from the data of all columns). + This parameter must be specified to for rules matching more than one column, + None is acceptable for rules mathching a single column, in which case None, True, and False all have the same effect. """ # NOTE: keep in sync with Rule! if (skip or unsupported) and count_not_none(transformer, transformer_factory) > 0: @@ -633,12 +632,11 @@ def __init__(self, transformation will be applied to each entry in the array. :param fit: whether the rule's transformer shall be fitted. One use case for setting this to False is if a transformer instance instead of a factory is given and the transformer is already fitted. - :param independent_columns: only relevant if the resulting rule matches multiple columns, in which case it is required. - If True, the columns are treated independent and a separate transformation is to be learned for each of them. Note that - this doesn't mean each column will get a separate transformer instance! Rather, the transformer will be fitted - on the array resulting from selecting the matched columns. - If False, all matching columns are treated as a single feature for the purpose of normalisation. - Thus, all columns will be concatenated before fitting the transformer. + :param independent_columns: whether, for the case where the rule matches multiple columns, the columns are independent and a + separate transformation is to be learned for each of them (rather than using the same transformation for all columns and + learning the transformation from the data of all columns). + This parameter must be specified to for rules matching more than one column, + None is acceptable for rules mathching a single column, in which case None, True, and False all have the same effect. """ if (skip or unsupported) and count_not_none(transformer, transformer_factory) > 0: raise ValueError("Passed transformer or transformer_factory while skip=True or unsupported=True") diff --git a/src/sensai/featuregen/feature_generator.py b/src/sensai/featuregen/feature_generator.py index 0d462dfa..bde0ff66 100644 --- a/src/sensai/featuregen/feature_generator.py +++ b/src/sensai/featuregen/feature_generator.py @@ -36,7 +36,7 @@ def __init__(self, add_categorical_default_rules: bool = True): """ :param categorical_feature_names: either a sequence of column names or a regex that is to match all categorical feature names - (which must not only work for the feature generated by this feature generator, i.e., it should not match feature names generated + (which must not only work for the feature generated by this feature generator, i.e. it should not match feature names generated by other feature generators). It will be ensured that the respective columns in the generated data frames will have dtype 'category'. Furthermore, the presence of meta-information can later be leveraged for further transformations, e.g., one-hot encoding.