Skip to content

Commit

Permalink
Improved docstrings, formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
opcode81 committed Aug 3, 2023
1 parent 55ef738 commit 08e6559
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/sensai/data_transformation/dft.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,7 @@ def info(self):

class DFTSkLearnTransformer(InvertibleDataFrameTransformer):
"""
Applies a transformer from sklearn.preprocessing to (a subset of the columns of) a data frame.
Applies a transformer from sklearn.preprocessing to (a subset of) the columns of a data frame.
If multiple columns are transformed, they are transformed independently (i.e. each column uses a separately trained transformation).
"""
def __init__(self,
Expand Down
12 changes: 7 additions & 5 deletions src/sensai/featuregen/feature_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import re
from abc import ABC, abstractmethod
from typing import Sequence, List, Union, Callable, Any, Dict, TYPE_CHECKING, Optional, Hashable
from typing import Sequence, List, Union, Callable, Any, Dict, TYPE_CHECKING, Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -43,7 +43,7 @@ def __init__(self,
:param normalisation_rules: Rules to be used by DFTNormalisation (e.g. for constructing an input transformer for a model).
These rules are only relevant if a DFTNormalisation object consuming them is instantiated and used
within a data processing pipeline. They do not affect feature generation.
:param normalisation_rule_template: This parameter can be supplied instead of normalisationRules for the case where
:param normalisation_rule_template: This parameter can be supplied instead of normalisation_rules for the case where
there shall be a single rule that applies to all columns generated by this feature generator that were not labeled as
categorical.
:param add_categorical_default_rules:
Expand All @@ -56,7 +56,7 @@ def __init__(self,
# and can provide them directly in the subclass constructor implementation. Thus it would enable
# non-sensical settings which should be avoided.
if len(normalisation_rules) > 0 and normalisation_rule_template is not None:
raise ValueError(f"normalisationRules should be empty when a normalisationRuleTemplate is provided")
raise ValueError(f"Normalisation rules should be empty when a rule template is provided")

self._generatedColumnNames = None
self.__categoricalFeatureNames = categorical_feature_names
Expand Down Expand Up @@ -700,9 +700,11 @@ def _generate(self, df: pd.DataFrame, ctx=None) -> pd.DataFrame:
if self.flatten:
for target_value in self._targetColumnValues:
# Important: pd.Series.apply should not be used here, as it would label the resulting column as categorical
result_df[f"{column}_{self.targetColumn}_distribution_{target_value}"] = [target_distribution_by_value[value].get(target_value, 0.0) for value in df[column]]
result_df[f"{column}_{self.targetColumn}_distribution_{target_value}"] = \
[target_distribution_by_value[value].get(target_value, 0.0) for value in df[column]]
else:
distributions = [[target_distribution_by_value[value].get(targetValue, 0.0) for targetValue in self._targetColumnValues] for value in df[column]]
distributions = [[target_distribution_by_value[value].get(targetValue, 0.0) for targetValue in self._targetColumnValues]
for value in df[column]]
result_df[f"{column}_{self.targetColumn}_distribution"] = pd.Series(distributions, index=df[column].index)
return result_df

Expand Down

0 comments on commit 08e6559

Please sign in to comment.