Skip to content

Commit

Permalink
searching searching space
Browse files Browse the repository at this point in the history
  • Loading branch information
technocreep committed Jul 19, 2023
1 parent faef03d commit e484785
Show file tree
Hide file tree
Showing 15 changed files with 474 additions and 237 deletions.
8 changes: 4 additions & 4 deletions fedot_ind/api/exper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
# ]

datasets_good_roc = [
'Chinatown',
# 'Chinatown',
# 'Earthquakes',
# 'Ham',
# 'ECG200',
'ECG200',
# 'MiddlePhalanxOutlineCorrect',
# 'MoteStrain',
# 'TwoLeadECG'
Expand Down Expand Up @@ -52,8 +52,8 @@
# 'wavelet_basis',
'data_driven_basis'
],
tuning_iterations=10,
tuning_timeout=2,
tuning_iterations=5,
tuning_timeout=15,
use_cache=False,
timeout=1,
n_jobs=2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from golem.core.tuning.simultaneous import SimultaneousTuner
from golem.core.tuning.sequential import SequentialTuner

from fedot_ind.api.utils.saver_collections import ResultSaver
from fedot_ind.core.architecture.postprocessing.Analyzer import PerformanceAnalyzer
Expand Down Expand Up @@ -91,7 +92,7 @@ def _init_input_data(self, X: pd.DataFrame, y: np.ndarray) -> InputData:
y: numpy array with target values
Returns:
InputData object convinient for FEDOT framework
InputData object convenient for FEDOT framework
"""
is_multivariate_data = self.__check_multivariate_data(X)
Expand Down
39 changes: 0 additions & 39 deletions fedot_ind/core/architecture/settings/hyperparams.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import numpy as np


def quantile(column, q: str):
return np.quantile(a=column, q=q)


def softmax(w, theta=1.0) -> np.ndarray:
"""Takes a vector w of S N-element and returns a vectors where each column
of the vector sums to 1, with elements exponentially proportional to the
Expand All @@ -25,45 +21,10 @@ def softmax(w, theta=1.0) -> np.ndarray:
return dist


stat_methods_default = {
'mean_': np.mean,
'median_': np.median,
'std_': np.std,
'var_': np.var,
'q5_': quantile,
'q25_': quantile,
'q75_': quantile,
'q95_': quantile,
}

stat_methods_ensemble = {
'MeanEnsemble': np.mean,
'MedianEnsemble': np.median,
'MinEnsemble': np.min,
'MaxEnsemble': np.max,
'ProductEnsemble': np.prod
}

stat_methods_full = {
'mean_': np.mean,
'median_': np.median,
'lambda_less_zero': lambda x: x < 0.01,
'std_': np.std,
'var_': np.var,
'max': np.max,
'min': np.min,
'q5_': quantile,
'q25_': quantile,
'q75_': quantile,
'q95_': quantile,
'sum_': np.sum,
'dif_': np.diff
}

hyper_param_dict = {'statistical_methods': stat_methods_default,
'statistical_methods_extra': stat_methods_full,
'stat_methods_ensemble': stat_methods_ensemble}


def select_hyper_param(param_name):
return hyper_param_dict[param_name]
4 changes: 2 additions & 2 deletions fedot_ind/core/ensemble/static/RankEnsembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from fedot_ind.core.architecture.postprocessing.Analyzer import PerformanceAnalyzer
from fedot_ind.core.architecture.preprocessing.DatasetLoader import DataLoader
from fedot_ind.core.architecture.settings.hyperparams import select_hyper_param
from fedot_ind.core.architecture.settings.hyperparams import stat_methods_ensemble
from fedot_ind.core.ensemble.BaseEnsembler import BaseEnsemble


Expand All @@ -32,7 +32,7 @@ def __init__(self, dataset_name: str, proba_dict, metric_dict):
self.logger = logging.getLogger(self.__class__.__name__)
self.best_ensemble_metric = 0

self.ensemble_strategy_dict = select_hyper_param('stat_methods_ensemble')
self.ensemble_strategy_dict = stat_methods_ensemble
self.ensemble_strategy = self.ensemble_strategy_dict.keys()

self.strategy_exclude_list = ['WeightedEnsemble']
Expand Down
32 changes: 31 additions & 1 deletion fedot_ind/core/models/BaseExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from fedot_ind.core.metrics.metrics_implementation import *
from fedot_ind.core.operation.IndustrialCachableOperation import IndustrialCachableOperationImplementation
from fedot_ind.core.operation.transformation.extraction.statistical import stat_methods
from fedot_ind.core.operation.utils.cache import DataCacher


Expand All @@ -21,7 +22,9 @@ class BaseExtractor(IndustrialCachableOperationImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.current_window = None
self.n_processes = math.ceil(cpu_count() * 0.7) if cpu_count() > 1 else 1
# TODO: get back
self.n_processes = 2
# self.n_processes = math.ceil(cpu_count() * 0.7) if cpu_count() > 1 else 1
self.data_type = DataTypesEnum.table
self.use_cache = params.get('use_cache', False)

Expand Down Expand Up @@ -103,3 +106,30 @@ def extract_features(self, train_features: pd.DataFrame,
return features
else:
return self.generate_features_from_ts(train_features, dataset_name)

@staticmethod
def get_statistical_features(time_series: Union[pd.DataFrame, np.ndarray]) -> pd.DataFrame:
"""
Method for creating baseline statistical features for a given time series.
Args:
time_series: time series for which features are generated
Returns:
Row vector of statistical features in the form of a pandas DataFrame
"""
names = []
vals = []
# flatten time series
if isinstance(time_series, (pd.DataFrame, pd.Series)):
time_series = time_series.values
time_series = time_series.flatten()

for name, method in stat_methods.items():
try:
vals.append(method(time_series))
names.append(name)
except ValueError:
continue
return pd.DataFrame([vals], columns=names)
8 changes: 4 additions & 4 deletions fedot_ind/core/models/signal/SignalExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from fedot_ind.core.metrics.metrics_implementation import *
from fedot_ind.core.models.signal.WindowedFeaturesExtractor import WindowedFeatureExtractor
from fedot_ind.core.operation.transformation.extraction.statistical import StatFeaturesExtractor
# from fedot_ind.core.operation.transformation.extraction.statistical import StatFeaturesExtractor


class SignalExtractor(WindowedFeatureExtractor):
Expand All @@ -17,7 +17,7 @@ class SignalExtractor(WindowedFeatureExtractor):
use_cache: flag to use cache or not. Defined in Config_Classification.yaml
Attributes:
ts_samples_count (int): number of samples in time series
aggregator (StatFeaturesExtractor): class to aggregate features
# aggregator (StatFeaturesExtractor): class to aggregate features
wavelet_extractor (WaveletExtractor): class to extract wavelet features
wavelet (str): current wavelet type
vis_flag (bool): flag to visualize or not
Expand All @@ -29,8 +29,8 @@ class SignalExtractor(WindowedFeatureExtractor):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.ts_samples_count = None
self.aggregator = StatFeaturesExtractor()
self.wavelet_extractor = WaveletExtractor
# self.aggregator = StatFeaturesExtractor()
# self.wavelet_extractor = WaveletExtractor

self.wavelet = params.get('wavelet')
self.vis_flag = False
Expand Down
3 changes: 2 additions & 1 deletion fedot_ind/core/models/signal/WindowedFeaturesExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ def apply_window_for_stat_feature(ts_data: pd.DataFrame,
df = feature_generator(slice_ts)
df.columns = [x + f'_on_interval: {i} - {i + window_size}' for x in df.columns]
tmp_list.append(df)
return tmp_list
return tmp_list

91 changes: 64 additions & 27 deletions fedot_ind/core/models/statistical/StatsExtractor.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,93 @@
from multiprocessing import Pool
from typing import Optional

import numpy as np
import pandas as pd

from fedot.core.data.data import InputData
from fedot.core.operations.operation_parameters import OperationParameters
from pandas import Index
from tqdm import tqdm

from fedot_ind.core.models.BaseExtractor import BaseExtractor
from fedot_ind.core.operation.transformation.extraction.statistical import StatFeaturesExtractor


class StatsExtractor(BaseExtractor):
"""Class responsible for quantile feature generator experiment.
Args:
window_mode: Flag for window mode. Defaults to False.
use_cache: Flag for cache usage. Defaults to False.
Attributes:
use_cache (bool): Flag for cache usage.
aggregator (StatFeaturesExtractor): StatFeaturesExtractor object.
vis_flag (bool): Flag for visualization.
train_feats (pd.DataFrame): Train features.
test_feats (pd.DataFrame): Test features.
"""

def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.aggregator = StatFeaturesExtractor()
self.var_threshold = params.get('var_threshold')
self.window_mode = params.get('window_mode')
self.window_size = params.get('window_size')
self.vis_flag = False
self.train_feats = None
self.test_feats = None
self.n_components = None

self.logging_params.update({'WS': self.window_size, 'WM': self.window_mode})
self.logging_params.update({'Wsize': self.window_size,
'Wmode': self.window_mode,
'VarTh': self.var_threshold})
self.relevant_features = None

def fit(self, input_data: InputData):
pass

def _transform(self, input_data: InputData) -> np.array:
"""
Method for feature generation for all series
"""
input_data_squeezed = np.squeeze(input_data.features, 3)
with Pool(self.n_processes) as p:
v = list(tqdm(p.imap(self.generate_features_from_ts, input_data_squeezed),
total=input_data.features.shape[0],
desc=f'{self.__class__.__name__} transform',
postfix=f'{self.logging_params}',
colour='green',
unit='ts',
ascii=False,
position=0,
leave=True)
)
stat_features = v[0].columns
n_components = v[0].shape[0]
predict = self._clean_predict(np.array(v))
predict = self.drop_features(predict, stat_features, n_components)
return predict.values

def drop_features(self, predict: pd.DataFrame, columns: Index, n_components: int):
"""
Method for dropping features with low variance
"""
# Fill columns names for every extracted ts component
predict = pd.DataFrame(predict,
columns=[f'{col}{str(i)}' for i in range(1, n_components+1) for col in columns])

if self.relevant_features is None:
reduced_df, self.relevant_features = self.filter_by_var(predict, threshold=self.var_threshold)
return reduced_df
else:
return predict[self.relevant_features]

def filter_by_var(self, data: pd.DataFrame, threshold: float):
cols = data.columns
filtrat = {}

for col in cols:
if np.var(data[col].values) > threshold:
filtrat.update({col: data[col].values.flatten()})

return pd.DataFrame(filtrat), list(filtrat.keys())

def extract_stats_features(self, ts):
if self.window_mode:
aggregator = self.aggregator.create_baseline_features
list_of_stat_features_on_interval = self.apply_window_for_stat_feature(ts_data=ts,
feature_generator=aggregator,
window_size=self.window_size)
aggregation_df = pd.concat(list_of_stat_features_on_interval, axis=1)
# aggregator = self.aggregator.create_baseline_features
list_of_stat_features = self.apply_window_for_stat_feature(ts_data=ts.T if ts.shape[1] == 1 else ts,
feature_generator=self.get_statistical_features,
window_size=self.window_size)
aggregation_df = pd.concat(list_of_stat_features, axis=1)
else:
aggregation_df = self.aggregator.create_baseline_features(ts)
aggregation_df = self.get_statistical_features(ts)
return aggregation_df

def generate_features_from_ts(self,
Expand Down Expand Up @@ -84,14 +126,9 @@ def __get_feature_matrix(self, ts):
ts_components = [pd.DataFrame(x) for x in ts.values.tolist()]
if ts_components[0].shape[0] != 1:
ts_components = [x.T for x in ts_components]
tmp_list = []
for index, component in enumerate(ts_components):
aggregation_df = self.extract_stats_features(component)
tmp_list.append(aggregation_df)
aggregation_df = pd.concat(tmp_list, axis=0)

# tmp_list = [self.extract_stats_features(x) for x in ts_components]
# aggregation_df = pd.concat(tmp_list, axis=0)
tmp_list = [self.extract_stats_features(x) for x in ts_components]
aggregation_df = pd.concat(tmp_list, axis=0)

return aggregation_df

Expand Down
14 changes: 9 additions & 5 deletions fedot_ind/core/operation/IndustrialCachableOperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@ def transform(self, input_data: InputData) -> OutputData:
Method firstly tries to load result from cache. If unsuccessful, it starts to generate features
"""
# TODO: get back to
# operation_parameters = [f'{key}:{value}' for key, value in self.params.to_dict().items()]
# class_params = list(self.__dir__())
# operational_info = operation_parameters + class_params
# hashed_info = self.cacher.hash_info(data=input_data.features.tobytes(),
# operation_info=operational_info)
# operation_parameters = self.params.to_dict()
# class_params = {k:v for k,v in self.__dict__.items() if k not in ['cacher',
# 'params',
# 'n_processes',
# 'logging_params']}
#
# operation_parameters.update(class_params)
# hashed_info = self.cacher.hash_info(data=input_data.features,
# operation_info=operation_parameters.__repr__())

# hashed_info = self.cacher.hash_info(data=input_data.features.tobytes(),
# operation_info=self.params.to_dict())
Expand Down
9 changes: 3 additions & 6 deletions fedot_ind/core/operation/transformation/basis/data_driven.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
import math
import time
from multiprocessing import Pool
from typing import Tuple, TypeVar, Optional
from typing import Optional, Tuple, TypeVar

import numpy as np
import tensorly as tl
from fedot.core.operations.operation_parameters import OperationParameters
from pymonad.either import Either
from pymonad.list import ListMonad
from sklearn.metrics import f1_score, roc_auc_score
from tensorly.decomposition import parafac
from tqdm import tqdm

from fedot_ind.core.architecture.preprocessing import InputData
from fedot_ind.core.operation.decomposition.matrix_decomposition.fast_svd import bksvd

from fedot_ind.core.operation.transformation.basis.abstract_basis import BasisDecompositionImplementation
from fedot_ind.core.operation.transformation.data.hankel import HankelMatrix
from fedot_ind.core.operation.transformation.regularization.spectrum import singular_value_hard_threshold, \
reconstruct_basis
from fedot_ind.core.operation.transformation.regularization.spectrum import reconstruct_basis, \
singular_value_hard_threshold

class_type = TypeVar("T", bound="DataDrivenBasis")

Expand Down
2 changes: 2 additions & 0 deletions fedot_ind/core/operation/transformation/basis/fourier.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def __init__(self, params: Optional[OperationParameters] = None):
self.threshold = params.get('threshold')
self.basis = None

self.logging_params.update({'threshold': self.threshold})

def low_pass(self, input_data):
fourier_coef = np.fft.rfft(input_data)
frequencies = np.fft.rfftfreq(input_data.size, d=2e-3 / input_data.size)
Expand Down
Loading

0 comments on commit e484785

Please sign in to comment.