Skip to content

Commit

Permalink
add combine components in DDB
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Aug 8, 2023
1 parent 9181266 commit ed9cc79
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pandas as pd
from scipy.linalg import qr
from scipy.spatial.distance import cdist
from sklearn.preprocessing import MinMaxScaler
Expand All @@ -23,7 +24,7 @@ def _compute_matrix_approximation(self, Ut, block, tensor, rank):
reconstr_m = tensor_approx @ tensor_approx.T @ tensor
return reconstr_m

def _regularize_rank(self, low_rank, Ut, block, tensor, l_reg: float = 0.2):
def _regularize_rank(self, low_rank, Ut, block, tensor, l_reg: float = 1.0):
spectral_norms, fro_norms = [], []
list_of_rank = list(range(1, low_rank + 1, 1))
for rank in list_of_rank:
Expand All @@ -32,13 +33,10 @@ def _regularize_rank(self, low_rank, Ut, block, tensor, l_reg: float = 0.2):
fro_norms.append(abs(np.linalg.norm(tensor - reconstr_m, 'fro')))
scaled_spectral = MinMaxScaler().fit_transform(np.array(spectral_norms).reshape(-1, 1))
scaled_fro = MinMaxScaler().fit_transform(np.array(fro_norms).reshape(-1, 1))
# scaled_rank = MinMaxScaler().fit_transform(np.array(list_of_rank).reshape(-1, 1))
aprox_error = (1 - l_reg) * scaled_spectral + l_reg * scaled_fro
aprox_error = aprox_error.reshape(-1)
deriviate_of_error = abs(np.diff(aprox_error))
deriviate_of_error = deriviate_of_error[deriviate_of_error > 0.01]
#first_gap_idx = np.where(deriviate_of_error == deriviate_of_error.max())[0][0]
#regularized_rank = first_gap_idx+2
error_threshold = np.median(deriviate_of_error)
regularized_rank = np.sum(deriviate_of_error <= error_threshold)
return regularized_rank
Expand Down Expand Up @@ -92,7 +90,5 @@ def rsvd(self, tensor, approximation: bool = False, regularized_rank: int = None

reconstr_tensor = self._compute_matrix_approximation(Ut, sampled_tensor_orto, tensor, regularized_rank)
U_, S_, V_ = np.linalg.svd(reconstr_tensor, full_matrices=False)
# ET1 = St[0] * np.outer(Ut[:, 0], Vt[0, :])
# ET2 = St[1] * np.outer(Ut[:, 1], Vt[1, :])
# ff = cdist(metric='correlation', XA=ET1, XB=ET2)

return [U_, S_, V_]
41 changes: 35 additions & 6 deletions fedot_ind/core/operation/transformation/basis/data_driven.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import math
from multiprocessing import Pool
from typing import Optional, Tuple, TypeVar

from scipy import stats
import numpy as np
import pandas as pd
import tensorly as tl
from fedot.core.operations.operation_parameters import OperationParameters
from pymonad.either import Either
from pymonad.list import ListMonad
from scipy.spatial.distance import cdist
from tensorly.decomposition import parafac
from tqdm import tqdm

Expand Down Expand Up @@ -38,14 +40,33 @@ def __init__(self, params: Optional[OperationParameters] = None):
self.window_size = params.get('window_size')
self.basis = None
self.SV_threshold = None
#self.sv_selector = params.get('sv_selector')
# self.sv_selector = params.get('sv_selector')
self.sv_selector = 'median'
self.svd_estimator = RSVDDecomposition()
self.low_rank_approximation = True
self.logging_params.update({'WS': self.window_size,
'SV_selector': self.sv_selector,
})

def _combine_components(self, predict):
count = 0
grouped_v = []
for df in predict:
tmp = pd.DataFrame(df)
ff = cdist(metric='cosine', XA=tmp.values, XB=tmp.values)
if ff[-1, -2] < 0.5:
count += 1
tmp.iloc[-2, :] = tmp.iloc[-2,] + tmp.iloc[-1, :]
tmp.drop(tmp.tail(1).index, inplace=True)
grouped_v.append(tmp.values)

if count / len(predict) > 0.35:
self.SV_threshold = grouped_v[0].shape[0]
self.logging_params.update({'SV_thr': self.SV_threshold})
return np.array(grouped_v)
else:
return predict

def _transform(self, input_data: InputData) -> np.array:
"""Method for transforming all samples
Expand Down Expand Up @@ -75,21 +96,29 @@ def _transform(self, input_data: InputData) -> np.array:
)
)
predict = np.array(v)
# new_shape = predict[0].shape[0]
#
# reduce_dimension = True
# while reduce_dimension:
# predict = self._combine_components(predict)
# if predict[0].shape[0] == new_shape or predict[0].shape[0] == 1:
# reduce_dimension = False
# new_shape = predict[0].shape[0]
#predict = self._clean_predict(np.array(v))
return predict

def get_threshold(self, data, selector: str):

selectors = {'median': np.median,
'0.75%': lambda x: np.quantile(x, 0.75),
'0.25%': lambda x: np.quantile(x, 0.25)}
'mode': stats.mode}

svd_numbers = []
with tqdm(total=len(data), desc='SVD estimation') as pbar:
for signal in data:
svd_numbers.append(self._transform_one_sample(signal, svd_flag=True))
pbar.update(1)

return math.ceil(selectors[selector](svd_numbers))
return selectors[selector](svd_numbers).mode[0]

def _transform_one_sample(self, series: np.array, svd_flag: bool = False):
trajectory_transformer = HankelMatrix(time_series=series, window_size=self.window_size)
Expand Down Expand Up @@ -153,7 +182,7 @@ def _get_multidim_basis(self, data):
return basis

def evaluate_derivative(self:
class_type,
class_type,
coefs: np.array,
order: int = 1) -> Tuple[class_type, np.array]:
basis = type(self)(
Expand Down

0 comments on commit ed9cc79

Please sign in to comment.