add combine components in DDB

aimclub · Aug 8, 2023 · ed9cc79 · ed9cc79
1 parent 9181266
commit ed9cc79
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 13 deletions.
diff --git a/fedot_ind/core/operation/decomposition/matrix_decomposition/fast_svd.py b/fedot_ind/core/operation/decomposition/matrix_decomposition/fast_svd.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from scipy.linalg import qr
 from scipy.spatial.distance import cdist
 from sklearn.preprocessing import MinMaxScaler
@@ -23,7 +24,7 @@ def _compute_matrix_approximation(self, Ut, block, tensor, rank):
         reconstr_m = tensor_approx @ tensor_approx.T @ tensor
         return reconstr_m
 
-    def _regularize_rank(self, low_rank, Ut, block, tensor, l_reg: float = 0.2):
+    def _regularize_rank(self, low_rank, Ut, block, tensor, l_reg: float = 1.0):
         spectral_norms, fro_norms = [], []
         list_of_rank = list(range(1, low_rank + 1, 1))
         for rank in list_of_rank:
@@ -32,13 +33,10 @@ def _regularize_rank(self, low_rank, Ut, block, tensor, l_reg: float = 0.2):
             fro_norms.append(abs(np.linalg.norm(tensor - reconstr_m, 'fro')))
         scaled_spectral = MinMaxScaler().fit_transform(np.array(spectral_norms).reshape(-1, 1))
         scaled_fro = MinMaxScaler().fit_transform(np.array(fro_norms).reshape(-1, 1))
-        # scaled_rank = MinMaxScaler().fit_transform(np.array(list_of_rank).reshape(-1, 1))
         aprox_error = (1 - l_reg) * scaled_spectral + l_reg * scaled_fro
         aprox_error = aprox_error.reshape(-1)
         deriviate_of_error = abs(np.diff(aprox_error))
         deriviate_of_error = deriviate_of_error[deriviate_of_error > 0.01]
-        #first_gap_idx = np.where(deriviate_of_error == deriviate_of_error.max())[0][0]
-        #regularized_rank = first_gap_idx+2
         error_threshold = np.median(deriviate_of_error)
         regularized_rank = np.sum(deriviate_of_error <= error_threshold)
         return regularized_rank
@@ -92,7 +90,5 @@ def rsvd(self, tensor, approximation: bool = False, regularized_rank: int = None
 
             reconstr_tensor = self._compute_matrix_approximation(Ut, sampled_tensor_orto, tensor, regularized_rank)
             U_, S_, V_ = np.linalg.svd(reconstr_tensor, full_matrices=False)
-            # ET1 = St[0] * np.outer(Ut[:, 0], Vt[0, :])
-            # ET2 = St[1] * np.outer(Ut[:, 1], Vt[1, :])
-            # ff = cdist(metric='correlation', XA=ET1, XB=ET2)
+
             return [U_, S_, V_]
diff --git a/fedot_ind/core/operation/transformation/basis/data_driven.py b/fedot_ind/core/operation/transformation/basis/data_driven.py
@@ -1,12 +1,14 @@
 import math
 from multiprocessing import Pool
 from typing import Optional, Tuple, TypeVar
-
+from scipy import stats
 import numpy as np
+import pandas as pd
 import tensorly as tl
 from fedot.core.operations.operation_parameters import OperationParameters
 from pymonad.either import Either
 from pymonad.list import ListMonad
+from scipy.spatial.distance import cdist
 from tensorly.decomposition import parafac
 from tqdm import tqdm
 
@@ -38,14 +40,33 @@ def __init__(self, params: Optional[OperationParameters] = None):
         self.window_size = params.get('window_size')
         self.basis = None
         self.SV_threshold = None
-        #self.sv_selector = params.get('sv_selector')
+        # self.sv_selector = params.get('sv_selector')
         self.sv_selector = 'median'
         self.svd_estimator = RSVDDecomposition()
         self.low_rank_approximation = True
         self.logging_params.update({'WS': self.window_size,
                                     'SV_selector': self.sv_selector,
                                     })
 
+    def _combine_components(self, predict):
+        count = 0
+        grouped_v = []
+        for df in predict:
+            tmp = pd.DataFrame(df)
+            ff = cdist(metric='cosine', XA=tmp.values, XB=tmp.values)
+            if ff[-1, -2] < 0.5:
+                count += 1
+            tmp.iloc[-2, :] = tmp.iloc[-2,] + tmp.iloc[-1, :]
+            tmp.drop(tmp.tail(1).index, inplace=True)
+            grouped_v.append(tmp.values)
+
+        if count / len(predict) > 0.35:
+            self.SV_threshold = grouped_v[0].shape[0]
+            self.logging_params.update({'SV_thr': self.SV_threshold})
+            return np.array(grouped_v)
+        else:
+            return predict
+
     def _transform(self, input_data: InputData) -> np.array:
         """Method for transforming all samples
 
@@ -75,21 +96,29 @@ def _transform(self, input_data: InputData) -> np.array:
                           )
                      )
         predict = np.array(v)
+        # new_shape = predict[0].shape[0]
+        #
+        # reduce_dimension = True
+        # while reduce_dimension:
+        #     predict = self._combine_components(predict)
+        #     if predict[0].shape[0] == new_shape or predict[0].shape[0] == 1:
+        #         reduce_dimension = False
+        #     new_shape = predict[0].shape[0]
+        #predict = self._clean_predict(np.array(v))
         return predict
 
     def get_threshold(self, data, selector: str):
 
         selectors = {'median': np.median,
-                     '0.75%': lambda x: np.quantile(x, 0.75),
-                     '0.25%': lambda x: np.quantile(x, 0.25)}
+                     'mode': stats.mode}
 
         svd_numbers = []
         with tqdm(total=len(data), desc='SVD estimation') as pbar:
             for signal in data:
                 svd_numbers.append(self._transform_one_sample(signal, svd_flag=True))
                 pbar.update(1)
 
-        return math.ceil(selectors[selector](svd_numbers))
+        return selectors[selector](svd_numbers).mode[0]
 
     def _transform_one_sample(self, series: np.array, svd_flag: bool = False):
         trajectory_transformer = HankelMatrix(time_series=series, window_size=self.window_size)
@@ -153,7 +182,7 @@ def _get_multidim_basis(self, data):
         return basis
 
     def evaluate_derivative(self:
-                            class_type,
+    class_type,
                             coefs: np.array,
                             order: int = 1) -> Tuple[class_type, np.array]:
         basis = type(self)(