From 0facd4ab8bbcbb927807fba183e4276c5762cedf Mon Sep 17 00:00:00 2001
From: Igor Infingardi <igor.infingardi@gmail.com>
Date: Wed, 16 Oct 2024 20:52:52 -0300
Subject: [PATCH 1/6] docs: improve compare_variables_for_regression
 documentation

---
 bibmon/_bibmon_tools.py | 1646 ++++++++++++++++++++++-----------------
 1 file changed, 952 insertions(+), 694 deletions(-)

diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py
index fd598d9..94397ae 100644
--- a/bibmon/_bibmon_tools.py
+++ b/bibmon/_bibmon_tools.py
@@ -1,695 +1,953 @@
-import numpy as np
-import pandas as pd
-from datetime import datetime
-import matplotlib.pyplot as plt
-
-###############################################################################
-
-def create_df_with_dates (array, 
-                          start = '2020-01-01 00:00:00', 
-                          freq = '1min'):
-    """
-    Parameters
-    ----------
-    array: pandas.DataFrame or numpy.array
-        Original data.
-    start: string, optional
-        Start timestamp.
-    freq: string, optional
-        Sampling interval.
-    Returns
-    ----------                
-    df: pandas.DataFrame
-        Processed data.
-    """    
-    df = pd.DataFrame(array)
-    df.index = pd.date_range(start = start, 
-                             periods = df.shape[0],
-                             freq=freq)
-    return df
-
-###############################################################################
-
-def create_df_with_noise (array, 
-                          noise_frac, 
-                          max_index_for_noise):
-    """
-    Adds artificial measurement noise to data.
-    
-    Parameters
-    ----------
-    array: pandas.DataFrame or numpy.array
-        Original data.
-    noise_frac: float 
-        Fraction (between 0 and 1) of the total amplitude of the variable
-        that will be used as the noise standard deviation.
-    max_index_for_noise: int
-        Maximum index to consider the amplitude 
-        in the standard deviation calculation.
-    Returns
-    ----------                
-    df: pandas.DataFrame
-        Processed data.
-    """    
-
-    df = pd.DataFrame(array)
-
-    sigma_noise = noise_frac*(np.amax(array[:max_index_for_noise,:], 
-                                      axis=0)-
-                              np.amin(array[:max_index_for_noise,:], 
-                                      axis=0))
-
-    for i in range(df.shape[1]):
-        df.iloc[:,i] = (df.iloc[:,i] + 
-                        sigma_noise[i]*np.random.randn(df.shape[0]))
-        
-    return df
-    
-###############################################################################
-
-def align_dfs_by_rows (df1, df2):
-    """
-    Aligns DataFrames by rows.
-
-    Parameters
-    ----------
-    df1, df2: pandas.DataFrame
-        Original data.
-    Returns
-    ----------                
-    new_df1, new_df2: pandas.DataFrame
-        Processed data.
-    """
-    
-    new_df1 = df1.loc[df1.index.isin(df2.index)]
-    new_df2 = df2.loc[df2.index.isin(df1.index)]
-
-    return new_df1, new_df2
-
-###############################################################################
-
-def spearmanr_dendrogram(df, figsize = (18,8)):
-    """
-    Generates a dendrogram of Spearman correlations.
-    
-    Parameters
-    ----------
-    df: pandas.DataFrame
-        Dados to be analyzed.
-    figsize: tuple of ints, optional
-        Figure dimensions.
-    """   
-    import scipy.cluster.hierarchy
-    import scipy.stats
-    
-    df = pd.DataFrame(df)
-    
-    corr = np.round(scipy.stats.spearmanr(df).correlation, 4)
-    corr_condensed = scipy.cluster.hierarchy.distance.squareform(1-corr)
-    z = scipy.cluster.hierarchy.linkage(corr_condensed, method='average')
-    plt.figure(figsize=figsize)
-    scipy.cluster.hierarchy.dendrogram(z, labels = df.columns.tolist(), 
-                                       orientation='left', leaf_font_size=16)
-    plt.xlabel('Spearman Correlation')
-    plt.ylabel('Variable clusters')
-    plt.show()
-
-###############################################################################
-
-def train_val_test_split (data, start_train, end_train, 
-                          end_validation, end_test, 
-                          tags_X = None, tags_Y = None):
-    """
-    Separates the data into consecutive portions of 
-    train, validation, and test, returning 3 DataFrames.
-    It can also separate into predictor variables (X) and 
-    predicted variables (Y), which in this case will return 6 DataFrames.
-        
-    Parameters
-    ----------
-    data: pandas.DataFrame
-        Data to be separated.
-    start_train: string
-        Start timestamp of the train portion.
-    end_train: string
-        End timestamp of the train portion.
-    end_validation: string
-        End timestamp of the validation portion.
-    end_test: string
-        End timestamp of the test portion.
-    tags_X: list of strings
-        Variables to be considered in the X set.
-    tags_Y: list of strings
-        Variables to be considered in the Y set.
-    Returns
-    ----------                
-    : pandas.DataFrames
-        Separated data.
-    """               
-    train_data = data.loc[start_train:end_train]
-    validation_data = data.loc[end_train:end_validation].iloc[1:,:]
-    test_data = data.loc[end_validation:end_test].iloc[1:,:]
-
-    if tags_Y is not None:
-
-        if not isinstance(tags_Y, list): tags_Y = [tags_Y]
-        
-        train_data_Y = train_data.loc[:,tags_Y]
-        validation_data_Y = validation_data.loc[:,tags_Y]
-        test_data_Y = test_data.loc[:,tags_Y]
-
-        if tags_X is not None:
-
-            if not isinstance(tags_X, list): tags_X = [tags_X]
-            
-            train_data_X = train_data.loc[:,tags_X]
-            validation_data_X = validation_data.loc[:,tags_X]
-            test_data_X = test_data.loc[:,tags_X]
-
-        else:
-
-            dif = train_data.columns.difference(train_data_Y.columns)
-            train_data_X = train_data[dif]
-            validation_data_X = validation_data[dif]
-            test_data_X = test_data[dif]
-            
-        return (train_data_X, validation_data_X, test_data_X, 
-                train_data_Y, validation_data_Y, test_data_Y)
-            
-    else:
-        
-        if tags_X is not None:
-
-            if not isinstance(tags_X, list): tags_Y = [tags_X]
-            
-            train_data = train_data.loc[:,tags_X]
-            validation_data = validation_data.loc[:,tags_X]
-            test_data = test_data.loc[:,tags_X]           
-            
-        return (train_data, validation_data, test_data)
-            
-###############################################################################
-
-def complete_analysis (model, X_train, X_validation, X_test, 
-                       Y_train = None , Y_validation = None, Y_test = None,
-                       lim_conf = 0.99,
-                       f_pp_train = ['remove_empty_variables',
-                                     'ffill_nan',
-                                     'remove_frozen_variables',
-                                     'normalize'],
-                       a_pp_train = None,
-                       f_pp_test = ['replace_nan_with_values',
-                                    'normalize'],
-                       a_pp_test = None,
-                       logy = True, 
-                       metrics = None, 
-                       X_pred_to_plot = None,
-                       count_limit = 1,
-                       count_window_size = 0,
-                       fault_start = None,
-                       fault_end = None):
-    """
-    Performs a complete monitoring analysis, with train, validation, and test.
-
-    Parameters
-    ----------
-    model: BibMon model
-        Model to be considered in the analysis.
-    X_train: pandas.DataFrame or numpy.array
-        Training data X.
-    X_validation: pandas.DataFrame or numpy.array
-        Validation data X.
-    X_test: pandas.DataFrame or numpy.array
-        Test data X.
-    Y_train: pandas.DataFrame or numpy.array, optional
-        Training data Y.
-    Y_validation: pandas.DataFrame or numpy.array, optional
-        Validation data Y.
-    Y_test: pandas.DataFrame or numpy.array, optional
-        Test data Y.
-    lim_conf: float, optional
-        Confidence limit for the detection index.
-    f_pp_train: list, optional
-        List containing strings with names of functions to be used 
-        in pre-processing the train data (the functions are defined in the 
-        PreProcess class, in the BibMon_Tools.py file).
-    a_pp_train: dict, optional
-        Dictionary containing the parameters to be provided
-        to each function to perform pre-processing of the train data, in
-        the format {'functionname__argname': argvalue, ...}.
-    f_pp_test: list, optional
-        List containing strings with names of functions to be used 
-        in pre-processing the test data (the functions are defined in the 
-        PreProcess class, in the BibMon_Tools.py file).
-    a_pp_test: dict, optional
-        Dictionary containing the parameters to be provided
-        to each function to perform pre-processing of the test data, in
-        the format {'functionname__argname': argvalue, ...}.
-    logy: boolean, optional
-        If use logarithmic scale in the SPE plots.
-    metrics: list of functions, optional
-        Functions for calculating metrics to be displayed in the title of 
-        the graph.
-    X_pred_to_plot: string, optional
-        In case the model is a reconstruction model (i.e., self.has_Y = False),
-        indicates which column of X to plot along with the prediction.
-    count_limit: int, optional
-        Limit of points to be considered in the window 
-        for the count alarm to sound.
-    count_window_size: int, optional
-        Window sizes used in count alarm calculation. 
-    fault_start: string, optional
-        Start timestamp of the fault.
-    fault_end: string, optional
-        End timestamp of the fault.
-    """               
-    fig, ax = plt.subplots(3,2, figsize = (15,12))
-
-    cond_to_plot_pred = (model.has_Y or 
-                        ((not model.has_Y) and (X_pred_to_plot is not None)))
-
-    ######## TRAINING ########
-
-    model.fit(X_train, Y_train, 
-              f_pp = f_pp_train,
-              a_pp = a_pp_train,
-              f_pp_test = f_pp_test,
-              a_pp_test = a_pp_test,
-              lim_conf = lim_conf,
-              redefine_limit = False)
-                                
-    # PLOTTING SPE
-                
-    model.plot_SPE(ax = ax[0,0], logy = logy)
-    ax[0,0].set_title('Training')
-
-    # PLOTTING PREDICTIONS
-
-    if cond_to_plot_pred:
-        model.plot_predictions(ax = ax[0,1], train_or_test = 'train', 
-                               X_pred_to_plot = X_pred_to_plot,
-                               metrics = metrics)
-
-    ######## VALIDATION ########
-
-    model.predict(X_validation, Y_validation, 
-                  count_window_size = count_window_size, 
-                  redefine_limit = True)
-
-    # PLOTTING SPE
-
-    model.plot_SPE(ax = ax[1,0], train_or_test = 'test', logy = logy)
-    ax[1,0].set_title('Validation')
-
-    # PLOTTING PREDICTIONS
-
-    if cond_to_plot_pred:
-        model.plot_predictions(ax = ax[1,1], train_or_test = 'test', 
-                               X_pred_to_plot = X_pred_to_plot, 
-                               metrics = metrics)
-        
-    ######## TEST ########
-        
-    model.predict(X_test, Y_test, 
-                  count_window_size = count_window_size, 
-                  count_limit = count_limit,
-                  redefine_limit = False)
-
-    # PLOTTING SPE
-
-    model.plot_SPE(ax = ax[2,0], train_or_test = 'test', logy = logy)
-    ax[2,0].set_title('Test')
-
-    if fault_start is not None:
-        ax[2,0].axvline(datetime.strptime(str(fault_start),
-                                          '%Y-%m-%d %H:%M:%S'), ls = '--')
-    if fault_end is not None:
-        ax[2,0].axvline(datetime.strptime(str(fault_end),
-                                          '%Y-%m-%d %H:%M:%S'), ls = '--')
-
-    # PLOTTING PREDICTIONS
-
-    if cond_to_plot_pred:
-        model.plot_predictions(ax = ax[2,1], train_or_test = 'test', 
-                               X_pred_to_plot = X_pred_to_plot,
-                               metrics = metrics)
-
-        if fault_start is not None:
-            ax[2,1].axvline(datetime.strptime(str(fault_start),
-                                              '%Y-%m-%d %H:%M:%S'), ls = '--')
-        if fault_end is not None:
-            ax[2,1].axvline(datetime.strptime(str(fault_end),
-                                              '%Y-%m-%d %H:%M:%S'), ls = '--')
-        
-    fig.tight_layout();
-            
-##############################################################################
-
-def comparative_table (models, X_train, X_validation, X_test, 
-                       Y_train = None , Y_validation = None, Y_test = None,
-                       lim_conf = 0.99,
-                       f_pp_train = ['remove_empty_variables',
-                                     'ffill_nan',
-                                     'remove_frozen_variables',
-                                     'normalize'],
-                       a_pp_train = None,
-                       f_pp_test = ['replace_nan_with_values',
-                                    'normalize'],
-                       a_pp_test = None,
-                       logy = True, metrics = None,
-                       X_pred_to_plot = None,
-                       count_limit = 1,
-                       count_window_size = 0,
-                       fault_start = None,
-                       fault_end = None,
-                       mask = None,
-                       times = True,
-                       plot_SPE = True,
-                       plot_predictions = True,
-                       fit_model = True):
-
-    """
-    Performs complete monitoring analysis of multiple models and builds
-    comparative result tables.
-
-    Parameters
-    ----------
-    models: list of BibMon models
-        Models to be considered in the analysis.
-    X_train: pandas.DataFrame or numpy.array
-        Training data X.
-    X_validation: pandas.DataFrame or numpy.array
-        Validation data X.
-    X_test: pandas.DataFrame or numpy.array
-        Test data X.
-    Y_train: pandas.DataFrame or numpy.array, optional
-        Training data Y.
-    Y_validation: pandas.DataFrame or numpy.array, optional
-        Validation data Y.
-    Y_test: pandas.DataFrame or numpy.array, optional
-        Test data Y.
-    lim_conf: float, optional
-        Confidence limit for the detection index.
-    f_pp_train: list, optional
-        List containing strings with names of functions to be used 
-        in pre-processing the train data (the functions are defined in the 
-        PreProcess class, in the BibMon_Tools.py file).
-    a_pp_train: dict, optional
-        Dictionary containing the parameters to be provided
-        to each function to perform pre-processing of the train data, in
-        the format {'functionname__argname': argvalue, ...}.
-    f_pp_test: list, optional
-        List containing strings with names of functions to be used 
-        in pre-processing the test data (the functions are defined in the 
-        PreProcess class, in the BibMon_Tools.py file).
-    a_pp_test: dict, optional
-        Dictionary containing the parameters to be provided
-        to each function to perform pre-processing of the test data, in
-        the format {'functionname__argname': argvalue, ...}.
-    logy: boolean, optional
-        If use logarithmic scale in the SPE plots.
-    metrics: list of functions, optional
-        Functions for calculating metrics to be displayed in the title of 
-        the graph.
-    X_pred_to_plot: string, optional
-        In case the model is a reconstruction model (i.e., self.has_Y = False),
-        indicates which column of X to plot along with the prediction.
-    count_limit: int, optional
-        Limit of points to be considered in the window 
-        for the count alarm to sound.
-    count_window_size: int, optional
-        Window sizes used in count alarm calculation.
-    fault_start: string, optional
-        Start timestamp of the fault.
-    fault_end: string, optional
-        End timestamp of the fault.
-    mask: numpy.array, optional
-        Boolean array indicating the indices where the process is
-        in fault.
-    times: boolean, optional
-        If execution times should be calculated.
-    plot_SPE: boolean, optional
-        If SPE plots should be plotted.
-    plot_predictions: boolean, optional
-        If prediction plots should be plotted.
-    fit_model: boolean, optional
-        If models should be trained.
-    Returns
-    ----------
-    : list of pandas.DataFrames
-        List with the generated tables (prediction and/or detection).
-    """
-    
-    n = len(models)
-
-    if plot_SPE:
-        fig_spe, ax_spe = plt.subplots(n, 3, figsize = (15, 4*n))
-
-    if plot_predictions:
-        fig_pred, ax_pred = plt.subplots(n, 3, figsize = (15, 4*n))
-        
-    train_metrics = {}     
-    validation_metrics = {}        
-    test_metrics = {}        
-
-    detection_alarms = {}
-    false_detection_rates = {}
-    false_alarm_rates = {}
-
-    for i in range(len(models)):
-        
-        model = models[i]
-        
-        cond_to_plot_pred = (model.has_Y or 
-                            ((not model.has_Y) and 
-                            (X_pred_to_plot is not None)))
-
-        ######## TRAINING ########
-        
-        if fit_model:
-            model.fit(X_train, Y_train, 
-                      f_pp = f_pp_train,
-                      a_pp = a_pp_train, 
-                      f_pp_test = f_pp_test,
-                      a_pp_test = a_pp_test,
-                      lim_conf = lim_conf,
-                      redefine_limit = False)
-        
-        # TERMS FOR PREDICTION TABLE
-        
-        if metrics is not None:
-            if model.has_Y:
-                true = model.Y_train_orig
-                pred = model.Y_train_pred_orig
-            else:
-                if X_pred_to_plot is not None:
-                    true = model.X_train_orig[X_pred_to_plot]
-                    pred = model.X_train_pred_orig[X_pred_to_plot]                
-            for mr in metrics:
-                true, pred = align_dfs_by_rows(true.dropna(), pred)
-                train_metrics[model.name+': '+mr.__name__] = mr(true, pred)
-                                    
-        # PLOTTING SPE
-                
-        if plot_SPE:
-            model.plot_SPE(ax = ax_spe[i,0], logy = logy)
-            ax_spe[i,0].set_title('Training')
-
-        # PLOTTING PREDICTIONS
-            
-        if plot_predictions:
-            if cond_to_plot_pred:
-                model.plot_predictions(ax = ax_pred[i,0], 
-                                        train_or_test = 'train', 
-                                        X_pred_to_plot = X_pred_to_plot,
-                                        metrics = metrics)
-
-        ######## VALIDATION ########
-                
-        model.predict(X_validation, Y_validation, 
-                      count_window_size = count_window_size, 
-                      redefine_limit = True)
-
-        # TERMS FOR PREDICTION TABLE
-        
-        if metrics is not None:
-            if model.has_Y:
-                true = model.Y_test_orig
-                pred = model.Y_test_pred_orig
-            else:
-                if X_pred_to_plot is not None:
-                    true = model.X_test_orig[X_pred_to_plot]
-                    pred = model.X_test_pred_orig[X_pred_to_plot]                
-            for mr in metrics:
-                true, pred = align_dfs_by_rows(true.dropna(), pred)
-                validation_metrics[model.name+': '+mr.__name__]= mr(true, pred)
-
-        # PLOTTING SPE
-                
-        if plot_SPE:
-            model.plot_SPE(ax = ax_spe[i,1], train_or_test = 'test', 
-                           logy=logy)
-            ax_spe[i,1].set_title(model.name+'\n\nValidation')
-
-        # PLOTTING PREDICTIONS
-            
-        if plot_predictions:
-            if cond_to_plot_pred:
-                model.plot_predictions(ax = ax_pred[i,1], 
-                                       train_or_test = 'test', 
-                                       X_pred_to_plot = X_pred_to_plot, 
-                                       metrics = metrics)
-                ax_pred[i,1].set_title(model.name+'\n\n'+
-                                       ax_pred[i,1].get_title())
-                
-        ######## TEST ########
-                
-        model.predict(X_test, Y_test, 
-                      count_window_size = count_window_size, 
-                      count_limit = count_limit,
-                      redefine_limit = False)
-
-        # TERMS FOR PREDICTION TABLE
-        
-        if metrics is not None:
-            if model.has_Y:
-                true = model.Y_test_orig
-                pred = model.Y_test_pred_orig
-            else:
-                if X_pred_to_plot is not None:
-                    true = model.X_test_orig[X_pred_to_plot]
-                    pred = model.X_test_pred_orig[X_pred_to_plot]                
-            for mr in metrics:
-                true, pred = align_dfs_by_rows(true.dropna(), pred)
-                test_metrics[model.name+': '+mr.__name__] =  mr(true, pred)
-
-        # TERMS FOR DETECTION TABLE
-        
-        if mask is None:
-            if fault_start is not None:            
-                for key, value in model.alarms.items():
-                    if fault_end is not None:
-                        detection_alarms[model.name+': '+key] = \
-                            value[fault_start:fault_end][:-1].mean()
-                        false_detection_rates[model.name+': '+key] = \
-                            pd.concat([value[:fault_start][:-1], 
-                                    value[fault_end:]]).mean()
-                    else:
-                        detection_alarms[model.name+': '+key] = \
-                            value[fault_start:fault_end].mean()
-                        false_detection_rates[model.name+': '+key] = \
-                            value[:fault_start][:-1].mean()
-        else:
-            for key, value in model.alarms.items():
-                detection_alarms[model.name+': '+key] = \
-                    mask[mask==1].eq(value[value==1]).astype(int).mean()
-                false_detection_rates[model.name+': '+key] = \
-                    1-mask[mask==0].eq(value[value==0]).astype(int).mean()
-
-        # PLOTTING SPE
-                
-        if plot_SPE:
-
-            model.plot_SPE(ax = ax_spe[i,2], train_or_test='test',
-                           logy = logy)
-            ax_spe[i,2].set_title('Test')
-            
-            if fault_start is not None:
-                ax_spe[i,2].axvline(datetime.strptime(str(fault_start),
-                                                      '%Y-%m-%d %H:%M:%S'), 
-                                                      ls='--')
-            if fault_end is not None:
-                ax_spe[i,2].axvline(datetime.strptime(str(fault_end),
-                                                      '%Y-%m-%d %H:%M:%S'), 
-                                                      ls='--')
-
-        # PLOTTING PREDICTIONS
-                
-        if plot_predictions:
-            if cond_to_plot_pred:
-                model.plot_predictions(ax = ax_pred[i,2], 
-                                        train_or_test = 'test', 
-                                        X_pred_to_plot = X_pred_to_plot,
-                                        metrics = metrics)
-                if fault_start is not None:
-                    ax_pred[i,2].axvline(datetime.strptime(str(fault_start),
-                                                 '%Y-%m-%d %H:%M:%S'), ls='--')
-                if fault_end is not None:
-                    ax_pred[i,2].axvline(datetime.strptime(str(fault_end),
-                                                 '%Y-%m-%d %H:%M:%S'), ls='--')
-
-        if plot_SPE:            
-            fig_spe.tight_layout();
-        if plot_predictions:
-            fig_pred.tight_layout();
-            
-    ######## GENERATING FINAL TABLES ########
-        
-    return_tables = []
-        
-    # PREDICTION
-
-    if metrics is not None:
-
-        prediction_table = pd.DataFrame([train_metrics, validation_metrics,
-                                         test_metrics], 
-                                        index = ['Train',
-                                                 'Validation',
-                                                 'Test']).T
-                                                            
-        models_names = [models[i].name for i in range(len(models))]
-        metrics_names = [metrics[i].__name__ for i in range(len(metrics))]
-        
-        iterables = [models_names, metrics_names]
-
-        index = pd.MultiIndex.from_product(iterables, 
-                                           names = ['Models', 'Metrics'])
-        
-        message = ("The MultiIndex table is not of the right size. "+
-                   "This could happen when two models have the same name.")
-        
-        try:
-            prediction_table.index = index
-        except ValueError as err:
-            raise ValueError(message) from err
-        
-        return_tables.append(prediction_table.swaplevel().sort_index(axis=0))
-
-    # DETECTION     
-        
-    if fault_start is not None:
-        
-        detection_table = pd.DataFrame([detection_alarms, 
-                                        false_detection_rates],
-                                       index = ['FDR','FAR']).T
-        
-        models_names = [models[i].name for i in range(len(models))]
-        alarms_names = [list(model.alarms.keys())[i] for i in 
-                range(len(list(model.alarms.keys())))]  
-        
-        iterables = [models_names, alarms_names]
-
-        index = pd.MultiIndex.from_product(iterables, 
-                                        names = ['Models', 'Alarms'])
-        
-        detection_table.index = index
-
-        return_tables.append(detection_table.swaplevel().sort_index(axis=0))
-        
-    # COMPUTATIONAL TIMES
-        
-    if times:
-        
-        times_dict = {}
-        
-        times_dict['Train'] = [1e6*models[i].train_time/X_train.shape[0]
-                        for i in range(len(models))]
-        times_dict['Test'] = [1e6*models[i].test_time/X_test.shape[0] 
-                        for i in range(len(models))]
-        
-        times_df = pd.DataFrame(times_dict, 
-                        index=[models[i].name for i in range(len(models))])
-        
-        return_tables.append(times_df)
-        
+import numpy as np
+import pandas as pd
+from datetime import datetime
+import matplotlib.pyplot as plt
+
+###############################################################################
+
+def create_df_with_dates (array, 
+                          start = '2020-01-01 00:00:00', 
+                          freq = '1min'):
+    """
+    Parameters
+    ----------
+    array: pandas.DataFrame or numpy.array
+        Original data.
+    start: string, optional
+        Start timestamp.
+    freq: string, optional
+        Sampling interval.
+    Returns
+    ----------                
+    df: pandas.DataFrame
+        Processed data.
+    """    
+    df = pd.DataFrame(array)
+    df.index = pd.date_range(start = start, 
+                             periods = df.shape[0],
+                             freq=freq)
+    return df
+
+###############################################################################
+
+def create_df_with_noise (array, 
+                          noise_frac, 
+                          max_index_for_noise):
+    """
+    Adds artificial measurement noise to data.
+    
+    Parameters
+    ----------
+    array: pandas.DataFrame or numpy.array
+        Original data.
+    noise_frac: float 
+        Fraction (between 0 and 1) of the total amplitude of the variable
+        that will be used as the noise standard deviation.
+    max_index_for_noise: int
+        Maximum index to consider the amplitude 
+        in the standard deviation calculation.
+    Returns
+    ----------                
+    df: pandas.DataFrame
+        Processed data.
+    """    
+
+    df = pd.DataFrame(array)
+
+    sigma_noise = noise_frac*(np.amax(array[:max_index_for_noise,:], 
+                                      axis=0)-
+                              np.amin(array[:max_index_for_noise,:], 
+                                      axis=0))
+
+    for i in range(df.shape[1]):
+        df.iloc[:,i] = (df.iloc[:,i] + 
+                        sigma_noise[i]*np.random.randn(df.shape[0]))
+        
+    return df
+    
+###############################################################################
+
+def align_dfs_by_rows (df1, df2):
+    """
+    Aligns DataFrames by rows.
+
+    Parameters
+    ----------
+    df1, df2: pandas.DataFrame
+        Original data.
+    Returns
+    ----------                
+    new_df1, new_df2: pandas.DataFrame
+        Processed data.
+    """
+    
+    new_df1 = df1.loc[df1.index.isin(df2.index)]
+    new_df2 = df2.loc[df2.index.isin(df1.index)]
+
+    return new_df1, new_df2
+
+###############################################################################
+
+def spearmanr_dendrogram(df, figsize = (18,8)):
+    """
+    Generates a dendrogram of Spearman correlations.
+    
+    Parameters
+    ----------
+    df: pandas.DataFrame
+        Dados to be analyzed.
+    figsize: tuple of ints, optional
+        Figure dimensions.
+    """   
+    import scipy.cluster.hierarchy
+    import scipy.stats
+    
+    df = pd.DataFrame(df)
+    
+    corr = np.round(scipy.stats.spearmanr(df).correlation, 4)
+    corr_condensed = scipy.cluster.hierarchy.distance.squareform(1-corr)
+    z = scipy.cluster.hierarchy.linkage(corr_condensed, method='average')
+    plt.figure(figsize=figsize)
+    scipy.cluster.hierarchy.dendrogram(z, labels = df.columns.tolist(), 
+                                       orientation='left', leaf_font_size=16)
+    plt.xlabel('Spearman Correlation')
+    plt.ylabel('Variable clusters')
+    plt.show()
+
+###############################################################################
+
+def train_val_test_split (data, start_train, end_train, 
+                          end_validation, end_test, 
+                          tags_X = None, tags_Y = None):
+    """
+    Separates the data into consecutive portions of 
+    train, validation, and test, returning 3 DataFrames.
+    It can also separate into predictor variables (X) and 
+    predicted variables (Y), which in this case will return 6 DataFrames.
+        
+    Parameters
+    ----------
+    data: pandas.DataFrame
+        Data to be separated.
+    start_train: string
+        Start timestamp of the train portion.
+    end_train: string
+        End timestamp of the train portion.
+    end_validation: string
+        End timestamp of the validation portion.
+    end_test: string
+        End timestamp of the test portion.
+    tags_X: list of strings
+        Variables to be considered in the X set.
+    tags_Y: list of strings
+        Variables to be considered in the Y set.
+    Returns
+    ----------                
+    : pandas.DataFrames
+        Separated data.
+    """               
+    train_data = data.loc[start_train:end_train]
+    validation_data = data.loc[end_train:end_validation].iloc[1:,:]
+    test_data = data.loc[end_validation:end_test].iloc[1:,:]
+
+    if tags_Y is not None:
+
+        if not isinstance(tags_Y, list): tags_Y = [tags_Y]
+        
+        train_data_Y = train_data.loc[:,tags_Y]
+        validation_data_Y = validation_data.loc[:,tags_Y]
+        test_data_Y = test_data.loc[:,tags_Y]
+
+        if tags_X is not None:
+
+            if not isinstance(tags_X, list): tags_X = [tags_X]
+            
+            train_data_X = train_data.loc[:,tags_X]
+            validation_data_X = validation_data.loc[:,tags_X]
+            test_data_X = test_data.loc[:,tags_X]
+
+        else:
+
+            dif = train_data.columns.difference(train_data_Y.columns)
+            train_data_X = train_data[dif]
+            validation_data_X = validation_data[dif]
+            test_data_X = test_data[dif]
+            
+        return (train_data_X, validation_data_X, test_data_X, 
+                train_data_Y, validation_data_Y, test_data_Y)
+            
+    else:
+        
+        if tags_X is not None:
+
+            if not isinstance(tags_X, list): tags_Y = [tags_X]
+            
+            train_data = train_data.loc[:,tags_X]
+            validation_data = validation_data.loc[:,tags_X]
+            test_data = test_data.loc[:,tags_X]           
+            
+        return (train_data, validation_data, test_data)
+            
+###############################################################################
+
+def complete_analysis (model, X_train, X_validation, X_test, 
+                       Y_train = None , Y_validation = None, Y_test = None,
+                       lim_conf = 0.99,
+                       f_pp_train = ['remove_empty_variables',
+                                     'ffill_nan',
+                                     'remove_frozen_variables',
+                                     'normalize'],
+                       a_pp_train = None,
+                       f_pp_test = ['replace_nan_with_values',
+                                    'normalize'],
+                       a_pp_test = None,
+                       logy = True, 
+                       metrics = None, 
+                       X_pred_to_plot = None,
+                       count_limit = 1,
+                       count_window_size = 0,
+                       fault_start = None,
+                       fault_end = None):
+    """
+    Performs a complete monitoring analysis, with train, validation, and test.
+
+    Parameters
+    ----------
+    model: BibMon model
+        Model to be considered in the analysis.
+    X_train: pandas.DataFrame or numpy.array
+        Training data X.
+    X_validation: pandas.DataFrame or numpy.array
+        Validation data X.
+    X_test: pandas.DataFrame or numpy.array
+        Test data X.
+    Y_train: pandas.DataFrame or numpy.array, optional
+        Training data Y.
+    Y_validation: pandas.DataFrame or numpy.array, optional
+        Validation data Y.
+    Y_test: pandas.DataFrame or numpy.array, optional
+        Test data Y.
+    lim_conf: float, optional
+        Confidence limit for the detection index.
+    f_pp_train: list, optional
+        List containing strings with names of functions to be used 
+        in pre-processing the train data (the functions are defined in the 
+        PreProcess class, in the BibMon_Tools.py file).
+    a_pp_train: dict, optional
+        Dictionary containing the parameters to be provided
+        to each function to perform pre-processing of the train data, in
+        the format {'functionname__argname': argvalue, ...}.
+    f_pp_test: list, optional
+        List containing strings with names of functions to be used 
+        in pre-processing the test data (the functions are defined in the 
+        PreProcess class, in the BibMon_Tools.py file).
+    a_pp_test: dict, optional
+        Dictionary containing the parameters to be provided
+        to each function to perform pre-processing of the test data, in
+        the format {'functionname__argname': argvalue, ...}.
+    logy: boolean, optional
+        If use logarithmic scale in the SPE plots.
+    metrics: list of functions, optional
+        Functions for calculating metrics to be displayed in the title of 
+        the graph.
+    X_pred_to_plot: string, optional
+        In case the model is a reconstruction model (i.e., self.has_Y = False),
+        indicates which column of X to plot along with the prediction.
+    count_limit: int, optional
+        Limit of points to be considered in the window 
+        for the count alarm to sound.
+    count_window_size: int, optional
+        Window sizes used in count alarm calculation. 
+    fault_start: string, optional
+        Start timestamp of the fault.
+    fault_end: string, optional
+        End timestamp of the fault.
+    """               
+    fig, ax = plt.subplots(3,2, figsize = (15,12))
+
+    cond_to_plot_pred = (model.has_Y or 
+                        ((not model.has_Y) and (X_pred_to_plot is not None)))
+
+    ######## TRAINING ########
+
+    model.fit(X_train, Y_train, 
+              f_pp = f_pp_train,
+              a_pp = a_pp_train,
+              f_pp_test = f_pp_test,
+              a_pp_test = a_pp_test,
+              lim_conf = lim_conf,
+              redefine_limit = False)
+                                
+    # PLOTTING SPE
+                
+    model.plot_SPE(ax = ax[0,0], logy = logy)
+    ax[0,0].set_title('Training')
+
+    # PLOTTING PREDICTIONS
+
+    if cond_to_plot_pred:
+        model.plot_predictions(ax = ax[0,1], train_or_test = 'train', 
+                               X_pred_to_plot = X_pred_to_plot,
+                               metrics = metrics)
+
+    ######## VALIDATION ########
+
+    model.predict(X_validation, Y_validation, 
+                  count_window_size = count_window_size, 
+                  redefine_limit = True)
+
+    # PLOTTING SPE
+
+    model.plot_SPE(ax = ax[1,0], train_or_test = 'test', logy = logy)
+    ax[1,0].set_title('Validation')
+
+    # PLOTTING PREDICTIONS
+
+    if cond_to_plot_pred:
+        model.plot_predictions(ax = ax[1,1], train_or_test = 'test', 
+                               X_pred_to_plot = X_pred_to_plot, 
+                               metrics = metrics)
+        
+    ######## TEST ########
+        
+    model.predict(X_test, Y_test, 
+                  count_window_size = count_window_size, 
+                  count_limit = count_limit,
+                  redefine_limit = False)
+
+    # PLOTTING SPE
+
+    model.plot_SPE(ax = ax[2,0], train_or_test = 'test', logy = logy)
+    ax[2,0].set_title('Test')
+
+    if fault_start is not None:
+        ax[2,0].axvline(datetime.strptime(str(fault_start),
+                                          '%Y-%m-%d %H:%M:%S'), ls = '--')
+    if fault_end is not None:
+        ax[2,0].axvline(datetime.strptime(str(fault_end),
+                                          '%Y-%m-%d %H:%M:%S'), ls = '--')
+
+    # PLOTTING PREDICTIONS
+
+    if cond_to_plot_pred:
+        model.plot_predictions(ax = ax[2,1], train_or_test = 'test', 
+                               X_pred_to_plot = X_pred_to_plot,
+                               metrics = metrics)
+
+        if fault_start is not None:
+            ax[2,1].axvline(datetime.strptime(str(fault_start),
+                                              '%Y-%m-%d %H:%M:%S'), ls = '--')
+        if fault_end is not None:
+            ax[2,1].axvline(datetime.strptime(str(fault_end),
+                                              '%Y-%m-%d %H:%M:%S'), ls = '--')
+        
+    fig.tight_layout();
+            
+##############################################################################
+
+def comparative_table (models, X_train, X_validation, X_test, 
+                       Y_train = None , Y_validation = None, Y_test = None,
+                       lim_conf = 0.99,
+                       f_pp_train = ['remove_empty_variables',
+                                     'ffill_nan',
+                                     'remove_frozen_variables',
+                                     'normalize'],
+                       a_pp_train = None,
+                       f_pp_test = ['replace_nan_with_values',
+                                    'normalize'],
+                       a_pp_test = None,
+                       logy = True, metrics = None,
+                       X_pred_to_plot = None,
+                       count_limit = 1,
+                       count_window_size = 0,
+                       fault_start = None,
+                       fault_end = None,
+                       mask = None,
+                       times = True,
+                       plot_SPE = True,
+                       plot_predictions = True,
+                       fit_model = True):
+
+    """
+    Performs complete monitoring analysis of multiple models and builds
+    comparative result tables.
+
+    Parameters
+    ----------
+    models: list of BibMon models
+        Models to be considered in the analysis.
+    X_train: pandas.DataFrame or numpy.array
+        Training data X.
+    X_validation: pandas.DataFrame or numpy.array
+        Validation data X.
+    X_test: pandas.DataFrame or numpy.array
+        Test data X.
+    Y_train: pandas.DataFrame or numpy.array, optional
+        Training data Y.
+    Y_validation: pandas.DataFrame or numpy.array, optional
+        Validation data Y.
+    Y_test: pandas.DataFrame or numpy.array, optional
+        Test data Y.
+    lim_conf: float, optional
+        Confidence limit for the detection index.
+    f_pp_train: list, optional
+        List containing strings with names of functions to be used 
+        in pre-processing the train data (the functions are defined in the 
+        PreProcess class, in the BibMon_Tools.py file).
+    a_pp_train: dict, optional
+        Dictionary containing the parameters to be provided
+        to each function to perform pre-processing of the train data, in
+        the format {'functionname__argname': argvalue, ...}.
+    f_pp_test: list, optional
+        List containing strings with names of functions to be used 
+        in pre-processing the test data (the functions are defined in the 
+        PreProcess class, in the BibMon_Tools.py file).
+    a_pp_test: dict, optional
+        Dictionary containing the parameters to be provided
+        to each function to perform pre-processing of the test data, in
+        the format {'functionname__argname': argvalue, ...}.
+    logy: boolean, optional
+        If use logarithmic scale in the SPE plots.
+    metrics: list of functions, optional
+        Functions for calculating metrics to be displayed in the title of 
+        the graph.
+    X_pred_to_plot: string, optional
+        In case the model is a reconstruction model (i.e., self.has_Y = False),
+        indicates which column of X to plot along with the prediction.
+    count_limit: int, optional
+        Limit of points to be considered in the window 
+        for the count alarm to sound.
+    count_window_size: int, optional
+        Window sizes used in count alarm calculation.
+    fault_start: string, optional
+        Start timestamp of the fault.
+    fault_end: string, optional
+        End timestamp of the fault.
+    mask: numpy.array, optional
+        Boolean array indicating the indices where the process is
+        in fault.
+    times: boolean, optional
+        If execution times should be calculated.
+    plot_SPE: boolean, optional
+        If SPE plots should be plotted.
+    plot_predictions: boolean, optional
+        If prediction plots should be plotted.
+    fit_model: boolean, optional
+        If models should be trained.
+    Returns
+    ----------
+    : list of pandas.DataFrames
+        List with the generated tables (prediction and/or detection).
+    """
+    
+    n = len(models)
+
+    if plot_SPE:
+        fig_spe, ax_spe = plt.subplots(n, 3, figsize = (15, 4*n))
+
+    if plot_predictions:
+        fig_pred, ax_pred = plt.subplots(n, 3, figsize = (15, 4*n))
+        
+    train_metrics = {}     
+    validation_metrics = {}        
+    test_metrics = {}        
+
+    detection_alarms = {}
+    false_detection_rates = {}
+    false_alarm_rates = {}
+
+    for i in range(len(models)):
+        
+        model = models[i]
+        
+        cond_to_plot_pred = (model.has_Y or 
+                            ((not model.has_Y) and 
+                            (X_pred_to_plot is not None)))
+
+        ######## TRAINING ########
+        
+        if fit_model:
+            model.fit(X_train, Y_train, 
+                      f_pp = f_pp_train,
+                      a_pp = a_pp_train, 
+                      f_pp_test = f_pp_test,
+                      a_pp_test = a_pp_test,
+                      lim_conf = lim_conf,
+                      redefine_limit = False)
+        
+        # TERMS FOR PREDICTION TABLE
+        
+        if metrics is not None:
+            if model.has_Y:
+                true = model.Y_train_orig
+                pred = model.Y_train_pred_orig
+            else:
+                if X_pred_to_plot is not None:
+                    true = model.X_train_orig[X_pred_to_plot]
+                    pred = model.X_train_pred_orig[X_pred_to_plot]                
+            for mr in metrics:
+                true, pred = align_dfs_by_rows(true.dropna(), pred)
+                train_metrics[model.name+': '+mr.__name__] = mr(true, pred)
+                                    
+        # PLOTTING SPE
+                
+        if plot_SPE:
+            model.plot_SPE(ax = ax_spe[i,0], logy = logy)
+            ax_spe[i,0].set_title('Training')
+
+        # PLOTTING PREDICTIONS
+            
+        if plot_predictions:
+            if cond_to_plot_pred:
+                model.plot_predictions(ax = ax_pred[i,0], 
+                                        train_or_test = 'train', 
+                                        X_pred_to_plot = X_pred_to_plot,
+                                        metrics = metrics)
+
+        ######## VALIDATION ########
+                
+        model.predict(X_validation, Y_validation, 
+                      count_window_size = count_window_size, 
+                      redefine_limit = True)
+
+        # TERMS FOR PREDICTION TABLE
+        
+        if metrics is not None:
+            if model.has_Y:
+                true = model.Y_test_orig
+                pred = model.Y_test_pred_orig
+            else:
+                if X_pred_to_plot is not None:
+                    true = model.X_test_orig[X_pred_to_plot]
+                    pred = model.X_test_pred_orig[X_pred_to_plot]                
+            for mr in metrics:
+                true, pred = align_dfs_by_rows(true.dropna(), pred)
+                validation_metrics[model.name+': '+mr.__name__]= mr(true, pred)
+
+        # PLOTTING SPE
+                
+        if plot_SPE:
+            model.plot_SPE(ax = ax_spe[i,1], train_or_test = 'test', 
+                           logy=logy)
+            ax_spe[i,1].set_title(model.name+'\n\nValidation')
+
+        # PLOTTING PREDICTIONS
+            
+        if plot_predictions:
+            if cond_to_plot_pred:
+                model.plot_predictions(ax = ax_pred[i,1], 
+                                       train_or_test = 'test', 
+                                       X_pred_to_plot = X_pred_to_plot, 
+                                       metrics = metrics)
+                ax_pred[i,1].set_title(model.name+'\n\n'+
+                                       ax_pred[i,1].get_title())
+                
+        ######## TEST ########
+                
+        model.predict(X_test, Y_test, 
+                      count_window_size = count_window_size, 
+                      count_limit = count_limit,
+                      redefine_limit = False)
+
+        # TERMS FOR PREDICTION TABLE
+        
+        if metrics is not None:
+            if model.has_Y:
+                true = model.Y_test_orig
+                pred = model.Y_test_pred_orig
+            else:
+                if X_pred_to_plot is not None:
+                    true = model.X_test_orig[X_pred_to_plot]
+                    pred = model.X_test_pred_orig[X_pred_to_plot]                
+            for mr in metrics:
+                true, pred = align_dfs_by_rows(true.dropna(), pred)
+                test_metrics[model.name+': '+mr.__name__] =  mr(true, pred)
+
+        # TERMS FOR DETECTION TABLE
+        
+        if mask is None:
+            if fault_start is not None:            
+                for key, value in model.alarms.items():
+                    if fault_end is not None:
+                        detection_alarms[model.name+': '+key] = \
+                            value[fault_start:fault_end][:-1].mean()
+                        false_detection_rates[model.name+': '+key] = \
+                            pd.concat([value[:fault_start][:-1], 
+                                    value[fault_end:]]).mean()
+                    else:
+                        detection_alarms[model.name+': '+key] = \
+                            value[fault_start:fault_end].mean()
+                        false_detection_rates[model.name+': '+key] = \
+                            value[:fault_start][:-1].mean()
+        else:
+            for key, value in model.alarms.items():
+                detection_alarms[model.name+': '+key] = \
+                    mask[mask==1].eq(value[value==1]).astype(int).mean()
+                false_detection_rates[model.name+': '+key] = \
+                    1-mask[mask==0].eq(value[value==0]).astype(int).mean()
+
+        # PLOTTING SPE
+                
+        if plot_SPE:
+
+            model.plot_SPE(ax = ax_spe[i,2], train_or_test='test',
+                           logy = logy)
+            ax_spe[i,2].set_title('Test')
+            
+            if fault_start is not None:
+                ax_spe[i,2].axvline(datetime.strptime(str(fault_start),
+                                                      '%Y-%m-%d %H:%M:%S'), 
+                                                      ls='--')
+            if fault_end is not None:
+                ax_spe[i,2].axvline(datetime.strptime(str(fault_end),
+                                                      '%Y-%m-%d %H:%M:%S'), 
+                                                      ls='--')
+
+        # PLOTTING PREDICTIONS
+                
+        if plot_predictions:
+            if cond_to_plot_pred:
+                model.plot_predictions(ax = ax_pred[i,2], 
+                                        train_or_test = 'test', 
+                                        X_pred_to_plot = X_pred_to_plot,
+                                        metrics = metrics)
+                if fault_start is not None:
+                    ax_pred[i,2].axvline(datetime.strptime(str(fault_start),
+                                                 '%Y-%m-%d %H:%M:%S'), ls='--')
+                if fault_end is not None:
+                    ax_pred[i,2].axvline(datetime.strptime(str(fault_end),
+                                                 '%Y-%m-%d %H:%M:%S'), ls='--')
+
+        if plot_SPE:            
+            fig_spe.tight_layout();
+        if plot_predictions:
+            fig_pred.tight_layout();
+            
+    ######## GENERATING FINAL TABLES ########
+        
+    return_tables = []
+        
+    # PREDICTION
+
+    if metrics is not None:
+
+        prediction_table = pd.DataFrame([train_metrics, validation_metrics,
+                                         test_metrics], 
+                                        index = ['Train',
+                                                 'Validation',
+                                                 'Test']).T
+                                                            
+        models_names = [models[i].name for i in range(len(models))]
+        metrics_names = [metrics[i].__name__ for i in range(len(metrics))]
+        
+        iterables = [models_names, metrics_names]
+
+        index = pd.MultiIndex.from_product(iterables, 
+                                           names = ['Models', 'Metrics'])
+        
+        message = ("The MultiIndex table is not of the right size. "+
+                   "This could happen when two models have the same name.")
+        
+        try:
+            prediction_table.index = index
+        except ValueError as err:
+            raise ValueError(message) from err
+        
+        return_tables.append(prediction_table.swaplevel().sort_index(axis=0))
+
+    # DETECTION     
+        
+    if fault_start is not None:
+        
+        detection_table = pd.DataFrame([detection_alarms, 
+                                        false_detection_rates],
+                                       index = ['FDR','FAR']).T
+        
+        models_names = [models[i].name for i in range(len(models))]
+        alarms_names = [list(model.alarms.keys())[i] for i in 
+                range(len(list(model.alarms.keys())))]  
+        
+        iterables = [models_names, alarms_names]
+
+        index = pd.MultiIndex.from_product(iterables, 
+                                        names = ['Models', 'Alarms'])
+        
+        detection_table.index = index
+
+        return_tables.append(detection_table.swaplevel().sort_index(axis=0))
+        
+    # COMPUTATIONAL TIMES
+        
+    if times:
+        
+        times_dict = {}
+        
+        times_dict['Train'] = [1e6*models[i].train_time/X_train.shape[0]
+                        for i in range(len(models))]
+        times_dict['Test'] = [1e6*models[i].test_time/X_test.shape[0] 
+                        for i in range(len(models))]
+        
+        times_df = pd.DataFrame(times_dict, 
+                        index=[models[i].name for i in range(len(models))])
+        
+        return_tables.append(times_df)
+        
+    return return_tables
+
+##############################################################################
+# complete_analysis + comparative_table
+
+def compare_variables_for_regression (data, start_train, end_train, 
+                          end_validation, end_test, 
+                          tags,
+                          model,
+                          metrics, count_window_size, count_limit,
+                          fault_start,
+                          fault_end,
+                          tags_X = None,
+                          f_pp_train = ['remove_empty_variables',
+                                     'ffill_nan',
+                                     'remove_frozen_variables',
+                                     'normalize',
+                                     'moving_average_filter'],
+                          a_pp_train = None,
+                          f_pp_test = ['replace_nan_with_values',
+                                    'normalize',
+                                      'moving_average_filter'],
+                          a_pp_test = None,
+                          mask = None
+                                     ):
+    """
+    Compare variables for regression analysis by training, validating, 
+    and testing a model on different data splits and generating relevant 
+    metrics for prediction and fault detection.
+
+    Parameters
+    ----------
+    data: pandas.DataFrame
+        DataFrame containing the time series data for the variables of interest.
+    start_train: string
+        Start timestamp of the training data.
+    end_train: string
+        End timestamp of the training data.
+    end_validation: string
+        End timestamp of the validation data.
+    end_test: string
+        End timestamp of the test data.
+    tags: list of strings
+        List of variable names (tags) to be used as targets (Y) in the model.
+    
+    model: BibMon model
+        Model to be considered in the analysis.
+    metrics: list of functions, optional
+        Functions for calculating metrics to be displayed in the title of 
+        the graph.
+    count_window_size: int, optional
+        Window sizes used in count alarm calculation.
+    count_limit: int, optional
+        Limit of points to be considered in the window 
+        for the count alarm to sound.
+    fault_start: string, optional
+        Start timestamp of the fault.
+    fault_end: string, optional
+        End timestamp of the fault.
+    tags_X: list of strings
+        Variables to be considered in the X set.
+    f_pp_train: list, optional
+        List containing strings with names of functions to be used 
+        in pre-processing the train data (the functions are defined in the 
+        PreProcess class, in the BibMon_Tools.py file).
+    a_pp_train: dict, optional
+        Dictionary containing the parameters to be provided
+        to each function to perform pre-processing of the train data, in
+        the format {'functionname__argname': argvalue, ...}.    
+    f_pp_test: list, optional
+        List containing strings with names of functions to be used 
+        in pre-processing the test data (the functions are defined in the 
+        PreProcess class, in the BibMon_Tools.py file).
+    a_pp_test: dict, optional
+        Dictionary containing the parameters to be provided
+        to each function to perform pre-processing of the test data, in
+        the format {'functionname__argname': argvalue, ...}.
+    mask: numpy.array, optional
+        Boolean array indicating the indices where the process is
+        in fault.
+    
+    Returns
+    ----------
+    return_tables: list of pandas.DataFrame
+        A list containing two or more DataFrames, depending on the parameters
+        passed:
+
+        - **Prediction table**: A DataFrame containing prediction metrics
+          (such as error metrics) for training, validation, and test data.
+          The table is multi-indexed, with tags (variables) and metrics as
+          the indices. Each metric is calculated for each tag and data split
+          (train, validation, test).
+
+        - **Detection table**: (Optional) A DataFrame that includes detection
+          metrics, such as False Detection Rate (FDR) and False Alarm Rate (FAR),
+          for each tag and fault detection alarm. This table is returned if 
+          `fault_start` is provided, and it includes performance information
+          related to the model’s ability to detect faults during the specified
+          period. It is also multi-indexed, with tags and alarms as the indices.
+
+        The function returns these tables to provide a detailed analysis of
+        model performance and fault detection.
+    """
+
+    return_tables = []
+    
+    train_metrics = {}     
+    validation_metrics = {}        
+    test_metrics = {}        
+
+    detection_alarms = {}
+    false_detection_rates = {}
+
+    errorTags = []
+    for tag_Y in tags:
+        try:
+            if not isinstance(tag_Y, list): tag_Y = [tag_Y]
+    
+            (X_train, X_validation, 
+             X_test, Y_train, 
+             Y_validation, Y_test) = train_val_test_split(data, 
+                                                                 start_train = start_train,
+                                                                 end_train = end_train, 
+                                                                 end_validation = end_validation,
+                                                                 end_test = end_test,
+                                                                 tags_X = tags_X,
+                                                                 tags_Y = tag_Y) 
+
+            ######## TRAINING ########
+            lim_conf = 0.99
+    
+            model.fit(X_train, Y_train, 
+                      f_pp = f_pp_train,
+                      a_pp = a_pp_train,
+                      f_pp_test = f_pp_test,
+                      a_pp_test = a_pp_test,
+                      lim_conf = lim_conf,
+                      redefine_limit = False)
+            
+            if metrics is not None:
+                if model.has_Y:
+                    true = model.Y_train_orig
+                    pred = model.Y_train_pred_orig
+              
+                for mr in metrics:
+                    true, pred = align_dfs_by_rows(true.dropna(), pred)
+                    train_metrics[tag_Y[0] +': '+mr.__name__] = mr(true, pred)
+                            
+            ######## VALIDATION ########
+            model.predict(X_validation, Y_validation, 
+                          count_window_size = count_window_size, 
+                          redefine_limit = True)        
+    
+            if metrics is not None:
+                if model.has_Y:
+                    true = model.Y_test_orig
+                    pred = model.Y_test_pred_orig
+           
+                for mr in metrics:
+                    true, pred = align_dfs_by_rows(true.dropna(), pred)
+                    validation_metrics[tag_Y[0] +': '+mr.__name__] = mr(true, pred)
+    
+    
+            ######## TEST ########
+                    
+            model.predict(X_test, Y_test, 
+                          count_window_size = count_window_size, 
+                          count_limit = count_limit,
+                          redefine_limit = False)
+    
+            if metrics is not None:
+                if model.has_Y:
+                    true = model.Y_test_orig
+                    pred = model.Y_test_pred_orig
+            
+                for mr in metrics:
+                    true, pred = align_dfs_by_rows(true.dropna(), pred)
+                    test_metrics[tag_Y[0] +': '+mr.__name__] = mr(true, pred)
+
+                # TERMS FOR DETECTION TABLE
+        
+                if mask is None:
+                    if fault_start is not None:            
+                        for key, value in model.alarms.items():
+                            if fault_end is not None:
+                                detection_alarms[tag_Y[0]+': '+key] = \
+                                    value[fault_start:fault_end][:-1].mean()
+                                false_detection_rates[tag_Y[0]+': '+key] = \
+                                    pd.concat([value[:fault_start][:-1], 
+                                            value[fault_end:]]).mean()
+                            else:
+                                detection_alarms[tag_Y[0]+': '+key] = \
+                                    value[fault_start:fault_end].mean()
+                                false_detection_rates[tag_Y[0]+': '+key] = \
+                                    value[:fault_start][:-1].mean()
+                else:
+                    for key, value in model.alarms.items():
+                        detection_alarms[tag_Y[0]+': '+key] = \
+                            mask[mask==1].eq(value[value==1]).astype(int).mean()
+                        false_detection_rates[tag_Y[0]+': '+key] = \
+                            1-mask[mask==0].eq(value[value==0]).astype(int).mean()
+
+                    
+        except ValueError as err:
+            errorTags.append(tag_Y[0])
+            print("Erro com a tag:" + tag_Y[0])
+            
+    tags = [key for key in tags if key not in errorTags]
+
+    return_tables = []
+        
+    # PREDICTION
+    if metrics is not None:
+
+        prediction_table = pd.DataFrame([train_metrics, validation_metrics,
+                                         test_metrics], 
+                                        index = ['Train',
+                                                 'Validation',
+                                                 'Test']).T
+                                                            
+        tags_names = [tags[i] for i in range(len(tags))]
+        metrics_names = [metrics[i].__name__ for i in range(len(metrics))]
+        
+        iterables = [tags_names, metrics_names]
+
+        index = pd.MultiIndex.from_product(iterables, 
+                                           names = ['Tags', 'Metrics'])
+        
+        message = ("The MultiIndex table is not of the right size. "+
+                   "This could happen when two tags have the same name.")
+        
+        try:
+            prediction_table.index = index
+        except ValueError as err:
+            raise ValueError(message) from err    
+        
+        return_tables.append(prediction_table.swaplevel().sort_index(axis=0))
+       
+        # DETECTION     
+        if fault_start is not None:
+            
+            detection_table = pd.DataFrame([detection_alarms, 
+                                            false_detection_rates],
+                                           index = ['FDR','FAR']).T
+            
+            tags_names = [tags[i] for i in range(len(tags))]
+            alarms_names = [list(model.alarms.keys())[i] for i in 
+                    range(len(list(model.alarms.keys())))]  
+            
+            iterables = [tags_names, alarms_names]
+    
+            index = pd.MultiIndex.from_product(iterables, 
+                                            names = ['Tags', 'Alarms'])
+            
+            detection_table.index = index
+    
+            return_tables.append(detection_table.swaplevel().sort_index(axis=0))
+
     return return_tables
\ No newline at end of file

From db70e72efcd3ebbcb197a86f1d04236c8ae00968 Mon Sep 17 00:00:00 2001
From: Igor Infingardi <igor.infingardi@gmail.com>
Date: Thu, 17 Oct 2024 20:57:37 -0300
Subject: [PATCH 2/6] add: documentation

---
 bibmon/_bibmon_tools.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py
index 94397ae..595f426 100644
--- a/bibmon/_bibmon_tools.py
+++ b/bibmon/_bibmon_tools.py
@@ -695,7 +695,6 @@ def comparative_table (models, X_train, X_validation, X_test,
     return return_tables
 
 ##############################################################################
-# complete_analysis + comparative_table
 
 def compare_variables_for_regression (data, start_train, end_train, 
                           end_validation, end_test, 
@@ -736,7 +735,6 @@ def compare_variables_for_regression (data, start_train, end_train,
         End timestamp of the test data.
     tags: list of strings
         List of variable names (tags) to be used as targets (Y) in the model.
-    
     model: BibMon model
         Model to be considered in the analysis.
     metrics: list of functions, optional

From 80587d313141d9be02d6a065868cadf022b42485 Mon Sep 17 00:00:00 2001
From: Laura <laura.magalhaes.altomari@gmail.com>
Date: Thu, 17 Oct 2024 21:20:03 -0300
Subject: [PATCH 3/6] fix documentation

---
 bibmon/_bibmon_tools.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py
index 595f426..cfea20e 100644
--- a/bibmon/_bibmon_tools.py
+++ b/bibmon/_bibmon_tools.py
@@ -696,7 +696,7 @@ def comparative_table (models, X_train, X_validation, X_test,
 
 ##############################################################################
 
-def compare_variables_for_regression (data, start_train, end_train, 
+def evaluate_targets_performance (data, start_train, end_train, 
                           end_validation, end_test, 
                           tags,
                           model,
@@ -717,9 +717,13 @@ def compare_variables_for_regression (data, start_train, end_train,
                           mask = None
                                      ):
     """
-    Compare variables for regression analysis by training, validating, 
-    and testing a model on different data splits and generating relevant 
-    metrics for prediction and fault detection.
+    This function evaluates the performance of all dependent variables (targets) in a regression model.
+    It systematically tests each target and computes key performance metrics, 
+    including the R² score (a measure of how well the predictions align with actual data), 
+    mean absolute error (quantifying the average prediction error), 
+    false alarm rate (FAR) (representing the frequency of false positives), 
+    and fault detection rate (FDR) (indicating the accuracy in identifying true faults). 
+    The results for each target will be stored in a structured format for detailed analysis.
 
     Parameters
     ----------

From a0ee3b76021544b3df02326294f08754eea532c2 Mon Sep 17 00:00:00 2001
From: Igor Infingardi <igor.infingardi@gmail.com>
Date: Thu, 17 Oct 2024 21:52:11 -0300
Subject: [PATCH 4/6] doc: documentation for new function
 targets_performance_comparative_table

---
 bibmon/_bibmon_tools.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py
index cfea20e..7ead5e3 100644
--- a/bibmon/_bibmon_tools.py
+++ b/bibmon/_bibmon_tools.py
@@ -696,7 +696,7 @@ def comparative_table (models, X_train, X_validation, X_test,
 
 ##############################################################################
 
-def evaluate_targets_performance (data, start_train, end_train, 
+def targets_performance_comparative_table (data, start_train, end_train, 
                           end_validation, end_test, 
                           tags,
                           model,
@@ -782,7 +782,7 @@ def evaluate_targets_performance (data, start_train, end_train,
         passed:
 
         - **Prediction table**: A DataFrame containing prediction metrics
-          (such as error metrics) for training, validation, and test data.
+          (R² score and mean absolute error) for training, validation, and test regression.
           The table is multi-indexed, with tags (variables) and metrics as
           the indices. Each metric is calculated for each tag and data split
           (train, validation, test).
@@ -952,4 +952,4 @@ def evaluate_targets_performance (data, start_train, end_train,
     
             return_tables.append(detection_table.swaplevel().sort_index(axis=0))
 
-    return return_tables
\ No newline at end of file
+    return return_tables

From 70885e6a3ca63b2a462bcf513d27fa4a7052b52d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Afr=C3=A2nio=20Melo?=
 <40374017+afraniomelo@users.noreply.github.com>
Date: Tue, 7 Jan 2025 17:57:37 -0300
Subject: [PATCH 5/6] Adjustments to incorporate PR 58.

---
 bibmon/__init__.py      |  6 ++--
 bibmon/_bibmon_tools.py | 68 +++++++++++++++++++----------------------
 2 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/bibmon/__init__.py b/bibmon/__init__.py
index 3dd3ee7..2b309d8 100644
--- a/bibmon/__init__.py
+++ b/bibmon/__init__.py
@@ -5,11 +5,11 @@
 from ._sklearn_regressor import sklearnRegressor
 from ._preprocess import PreProcess
 from ._load_data import load_tennessee_eastman, load_real_data
-from ._bibmon_tools import train_val_test_split, complete_analysis, comparative_table, spearmanr_dendrogram, create_df_with_dates, create_df_with_noise, align_dfs_by_rows
+from ._bibmon_tools import train_val_test_split, complete_analysis, comparative_table, targets_comparative_table, spearmanr_dendrogram, create_df_with_dates, create_df_with_noise, align_dfs_by_rows
 
 __all__ = ['Autoencoder','PCA','ESN','SBM',
-	   'sklearnRegressor', 'PreProcess',
+	       'sklearnRegressor', 'PreProcess',
            'load_tennessee_eastman', 'load_real_data', 
-           'train_val_test_split', 'complete_analysis', 'comparative_table',
+           'train_val_test_split', 'complete_analysis', 'comparative_table','targets_comparative_table'
 	       'spearmanr_dendrogram', 'create_df_with_dates',
            'create_df_with_noise', 'align_dfs_by_rows']
diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py
index 7ead5e3..c9bbd7f 100644
--- a/bibmon/_bibmon_tools.py
+++ b/bibmon/_bibmon_tools.py
@@ -696,37 +696,37 @@ def comparative_table (models, X_train, X_validation, X_test,
 
 ##############################################################################
 
-def targets_performance_comparative_table (data, start_train, end_train, 
-                          end_validation, end_test, 
-                          tags,
-                          model,
-                          metrics, count_window_size, count_limit,
-                          fault_start,
-                          fault_end,
-                          tags_X = None,
-                          f_pp_train = ['remove_empty_variables',
-                                     'ffill_nan',
-                                     'remove_frozen_variables',
-                                     'normalize',
-                                     'moving_average_filter'],
-                          a_pp_train = None,
-                          f_pp_test = ['replace_nan_with_values',
-                                    'normalize',
-                                      'moving_average_filter'],
-                          a_pp_test = None,
-                          mask = None
-                                     ):
+def targets_comparative_table (model, data, 
+                               start_train, end_train, 
+                               end_validation, end_test, 
+                               tags,
+                               metrics, 
+                               fault_start=None,
+                               fault_end=None,
+                               count_window_size=0, count_limit=1,
+                               lim_conf=0.99,
+                               tags_X = None,
+                               f_pp_train = ['remove_empty_variables',
+                                            'ffill_nan',
+                                            'remove_frozen_variables',
+                                            'normalize'],
+                               a_pp_train = None,
+                               f_pp_test = ['replace_nan_with_values',
+                                            'normalize'],
+                                a_pp_test = None,
+                                mask = None):
     """
     This function evaluates the performance of all dependent variables (targets) in a regression model.
     It systematically tests each target and computes key performance metrics, 
-    including the R² score (a measure of how well the predictions align with actual data), 
-    mean absolute error (quantifying the average prediction error), 
-    false alarm rate (FAR) (representing the frequency of false positives), 
+    including fitting metrics such as the R² score (a measure of how well the predictions align with actual data) and 
+    mean absolute error (quantifying the average prediction error), and also 
+    alarm metrics such as false alarm rate (FAR) (representing the frequency of false positives), 
     and fault detection rate (FDR) (indicating the accuracy in identifying true faults). 
-    The results for each target will be stored in a structured format for detailed analysis.
 
     Parameters
     ----------
+    model: BibMon model
+        Model to be considered in the analysis.
     data: pandas.DataFrame
         DataFrame containing the time series data for the variables of interest.
     start_train: string
@@ -739,20 +739,20 @@ def targets_performance_comparative_table (data, start_train, end_train,
         End timestamp of the test data.
     tags: list of strings
         List of variable names (tags) to be used as targets (Y) in the model.
-    model: BibMon model
-        Model to be considered in the analysis.
-    metrics: list of functions, optional
+    metrics: list of functions
         Functions for calculating metrics to be displayed in the title of 
         the graph.
+    fault_start: string, optional
+        Start timestamp of the fault.
+    fault_end: string, optional
+        End timestamp of the fault.
     count_window_size: int, optional
         Window sizes used in count alarm calculation.
     count_limit: int, optional
         Limit of points to be considered in the window 
         for the count alarm to sound.
-    fault_start: string, optional
-        Start timestamp of the fault.
-    fault_end: string, optional
-        End timestamp of the fault.
+    lim_conf: float, optional
+        Confidence limit for the detection index.
     tags_X: list of strings
         Variables to be considered in the X set.
     f_pp_train: list, optional
@@ -782,7 +782,7 @@ def targets_performance_comparative_table (data, start_train, end_train,
         passed:
 
         - **Prediction table**: A DataFrame containing prediction metrics
-          (R² score and mean absolute error) for training, validation, and test regression.
+          (such as R² score and mean absolute error) for training, validation, and test regression.
           The table is multi-indexed, with tags (variables) and metrics as
           the indices. Each metric is calculated for each tag and data split
           (train, validation, test).
@@ -793,9 +793,6 @@ def targets_performance_comparative_table (data, start_train, end_train,
           `fault_start` is provided, and it includes performance information
           related to the model’s ability to detect faults during the specified
           period. It is also multi-indexed, with tags and alarms as the indices.
-
-        The function returns these tables to provide a detailed analysis of
-        model performance and fault detection.
     """
 
     return_tables = []
@@ -823,7 +820,6 @@ def targets_performance_comparative_table (data, start_train, end_train,
                                                                  tags_Y = tag_Y) 
 
             ######## TRAINING ########
-            lim_conf = 0.99
     
             model.fit(X_train, Y_train, 
                       f_pp = f_pp_train,

From 47d432ceb0791fcb8be283a1b3d1fbc5ed9f4737 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Afr=C3=A2nio=20Melo?=
 <40374017+afraniomelo@users.noreply.github.com>
Date: Tue, 7 Jan 2025 18:04:45 -0300
Subject: [PATCH 6/6] Update __init__.py

bug correction
---
 bibmon/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bibmon/__init__.py b/bibmon/__init__.py
index 2b309d8..a2eb249 100644
--- a/bibmon/__init__.py
+++ b/bibmon/__init__.py
@@ -10,6 +10,7 @@
 __all__ = ['Autoencoder','PCA','ESN','SBM',
 	       'sklearnRegressor', 'PreProcess',
            'load_tennessee_eastman', 'load_real_data', 
-           'train_val_test_split', 'complete_analysis', 'comparative_table','targets_comparative_table'
+           'train_val_test_split', 'complete_analysis', 'comparative_table',
+           'targets_comparative_table',
 	       'spearmanr_dendrogram', 'create_df_with_dates',
            'create_df_with_noise', 'align_dfs_by_rows']