petrobras · Alysson-Alves23 · Sep 3, 2024 · Sep 9, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/enhancement.md b/.github/ISSUE_TEMPLATE/enhancement.md
@@ -0,0 +1,17 @@
+---
+name: Enhancement
+about: Propose an idea to enhance the existing functionalities of BibMon.
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+## How we are today
+Provide a brief description of how BibMon currently addresses the topic related to this issue. Highlight any limitations, challenges, or areas that need improvement.
+
+## Proposed Enhancement
+Explain the proposed enhancement. Include its objectives, expected benefits, and any relevant background information. Reference relevant literature or examples where applicable to support your proposal.
+
+## Implementation
+Describe how the enhancement will integrate with the existing structure of the package or specify any modifications that will be needed. Include any potential challenges and how they might be addressed.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,17 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+## How we are today
+Provide a brief description of how BibMon currently addresses the topic related to this issue. Highlight any limitations, challenges, or areas that need improvement.
+
+## Proposed New Feature
+Explain the proposed new feature. Include its objectives, expected benefits, and any relevant background information. Reference relevant literature or examples where applicable to support your proposal.
+
+## Implementation
+Describe how the the new feature will integrate with the existing structure of the package or specify any modifications that will be needed. Include any potential challenges and how they might be addressed.
diff --git a/bibmon/_gaussian_model.py b/bibmon/_gaussian_model.py
@@ -0,0 +1,89 @@
+import numpy as np
+
+from _probabilistic_genereic_model import ProbabilisticGenericModel
+
+class GaussianModel(ProbabilisticGenericModel):
+    def __init__(self):
+        self.mean = None
+        self.std = None
+
+    def train_core(self):
+        """
+        Ajusta uma distribuição normal aos dados de treinamento.
+        """
+        self.mean = self.X_train.mean()
+        self.std = self.X_train.std()
+
+    def predict_proba(self, X):
+        """
+        Calcula a probabilidade de cada observação em X sob a distribuição ajustada.
+
+        Parâmetros
+        ----------
+        X: numpy.array
+            Dados de entrada para previsão.
+
+        Retorna
+        -------
+        probabilities: numpy.array
+            Probabilidades calculadas.
+        """
+        from scipy.stats import norm
+        probabilities = norm.pdf(X, loc=self.mean, scale=self.std)
+        return probabilities
+
+    def pre_train(self, X_train, Y_train=None, *args, **kwargs):
+        """
+        Pré-processamento específico para o modelo Gaussiano.
+        """
+        self.X_train = pd.DataFrame(X_train)
+        # Possíveis etapas de pré-processamento
+
+    def pre_test(self, X_test, Y_test=None, *args, **kwargs):
+        """
+        Pré-processamento específico para o modelo Gaussiano.
+        """
+        self.X_test = pd.DataFrame(X_test)
+        # Possíveis etapas de pré-processamento
+
+    def plot_histogram_with_pdf(self, feature_name=None, bins=30):
+        """
+        Plota o histograma dos dados de treinamento com a PDF ajustada sobreposta.
+        """
+        import matplotlib.pyplot as plt
+        from scipy.stats import norm
+        import seaborn as sns
+
+        if feature_name is None:
+            # Se nenhuma feature for especificada, usar a primeira
+            feature_name = self.X_train.columns[0]
+
+        data = self.X_train[feature_name]
+
+        plt.figure(figsize=(10,6))
+        sns.histplot(data, bins=bins, kde=False, stat='density', label='Dados')
+
+        x_axis = np.linspace(data.min(), data.max(), 100)
+        pdf = norm.pdf(x_axis, self.mean[feature_name], self.std[feature_name])
+        plt.plot(x_axis, pdf, color='red', label='PDF Ajustada')
+        plt.title(f'Histograma e PDF Ajustada para {feature_name}')
+        plt.xlabel(feature_name)
+        plt.ylabel('Densidade')
+        plt.legend()
+        plt.show()
+
+    def plot_qq(self, feature_name=None):
+        """
+        Plota o gráfico Q-Q para verificar a normalidade dos dados.
+        """
+        import scipy.stats as stats
+        import matplotlib.pyplot as plt
+
+        if feature_name is None:
+            feature_name = self.X_train.columns[0]
+
+        data = self.X_train[feature_name]
+
+        stats.probplot(data, dist="norm", plot=plt)
+        plt.title(f"Gráfico Q-Q para {feature_name}")
+        plt.show()
diff --git a/bibmon/_load_data.py b/bibmon/_load_data.py
@@ -58,7 +58,9 @@ def load_tennessee_eastman (train_id = 0, test_id = 0):
 
         else:
 
-            train_df = create_df_with_dates(filepath, sep='\s+',names=tags,
+            train_df = create_df_with_dates(pd.read_csv(filepath, 
+                                                        sep = '\s+',
+                                                        names = tags),
                                             freq = '3min')
 
     with pkg_resources.path(tennessee_eastman, file_test) as filepath:

diff --git a/bibmon/_probabilistic_genereic_model.py b/bibmon/_probabilistic_genereic_model.py
@@ -0,0 +1,51 @@
+import numpy as np
+import pandas as pd
+from scipy.stats import norm
+from abc import ABC, abstractmethod
+
+class ProbabilisticGenericModel(ABC):
+    @abstractmethod
+    def train_core(self):
+        pass
+
+    @abstractmethod
+    def predict_proba(self, X):
+        pass
+
+    def set_hyperparameters(self, params_dict):
+        for key, value in params_dict.items():
+            setattr(self, key, value)
+
+    def load_model(self, *args, **kwargs):
+        pass
+
+    def pre_train(self, X_train, Y_train=None, *args, **kwargs):
+        # Garantir que os dados são numéricos
+        self.X_train = pd.DataFrame(X_train).astype(float)
+        self.Y_train = pd.DataFrame(Y_train)
+
+    def train(self, *args, **kwargs):
+        start_time = time.time()
+        self.train_core()
+        end_time = time.time()
+        self.train_time = end_time - start_time
+
+    def pre_test(self, X_test, Y_test=None, *args, **kwargs):
+        # Garantir que os dados são numéricos
+        self.X_test = pd.DataFrame(X_test).astype(float)
+        self.Y_test = pd.DataFrame(Y_test)
+
+    def test(self, *args, **kwargs):
+        start_time = time.time()
+        probabilities = self.predict_proba(self.X_test.values)
+        end_time = time.time()
+        self.test_time = end_time - start_time
+        self.probabilities = pd.DataFrame(probabilities, index=self.X_test.index)
+
+    def fit(self, X_train, Y_train=None, *args, **kwargs):
+        self.pre_train(X_train, Y_train, *args, **kwargs)
+        self.train(*args, **kwargs)
+
+    def predict(self, X_test, Y_test=None, *args, **kwargs):
+        self.pre_test(X_test, Y_test, *args, **kwargs)
+        self.test(*args, **kwargs)
diff --git a/bibmon/gaussian_naive_bayes.py b/bibmon/gaussian_naive_bayes.py
@@ -0,0 +1,134 @@
+import numpy as np
+from scipy.stats import norm
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
+from sklearn.metrics import roc_curve, auc
+from sklearn.metrics import precision_recall_curve, average_precision_score
+from _probabilistic_genereic_model import ProbabilisticGenericModel
+
+
+class GNB(ProbabilisticGenericModel):
+    def __init__(self):
+        self.classes = None
+        self.class_priors = None
+        self.means = None
+        self.variances = None
+
+    def train_core(self):
+        """
+        Treina o modelo Gaussian Naive Bayes estimando a média, variância
+        e probabilidades a priori para cada classe.
+        """
+        X = self.X_train.values
+        y = self.Y_train.values.flatten()
+
+        # Identificar classes únicas
+        self.classes = np.unique(y)
+        n_classes = len(self.classes)
+        n_features = X.shape[1]
+
+        # Inicializar parâmetros
+        self.means = np.zeros((n_classes, n_features))
+        self.variances = np.zeros((n_classes, n_features))
+        self.class_priors = np.zeros(n_classes)
+
+        # Calcular médias, variâncias e priors para cada classe
+        for idx, cls in enumerate(self.classes):
+            X_c = X[y == cls]
+            self.means[idx, :] = np.mean(X_c, axis=0)
+            # Adicionar pequeno valor às variâncias para evitar divisão por zero
+            self.variances[idx, :] = np.var(X_c, axis=0) + 1e-9
+            self.class_priors[idx] = X_c.shape[0] / X.shape[0]
+
+    def predict_proba(self, X):
+        """
+        Calcula as probabilidades para cada classe para as amostras de teste fornecidas.
+        """
+        X = np.asarray(X)
+        n_samples, n_features = X.shape
+        n_classes = len(self.classes)
+        log_probs = np.zeros((n_samples, n_classes))
+
+        # Calcular log-probabilidades para cada classe
+        for idx, cls in enumerate(self.classes):
+            mean = self.means[idx]
+            var = self.variances[idx]
+            prior = np.log(self.class_priors[idx])
+
+            # Calcular log-verossimilhança
+            log_likelihood = -0.5 * np.sum(np.log(2. * np.pi * var))
+            log_likelihood -= 0.5 * np.sum(((X - mean) ** 2) / var, axis=1)
+            log_probs[:, idx] = prior + log_likelihood
+
+        # Normalizar log-probabilidades usando o truque log-sum-exp
+        log_probs -= np.max(log_probs, axis=1, keepdims=True)
+        probs = np.exp(log_probs)
+        probs /= np.sum(probs, axis=1, keepdims=True)
+
+        return probs
+
+    def predict(self, X):
+        """
+        Prediz a classe para cada amostra baseada na probabilidade máxima.
+        """
+        probs = self.predict_proba(X)
+        class_indices = np.argmax(probs, axis=1)
+        return self.classes[class_indices]
+
+    def plot_confusion_matrix(self, X_test, Y_test):
+        """
+        Plota a matriz de confusão para os dados de teste fornecidos.
+        """
+        y_pred = self.predict(X_test)
+        cm = confusion_matrix(Y_test, y_pred, labels=self.classes)
+        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=self.classes)
+        disp.plot(cmap=plt.cm.Blues)
+        plt.title('Matriz de Confusão')
+        plt.show()
+
+    def plot_roc_curve(self, X_test, Y_test):
+        """
+        Plota a curva ROC para os dados de teste fornecidos.
+        """
+        if len(self.classes) != 2:
+            print("A curva ROC é aplicável apenas para problemas de classificação binária.")
+            return
+
+        y_score = self.predict_proba(X_test)[:, 1]
+        fpr, tpr, _ = roc_curve(Y_test, y_score, pos_label=self.classes[1])
+        roc_auc = auc(fpr, tpr)
+
+        plt.figure()
+        plt.plot(fpr, tpr, color='darkorange', lw=2,
+                 label='Curva ROC (área = %0.2f)' % roc_auc)
+        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+        plt.xlim([-0.01, 1.0])
+        plt.ylim([0.0, 1.01])
+        plt.xlabel('Taxa de Falsos Positivos')
+        plt.ylabel('Taxa de Verdadeiros Positivos')
+        plt.title('Curva ROC')
+        plt.legend(loc="lower right")
+        plt.show()
+
+    def plot_precision_recall_curve(self, X_test, Y_test):
+        """
+        Plota a curva de Precisão-Recall para os dados de teste fornecidos.
+        """
+        if len(self.classes) != 2:
+            print("A curva de Precisão-Recall é aplicável apenas para problemas de classificação binária.")
+            return
+
+        y_score = self.predict_proba(X_test)[:, 1]
+        precision, recall, _ = precision_recall_curve(Y_test, y_score, pos_label=self.classes[1])
+        average_precision = average_precision_score(Y_test, y_score, pos_label=self.classes[1])
+
+        plt.figure()
+        plt.step(recall, precision, where='post', color='b', alpha=0.2,
+                 label='Precisão-Recall (AP = %0.2f)' % average_precision)
+        plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
+
+        plt.xlabel('Recall')
+        plt.ylabel('Precisão')
+        plt.title('Curva de Precisão-Recall')
+        plt.legend(loc="upper right")
+        plt.show()
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -32,6 +32,8 @@
 # ones.
 extensions = ['sphinx.ext.autodoc','sphinx.ext.napoleon','myst_nb']
 
+myst_enable_extensions = ["dollarmath", "amsmath"]
+
 # Add any paths that contain templates here, relative to this directory.
 #templates_path = ['_templates']