Merge pull request #37 from WenjieDu/dev

Merge `dev` into `main`
WenjieDu · Mar 20, 2023 · 818e7ef · 818e7ef
2 parents 4116399 + 8c584b5
commit 818e7ef
Show file tree

Hide file tree

Showing 22 changed files with 344 additions and 88 deletions.
diff --git a/README.md b/README.md
@@ -26,7 +26,7 @@
     </a>
     <!-- PyPI download number -->
     <a alt='PyPI download number' href='https://pepy.tech/project/pypots'>
-        <img src='https://static.pepy.tech/personalized-badge/pypots?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads'>
+        <img src='https://static.pepy.tech/personalized-badge/pypots?period=total&units=none&left_color=grey&right_color=blue&left_text=Downloads'>
     </a>
     <!-- Zenodo DOI -->
     <a alt='Zenodo DOI' href='https://zenodo.org/badge/latestdoi/475477908'>
@@ -86,7 +86,7 @@ mae = cal_mae(imputation, X_intact, indicating_mask)  # calculate mean absolute
 | Task                          | Type           | Algorithm                                                                | Year | Reference |        
 |-------------------------------|----------------|--------------------------------------------------------------------------|------|-----------|
 | Imputation                    | Neural Network | SAITS (Self-Attention-based Imputation for Time Series)                  | 2023 | [^1]      |
-| Imputation                    | Neural Network | Transformer                                                              | 2017 | [^2] [^1] |
+| Imputation                    | Neural Network | Transformer                                                              | 2017 | [^1] [^2] |
 | Imputation,<br>Classification | Neural Network | BRITS (Bidirectional Recurrent Imputation for Time Series)               | 2018 | [^3]      |
 | Imputation                    | Naive          | LOCF (Last Observation Carried Forward)                                  | -    | -         |
 | Classification                | Neural Network | GRU-D                                                                    | 2018 | [^4]      |
@@ -120,14 +120,14 @@ The documentation and tutorials are under construction. And a short paper introd
 Thank you all for your attention! 😃
 
 
-[^1]: Du, W., Cote, D., & Liu, Y. (2023). [SAITS: Self-Attention-based Imputation for Time Series](https://doi.org/10.1016/j.eswa.2023.119619). Expert systems with applications.
-[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). NeurIPS 2017.
-[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). NeurIPS 2018.
-[^4]: Che, Z., Purushotham, S., Cho, K., Sontag, D.A., & Liu, Y. (2018). [Recurrent Neural Networks for Multivariate Time Series with Missing Values](https://www.nature.com/articles/s41598-018-24271-9). Scientific Reports, 8.
-[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). ICLR 2022.
-[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). AAAI 2021.
-[^7]: Jong, J.D., Emon, M.A., Wu, P., Karki, R., Sood, M., Godard, P., Ahmad, A., Vrooman, H.A., Hofmann-Apitius, M., & Fröhlich, H. (2019). [Deep learning for clustering of multivariate clinical patient trajectories with missing values](https://academic.oup.com/gigascience/article/8/11/giz134/5626377). GigaScience, 8.
-[^8]: Chen, X., & Sun, L. (2021). [Bayesian Temporal Factorization for Multidimensional Time Series Prediction](https://arxiv.org/abs/1910.06366). IEEE transactions on pattern analysis and machine intelligence, PP.
+[^1]: Du, W., Cote, D., & Liu, Y. (2023). [SAITS: Self-Attention-based Imputation for Time Series](https://doi.org/10.1016/j.eswa.2023.119619). *Expert systems with applications*.
+[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS* 2017.
+[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS* 2018.
+[^4]: Che, Z., Purushotham, S., Cho, K., Sontag, D.A., & Liu, Y. (2018). [Recurrent Neural Networks for Multivariate Time Series with Missing Values](https://www.nature.com/articles/s41598-018-24271-9). *Scientific Reports*.
+[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). *ICLR* 2022.
+[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). *AAAI* 2021.
+[^7]: Jong, J.D., Emon, M.A., Wu, P., Karki, R., Sood, M., Godard, P., Ahmad, A., Vrooman, H.A., Hofmann-Apitius, M., & Fröhlich, H. (2019). [Deep learning for clustering of multivariate clinical patient trajectories with missing values](https://academic.oup.com/gigascience/article/8/11/giz134/5626377). *GigaScience*.
+[^8]: Chen, X., & Sun, L. (2021). [Bayesian Temporal Factorization for Multidimensional Time Series Prediction](https://arxiv.org/abs/1910.06366). *IEEE transactions on pattern analysis and machine intelligence*.
 
 <details>
 <summary>🏠 Visits</summary>

diff --git a/pypots/__init__.py b/pypots/__init__.py
@@ -5,5 +5,13 @@
 # Created by Wenjie Du <[email protected]>
 # License: GPL-v3
 
-
 from .__version__ import version as __version__
+
+__all__ = [
+    "data",
+    "imputation",
+    "classification",
+    "clustering",
+    "forecasting",
+    "utils",
+]
diff --git a/pypots/base.py b/pypots/base.py
@@ -4,11 +4,15 @@
 
 # Created by Wenjie Du <[email protected]>
 # License: GLP-v3
+
+import os
 from abc import ABC
 
 import numpy as np
 import torch
-from torch.utils.tensorboard import SummaryWriter
+
+from pypots.utils.logging import logger
+from pypots.utils.files import create_dir_if_not_exist
 
 
 class BaseModel(ABC):
@@ -24,7 +28,7 @@ def __init__(self, device):
                 if torch.cuda.is_available() and torch.cuda.device_count() > 0
                 else "cpu"
             )
-            print("No given device, using default device:", self.device)
+            logger.info(f"No given device, using default device: {self.device}")
         else:
             self.device = device
 
@@ -136,21 +140,41 @@ def save_logs_to_tensorboard(self, saving_path):
         # tb_summary_writer = SummaryWriter(saving_path)
         # tb_summary_writer.add_custom_scalars(self.logger)
         # tb_summary_writer.close()
-        # print(f'Log saved successfully to {saving_path}.')
+        # logger.info(f'Log saved successfully to {saving_path}.')
 
-    def save_model(self, saving_path):
+    def save_model(self, saving_dir, name, overwrite=False):
         """Save the model to a disk file.
 
+        A .pypots extension will be appended to the filename if it does not already have one.
+        Please note that such an extension is not necessary, but to indicate the saved model is from PyPOTS framework so people can distinguish.
+
         Parameters
         ----------
-        saving_path : str,
-            The given path to save the model.
+        saving_dir : str,
+            The given directory to save the model.
+
+        name : str,
+            The file name of the model to be saved.
+
+        overwrite : bool,
+
         """
+        name = name + ".pypots" if name.split(".")[-1] != "pypots" else name
+        saving_path = os.path.join(saving_dir, name)
+        if os.path.exists(saving_path):
+            if overwrite:
+                logger.warning(
+                    f"File {saving_path} exists. Argument `overwrite` is True. Overwriting now..."
+                )
+            else:
+                logger.error(f"File {saving_path} exists. Saving operation aborted.")
+                return
         try:
+            create_dir_if_not_exist(saving_dir)
             torch.save(self.model, saving_path)
+            logger.info(f"Saved successfully to {saving_path}.")
         except Exception as e:
-            print(e)
-        print(f"Saved successfully to {saving_path}.")
+            raise RuntimeError(f'{e} Failed to save the model to "{saving_path}"!')
 
     def load_model(self, model_path):
         """Load the saved model from a disk file.
@@ -174,7 +198,7 @@ def load_model(self, model_path):
                 self.model = loaded_model.model
         except Exception as e:
             raise e
-        print(f"Model loaded successfully from {model_path}.")
+        logger.info(f"Model loaded successfully from {model_path}.")
 
 
 class BaseNNModel(BaseModel):
@@ -202,6 +226,6 @@ def __init__(
     def _print_model_size(self):
         """Print the number of trainable parameters in the initialized NN model."""
         num_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
-        print(
+        logger.info(
             f"Model initialized successfully. Number of the trainable parameters: {num_params}"
         )
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
@@ -12,6 +12,7 @@
 import torch
 
 from pypots.base import BaseModel, BaseNNModel
+from pypots.utils.logging import logger
 
 
 class BaseClassifier(BaseModel):
@@ -116,12 +117,12 @@ def _train_model(self, training_loader, val_loader=None):
 
                     mean_val_loss = np.mean(epoch_val_loss_collector)
                     self.logger["validating_loss"].append(mean_val_loss)
-                    print(
+                    logger.info(
                         f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}"
                     )
                     mean_loss = mean_val_loss
                 else:
-                    print(f"epoch {epoch}: training loss {mean_train_loss:.4f}")
+                    logger.info(f"epoch {epoch}: training loss {mean_train_loss:.4f}")
                     mean_loss = mean_train_loss
 
                 if mean_loss < self.best_loss:
@@ -131,12 +132,12 @@ def _train_model(self, training_loader, val_loader=None):
                 else:
                     self.patience -= 1
                     if self.patience == 0:
-                        print(
+                        logger.info(
                             "Exceeded the training patience. Terminating the training procedure..."
                         )
                         break
         except Exception as e:
-            print(f"Exception: {e}")
+            logger.info(f"Exception: {e}")
             if self.best_model_dict is None:
                 raise RuntimeError(
                     "Training got interrupted. Model was not get trained. Please try fit() again."
@@ -151,4 +152,4 @@ def _train_model(self, training_loader, val_loader=None):
         if np.equal(self.best_loss, float("inf")):
             raise ValueError("Something is wrong. best_loss is Nan after training.")
 
-        print("Finished training.")
+        logger.info("Finished training.")
diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py
@@ -27,6 +27,10 @@
 from torch.nn.parameter import Parameter
 from torch.utils.data import DataLoader
 
+from pypots.classification.base import BaseNNClassifier
+from pypots.data.dataset_for_grud import DatasetForGRUD
+from pypots.utils.logging import logger
+
 try:
     from torch_geometric.nn.conv import MessagePassing
     from torch_geometric.nn.inits import glorot
@@ -35,15 +39,12 @@
     from torch_scatter import scatter
     from torch_sparse import SparseTensor
 except ImportError as e:
-    print(
+    logger.error(
         f"{e}\n"
         "torch_geometric is missing, "
         "please install it with 'pip install torch_geometric' or 'conda install -c pyg pyg'"
     )
 
-from pypots.classification.base import BaseNNClassifier
-from pypots.data.dataset_for_grud import DatasetForGRUD
-
 
 class PositionalEncodingTF(nn.Module):
     """Generate positional encoding according to time information."""
@@ -96,7 +97,7 @@ def __init__(
         edge_dim: Optional[int] = None,
         bias: bool = True,
         root_weight: bool = True,
-        **kwargs
+        **kwargs,
     ):
         kwargs.setdefault("aggr", "add")
         super().__init__(node_dim=0, **kwargs)

diff --git a/pypots/clustering/__init__.py b/pypots/clustering/__init__.py
@@ -8,4 +8,7 @@
 from pypots.clustering.crli import CRLI
 from pypots.clustering.vader import VaDER
 
-__all__ = ["CRLI", "VaDER"]
+__all__ = [
+    "CRLI",
+    "VaDER",
+]
diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py
@@ -12,6 +12,7 @@
 import torch
 
 from pypots.base import BaseModel, BaseNNModel
+from pypots.utils.logging import logger
 
 
 class BaseClusterer(BaseModel):
@@ -110,12 +111,12 @@ def _train_model(self, training_loader, val_loader=None):
 
                     mean_val_loss = np.mean(epoch_val_loss_collector)
                     self.logger["validating_loss"].append(mean_val_loss)
-                    print(
+                    logger.info(
                         f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}"
                     )
                     mean_loss = mean_val_loss
                 else:
-                    print(f"epoch {epoch}: training loss {mean_train_loss:.4f}")
+                    logger.info(f"epoch {epoch}: training loss {mean_train_loss:.4f}")
                     mean_loss = mean_train_loss
 
                 if mean_loss < self.best_loss:
@@ -125,12 +126,12 @@ def _train_model(self, training_loader, val_loader=None):
                 else:
                     self.patience -= 1
                     if self.patience == 0:
-                        print(
+                        logger.info(
                             "Exceeded the training patience. Terminating the training procedure..."
                         )
                         break
         except Exception as e:
-            print(f"Exception: {e}")
+            logger.info(f"Exception: {e}")
             if self.best_model_dict is None:
                 raise RuntimeError(
                     "Training got interrupted. Model was not get trained. Please try fit() again."
@@ -145,4 +146,4 @@ def _train_model(self, training_loader, val_loader=None):
         if np.equal(self.best_loss, float("inf")):
             raise ValueError("Something is wrong. best_loss is Nan after training.")
 
-        print("Finished training.")
+        logger.info("Finished training.")
diff --git a/pypots/clustering/crli.py b/pypots/clustering/crli.py
@@ -16,6 +16,7 @@
 
 from pypots.clustering.base import BaseNNClusterer
 from pypots.data.dataset_for_grud import DatasetForGRUD
+from pypots.utils.logging import logger
 from pypots.utils.metrics import cal_mse
 
 RNN_CELL = {
@@ -437,7 +438,7 @@ def _train_model(self, training_loader, val_loader=None):
                 )  # mean training loss of the current epoch
                 self.logger["training_loss_generator"].append(mean_train_G_loss)
                 self.logger["training_loss_discriminator"].append(mean_train_D_loss)
-                print(
+                logger.info(
                     f"epoch {epoch}: "
                     f"training loss_generator {mean_train_G_loss:.4f}, "
                     f"train loss_discriminator {mean_train_D_loss:.4f}"
@@ -451,12 +452,12 @@ def _train_model(self, training_loader, val_loader=None):
                 else:
                     self.patience -= 1
                     if self.patience == 0:
-                        print(
+                        logger.info(
                             "Exceeded the training patience. Terminating the training procedure..."
                         )
                         break
         except Exception as e:
-            print(f"Exception: {e}")
+            logger.info(f"Exception: {e}")
             if self.best_model_dict is None:
                 raise RuntimeError(
                     "Training got interrupted. Model was not get trained. Please try fit() again."
@@ -471,7 +472,7 @@ def _train_model(self, training_loader, val_loader=None):
         if np.equal(self.best_loss, float("inf")):
             raise ValueError("Something is wrong. best_loss is Nan after training.")
 
-        print("Finished training.")
+        logger.info("Finished training.")
 
     def cluster(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)

diff --git a/pypots/clustering/vader.py b/pypots/clustering/vader.py
@@ -21,6 +21,7 @@
 
 from pypots.clustering.base import BaseNNClusterer
 from pypots.data.dataset_for_grud import DatasetForGRUD
+from pypots.utils.logging import logger
 from pypots.utils.metrics import cal_mse
 
 
@@ -478,12 +479,12 @@ def _train_model(self, training_loader, val_loader=None):
 
                     mean_val_loss = np.mean(epoch_val_loss_collector)
                     self.logger["validating_loss"].append(mean_val_loss)
-                    print(
+                    logger.info(
                         f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}"
                     )
                     mean_loss = mean_val_loss
                 else:
-                    print(f"epoch {epoch}: training loss {mean_train_loss:.4f}")
+                    logger.info(f"epoch {epoch}: training loss {mean_train_loss:.4f}")
                     mean_loss = mean_train_loss
 
                 if mean_loss < self.best_loss:
@@ -493,12 +494,12 @@ def _train_model(self, training_loader, val_loader=None):
                 else:
                     self.patience -= 1
                     if self.patience == 0:
-                        print(
+                        logger.info(
                             "Exceeded the training patience. Terminating the training procedure..."
                         )
                         break
         except Exception as e:
-            print(f"Exception: {e}")
+            logger.info(f"Exception: {e}")
             if self.best_model_dict is None:
                 raise RuntimeError(
                     "Training got interrupted. Model was not get trained. Please try fit() again."
@@ -513,7 +514,7 @@ def _train_model(self, training_loader, val_loader=None):
         if np.equal(self.best_loss, float("inf")):
             raise ValueError("Something is wrong. best_loss is Nan after training.")
 
-        print("Finished training.")
+        logger.info("Finished training.")
 
     def cluster(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)

diff --git a/pypots/data/load_specific_datasets.py b/pypots/data/load_specific_datasets.py
@@ -7,6 +7,7 @@
 
 import pandas as pd
 import tsdb
+from pypots.utils.logging import logger
 
 SUPPORTED_DATASETS = [
     "physionet_2012",
@@ -80,7 +81,7 @@ def load_specific_dataset(dataset_name, use_cache=True):
         e.g. standardizing and splitting.
 
     """
-    print(
+    logger.info(
         f"Loading the dataset {dataset_name} with TSDB (https://github.com/WenjieDu/Time_Series_Database)..."
     )
     assert dataset_name in SUPPORTED_DATASETS, (
@@ -89,7 +90,7 @@ def load_specific_dataset(dataset_name, use_cache=True):
         f"please create an issue on GitHub "
         f"https://github.com/WenjieDu/PyPOTS/issues"
     )
-    print(f"Starting preprocessing {dataset_name}...")
+    logger.info(f"Starting preprocessing {dataset_name}...")
     data = tsdb.load_dataset(dataset_name, use_cache)
     data = PREPROCESSING[dataset_name](data)
     return data