docs: update and fix format (#56)

tilman151 · Jan 31, 2024 · 494052f · 494052f
1 parent ac70405
commit 494052f
Show file tree

Hide file tree

Showing 11 changed files with 52 additions and 17 deletions.
diff --git a/docs/use_cases/feature_extraction.md b/docs/use_cases/feature_extraction.md
@@ -3,7 +3,8 @@ It may be useful to extract hand-crafted features, i.e. RMS or P2P, from this vi
 The [RulDataModule][rul_datasets.core.RulDataModule] provides the option to use a custom feature extractor on each window of data.
 
 The feature extractor can be anything that can be called as a function.
-It should take a numpy array with the shape `[num_windows, window_size, num_features]` and return another array.
+It should take a feature array with the shape `[num_windows, window_size, num_features]` and a target array with the shape `[num_windows]`.
+The return value should be the transformed feature and target arrays.
 Depending on whether a `window_size` is supplied to the data module, the expected output shape of the feature extractor is:
 
 * `window_size is None`: `[num_new_windows, new_window_size, features]`

diff --git a/docs/use_cases/libraries.md b/docs/use_cases/libraries.md
@@ -9,24 +9,28 @@ The RUL Datasets library implements several data modules that are 100% compatibl
 import pytorch_lightning as pl
 import rul_datasets
 
-import rul_estimator
+import rul_estimator  # (1)!
 
 
 cmapss_fd1 = rul_datasets.CmapssReader(fd=1)
 dm = rul_datasets.RulDataModule(cmapss_fd1, batch_size=32)
 
-my_rul_estimator = rul_estimator.MyRulEstimator() # (1)!
+my_rul_estimator = rul_estimator.MyRulEstimator() # (2)!
 
 trainer = pl.Trainer(max_epochs=100)
-trainer.fit(my_rul_estimator, dm) # (2)!
+trainer.fit(my_rul_estimator, dm) # (3)!
 
 trainer.test(my_rul_estimator, dm)
 ```
 
-1. This should be a subclass of [LightningModule][pytorch_lightning.core.LightningModule].
-2. The trainer calls the data module's `prepare_data` and `setup` functions automatically.
+1. This is a hypothetical module containing your model.
+2. This should be a subclass of [LightningModule][lightning.pytorch.core.LightningModule].
+3. The trainer calls the data module's `prepare_data` and `setup` functions automatically.
 
 The RUL datasets library loads all data into memory at once and uses the main process for creating batches, i.e. `num_workers=0` for all dataloaders.
+Unnecessary copies are avoided by using shared memory for both Numpy and PyTorch.
+This means that modifying a batch directly, e.g., `features += 1` should be avoided.
+
 When data is held in memory, multiple data loading processes are unnecessary and may even slow down training.
 The warning produced by PyTorch Lightning that `num_workers` is too low is, therefore, suppressed.
 

diff --git a/docs/use_cases/tasks.md b/docs/use_cases/tasks.md
@@ -96,8 +96,8 @@ For validation and testing, the data module returns data loaders for the full la
 ## Unsupervised Domain Adaption
 
 Unsupervised domain adaption uses a labeled dataset form a source domain to train a model for a target domain for which only unlabeled data is available.
-All included dataset consist of multiple sub-datasets that can be viewed as different domains.
-As the sub-dataset still bear a sufficient similarity to each other, domain adaption between them should be possible.
+All included datasets consist of multiple sub-datasets that can be viewed as different domains.
+As the sub-datasets still bear a sufficient similarity to each other, domain adaption between them should be possible.
 The `get_compatible` function is useful to construct a reader for a different sub-dataset from an existing one:
 
 ```pycon

diff --git a/rul_datasets/adaption.py b/rul_datasets/adaption.py
@@ -134,6 +134,7 @@ def train_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The training data loader
         """
@@ -156,6 +157,7 @@ def val_dataloader(self, *args: Any, **kwargs: Any) -> List[DataLoader]:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The source, target and an optional paired validation data loader.
         """
@@ -184,6 +186,7 @@ def test_dataloader(self, *args: Any, **kwargs: Any) -> List[DataLoader]:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The source and target test data loader.
         """
@@ -323,9 +326,10 @@ def split_healthy(
         targets: List of target time series.
         by_max_rul: Whether to split healthy and degrading data by max RUL value.
         by_steps: Split healthy and degrading data after this number of time steps.
+
     Returns:
         healthy: Dataset of healthy data.
-        degrading: Dataset of degrading data.
+        degraded: Dataset of degrading data.
     """
     if not by_max_rul and (by_steps is None):
         raise ValueError("Either 'by_max_rul' or 'by_steps' need to be set.")

diff --git a/rul_datasets/baseline.py b/rul_datasets/baseline.py
@@ -111,6 +111,7 @@ def test_dataloader(self, *args: Any, **kwargs: Any) -> List[DataLoader]:
         Args:
             *args: Passed down to each data module.
             **kwargs: Passed down to each data module.
+
         Returns:
             The test dataloaders of all sub-datasets.
         """

diff --git a/rul_datasets/core.py b/rul_datasets/core.py
@@ -190,6 +190,7 @@ def is_mutually_exclusive(self, other: "RulDataModule") -> bool:
 
         Args:
             other: Data module to check exclusivity against.
+
         Returns:
             Whether both data modules are mutually exclusive.
         """
@@ -254,6 +255,7 @@ def load_split(
             split: The desired split to load.
             alias: The split as which the loaded data should be treated.
             degraded_only: Whether to only load degraded samples.
+
         Returns:
             The feature and target tensors of the split's runs.
         """
@@ -319,6 +321,7 @@ def train_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The training data loader
         """
@@ -347,6 +350,7 @@ def val_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The validation data loader
         """
@@ -370,6 +374,7 @@ def test_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The test data loader
         """
@@ -395,6 +400,7 @@ def to_dataset(self, split: str, alias: Optional[str] = None) -> "RulDataset":
         Args:
             split: The split to place inside the dataset.
             alias: The split the loaded data should be treated as.
+
         Returns:
             A dataset containing the requested split.
         """

diff --git a/rul_datasets/reader/abstract.py b/rul_datasets/reader/abstract.py
@@ -139,6 +139,7 @@ def default_window_size(self, fd: int) -> int:
 
         Args:
             fd: The index of a sub-dataset.
+
         Returns:
             The default window size for the sub-dataset.
         """
@@ -170,6 +171,7 @@ def load_complete_split(
         Args:
             split: The name of the split to load.
             alias: The split as which the loaded data should be treated.
+
         Returns:
             features: The complete, scaled features of the desired split.
             targets: The capped target values corresponding to the features.
@@ -194,6 +196,7 @@ def load_split(
         Args:
             split: The desired split to load.
             alias: The split as which the loaded data should be treated.
+
         Returns:
             features: The scaled, truncated features of the desired split.
             targets: The truncated targets of the desired split.
@@ -250,6 +253,7 @@ def get_compatible(
             percent_fail_runs: Override this value in the new reader.
             truncate_val: Override this value in the new reader.
             consolidate_window_size: How to consolidate the window size of the readers.
+
         Returns:
             A compatible reader with optional overrides.
         """
@@ -300,6 +304,7 @@ def get_complement(
         Args:
             percent_broken: Override this value in the new reader.
             truncate_val: Override this value in the new reader.
+
         Returns:
             A compatible reader with all development runs missing in this one.
         """
@@ -338,6 +343,7 @@ def is_mutually_exclusive(self, other: "AbstractReader") -> bool:
 
         Args:
             other: The reader to check exclusivity against.
+
         Returns:
             Whether the readers are mutually exclusive.
         """

diff --git a/rul_datasets/reader/saving.py b/rul_datasets/reader/saving.py
@@ -42,6 +42,7 @@ def load(save_path: str, memmap: bool = False) -> Tuple[np.ndarray, np.ndarray]:
         save_path: Path that was supplied to the
                    [save][rul_datasets.reader.saving.save] function.
         memmap: whether to use memmap to avoid loading the whole run into memory
+
     Returns:
         features: The feature array saved in `save_path`
         targets: The target array saved in `save_path`
@@ -64,6 +65,7 @@ def load_multiple(
     Args:
         save_paths: The list of run files to load.
         memmap: See [load][rul_datasets.reader.saving.load]
+
     Returns:
         features: The feature arrays saved in `save_paths`
         targets: The target arrays saved in `save_paths`
@@ -84,6 +86,7 @@ def exists(save_path: str) -> bool:
     Args:
         save_path: the `save_path` the [save][rul_datasets.reader.saving.save]
                    function was called with
+
     Returns:
         Whether the files exist
     """

diff --git a/rul_datasets/reader/scaling.py b/rul_datasets/reader/scaling.py
@@ -86,6 +86,7 @@ def partial_fit(
         Args:
             features: The feature array to be scaled.
             operation_conditions: The condition values compared against the boundaries.
+
         Returns:
             The partially fitted scaler.
         """
@@ -112,6 +113,7 @@ def transform(
         Args:
             features: The features to be scaled.
             operation_conditions: The condition values compared against the boundaries.
+
         Returns:
             The scaled features.
         """
@@ -160,6 +162,7 @@ def fit_scaler(
         features: The RUL features.
         scaler: The scaler to be fit. Defaults to a StandardScaler.
         operation_conditions: The operation conditions for condition aware scaling.
+
     Returns:
         The fitted scaler.
     """
@@ -218,6 +221,7 @@ def load_scaler(save_path: str) -> Scaler:
 
     Args:
         save_path: The path the scaler was saved to.
+
     Returns:
         The loaded scaler.
     """
@@ -233,7 +237,7 @@ def scale_features(
     operation_conditions: Optional[List[np.ndarray]] = None,
 ) -> List[np.ndarray]:
     """
-    Scaler the RUL features with a given scaler.
+    Scale the RUL features with a given scaler.
 
     The features can have a shape of `[num_time_steps, channels]` or `[num_windows,
     window_size, channels]`. The scaler needs to work on the channel dimension. If it
@@ -246,6 +250,7 @@ def scale_features(
         features: The RUL features to be scaled.
         scaler: The already fitted scaler.
         operation_conditions: The operation conditions for condition aware scaling.
+
     Returns:
         The scaled features.
     """

diff --git a/rul_datasets/ssl.py b/rul_datasets/ssl.py
@@ -115,6 +115,7 @@ def train_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The training data loader
         """
@@ -132,6 +133,7 @@ def val_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The labeled validation data loader.
         """
@@ -144,6 +146,7 @@ def test_dataloader(self, *args: Any, **kwargs: Any) -> DataLoader:
         Args:
             *args: Ignored. Only for adhering to parent class interface.
             **kwargs: Ignored. Only for adhering to parent class interface.
+
         Returns:
             The labeled test data loader.
         """

diff --git a/rul_datasets/utils.py b/rul_datasets/utils.py
@@ -12,16 +12,17 @@
 
 def get_files_in_path(path: str, condition: Optional[Callable] = None) -> List[str]:
     """
-    Return the paths of all files in a path that satisfy a condition in alphabetical
-    order.
+        Return the paths of all files in a path that satisfy a condition in alphabetical
+        order.
 
-    If the condition is `None` all files are returned.
+        If the condition is `None` all files are returned.
+
+        Args:
+            path: the path to look into
+            condition: the include-condition for files
 
-    Args:
-        path: the path to look into
-        condition: the include-condition for files
     Returns:
-        all files that satisfy the condition in alphabetical order
+            all files that satisfy the condition in alphabetical order
     """
     if condition is None:
         feature_files = [f for f in os.listdir(path)]
@@ -83,6 +84,7 @@ def extract_windows(
         window_size: length of the sliding window
         dilation: dilation of the sliding window
         mode: create windows either in memory or on disk
+
     Returns:
         array of sliding windows
     """