From f322cffb92905e5eaa2175d3f515f31b1d5d352c Mon Sep 17 00:00:00 2001
From: Reinder Vos de Wael <reinder.vosdewael@gmail.com>
Date: Thu, 3 Aug 2023 16:05:04 -0400
Subject: [PATCH] Various review comments (#11)

* Update README

* Various minor code cleanups

* Further minor code cleanups

* Lock python version to allow scipy update

* Various review comments

* Fix faulty merge

---------

Co-authored-by: Reinder Vos de Wael <reinder.vosdewael@childmind.org>
---
 README.md                                 |   2 +-
 src/ba_timeseries_gradients/cli.py        | 186 +-----------------
 src/ba_timeseries_gradients/gradients.py  |   9 +-
 src/ba_timeseries_gradients/parser.py     | 223 ++++++++++++++++++++++
 src/ba_timeseries_gradients/utils.py      |   3 -
 tests/integration/test_integration_cli.py |   6 +-
 tests/unit/test_parser.py                 |  67 +++++++
 tests/unit/test_unit_cli.py               |   6 -
 8 files changed, 304 insertions(+), 198 deletions(-)
 create mode 100644 src/ba_timeseries_gradients/parser.py
 create mode 100644 tests/unit/test_parser.py
diff --git a/README.md b/README.md
index a8f0414..0887ad8 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![L-GPL License](https://img.shields.io/badge/license-L--GPL-blue.svg)](LICENSE)
 
-This is a command line interface (CLI) for running BrainSpace on BIDS-compliant datasets. Gradients are computed for volumetric files in NIFTI format, or surface files in GIFTI format. For more details on BrainSpace, see the [BrainSpace documentation](https://brainspace.readthedocs.io/en/latest/).
+This is a command line interface (CLI) for running BrainSpace on BIDS-compliant datasets. BrainSpace is a toolbox for macroscale gradient mapping, a common method to reduce the dimensionality of neuroimaging data. Gradients are computed for volumetric files in NIFTI format, or surface files in GIFTI format. For more details on BrainSpace, see the [BrainSpace documentation](https://brainspace.readthedocs.io/en/latest/).
 
 ## Installation
 
diff --git a/src/ba_timeseries_gradients/cli.py b/src/ba_timeseries_gradients/cli.py
index 3a71042..feb59de 100755
--- a/src/ba_timeseries_gradients/cli.py
+++ b/src/ba_timeseries_gradients/cli.py
@@ -2,11 +2,10 @@
 """ Command line interface for ba_timeseries_gradients. """
 import argparse
 import logging
-import pathlib
 
 import bids
 
-from ba_timeseries_gradients import exceptions, gradients, logs, utils
+from ba_timeseries_gradients import exceptions, gradients, logs, parser, utils
 
 LOGGER_NAME = logs.LOGGER_NAME
 
@@ -18,10 +17,9 @@ def main() -> None:
     The main function that runs the ba_timeseries_gradients command line interface.
     """
     logger.debug("Parsing command line arguments...")
-    parser = _get_parser()
-    args = parser.parse_args()
+    args = parser.get_parser().parse_args()
 
-    logger.setLevel(args.verbose)
+    logger.setLevel(logging.getLevelName(args.verbose.upper()))
 
     logger.debug("Getting input files...")
     files = _get_bids_files(args)
@@ -49,180 +47,6 @@ def main() -> None:
     utils.save(output_gradients, lambdas, output_file)
 
 
-def _get_parser() -> argparse.ArgumentParser:
-    """
-    Returns an ArgumentParser object with the command line arguments for the ba_timeseries_gradients CLI.
-
-    Returns:
-        argparse.ArgumentParser: An ArgumentParser object with the command line arguments.
-
-    Notes:
-        Arguments in the bids_group must have a `dest` value equivalent to `bids_<argument>`, where
-        <argument> is the name of the argument in the BIDS specification.
-    """
-    parser = argparse.ArgumentParser(
-        prog="ba_timeseries_gradients",
-        description="""Computes gradients for a BIDS dataset. If the target
-        files are volumetric, they must be in NIFTI format, and a parcellation
-        file must be provided.""",
-        epilog="""Issues can be reported at: https://github.com/cmi-dair/ba_timeseries_gradients.""",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-
-    mandatory_group = parser.add_argument_group("Mandatory arguments")
-    bids_group = parser.add_argument_group("BIDS arguments")
-    brainspace_group = parser.add_argument_group("BrainSpace arguments")
-    other_group = parser.add_argument_group("Other arguments")
-
-    mandatory_group.add_argument(
-        "bids_dir",
-        type=pathlib.Path,
-        help="BIDS directory containing the input files.",
-    )
-    mandatory_group.add_argument(
-        "output_dir",
-        type=pathlib.Path,
-        help="Output directory.",
-    )
-    mandatory_group.add_argument(
-        "analysis_level",
-        type=str,
-        help="Level of the analysis that will be performed.",
-        choices=["group"],
-    )
-    bids_group.add_argument(
-        "--subject",
-        required=False,
-        default=None,
-        type=str,
-        action="append",
-        dest="bids_subject",
-        help="The subject regex to use for searching BIDS files.",
-    )
-    bids_group.add_argument(
-        "--session",
-        required=False,
-        default=None,
-        type=str,
-        action="append",
-        dest="bids_session",
-        help="The session to include for finding BIDS files.",
-    )
-    bids_group.add_argument(
-        "--suffix",
-        required=False,
-        default="bold",
-        type=str,
-        dest="bids_suffix",
-        help="Suffix to use for finding BIDS files.",
-    )
-    bids_group.add_argument(
-        "--run",
-        required=False,
-        default=None,
-        type=int,
-        action="append",
-        dest="bids_run",
-        help="The runs to include, may be supplied multiple times.",
-    )
-    bids_group.add_argument(
-        "--task",
-        required=False,
-        default=None,
-        type=str,
-        action="append",
-        dest="bids_task",
-        help="The tasks to include, may be supplied multiple times.",
-    )
-    bids_group.add_argument(
-        "--space",
-        required=False,
-        default=None,
-        type=str,
-        dest="bids_space",
-        help="The space to use for finding BIDS files.",
-    )
-    bids_group.add_argument(
-        "--extension",
-        required=False,
-        default=".nii.gz",
-        type=str,
-        dest="bids_extension",
-        help="Extension to use for finding BIDS files.",
-    )
-    bids_group.add_argument(
-        "--datatype",
-        required=False,
-        default=None,
-        type=str,
-        dest="bids_datatype",
-    )
-    brainspace_group.add_argument(
-        "--parcellation",
-        required=False,
-        default=None,
-        type=pathlib.Path,
-        help="Parcellation to use for similarity calculation. Must be a GIFTI or NIFTI file, obligatory if input files are NIFTI.",
-    )
-    brainspace_group.add_argument(
-        "--dimensionality_reduction",
-        required=False,
-        default="dm",
-        type=str,
-        help="Dimensionality reduction method to use. Must be one of 'pca', 'le', or 'dm'.",
-    )
-    brainspace_group.add_argument(
-        "--kernel",
-        required=False,
-        default="cosine",
-        type=str,
-        help="Kernel to use for similarity calculation. Must be one of: 'pearson', 'spearman', 'cosine', 'normalized_angle', 'gaussian'.",
-    )
-    brainspace_group.add_argument(
-        "--sparsity",
-        required=False,
-        default=0.9,
-        type=float,
-        help="Sparsity to use for similarity calculation. Must be a float between 0 and 1.",
-    )
-    brainspace_group.add_argument(
-        "--n_components",
-        required=False,
-        default=10,
-        type=int,
-        help="Number of components to use for dimensionality reduction. Must be an integer.",
-    )
-    other_group.add_argument(
-        "--force",
-        required=False,
-        action="store_true",
-        help="Force overwrite of output file if it already exists.",
-    )
-    other_group.add_argument(
-        "--verbose",
-        required=False,
-        default=logging.INFO,
-        type=int,
-        help="Verbosity level. Must be one of: 10 (DEBUG), 20 (INFO), 30 (WARNING), 40 (ERROR), 50 (CRITICAL).",
-    )
-    other_group.add_argument(
-        "--output_format",
-        required=False,
-        default="h5",
-        type=str,
-        help="Output file format",
-        choices=["h5", "json"],
-    )
-    other_group.add_argument(
-        "--dry-run",
-        required=False,
-        action="store_true",
-        help="Do not run the pipeline, only show what input files would be used. Note that dry run is logged at the logging.INFO level.",
-    )
-
-    return parser
-
-
 def _raise_invalid_input(args: argparse.Namespace, files: list[str]) -> None:
     """
     Check if the input arguments are valid.
@@ -235,7 +59,6 @@ def _raise_invalid_input(args: argparse.Namespace, files: list[str]) -> None:
         InputError: If the output file already exists and the force flag is not set.
         InputError: If no input files are found.
         InputError: If input files are volume files and no parcellation is provided.
-        InputError: If the output format is not one of: 'hdf5', or 'json'.
     """
     if (args.output_dir / "gradients.h5").exists() and not args.force:
         raise exceptions.InputError(
@@ -252,8 +75,7 @@ def _raise_invalid_input(args: argparse.Namespace, files: list[str]) -> None:
 
 
 def _get_bids_files(args: argparse.Namespace) -> list[str]:
-    """
-    Get the list of input files from the BIDS directory.
+    """Get the list of input files from the BIDS directory.
 
     Args:
         args: The parsed command-line arguments.
diff --git a/src/ba_timeseries_gradients/gradients.py b/src/ba_timeseries_gradients/gradients.py
index f8cba65..3b2d80e 100644
--- a/src/ba_timeseries_gradients/gradients.py
+++ b/src/ba_timeseries_gradients/gradients.py
@@ -26,8 +26,7 @@ def compute_gradients(
     n_components: int = 10,
     sparsity: float = 0.9,
 ) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Computes the gradients for a collection of files.
+    """Computes the gradients for a collection of files.
 
     Args:
         files: A collection of file paths containing timeseries data.
@@ -39,7 +38,8 @@ def compute_gradients(
         sparsity: The sparsity level to use for the gradient computation.
 
     Returns:
-        The computed gradients as a numpy array.
+        numpy.ndarray: The computed gradients.
+        numpy.ndarray: The computed lambdas.
 
     Notes:
         The gradient computation is performed using the BrainSpace package.
@@ -77,6 +77,9 @@ def _get_connectivity_matrix(
     Returns:
         A connectivity matrix as a numpy array.
     """
+    if not files:
+        raise ValueError("No files provided.")
+
     if parcellation_file:
         logger.debug("Loading parcellation data...")
         parcellation = nib.load(parcellation_file)
diff --git a/src/ba_timeseries_gradients/parser.py b/src/ba_timeseries_gradients/parser.py
new file mode 100644
index 0000000..d156973
--- /dev/null
+++ b/src/ba_timeseries_gradients/parser.py
@@ -0,0 +1,223 @@
+"""The parser module for the ba_timeseries_gradients CLI."""
+import argparse
+import pathlib
+
+
+def get_parser() -> argparse.ArgumentParser:
+    """
+    Returns an ArgumentParser object with the command line arguments for the ba_timeseries_gradients CLI.
+
+    Returns:
+        argparse.ArgumentParser: An ArgumentParser object with the command line arguments.
+
+    Notes:
+        Arguments in the bids_group must have a `dest` value equivalent to `bids_<argument>`, where
+        <argument> is the name of the argument in the BIDS specification.
+    """
+    parser = argparse.ArgumentParser(
+        prog="ba_timeseries_gradients",
+        description="""Computes gradients for a BIDS dataset. If the target
+        files are volumetric, they must be in NIFTI format, and a parcellation
+        file must be provided.""",
+        epilog="""Issues can be reported at: https://github.com/cmi-dair/ba_timeseries_gradients.""",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    mandatory_group = parser.add_argument_group("Mandatory arguments")
+    bids_group = parser.add_argument_group("BIDS arguments")
+    brainspace_group = parser.add_argument_group("BrainSpace arguments")
+    other_group = parser.add_argument_group("Other arguments")
+
+    mandatory_group.add_argument(
+        "bids_dir",
+        type=_path_exists,
+        help="BIDS directory containing the input files.",
+    )
+    mandatory_group.add_argument(
+        "output_dir",
+        type=pathlib.Path,
+        help="Output directory.",
+    )
+    mandatory_group.add_argument(
+        "analysis_level",
+        type=str,
+        help="Level of the analysis that will be performed.",
+        choices=["group"],
+    )
+    bids_group.add_argument(
+        "--subject",
+        required=False,
+        default=None,
+        type=str,
+        action="append",
+        dest="bids_subject",
+        help="The subject regex to use for searching BIDS files.",
+    )
+    bids_group.add_argument(
+        "--session",
+        required=False,
+        default=None,
+        type=str,
+        action="append",
+        dest="bids_session",
+        help="The session to include for finding BIDS files.",
+    )
+    bids_group.add_argument(
+        "--suffix",
+        required=False,
+        default="bold",
+        type=str,
+        dest="bids_suffix",
+        help="Suffix to use for finding BIDS files.",
+    )
+    bids_group.add_argument(
+        "--run",
+        required=False,
+        default=None,
+        type=str,
+        action="append",
+        dest="bids_run",
+        help="The runs to include, may be supplied multiple times.",
+    )
+    bids_group.add_argument(
+        "--task",
+        required=False,
+        default=None,
+        type=str,
+        action="append",
+        dest="bids_task",
+        help="The tasks to include, may be supplied multiple times.",
+    )
+    bids_group.add_argument(
+        "--space",
+        required=False,
+        default=None,
+        type=str,
+        dest="bids_space",
+        help="The space to use for finding BIDS files.",
+    )
+    bids_group.add_argument(
+        "--extension",
+        required=False,
+        default=".nii.gz",
+        type=str,
+        dest="bids_extension",
+        help="Extension to use for finding BIDS files.",
+    )
+    bids_group.add_argument(
+        "--datatype",
+        required=False,
+        default=None,
+        type=str,
+        dest="bids_datatype",
+    )
+    brainspace_group.add_argument(
+        "--parcellation",
+        required=False,
+        default=None,
+        type=_path_exists,
+        help="Parcellation to use for similarity calculation. Must be a GIFTI or NIFTI file, obligatory if input files are NIFTI.",
+    )
+    brainspace_group.add_argument(
+        "--dimensionality_reduction",
+        required=False,
+        default="dm",
+        type=str,
+        help="Dimensionality reduction method to use.",
+        choices=["pca", "le", "dm"],
+    )
+    brainspace_group.add_argument(
+        "--kernel",
+        required=False,
+        default="cosine",
+        type=str,
+        help="Kernel to use for similarity calculation.",
+        choices=["pearson", "spearman", "cosine", "normalized_angle", "gaussian"],
+    )
+    brainspace_group.add_argument(
+        "--sparsity",
+        required=False,
+        default=0.9,
+        type=_is_between_zero_and_one,
+        help="Sparsity to use for similarity calculation. Must be a float between 0 and 1.",
+    )
+    brainspace_group.add_argument(
+        "--n_components",
+        required=False,
+        default=10,
+        type=_is_positive_integer,
+        help="Number of components to use for dimensionality reduction. Must be an integer.",
+    )
+    other_group.add_argument(
+        "--force",
+        required=False,
+        action="store_true",
+        help="Force overwrite of output file if it already exists.",
+    )
+    other_group.add_argument(
+        "--verbose",
+        required=False,
+        default="info",
+        type=str,
+        help="Verbosity level.",
+        choices=["debug", "info", "warning", "error", "critical"],
+    )
+    other_group.add_argument(
+        "--output_format",
+        required=False,
+        default="h5",
+        type=str,
+        help="Output file format",
+        choices=["h5", "json"],
+    )
+    other_group.add_argument(
+        "--dry-run",
+        required=False,
+        action="store_true",
+        help="Do not run the pipeline, only show what input files would be used. Note that dry run is logged at the logging.INFO level.",
+    )
+
+    return parser
+
+
+def _path_exists(path_str: str) -> pathlib.Path:
+    """Checks if an argument is an existing path.
+
+    Args:
+        path_str: The path to check.
+
+    Returns:
+        pathlib.Path: The path as a pathlib.Path if it exists.
+    """
+    path = pathlib.Path(path_str)
+    if path.exists():
+        return path
+    raise argparse.ArgumentTypeError(f"{path} does not exist.")
+
+
+def _is_between_zero_and_one(val: str) -> float:
+    """Checks if an argument is between 0 and 1.
+
+    Args:
+        val: The argument to check.
+
+    Returns:
+        float: The argument as a float if it is between 0 (inclusive) and 1 (exclusive).
+    """
+    if 0 <= float(val) < 1:
+        return float(val)
+    raise argparse.ArgumentTypeError(f"{val} is not in range [0, 1).")
+
+
+def _is_positive_integer(value: str) -> int:
+    """Checks if an argument is greater than 0.
+
+    Args:
+        val: The argument to check.
+
+    Returns:
+        float: The argument as a float if it is greater than 0.
+    """
+    if value.isdigit() and int(value) > 0:
+        return int(value)
+    raise argparse.ArgumentTypeError("Argument is not a positive integer.")
diff --git a/src/ba_timeseries_gradients/utils.py b/src/ba_timeseries_gradients/utils.py
index aaec718..6e1c4f6 100644
--- a/src/ba_timeseries_gradients/utils.py
+++ b/src/ba_timeseries_gradients/utils.py
@@ -46,9 +46,6 @@ def save_hdf5(
     with h5py.File(filename, "w") as h5_file:
         h5_file.create_dataset("gradients", data=output_gradients)
         h5_file.create_dataset("lambdas", data=lambdas)
-    with h5py.File(filename, "w") as h5_file:
-        h5_file.create_dataset("gradients", data=output_gradients)
-        h5_file.create_dataset("lambdas", data=lambdas)
 
 
 def save_json(
diff --git a/tests/integration/test_integration_cli.py b/tests/integration/test_integration_cli.py
index 7286c36..aadb639 100644
--- a/tests/integration/test_integration_cli.py
+++ b/tests/integration/test_integration_cli.py
@@ -40,7 +40,7 @@ class MockParser:
     sparsity: float = 0.1
     n_components: int = 10
     force: bool = False
-    verbose: int = 0
+    verbose: str = "info"
     dry_run: bool = False
 
     def parse_args(self, *args: Any) -> MockParser:
@@ -107,7 +107,7 @@ def test_volume_input(
         mock_parser.output_dir = pathlib.Path(output_dir)
         mock_parser.parcellation = nifti_parcellation_file
         mocker.patch(
-            "ba_timeseries_gradients.cli._get_parser",
+            "ba_timeseries_gradients.parser.get_parser",
             return_value=mock_parser,
         )
         mocker.patch(
@@ -137,7 +137,7 @@ def test_surface_input(
         mock_parser.output_dir = pathlib.Path(output_dir)
         mock_parser.parcellation = surface_parcellation_file
         mocker.patch(
-            "ba_timeseries_gradients.cli._get_parser",
+            "ba_timeseries_gradients.parser.get_parser",
             return_value=mock_parser,
         )
         mocker.patch(
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
new file mode 100644
index 0000000..6aac16a
--- /dev/null
+++ b/tests/unit/test_parser.py
@@ -0,0 +1,67 @@
+"""Tests for the parser module."""
+# pylint: disable=protected-access
+import argparse
+import pathlib
+import tempfile
+
+import pytest
+
+from ba_timeseries_gradients import parser
+
+
+def test_get_parser() -> None:
+    """Test the get_parser function to ensure it returns an instance of argparse.ArgumentParser."""
+    parser_object = parser.get_parser()
+    assert isinstance(parser_object, argparse.ArgumentParser)
+
+
+def test_path_exists_success() -> None:
+    """Test the path_exists function for a successful case."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        assert parser._path_exists(temp_dir) == pathlib.Path(temp_dir)
+
+
+def test_path_exists_failure() -> None:
+    """Test the path_exists function for a failure case."""
+    with pytest.raises(argparse.ArgumentTypeError):
+        parser._path_exists("not_a_path")
+
+
+def test_is_positive_integer_success() -> None:
+    """Test the _is_positive_integer function for a successful case."""
+    assert parser._is_positive_integer("1") == 1
+
+
+def test_is_positive_integer_failure_not_int() -> None:
+    """Test the _is_positive_integer function for a float."""
+    with pytest.raises(argparse.ArgumentTypeError):
+        parser._is_positive_integer("0.5")
+
+
+def test_is_positive_integer_failure_negative() -> None:
+    """Test the _is_positive_integer function for a negative integer."""
+    with pytest.raises(argparse.ArgumentTypeError):
+        parser._is_positive_integer("-1")
+
+
+def test_is_positive_integer_failure_zero() -> None:
+    """Test the _is_positive_integer function for zero."""
+    with pytest.raises(argparse.ArgumentTypeError):
+        parser._is_positive_integer("0")
+
+
+def test_is_between_zero_and_one_success() -> None:
+    """Test the _is_between_zero_and_one function for a successful case."""
+    assert parser._is_between_zero_and_one("0") == 0
+
+
+def test_is_between_zero_and_one_failure_is_one() -> None:
+    """Test the _is_between_zero_and_one function for a failure case."""
+    with pytest.raises(argparse.ArgumentTypeError):
+        parser._is_between_zero_and_one("1")
+
+
+def test_is_between_zero_and_one_failure_is_negative() -> None:
+    """Test the _is_between_zero_and_one function for a failure case."""
+    with pytest.raises(argparse.ArgumentTypeError):
+        parser._is_between_zero_and_one("-1")
diff --git a/tests/unit/test_unit_cli.py b/tests/unit/test_unit_cli.py
index 32c6b77..133fd20 100644
--- a/tests/unit/test_unit_cli.py
+++ b/tests/unit/test_unit_cli.py
@@ -39,12 +39,6 @@ def mock_files() -> list[str]:
     return ["/path/to/bids/sub-01/func/sub-01_task-rest_bold.nii.gz"]
 
 
-def test_get_parser() -> None:
-    """Test the _get_parser function to ensure it returns an instance of argparse.ArgumentParser."""
-    parser = cli._get_parser()
-    assert isinstance(parser, argparse.ArgumentParser)
-
-
 def test_raise_invalid_input_existing_output_file(mock_args: mock.MagicMock) -> None:
     """Test _raise_invalid_input when output file already exists."""
     mock_output_file = mock.MagicMock()