From 03e6db084c82e2da4cd35f4091544215b3339000 Mon Sep 17 00:00:00 2001 From: Reinder Vos de Wael Date: Thu, 3 Aug 2023 10:45:17 -0400 Subject: [PATCH] Various minor code cleanups (#9) * Update README * Various minor code cleanups --------- Co-authored-by: Reinder Vos de Wael --- README.md | 42 +++++++++++++++++------ src/ba_timeseries_gradients/cli.py | 23 +++++++++---- src/ba_timeseries_gradients/exceptions.py | 6 +--- src/ba_timeseries_gradients/gradients.py | 22 +++++------- src/ba_timeseries_gradients/utils.py | 17 +++++---- tests/integration/test_integration_cli.py | 3 +- tests/unit/test_unit_cli.py | 10 ------ tests/unit/test_unit_exceptions.py | 5 +-- tests/unit/test_unit_gradients.py | 16 --------- tests/unit/test_unit_utils.py | 2 +- 10 files changed, 69 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 4184528..a8f0414 100644 --- a/README.md +++ b/README.md @@ -5,31 +5,51 @@ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![L-GPL License](https://img.shields.io/badge/license-L--GPL-blue.svg)](LICENSE) -This is a command line interface (CLI) running BrainSpace on BIDS-compliant datasets. Gradients are computed for volumetric files in NIFTI format, or surface files in GIFTI format. For more details on BrainSpace, see the [BrainSpace documentation](https://brainspace.readthedocs.io/en/latest/). +This is a command line interface (CLI) for running BrainSpace on BIDS-compliant datasets. Gradients are computed for volumetric files in NIFTI format, or surface files in GIFTI format. For more details on BrainSpace, see the [BrainSpace documentation](https://brainspace.readthedocs.io/en/latest/). ## Installation -The recommended approaches for installing ba-timeseries-gradients is through Docker or PyPi. To build it as a Docker image, download the repository and run the following command from the repository root: +For local installation the recommended approach is through Poetry. To install through Poetry, run the following commands: ```bash -docker build -t ba_timeseries_gradients . +pip install poetry +poetry install ``` -To install through PyPi, run the following command: +## Usage + +The basic usage of the ba_timeseries_gradients CLI is as follows for Poetry-based installations: ```bash -pip install ba_timeseries_gradients +ba_timeseries_gradients [OPTIONS] BIDS_DIR OUTPUT_DIR ANALYSIS_LEVEL ``` -## Usage +The `BIDS_DIR` is the path to the BIDS directory containing the dataset to be analyzed. The `OUTPUT_DIR` is the path to the directory where the output will be saved. The `ANALYSIS_LEVEL` is the level of analysis to be performed, which can currently only be `group`. -The basic usage of the ba_timeseries_gradients CLI is as follows, depending on whether you've installed through PyPi or Docker: +For a full list of options, see: ```bash -docker run --volume LOCAL_BIDS_DIR:BIDS_DIR --volume LOCAL_OUTPUT_DIR:OUTPUT_DIR ba_timeseries_gradients [OPTIONS] BIDS_DIR OUTPUT_DIR ANALYSIS_LEVEL -ba_timeseries_gradients [OPTIONS] BIDS_DIR OUTPUT_DIR ANALYSIS_LEVEL +ba_timeseries_gradients --help ``` -The `BIDS_DIR` is the path to the BIDS directory containing the dataset to be analyzed. The `OUTPUT_DIR` is the path to the directory where the output will be saved. The `ANALYSIS_LEVEL` is the level of analysis to be performed, which can currently only be `group`. +It is highly recommended to include options to filter the dataset for specific files. See the BIDS arguments section in the help for more details. -For a complete list of options see `ba_timeseries_gradients -h`. It is highly recommended to include options to filter the dataset for specific files. See the BIDS arguments section in the help for more details. +You can also run the CLI through Docker. To do so, run the following command: + +```bash +docker run \ + --volume LOCAL_BIDS_DIR:BIDS_DIR \ + --volume LOCAL_OUTPUT_DIR:OUTPUT_DIR \ + ghcr.io/cmi-dair/ba-timeseries-gradients:main \ + [OPTIONS] BIDS_DIR OUTPUT_DIR ANALYSIS_LEVEL +``` + +Similarly, the CLI can also be run in Singularity as follows: + +```bash +singularity run \ + --bind LOCAL_BIDS_DIR:BIDS_DIR \ + --bind LOCAL_OUTPUT_DIR:OUTPUT_DIR \ + docker://ghcr.io/cmi-dair/ba-timeseries-gradients:main \ + [OPTIONS] BIDS_DIR OUTPUT_DIR ANALYSIS_LEVEL +``` diff --git a/src/ba_timeseries_gradients/cli.py b/src/ba_timeseries_gradients/cli.py index d8ab479..12916c0 100755 --- a/src/ba_timeseries_gradients/cli.py +++ b/src/ba_timeseries_gradients/cli.py @@ -25,6 +25,12 @@ def main(): logger.debug("Getting input files...") files = _get_bids_files(args) + output_file = args.output_dir / ("gradients." + args.output_format) + + if args.dry_run: + logger.info("Detected input files:\n%s", "\n".join(files)) + logger.info("Output file: %s", output_file) + return logger.debug("Checking input validity.") _raise_invalid_input(args, files) @@ -39,8 +45,8 @@ def main(): sparsity=args.sparsity, ) - logger.info("Saving gradient map to %s...", args.output_dir) - utils.save(output_gradients, lambdas, args) + logger.info("Saving gradient map to %s.", output_file) + utils.save(output_gradients, lambdas, output_file) def _get_parser() -> argparse.ArgumentParser: @@ -202,10 +208,16 @@ def _get_parser() -> argparse.ArgumentParser: other_group.add_argument( "--output_format", required=False, - default="hdf5", + default="h5", type=str, help="Output file format", - choices=["hdf5", "json"], + choices=["h5", "json"], + ) + other_group.add_argument( + "--dry-run", + required=False, + action="store_true", + help="Do not run the pipeline, only show what input files would be used. Note that dry run is logged at the logging.INFO level.", ) return parser @@ -238,9 +250,6 @@ def _raise_invalid_input(args: argparse.Namespace, files: list[str]) -> None: "Must provide a parcellation if input files are volume files." ) - if args.output_format not in ("hdf5", "json"): - raise exceptions.InputError("Output format must be one of: 'hdf5', or 'json'.") - def _get_bids_files(args: argparse.Namespace) -> list[str]: """ diff --git a/src/ba_timeseries_gradients/exceptions.py b/src/ba_timeseries_gradients/exceptions.py index df80bd1..102532d 100644 --- a/src/ba_timeseries_gradients/exceptions.py +++ b/src/ba_timeseries_gradients/exceptions.py @@ -13,8 +13,8 @@ class BaseLoggingError(Exception): def __init__(self, message: str): self.message = message - super().__init__(self.message) logger.error(self.message) + super().__init__(self.message) class InputError(BaseLoggingError): @@ -23,7 +23,3 @@ class InputError(BaseLoggingError): class InternalError(BaseLoggingError): """Exception raised when an internal error occurs. These should never happen.""" - - -class BrainSpaceError(BaseLoggingError): - """Exception raised when a BrainSpace error occurs.""" diff --git a/src/ba_timeseries_gradients/gradients.py b/src/ba_timeseries_gradients/gradients.py index 0e101e8..c1c34f3 100644 --- a/src/ba_timeseries_gradients/gradients.py +++ b/src/ba_timeseries_gradients/gradients.py @@ -51,19 +51,14 @@ def compute_gradients( connectivity_matrix = _get_connectivity_matrix(files, parcellation_file) logger.info("Computing gradients...") - try: - gradient_map = gradient.GradientMaps( - n_components=n_components, - kernel=kernel, - approach=approach, - alignment=None, - random_state=0, - ) - gradient_map.fit(connectivity_matrix, sparsity=sparsity) - except Exception as exc_info: - raise exceptions.BrainSpaceError( - f"An error occurred in BrainSpace: {exc_info}" - ) from exc_info + gradient_map = gradient.GradientMaps( + n_components=n_components, + kernel=kernel, + approach=approach, + alignment=None, + random_state=0, + ) + gradient_map.fit(connectivity_matrix, sparsity=sparsity) return gradient_map.gradients_, gradient_map.lambdas_ @@ -92,6 +87,7 @@ def _get_connectivity_matrix( for index, filename in enumerate(files): logger.debug("Processing file %s of %s...", index + 1, len(files)) timeseries = _get_nifti_gifti_data(nib.load(filename)).squeeze() + timeseries_permuted = np.swapaxes(timeseries, 0, -1) timeseries_2d = timeseries_permuted.reshape(timeseries_permuted.shape[0], -1) diff --git a/src/ba_timeseries_gradients/utils.py b/src/ba_timeseries_gradients/utils.py index 7e0f151..7771872 100644 --- a/src/ba_timeseries_gradients/utils.py +++ b/src/ba_timeseries_gradients/utils.py @@ -1,5 +1,4 @@ """ Utility functions for the BrainSpace runner. """ -import argparse import json import pathlib @@ -10,18 +9,18 @@ def save( - output_gradients: np.ndarray, lambdas: np.ndarray, args: argparse.Namespace + output_gradients: np.ndarray, lambdas: np.ndarray, filename: str | pathlib.Path ) -> None: """ Saves a numpy array to a file with the given filename. Args: output_gradients: The numpy array to save. + lambdas: The lambdas to save. filename: The filename to save the array to. """ - extension = "h5" if args.output_format == "hdf5" else args.output_format - filename = args.output_dir / f"gradients.{extension}" + filename = pathlib.Path(filename) if filename.suffix == ".h5": save_hdf5(output_gradients, lambdas, filename) @@ -42,9 +41,9 @@ def save_hdf5( filename: The filename to save the array to. """ - with h5py.File(filename, "w") as fb: - fb.create_dataset("gradients", data=output_gradients) - fb.create_dataset("lambdas", data=lambdas) + with h5py.File(filename, "w") as h5_file: + h5_file.create_dataset("gradients", data=output_gradients) + h5_file.create_dataset("lambdas", data=lambdas) def save_json( @@ -58,11 +57,11 @@ def save_json( filename: The filename to save the array to. """ - with open(filename, "w", encoding="utf-8") as fb: + with open(filename, "w", encoding="utf-8") as file_buffer: json.dump( { "gradients": output_gradients.tolist(), "lambdas": lambdas.tolist(), }, - fb, + file_buffer, ) diff --git a/tests/integration/test_integration_cli.py b/tests/integration/test_integration_cli.py index 1fdf646..809dd90 100644 --- a/tests/integration/test_integration_cli.py +++ b/tests/integration/test_integration_cli.py @@ -33,12 +33,13 @@ class MockParser: extension: str = ".nii.gz" dimensionality_reduction: str = "dm" parcellation: str | None = None - output_format = "hdf5" + output_format = "h5" kernel: str = "cosine" sparsity: float = 0.1 n_components: int = 10 force: bool = False verbose: int = 0 + dry_run: bool = False def parse_args(self, *args): """Return self.""" diff --git a/tests/unit/test_unit_cli.py b/tests/unit/test_unit_cli.py index 1485a7f..b0a3ba2 100644 --- a/tests/unit/test_unit_cli.py +++ b/tests/unit/test_unit_cli.py @@ -73,13 +73,3 @@ def test_raise_invalid_input_no_parcellation(mock_args, mock_files) -> None: cli._raise_invalid_input(mock_args, mock_files) assert "Must provide a parcellation" in str(exc_info.value) - - -def test_raise_invalid_input_invalid_output_format(mock_args, mock_files) -> None: - """Test _raise_invalid_input when an invalid output format is provided.""" - mock_args.output_format = "invalid_format" - - with pytest.raises(exceptions.InputError) as exc_info: - cli._raise_invalid_input(mock_args, mock_files) - - assert "Output format must be" in str(exc_info.value) diff --git a/tests/unit/test_unit_exceptions.py b/tests/unit/test_unit_exceptions.py index ec7d6a5..ae74539 100644 --- a/tests/unit/test_unit_exceptions.py +++ b/tests/unit/test_unit_exceptions.py @@ -10,15 +10,12 @@ [ exceptions.BaseLoggingError, exceptions.InputError, - exceptions.BrainSpaceError, exceptions.InternalError, ], ) def test_logging_error( mocker: pytest_mock.MockFixture, - exception_type: exceptions.BaseLoggingError - | exceptions.InputError - | exceptions.BrainSpaceError, + exception_type: exceptions.BaseLoggingError | exceptions.InputError, ): """ Test that a BaseLoggingError is raised with the correct message and that the error is logged. diff --git a/tests/unit/test_unit_gradients.py b/tests/unit/test_unit_gradients.py index 9b30d88..c7c4576 100644 --- a/tests/unit/test_unit_gradients.py +++ b/tests/unit/test_unit_gradients.py @@ -20,22 +20,6 @@ def test_compute_gradients(mocker: pytest_mock.MockFixture) -> None: assert np.allclose(actual_lambdas, 0) -def test_compute_gradients_brainspace_error(mocker: pytest_mock.MockFixture) -> None: - """Test that the compute_gradients function raises an error when - BrainSpace raises an error.""" - mocker.patch( - "ba_timeseries_gradients.gradients._get_connectivity_matrix", - return_value=np.ones((3, 3)), - ) - mocker.patch( - "brainspace.gradient.GradientMaps", - side_effect=Exception("Error"), - ) - - with pytest.raises(exceptions.BrainSpaceError): - gradients.compute_gradients(files=[]) - - def test_connevtivity_matrix_from_2d_success(mocker: pytest_mock.MockerFixture) -> None: """Test that the connectivity matrix is computed correctly from a 2D timeseries.""" diff --git a/tests/unit/test_unit_utils.py b/tests/unit/test_unit_utils.py index 7bf85ce..ff3e7af 100644 --- a/tests/unit/test_unit_utils.py +++ b/tests/unit/test_unit_utils.py @@ -68,4 +68,4 @@ def test_save_internal_error() -> None: parser = MockArgparse(pathlib.Path("."), "txt") with pytest.raises(exceptions.InternalError): - utils.save(np.array([1, 2, 3]), np.array([2, 3, 4]), parser) # type: ignore[arg-type] + utils.save(np.array([1, 2, 3]), np.array([2, 3, 4]), "wrong.extension") # type: ignore[arg-type]