Add labels as data rather than as group

scil-vital · Feb 15, 2024 · 493299e · 493299e
1 parent e02b44f
commit 493299e
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 13 deletions.
diff --git a/dwi_ml/data/hdf5/hdf5_creation.py b/dwi_ml/data/hdf5/hdf5_creation.py
@@ -11,6 +11,7 @@
 from dipy.io.utils import is_header_compatible
 from dipy.tracking.utils import length
 import h5py
+from scilpy.image.labels import get_data_as_labels
 
 from dwi_ml.data.hdf5.utils import format_nb_blocs_connectivity
 from dwi_ml.data.processing.streamlines.data_augmentation import \
@@ -45,7 +46,7 @@ def format_filelist(filenames, enforce_presence, folder=None) -> List[str]:
                 else:
                     logging.warning(msg)
             else:
-                new_files.extend(f)
+                new_files.extend(tmp)
         else:
             if not Path(f).is_file():
                 msg = "File not found: {}".format(f)
@@ -309,7 +310,6 @@ def _verify_subjects_list(self):
                 "testing set!".format(ignored_subj))
         return unique_subjs
 
-
     def _check_files_presence(self):
         """
         Verifying now the list of files. Prevents stopping after a long
@@ -334,6 +334,7 @@ def flatten_list(a_list):
         config_file_list = [
             nested_lookup('files', self.groups_config),
             nested_lookup('connectivity_matrix', self.groups_config),
+            nested_lookup('connectivity_labels', self.groups_config),
             nested_lookup('std_mask', self.groups_config)]
         config_file_list = flatten_list(config_file_list)
 
@@ -472,9 +473,6 @@ def _process_one_volume_group(self, group: str, subj_id: str,
             else:
                 # Load subject's standardization mask. Can be a list of files.
                 std_masks = self.groups_config[group]['std_mask']
-                if isinstance(std_masks, str):
-                    std_masks = [std_masks]
-
                 std_masks = format_filelist(std_masks,
                                             self.enforce_files_presence,
                                             folder=subj_input_dir)
@@ -606,7 +604,8 @@ def _create_streamline_groups(self, ref, subj_input_dir, subj_id,
             if len(sft.data_per_point) > 0:
                 logging.debug('sft contained data_per_point. Data not kept.')
             if len(sft.data_per_streamline) > 0:
-                logging.debug('sft contained data_per_streamlines. Data not kept.')
+                logging.debug('sft contained data_per_streamlines. Data not '
+                              'kept.')
 
             # Accessing private Dipy values, but necessary.
             # We need to deconstruct the streamlines into arrays with
@@ -721,11 +720,9 @@ def _process_one_streamline_group(
                     self.groups_config[group]['connectivity_nb_blocs'])
                 conn_info = ['from_blocs', nb_blocs]
             else:
-                labels_group = self.groups_config[group]['connectivity_labels']
-                if labels_group not in self.volume_groups:
-                    raise ValueError("connectivity_labels_volume must be "
-                                     "an existing volume group.")
-                conn_info = ['from_labels', labels_group]
+                labels_file = self.groups_config[group]['connectivity_labels']
+                labels_data = get_data_as_labels(nib.load(labels_file))
+                conn_info = ['from_labels', labels_data]
 
             conn_file = subj_dir.joinpath(
                 self.groups_config[group]['connectivity_matrix'])

diff --git a/source/2_A_creating_the_hdf5.rst b/source/2_A_creating_the_hdf5.rst
@@ -83,7 +83,7 @@ To create the hdf5 file, you will need a config file such as below. HDF groups w
             "files": ["tractograms/bundle1.trk", "tractograms/wholebrain.trk", "tractograms/*__wholebrain.trk"], ----> Will get, for instance, sub1000__bundle1.trk
             "connectivity_matrix": "my_file.npy",
             "connectivity_nb_blocs": 6  ---> OR
-            "connectivity_labels": labels_volume
+            "connectivity_labels": labels_volume_group
              }
         "bad_streamlines": {
             "type": "streamlines",
@@ -136,7 +136,7 @@ Additional attributes for streamlines groups:
     - **connectivity_matrix**: The name of the connectivity matrix to associate to the streamline group. This matrix will probably be used as a mean of validation during training. Then, you also need to explain how the matrix was created, so that you can create the connectivity matrix of the streamlines being validated, in order to compare it with the expected result. ONE of the two next options must be given:
 
         - **connectivity_nb_blocs**: This explains that the connectivity matrix was created by dividing the volume space into regular blocs. See dwiml_compute_connectivity_matrix_from_blocs for a description. The value should be either an integers or a list of three integers.
-        - **connectivity_labels_volume**: This explains that the connectivity matrix was created by dividing the cortex into a list of regions associated with labels. The value must be the name of another volume group in the same config file, which refers to a map with one label per region. NOTE: This option is offered in preparation of future use only. Currently, you can create the hdf5 with this option, but connectivity computation using labels is not yet implemented in dwi_ml.
+        - **connectivity_labels**: This explains that the connectivity matrix was created by dividing the cortex into a list of regions associated with labels. The value must be the name of the associated labels file (typically a nifti file filled with integers).
 
 2.4. Creating the hdf5
 **********************