From 1f4a2b9813b0d6f8ccc2765d4e4b567c8b718a15 Mon Sep 17 00:00:00 2001 From: Altana Namsaraeva <99650244+namsaraeva@users.noreply.github.com> Date: Wed, 11 Sep 2024 11:16:12 +0200 Subject: [PATCH 01/56] Update requirements_dev.txt --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index bb4f4adc..9057b769 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -36,7 +36,7 @@ pyqt5 lxml_html_clean ashlar @ git+https://github.com/labsyspharm/ashlar.git@master networkx -py-lmd @ git+https://github.com/MannLabs/py-lmd.git@refs/pull/11/head#egg=py-lmd +py-lmd #packages for building the documentation sphinx From edc1274e9acb1fd547460ee4ab38581f969493f6 Mon Sep 17 00:00:00 2001 From: Altana Namsaraeva <99650244+namsaraeva@users.noreply.github.com> Date: Wed, 11 Sep 2024 11:16:48 +0200 Subject: [PATCH 02/56] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1fe95023..7a2f2d12 100755 --- a/requirements.txt +++ b/requirements.txt @@ -36,4 +36,4 @@ pyqt5 lxml_html_clean ashlar @ git+https://github.com/labsyspharm/ashlar.git@master networkx -py-lmd @ git+https://github.com/MannLabs/py-lmd.git@refs/pull/11/head#egg=py-lmd +py-lmd From 42f94f7cd2a1ac9fc20549e2f80a101f278a936f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:04:02 +0100 Subject: [PATCH 03/56] cleanup not required variables --- examples/notebooks | 2 +- src/scportrait/pipeline/_base.py | 21 +++++++++++++++++++++ src/scportrait/pipeline/extraction.py | 10 ++++++---- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/examples/notebooks b/examples/notebooks index 17b3196e..bd8f8f04 160000 --- a/examples/notebooks +++ b/examples/notebooks @@ -1 +1 @@ -Subproject commit 17b3196ec1459f7cbc3a155e6e3285ec64b25db9 +Subproject commit bd8f8f041aa02bd4d18f1be4655f9e7cc4dfa307 diff --git a/src/scportrait/pipeline/_base.py b/src/scportrait/pipeline/_base.py index ab35274c..fcc50d97 100644 --- a/src/scportrait/pipeline/_base.py +++ b/src/scportrait/pipeline/_base.py @@ -91,6 +91,27 @@ def _clean_log_file(self): if os.path.exists(log_file_path): os.remove(log_file_path) + + # def _clear_cache(self, vars_to_delete=None): + # """Helper function to help clear memory usage. Mainly relevant for GPU based segmentations. + + # Args: + # vars_to_delete (list): List of variable names (as strings) to delete. + # """ + + # # delete all specified variables + # if vars_to_delete is not None: + # for var_name in vars_to_delete: + # if var_name in globals(): + # del globals()[var_name] + + # if torch.cuda.is_available(): + # torch.cuda.empty_cache() + + # if torch.backends.mps.is_available(): + # torch.mps.empty_cache() + + # gc.collect() def _clear_cache(self, vars_to_delete=None): """Helper function to help clear memory usage. Mainly relevant for GPU based segmentations.""" diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index ff51dfdf..9eb46647 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -877,7 +877,7 @@ def process(self, partial=False, n_cells=None, seed=42): else: # set up function for multi-threaded processing f = func_partial(self._extract_classes_multi, self.px_centers) - batched_args = self._generate_batched_args(args) + args = self._generate_batched_args(args) self.log(f"Running in multiprocessing mode with {self.threads} threads.") with mp.get_context("fork").Pool( @@ -885,17 +885,19 @@ def process(self, partial=False, n_cells=None, seed=42): ) as pool: # both spawn and fork work but fork is faster so forcing fork here results = list( tqdm( - pool.imap(f, batched_args), - total=len(batched_args), + pool.imap(f, args), + total=len(args), desc="Processing cell batches", ) ) pool.close() pool.join() - print("multiprocessing done.") self.save_index_to_remove = flatten(results) + #cleanup memory and remove any no longer required variables + del results, args + #self._clear_cache(vars_to_delete=["results", "args"]) # this is not working as expected at the moment so need to manually delete the variables stop_extraction = timeit.default_timer() # calculate duration From 2c1ea3cdff311c454b9a219441b3dd3b6b4797f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:04:16 +0100 Subject: [PATCH 04/56] manually delete variables --- src/scportrait/pipeline/extraction.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 9eb46647..592723a0 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -651,7 +651,8 @@ def _transfer_tempmmap_to_hdf5(self): ) # increase to 64 bit otherwise information may become truncated self.log("single-cell index created.") - self._clear_cache(vars_to_delete=[cell_ids]) + del cell_ids + #self._clear_cache(vars_to_delete=[cell_ids]) # this is not working as expected so we will just delete the variable directly _, c, x, y = _tmp_single_cell_data.shape single_cell_data = hf.create_dataset( @@ -668,7 +669,8 @@ def _transfer_tempmmap_to_hdf5(self): single_cell_data[ix] = _tmp_single_cell_data[i] self.log("single-cell data created") - self._clear_cache(vars_to_delete=[single_cell_data]) + del single_cell_data + #self._clear_cache(vars_to_delete=[single_cell_data]) # this is not working as expected so we will just delete the variable directly # also transfer labelled index to HDF5 index_labelled = _tmp_single_cell_index[keep_index] @@ -684,7 +686,8 @@ def _transfer_tempmmap_to_hdf5(self): hf.create_dataset("single_cell_index_labelled", data=index_labelled, chunks=None, dtype=dt) self.log("single-cell index labelled created.") - self._clear_cache(vars_to_delete=[index_labelled]) + del index_labelled + #self._clear_cache(vars_to_delete=[index_labelled]) # this is not working as expected so we will just delete the variable directly hf.create_dataset( "channel_information", @@ -695,7 +698,9 @@ def _transfer_tempmmap_to_hdf5(self): self.log("channel information created.") # cleanup memory - self._clear_cache(vars_to_delete=[_tmp_single_cell_index, index_labelled]) + del _tmp_single_cell_index + #self._clear_cache(vars_to_delete=[_tmp_single_cell_index]) # this is not working as expected so we will just delete the variable directly + os.remove(self._tmp_single_cell_data_path) os.remove(self._tmp_single_cell_index_path) From 80410363bb8f439e209b9d7d159252a064ba414e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:04:34 +0100 Subject: [PATCH 05/56] properly display figure --- src/scportrait/pipeline/extraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 592723a0..662b96f5 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -636,7 +636,7 @@ def _transfer_tempmmap_to_hdf5(self): axs[i].imshow(img, vmin=0, vmax=1) axs[i].axis("off") fig.tight_layout() - fig.show() + plt.show(fig) self.log("Transferring extracted single cells to .hdf5") From 9542b24be82b033b749c688170ad7e02799f4979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Fri, 20 Dec 2024 17:22:49 +0100 Subject: [PATCH 06/56] standardize log output --- src/scportrait/pipeline/extraction.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 662b96f5..afec8da8 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -813,7 +813,6 @@ def process(self, partial=False, n_cells=None, seed=42): # directory where intermediate results should be saved cache: "/mnt/temp/cache" """ - total_time_start = timeit.default_timer() start_setup = timeit.default_timer() @@ -876,7 +875,7 @@ def process(self, partial=False, n_cells=None, seed=42): self.log("Running in single threaded mode.") results = [] - for arg in tqdm(args): + for arg in tqdm(args, total = len(args), desc = "Processing cell batches"): x = f(arg) results.append(x) else: @@ -919,7 +918,6 @@ def process(self, partial=False, n_cells=None, seed=42): self.DEFAULT_LOG_NAME = "processing.log" # change log name back to default self._post_extraction_cleanup() - total_time_stop = timeit.default_timer() total_time = total_time_stop - total_time_start From bf0d2cdb20cffd094d2923faff269e41b0708e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Wed, 8 Jan 2025 16:20:57 +0100 Subject: [PATCH 07/56] update git submodule version --- examples/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/notebooks b/examples/notebooks index bd8f8f04..5a9b127f 160000 --- a/examples/notebooks +++ b/examples/notebooks @@ -1 +1 @@ -Subproject commit bd8f8f041aa02bd4d18f1be4655f9e7cc4dfa307 +Subproject commit 5a9b127f06a39d326931728a0cf9850848fca205 From 6a590919830db433b093134036c574bad80e1f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Wed, 8 Jan 2025 16:21:49 +0100 Subject: [PATCH 08/56] fix pre-commit issues --- src/scportrait/pipeline/_base.py | 2 +- src/scportrait/pipeline/extraction.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/scportrait/pipeline/_base.py b/src/scportrait/pipeline/_base.py index fcc50d97..4bc63d87 100644 --- a/src/scportrait/pipeline/_base.py +++ b/src/scportrait/pipeline/_base.py @@ -91,7 +91,7 @@ def _clean_log_file(self): if os.path.exists(log_file_path): os.remove(log_file_path) - + # def _clear_cache(self, vars_to_delete=None): # """Helper function to help clear memory usage. Mainly relevant for GPU based segmentations. diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index afec8da8..ad672ff3 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -652,7 +652,7 @@ def _transfer_tempmmap_to_hdf5(self): self.log("single-cell index created.") del cell_ids - #self._clear_cache(vars_to_delete=[cell_ids]) # this is not working as expected so we will just delete the variable directly + # self._clear_cache(vars_to_delete=[cell_ids]) # this is not working as expected so we will just delete the variable directly _, c, x, y = _tmp_single_cell_data.shape single_cell_data = hf.create_dataset( @@ -670,7 +670,7 @@ def _transfer_tempmmap_to_hdf5(self): self.log("single-cell data created") del single_cell_data - #self._clear_cache(vars_to_delete=[single_cell_data]) # this is not working as expected so we will just delete the variable directly + # self._clear_cache(vars_to_delete=[single_cell_data]) # this is not working as expected so we will just delete the variable directly # also transfer labelled index to HDF5 index_labelled = _tmp_single_cell_index[keep_index] @@ -687,7 +687,7 @@ def _transfer_tempmmap_to_hdf5(self): self.log("single-cell index labelled created.") del index_labelled - #self._clear_cache(vars_to_delete=[index_labelled]) # this is not working as expected so we will just delete the variable directly + # self._clear_cache(vars_to_delete=[index_labelled]) # this is not working as expected so we will just delete the variable directly hf.create_dataset( "channel_information", @@ -699,7 +699,7 @@ def _transfer_tempmmap_to_hdf5(self): # cleanup memory del _tmp_single_cell_index - #self._clear_cache(vars_to_delete=[_tmp_single_cell_index]) # this is not working as expected so we will just delete the variable directly + # self._clear_cache(vars_to_delete=[_tmp_single_cell_index]) # this is not working as expected so we will just delete the variable directly os.remove(self._tmp_single_cell_data_path) os.remove(self._tmp_single_cell_index_path) @@ -875,7 +875,7 @@ def process(self, partial=False, n_cells=None, seed=42): self.log("Running in single threaded mode.") results = [] - for arg in tqdm(args, total = len(args), desc = "Processing cell batches"): + for arg in tqdm(args, total=len(args), desc="Processing cell batches"): x = f(arg) results.append(x) else: @@ -899,9 +899,9 @@ def process(self, partial=False, n_cells=None, seed=42): self.save_index_to_remove = flatten(results) - #cleanup memory and remove any no longer required variables + # cleanup memory and remove any no longer required variables del results, args - #self._clear_cache(vars_to_delete=["results", "args"]) # this is not working as expected at the moment so need to manually delete the variables + # self._clear_cache(vars_to_delete=["results", "args"]) # this is not working as expected at the moment so need to manually delete the variables stop_extraction = timeit.default_timer() # calculate duration From dbea03215333fadcab226bf12f81d234370d4538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Wed, 8 Jan 2025 16:28:05 +0100 Subject: [PATCH 09/56] ensure up to date spatialdata version --- requirements.txt | 2 +- requirements_dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index b12d8e47..dc723bac 100755 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ torch pytorch-lightning torchvision -spatialdata +spatialdata>=0.2.0 napari-spatialdata pyqt5 lxml_html_clean diff --git a/requirements_dev.txt b/requirements_dev.txt index 088c9fbb..1333aaf0 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -29,7 +29,7 @@ torch pytorch-lightning torchvision -spatialdata +spatialdata>=0.2.0 napari-spatialdata pyqt5 lxml_html_clean From e6b2148b36c37a6d322675c2700540ef80aed171 Mon Sep 17 00:00:00 2001 From: Niklas Schmacke Date: Thu, 9 Jan 2025 12:09:37 +0100 Subject: [PATCH 10/56] Added option to compress hdf5 with gzip --- src/scportrait/pipeline/extraction.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index afec8da8..615dacb3 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -57,7 +57,13 @@ def __init__(self, *args, **kwargs): self.overwrite_run_path = self.overwrite def _get_compression_type(self): - self.compression_type = "lzf" if self.compression else None + if (self.compression) or (self.compression == "lzf"): + self.compression_type = "lzf" + return self.compression_type + elif self.compression == "gzip": + self.compression_type = "gzip" + return self.compression_type + self.compression_type = None return self.compression_type def _check_config(self): @@ -655,18 +661,25 @@ def _transfer_tempmmap_to_hdf5(self): #self._clear_cache(vars_to_delete=[cell_ids]) # this is not working as expected so we will just delete the variable directly _, c, x, y = _tmp_single_cell_data.shape + print(_tmp_single_cell_data.shape) + print(self.image_size) + print(keep_index.shape) single_cell_data = hf.create_dataset( "single_cell_data", shape=(len(keep_index), c, x, y), chunks=(1, 1, self.image_size, self.image_size), - compression=self.compression_type, + # compression=self.compression_type, + compression='gzip', #was lzf, gzip works dtype=np.float16, + # rdcc_nbytes=5242880000, # 5gb 1024 * 1024 * 5000 + # rdcc_w0=1, + # rdcc_nslots=50000, ) # populate dataset in loop to prevent loading of entire dataset into memory # this is required to process large datasets to not run into memory issues for ix, i in enumerate(keep_index): - single_cell_data[ix] = _tmp_single_cell_data[i] + single_cell_data[ix] = _tmp_single_cell_data[i] self.log("single-cell data created") del single_cell_data From d53eab4bdfdda87c7766fced8da41fc0546d22cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:16:58 +0100 Subject: [PATCH 11/56] add workaround for sdata objects with multiple labels supports some labels not being in the scportrait compatible format --- src/scportrait/pipeline/_utils/sdata_io.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/scportrait/pipeline/_utils/sdata_io.py b/src/scportrait/pipeline/_utils/sdata_io.py index 47017a1c..89c480e0 100644 --- a/src/scportrait/pipeline/_utils/sdata_io.py +++ b/src/scportrait/pipeline/_utils/sdata_io.py @@ -71,10 +71,12 @@ def _read_sdata(self) -> SpatialData: _sdata = SpatialData() _sdata.write(self.sdata_path, overwrite=True) + allowed_labels = ["seg_all_nucleus", "seg_all_cytosol"] for key in _sdata.labels: - segmentation_object = _sdata.labels[key] - if not hasattr(segmentation_object.attrs, "cell_ids"): - segmentation_object = spLabels2DModel().convert(segmentation_object, classes=None) + if key in allowed_labels: + segmentation_object = _sdata.labels[key] + if not hasattr(segmentation_object.attrs, "cell_ids"): + segmentation_object = spLabels2DModel().convert(segmentation_object, classes=None) return _sdata From 1a2a1b2b59d032b41a7e161e56b7318ba6725731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:23:42 +0100 Subject: [PATCH 12/56] initial implementation to explicitly specify segmentation masks to be used for extraction --- src/scportrait/pipeline/extraction.py | 68 ++++++++++++++++++--------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 0b83ead3..53473b32 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -267,24 +267,53 @@ def _get_segmentation_info(self): f"Found no segmentation masks with key {self.segmentation_key}. Cannot proceed with extraction." ) - # get relevant segmentation masks to perform extraction on - nucleus_key = f"{self.segmentation_key}_nucleus" + #intialize default values to track what should be extracted + self.nucleus_key = None + self.cytosol_key = None + self.extract_nucleus_mask = False + self.extract_cytosol_mask = False + + if "segmentation_mask" in self.config: + allowed_mask_values = ["nucleus", "cytosol"] + allowed_mask_values = [f"{self.segmentation_key}_{x}" for x in allowed_mask_values] + + if isinstance(self.config["segmentation_mask"], str): + assert (self.config["segmentation_mask"] in allowed_mask_values) + + if "nucleus" in self.main_segmenation_mask: + self.nucleus_key = self.main_segmenation_mask + self.extract_nucleus_mask = True + + elif "cytosol" in self.main_segmenation_mask: + self.cytosol_key = self.main_segmenation_mask + self.extract_cytosol_mask = True + else: + raise ValueError(f"Segmentation mask {self.main_segmenation_mask} is not a valid mask to extract from.") - if nucleus_key in relevant_masks: - self.extract_nucleus_mask = True - self.nucleus_key = nucleus_key - else: - self.extract_nucleus_mask = False - self.nucleus_key = None + elif isinstance(self.config["segmentation_mask"], list): + assert all(x in allowed_mask_values for x in self.config["segmentation_mask"]) - cytosol_key = f"{self.segmentation_key}_cytosol" + for x in self.config["segmentation_mask"]: + if "nucleus" in x: + self.nucleus_key = x + self.extract_nucleus_mask = True + if "cytosol" in x: + self.cytosol_key = x + self.extract_cytosol_mask = True - if cytosol_key in relevant_masks: - self.extract_cytosol_mask = True - self.cytosol_key = cytosol_key else: - self.extract_cytosol_mask = False - self.cytosol_key = None + # get relevant segmentation masks to perform extraction on + nucleus_key = f"{self.segmentation_key}_nucleus" + + if nucleus_key in relevant_masks: + self.extract_nucleus_mask = True + self.nucleus_key = nucleus_key + + cytosol_key = f"{self.segmentation_key}_cytosol" + + if cytosol_key in relevant_masks: + self.extract_cytosol_mask = True + self.cytosol_key = cytosol_key self.n_masks = np.sum([self.extract_nucleus_mask, self.extract_cytosol_mask]) self.masks = [x for x in [self.nucleus_key, self.cytosol_key] if x is not None] @@ -661,25 +690,18 @@ def _transfer_tempmmap_to_hdf5(self): # self._clear_cache(vars_to_delete=[cell_ids]) # this is not working as expected so we will just delete the variable directly _, c, x, y = _tmp_single_cell_data.shape - print(_tmp_single_cell_data.shape) - print(self.image_size) - print(keep_index.shape) single_cell_data = hf.create_dataset( "single_cell_data", shape=(len(keep_index), c, x, y), chunks=(1, 1, self.image_size, self.image_size), - # compression=self.compression_type, - compression='gzip', #was lzf, gzip works + compression=self.compression_type, dtype=np.float16, - # rdcc_nbytes=5242880000, # 5gb 1024 * 1024 * 5000 - # rdcc_w0=1, - # rdcc_nslots=50000, ) # populate dataset in loop to prevent loading of entire dataset into memory # this is required to process large datasets to not run into memory issues for ix, i in enumerate(keep_index): - single_cell_data[ix] = _tmp_single_cell_data[i] + single_cell_data[ix] = _tmp_single_cell_data[i] self.log("single-cell data created") del single_cell_data From f7d2c341c6465dc808267fbee5ee4be859df7e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:29:11 +0100 Subject: [PATCH 13/56] fix bug incorrectly saved mask names --- src/scportrait/pipeline/extraction.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 53473b32..ab8a6de5 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -280,15 +280,15 @@ def _get_segmentation_info(self): if isinstance(self.config["segmentation_mask"], str): assert (self.config["segmentation_mask"] in allowed_mask_values) - if "nucleus" in self.main_segmenation_mask: - self.nucleus_key = self.main_segmenation_mask + if "nucleus" in self.config["segmentation_mask"]: + self.nucleus_key = self.config["segmentation_mask"] self.extract_nucleus_mask = True - elif "cytosol" in self.main_segmenation_mask: - self.cytosol_key = self.main_segmenation_mask + elif "cytosol" in self.config["segmentation_mask"]: + self.cytosol_key = self.config["segmentation_mask"] self.extract_cytosol_mask = True else: - raise ValueError(f"Segmentation mask {self.main_segmenation_mask} is not a valid mask to extract from.") + raise ValueError(f"Segmentation mask {self.config['segmentation_mask']} is not a valid mask to extract from.") elif isinstance(self.config["segmentation_mask"], list): assert all(x in allowed_mask_values for x in self.config["segmentation_mask"]) From 7da9a873b6b07d906a6a76fb5a4db4783c03ddf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:50:07 +0100 Subject: [PATCH 14/56] fix precommit issues --- src/scportrait/pipeline/extraction.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index ab8a6de5..1060dc0c 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -267,7 +267,7 @@ def _get_segmentation_info(self): f"Found no segmentation masks with key {self.segmentation_key}. Cannot proceed with extraction." ) - #intialize default values to track what should be extracted + # intialize default values to track what should be extracted self.nucleus_key = None self.cytosol_key = None self.extract_nucleus_mask = False @@ -278,7 +278,7 @@ def _get_segmentation_info(self): allowed_mask_values = [f"{self.segmentation_key}_{x}" for x in allowed_mask_values] if isinstance(self.config["segmentation_mask"], str): - assert (self.config["segmentation_mask"] in allowed_mask_values) + assert self.config["segmentation_mask"] in allowed_mask_values if "nucleus" in self.config["segmentation_mask"]: self.nucleus_key = self.config["segmentation_mask"] @@ -288,7 +288,9 @@ def _get_segmentation_info(self): self.cytosol_key = self.config["segmentation_mask"] self.extract_cytosol_mask = True else: - raise ValueError(f"Segmentation mask {self.config['segmentation_mask']} is not a valid mask to extract from.") + raise ValueError( + f"Segmentation mask {self.config['segmentation_mask']} is not a valid mask to extract from." + ) elif isinstance(self.config["segmentation_mask"], list): assert all(x in allowed_mask_values for x in self.config["segmentation_mask"]) From d4663d8d759a9a3bed04118f980c4cafcb4e7226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 11 Jan 2025 12:45:15 +0100 Subject: [PATCH 15/56] Fix typo in file naming --- src/scportrait/pipeline/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/_base.py b/src/scportrait/pipeline/_base.py index 4bc63d87..fdca9b64 100644 --- a/src/scportrait/pipeline/_base.py +++ b/src/scportrait/pipeline/_base.py @@ -158,7 +158,7 @@ class ProcessingStep(Logable): DEFAULT_SEGMENTATION_DIR_NAME = "segmentation" DEFAULT_TILES_FOLDER = "tiles" - DEFAULT_EXTRACTIN_DIR_NAME = "extraction" + DEFAULT_EXTRACTION_DIR_NAME = "extraction" DEFAULT_DATA_DIR = "data" DEFAULT_IMAGE_DTYPE = np.uint16 From fbac9fe0f0e477b269a9d5a93b0b7714fb42fb26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 11 Jan 2025 12:45:30 +0100 Subject: [PATCH 16/56] relocate removed classes file to extraction directory --- src/scportrait/pipeline/extraction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 1060dc0c..e7cddf01 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -450,9 +450,10 @@ def _get_label_info(self, arg): def _save_removed_classes(self, classes): # define path where classes should be saved + filtered_path = os.path.join( self.project_location, - self.DEFAULT_SEGMENTATION_DIR_NAME, + self.DEFAULT_EXTRACTION_DIR_NAME, self.DEFAULT_REMOVED_CLASSES_FILE, ) From 22bd181e84dbb576d2ffa0eaabd5363ed0d2148a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 11 Jan 2025 12:47:21 +0100 Subject: [PATCH 17/56] ruff linting --- src/scportrait/pipeline/extraction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index e7cddf01..87c6b46c 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -450,7 +450,6 @@ def _get_label_info(self, arg): def _save_removed_classes(self, classes): # define path where classes should be saved - filtered_path = os.path.join( self.project_location, self.DEFAULT_EXTRACTION_DIR_NAME, From d922f3ed6b20066b159c8bf5af7511108ae3361d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Wed, 15 Jan 2025 13:34:13 +0100 Subject: [PATCH 18/56] ensure angles are always stored as a list --- src/scportrait/tools/ml/transforms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scportrait/tools/ml/transforms.py b/src/scportrait/tools/ml/transforms.py index b9769895..08baba1c 100644 --- a/src/scportrait/tools/ml/transforms.py +++ b/src/scportrait/tools/ml/transforms.py @@ -19,7 +19,7 @@ def __init__(self, choices=4, include_zero=True): delta = (360 - angles[-1]) / 2 angles = angles + delta - self.choices = angles + self.choices = angles.tolist() def __call__(self, tensor): angle = random.choice(self.choices) From a2419bb3078c8ba13f38df1911827fdf1c29cab7 Mon Sep 17 00:00:00 2001 From: Niklas Schmacke Date: Fri, 17 Jan 2025 19:09:18 +0100 Subject: [PATCH 19/56] Fixed gzip compression --- src/scportrait/pipeline/extraction.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 87c6b46c..1d032ebe 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -57,13 +57,13 @@ def __init__(self, *args, **kwargs): self.overwrite_run_path = self.overwrite def _get_compression_type(self): - if (self.compression) or (self.compression == "lzf"): + if (self.compression == True) or (self.compression == "lzf"): self.compression_type = "lzf" - return self.compression_type elif self.compression == "gzip": self.compression_type = "gzip" - return self.compression_type - self.compression_type = None + else: + self.compression_type = None + self.log(f"Compression algorithm: {self.compression_type}") return self.compression_type def _check_config(self): From 144025f46458013578f4c536bef77e683e66d977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Fri, 17 Jan 2025 19:25:29 +0100 Subject: [PATCH 20/56] improve behaviour of mask matching behaviour for deprecated config keys will use the default parameters as if nothing was specified --- src/scportrait/pipeline/segmentation/workflows.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/segmentation/workflows.py b/src/scportrait/pipeline/segmentation/workflows.py index 008a2729..d5f2614d 100644 --- a/src/scportrait/pipeline/segmentation/workflows.py +++ b/src/scportrait/pipeline/segmentation/workflows.py @@ -653,7 +653,9 @@ def _check_for_mask_matching_filtering(self) -> None: else: # add deprecation warning for old config setup if "filter_status" in self.config.keys(): - Warning("filter_status is deprecated, please use match_masks instead Will not perform filtering.") + self.filter_match_masks = True + self.mask_matching_filtering_threshold = 0.95 + Warning("filter_status is deprecated, please use match_masks instead. Will use default settings for mask matching.") # default behaviour that this filtering should be performed, otherwise another additional step is required before extraction self.filter_match_masks = True From 696ea5e97f9e96371f2ea7478397a18dde7a1500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Fri, 17 Jan 2025 19:29:57 +0100 Subject: [PATCH 21/56] only check loaded segmentation masks if available --- src/scportrait/pipeline/project.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py index ae20d64c..864195d0 100644 --- a/src/scportrait/pipeline/project.py +++ b/src/scportrait/pipeline/project.py @@ -888,9 +888,10 @@ def load_input_from_sdata( # ensure that the provided nucleus and cytosol segmentations fullfill the scPortrait requirements # requirements are: # 1. The nucleus segmentation mask and the cytosol segmentation mask must contain the same ids - assert ( - self.sdata[self.nuc_seg_name].attrs["cell_ids"] == self.sdata[self.cyto_seg_name].attrs["cell_ids"] - ), "The nucleus segmentation mask and the cytosol segmentation mask must contain the same ids." + if self.nuc_seg_status in self.sdata.keys() and self.cyto_seg_status in self.sdata.keys(): + assert ( + self.sdata[self.nuc_seg_name].attrs["cell_ids"] == self.sdata[self.cyto_seg_name].attrs["cell_ids"] + ), "The nucleus segmentation mask and the cytosol segmentation mask must contain the same ids." # 2. the nucleus segmentation ids and the cytosol segmentation ids need to match # THIS NEEDS TO BE IMPLEMENTED HERE From 622a45f7b925687f2467c1ab2f00125f4131b4ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Fri, 17 Jan 2025 19:33:03 +0100 Subject: [PATCH 22/56] ruff linting --- src/scportrait/pipeline/extraction.py | 2 +- src/scportrait/pipeline/segmentation/workflows.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 1d032ebe..505f6081 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -57,7 +57,7 @@ def __init__(self, *args, **kwargs): self.overwrite_run_path = self.overwrite def _get_compression_type(self): - if (self.compression == True) or (self.compression == "lzf"): + if (self.compression is True) or (self.compression == "lzf"): self.compression_type = "lzf" elif self.compression == "gzip": self.compression_type = "gzip" diff --git a/src/scportrait/pipeline/segmentation/workflows.py b/src/scportrait/pipeline/segmentation/workflows.py index d5f2614d..94ed8c44 100644 --- a/src/scportrait/pipeline/segmentation/workflows.py +++ b/src/scportrait/pipeline/segmentation/workflows.py @@ -655,7 +655,9 @@ def _check_for_mask_matching_filtering(self) -> None: if "filter_status" in self.config.keys(): self.filter_match_masks = True self.mask_matching_filtering_threshold = 0.95 - Warning("filter_status is deprecated, please use match_masks instead. Will use default settings for mask matching.") + Warning( + "filter_status is deprecated, please use match_masks instead. Will use default settings for mask matching." + ) # default behaviour that this filtering should be performed, otherwise another additional step is required before extraction self.filter_match_masks = True From e34129ebbb7b8c0a0d586208408065c251e323fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 18 Jan 2025 14:25:46 +0100 Subject: [PATCH 23/56] remove unnecessary print statement --- src/scportrait/pipeline/segmentation/segmentation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scportrait/pipeline/segmentation/segmentation.py b/src/scportrait/pipeline/segmentation/segmentation.py index 9b2edcd9..5d8a0f9a 100644 --- a/src/scportrait/pipeline/segmentation/segmentation.py +++ b/src/scportrait/pipeline/segmentation/segmentation.py @@ -742,7 +742,6 @@ def _resolve_sharding(self, sharding_plan): local_hf = h5py.File(local_output, "r") local_hdf_labels = local_hf.get(self.DEFAULT_MASK_NAME)[:] - print(type(local_hdf_labels)) shifted_map, edge_labels = shift_labels( local_hdf_labels, class_id_shift, From 43f4a9814b6dc7031b247902fa0efe683fe03f9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 18 Jan 2025 15:55:06 +0100 Subject: [PATCH 24/56] fix multiprocessing worker naming issue when multiple runs are done sequentially --- src/scportrait/pipeline/segmentation/segmentation.py | 5 +++-- src/scportrait/pipeline/segmentation/workflows.py | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/scportrait/pipeline/segmentation/segmentation.py b/src/scportrait/pipeline/segmentation/segmentation.py index 5d8a0f9a..63d62490 100644 --- a/src/scportrait/pipeline/segmentation/segmentation.py +++ b/src/scportrait/pipeline/segmentation/segmentation.py @@ -901,8 +901,9 @@ def _resolve_sharding(self, sharding_plan): if not self.deep_debug: self._cleanup_shards(sharding_plan) - def _initializer_function(self, gpu_id_list): + def _initializer_function(self, gpu_id_list, n_processes): current_process().gpu_id_list = gpu_id_list + current_process().n_processes = n_processes def _perform_segmentation(self, shard_list): # get GPU status @@ -920,7 +921,7 @@ def _perform_segmentation(self, shard_list): with mp.get_context(self.context).Pool( processes=self.n_processes, initializer=self._initializer_function, - initargs=[self.gpu_id_list], + initargs=[self.gpu_id_list, self.n_processes], ) as pool: list( tqdm( diff --git a/src/scportrait/pipeline/segmentation/workflows.py b/src/scportrait/pipeline/segmentation/workflows.py index 94ed8c44..2ad5d0cb 100644 --- a/src/scportrait/pipeline/segmentation/workflows.py +++ b/src/scportrait/pipeline/segmentation/workflows.py @@ -15,6 +15,7 @@ from skimage.filters import median from skimage.morphology import binary_erosion, dilation, disk, erosion from skimage.segmentation import watershed +import _multiprocessing from scportrait.pipeline._utils.segmentation import ( contact_filter, @@ -1353,6 +1354,9 @@ def _check_gpu_status(self): gpu_id_list = current.gpu_id_list cpu_id = int(cpu_name[cpu_name.find("-") + 1 :]) - 1 + if cpu_id >= len(gpu_id_list): + cpu_id = cpu_id%current.n_processes + # track gpu_id and update GPU status self.gpu_id = gpu_id_list[cpu_id] self.status = "multi_GPU" From 419c3454a7f1e9f5c27d719c80f557c03607a5b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 18 Jan 2025 19:03:38 +0100 Subject: [PATCH 25/56] ruff linting --- src/scportrait/pipeline/segmentation/workflows.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/scportrait/pipeline/segmentation/workflows.py b/src/scportrait/pipeline/segmentation/workflows.py index 2ad5d0cb..b677597f 100644 --- a/src/scportrait/pipeline/segmentation/workflows.py +++ b/src/scportrait/pipeline/segmentation/workflows.py @@ -15,7 +15,6 @@ from skimage.filters import median from skimage.morphology import binary_erosion, dilation, disk, erosion from skimage.segmentation import watershed -import _multiprocessing from scportrait.pipeline._utils.segmentation import ( contact_filter, @@ -1355,7 +1354,7 @@ def _check_gpu_status(self): cpu_id = int(cpu_name[cpu_name.find("-") + 1 :]) - 1 if cpu_id >= len(gpu_id_list): - cpu_id = cpu_id%current.n_processes + cpu_id = cpu_id % current.n_processes # track gpu_id and update GPU status self.gpu_id = gpu_id_list[cpu_id] From 405ea46fa390fccd3c963f0a544c116a533551d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:47:32 +0100 Subject: [PATCH 26/56] relocate import statements that require java to lazy import statement this allows you to not have a working java installation to import and work with scportrait as long as you do not require the stitching capabilities --- src/scportrait/tools/stitch/_stitch.py | 39 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/scportrait/tools/stitch/_stitch.py b/src/scportrait/tools/stitch/_stitch.py index 5b21148d..90ead29f 100644 --- a/src/scportrait/tools/stitch/_stitch.py +++ b/src/scportrait/tools/stitch/_stitch.py @@ -21,13 +21,7 @@ from scportrait.io.daskmmap import dask_array_from_path from scportrait.processing.images._image_processing import rescale_image from scportrait.tools.stitch._utils.ashlar_plotting import plot_edge_quality, plot_edge_scatter -from scportrait.tools.stitch._utils.filereaders import ( - BioformatsReaderRescale, - FilePatternReaderRescale, -) from scportrait.tools.stitch._utils.filewriters import write_ome_zarr, write_spatialdata, write_tif, write_xml -from scportrait.tools.stitch._utils.parallelized_ashlar import ParallelEdgeAligner, ParallelMosaic - class Stitcher: """ @@ -65,7 +59,7 @@ def __init__( do_intensity_rescale: bool | str = True, rescale_range: tuple = (1, 99), channel_order: list[str] = None, - reader_type=FilePatternReaderRescale, + reader_type="FilePatternReaderRescale", orientation: dict = None, plot_QC: bool = True, overwrite: bool = False, @@ -112,8 +106,13 @@ def __init__( """ self._lazy_imports() + # workaround for lazy imports of module + if self.reader_type == "FilePatternReaderRescale": + self.reader_type = self.FilePatternReaderRescale + if orientation is None: orientation = {"flip_x": False, "flip_y": True} + self.input_dir = input_dir self.slidename = slidename self.outdir = outdir @@ -158,10 +157,21 @@ def _lazy_imports(self): from ashlar.reg import EdgeAligner, Mosaic from ashlar.scripts.ashlar import process_axis_flip + from scportrait.tools.stitch._utils.filereaders import ( + BioformatsReaderRescale, + FilePatternReaderRescale, + ) + + from scportrait.tools.stitch._utils.parallelized_ashlar import ParallelEdgeAligner, ParallelMosaic + self.ashlar_thumbnail = thumbnail self.ashlar_EdgeAligner = EdgeAligner self.ashlar_Mosaic = Mosaic self.ashlar_process_axis_flip = process_axis_flip + self.BioformatsReaderRescale = BioformatsReaderRescale + self.FilePatternReaderRescale = FilePatternReaderRescale + self.ParallelEdgeAligner = ParallelEdgeAligner + self.ParallelMosaic = ParallelMosaic def __exit__(self): self._clear_cache() @@ -294,14 +304,14 @@ def _initialize_reader(self): """ Initialize the reader for reading image tiles. """ - if self.reader_type == FilePatternReaderRescale: + if self.reader_type == self.FilePatternReaderRescale: self.reader = self.reader_type( self.input_dir, self.pattern, self.overlap, rescale_range=self.rescale_range, ) - elif self.reader_type == BioformatsReaderRescale: + elif self.reader_type == self.BioformatsReaderRescale: self.reader = self.reader_type(self.input_dir, rescale_range=self.rescale_range) # setup correct orientation of slide (this depends on microscope used to generate the data) @@ -564,7 +574,7 @@ class ParallelStitcher(Stitcher): do_intensity_rescale (bool or "full_image", optional): Flag to indicate whether to rescale image intensities (default is True). Alternatively, set to "full_image" to rescale the entire image. rescale_range (tuple or dict, optional): If all channels should be rescaled to the same range pass a tuple with the percentiles for rescaling (default is (1, 99)). Alternatively, a dictionary can be passed with the channel names as keys and the percentiles as values if each channel should be rescaled to a different range. channel_order (list, optional): Order of channels in the generated output mosaic. If none (default value) the order of the channels is left unchanged. - reader_type (class, optional): Type of reader to use for reading image tiles (default is FilePatternReaderRescale). + reader_type (class, optional): Type of reader to use for reading image tiles (default is "FilePatternReaderRescale"). orientation (dict, optional): Dictionary specifying which dimensions of the slide to flip (default is {'flip_x': False, 'flip_y': True}). plot_QC (bool, optional): Flag to indicate whether to plot quality control (QC) figures (default is True). overwrite (bool, optional): Flag to indicate whether to overwrite the output directory if it already exists (default is False). @@ -588,7 +598,7 @@ def __init__( WGAchannel: str = None, channel_order: list[str] = None, overwrite: bool = False, - reader_type=FilePatternReaderRescale, + reader_type="FilePatternReaderRescale", orientation=None, cache: str = None, threads: int = 20, @@ -613,8 +623,9 @@ def __init__( overwrite, cache, ) + # dirty fix to avoide multithreading error with BioformatsReader until this can be fixed - if self.reader_type == BioformatsReaderRescale: + if self.reader_type == self.BioformatsReaderRescale: threads = 1 print( "BioformatsReaderRescale does not support multithreading for calculating the error threshold currently. Proceeding with 1 thread." @@ -632,7 +643,7 @@ def _initialize_aligner(self): Returns: aligner (ParallelEdgeAligner): Initialized ParallelEdgeAligner object. """ - aligner = ParallelEdgeAligner( + aligner = self.ParallelEdgeAligner( self.reader, channel=self.stitching_channel_id, filter_sigma=self.filter_sigma, @@ -644,7 +655,7 @@ def _initialize_aligner(self): return aligner def _initialize_mosaic(self): - mosaic = ParallelMosaic( + mosaic =self.ParallelMosaic( self.aligner, self.aligner.mosaic_shape, verbose=True, channels=self.channels, n_threads=self.threads ) return mosaic From e3fbd309976f7343bf3af73c43269c6b5a883805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 20:10:44 +0100 Subject: [PATCH 27/56] adapt selection workflow to work with new py-lmd version + improves selection performance even for large datasets see https://github.com/MannLabs/py-lmd/pull/11 for more information --- src/scportrait/pipeline/selection.py | 195 +++++++++++++++++++++++---- 1 file changed, 168 insertions(+), 27 deletions(-) diff --git a/src/scportrait/pipeline/selection.py b/src/scportrait/pipeline/selection.py index 0afb36a8..cc26c30c 100644 --- a/src/scportrait/pipeline/selection.py +++ b/src/scportrait/pipeline/selection.py @@ -4,8 +4,19 @@ from alphabase.io import tempmmap from lmd.lib import SegmentationLoader +import h5py +import timeit +import pandas as pd +import pickle +from scipy.sparse import coo_array +from tqdm.auto import tqdm +from functools import partial as func_partial +import multiprocessing as mp + from scportrait.pipeline._base import ProcessingStep +from scportrait.pipeline._utils.helper import flatten +import matplotlib.pyplot as plt class LMDSelection(ProcessingStep): """ @@ -13,19 +24,58 @@ class LMDSelection(ProcessingStep): This method class relies on the functionality of the pylmd library. """ - # define all valid path optimization methods used with the "path_optimization" argument in the configuration - VALID_PATH_OPTIMIZERS = ["none", "hilbert", "greedy"] - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self._check_config() self.name = None self.cell_sets = None self.calibration_marker = None + self.deep_debug = False #flag for deep debugging by developers + + def _check_config(self): + assert "segmentation_channel" in self.config, "segmentation_channel not defined in config" + self.segmentation_channel_to_select = self.config["segmentation_channel"] + + # check for optional config parameters + + #this defines how large the box mask around the center of a cell is for the coordinate extraction + #assumption is that all pixels belonging to each mask are within the box otherwise they will be cut off during cutting contour generation + + if "cell_width" in self.config: + self.cell_radius = self.config["cell_width"] + else: + self.cell_radius = 100 + + if "threads" in self.config: + self.threads = self.config["threads"] + assert self.threads > 0, "threads must be greater than 0" + assert isinstance(self.threads, int), "threads must be an integer" + else: + self.threads = 10 + + if "batch_size_coordinate_extraction" in self.config: + self.batch_size = self.config["batch_size_coordinate_extraction"] + assert self.batch_size > 0, "batch_size_coordinate_extraction must be greater than 0" + assert isinstance(self.batch_size, int), "batch_size_coordinate_extraction must be an integer" + else: + self.batch_size = 100 + + if "orientation_transform" in self.config: + self.orientation_transform = self.config["orientation_transform"] + else: + self.orientation_transform = np.array([[0, -1], [1, 0]]) + self.config["orientation_transform"] = self.orientation_transform #ensure its also in config so its passed on to the segmentation loader + + if "processes_cell_sets" in self.config: + self.processes_cell_sets = self.config["processes_cell_sets"] + assert self.processes_cell_sets > 0, "processes_cell_sets must be greater than 0" + assert isinstance(self.processes_cell_sets, int), "processes_cell_sets must be an integer" + else: + self.processes_cell_sets = 1 + def _setup_selection(self): - # set orientation transform - self.config["orientation_transform"] = np.array([[0, -1], [1, 0]]) # configure name of extraction if self.name is None: @@ -39,6 +89,102 @@ def _setup_selection(self): savename = name.replace(" ", "_") + ".xml" self.savepath = os.path.join(self.directory, savename) + #check that the segmentation label exists + assert self.segmentation_channel_to_select in self.project.filehandler.get_sdata()._shared_keys, f"Segmentation channel {self.segmentation_channel_to_select} not found in sdata." + + def __get_coords(self, + cell_ids: list, + centers:list[tuple[int, int]], + width:int = 60) -> list[tuple[int, np.ndarray]]: + results = [] + + _sdata = self.project.filehandler.get_sdata() + for i, _id in enumerate(cell_ids): + values = centers[i] + + x_start = np.max([int(values[0]) - width, 0]) + y_start = np.max([int(values[1]) - width, 0]) + + x_end = x_start + width*2 + y_end = y_start + width*2 + + _cropped = _sdata[self.segmentation_channel_to_select][slice(x_start, x_end), slice(y_start, y_end)].compute() + + #optional plotting output for deep debugging + if self.deep_debug: + if self.threads == 1: + plt.figure() + plt.imshow(_cropped) + plt.show() + else: + raise ValueError("Deep debug is not supported with multiple threads.") + + sparse = coo_array(_cropped == _id) + + if 0 in sparse: + Warning(f"Cell {i} with id {_id} is potentially not fully contained in the bounding mask. Consider increasing the value for the 'cell_width' parameter in your config.") + + x = sparse.coords[0] + x_start + y = sparse.coords[1] + y_start + + results.append((_id, np.array(list(zip(x, y, strict = True))))) + + return(results) + + def _get_coords_multi(self, width:int, arg: tuple[list[int], np.ndarray]) -> list[tuple[int, np.ndarray]]: + cell_ids, centers = arg + results = self.__get_coords(cell_ids, centers, width) + return(results) + + def _get_coords(self, + cell_ids: list, + centers:list[tuple[int, int]], + width:int = 60, + batch_size:int = 100, + threads:int = 10) -> dict: + + #create batches + n_batches = int(np.ceil(len(cell_ids)/batch_size)) + slices = [(i*batch_size, i*batch_size + batch_size) for i in range(n_batches - 1)] + slices.append(((n_batches - 1)*batch_size, len(cell_ids))) + + batched_args = [(cell_ids[start:end], centers[start:end]) for start, end in slices] + + f = func_partial(self._get_coords_multi, + width + ) + + if threads == 1: # if only one thread is used, the function is called directly to avoid the overhead of multiprocessing + results = [f(arg) for arg in batched_args] + else: + with mp.get_context(self.context).Pool(processes=threads) as pool: + results = list(tqdm( + pool.imap(f, batched_args), + total=len(batched_args), + desc="Processing cell batches", + ) + ) + pool.close() + pool.join() + + results = flatten(results) + return(dict(results)) + + def _get_cell_ids(self, cell_sets: list[dict]) -> list[int]: + cell_ids = [] + for cell_set in cell_sets: + if "classes" in cell_set: + cell_ids.extend(cell_set["classes"]) + else: + Warning(f"Cell set {cell_set['name']} does not contain any classes.") + return(cell_ids) + + def _get_centers(self, cell_ids: list[int]) -> list[tuple[int, int]]: + _sdata = self.project.filehandler.get_sdata() + centers = _sdata["centers_cells"].compute() + centers = centers.loc[cell_ids, :] + return(centers[["y", "x"]].values.tolist()) #needs to be returned as yx to match the coordinate system as saved in spatialdataobjects + def _post_processing_cleanup(self, vars_to_delete: list | None = None): if vars_to_delete is not None: self._clear_cache(vars_to_delete=vars_to_delete) @@ -51,7 +197,6 @@ def _post_processing_cleanup(self, vars_to_delete: list | None = None): def process( self, - segmentation_name: str, cell_sets: list[dict], calibration_marker: np.array, name: str | None = None, @@ -61,9 +206,9 @@ def process( Under the hood this method relies on the pylmd library and utilizies its `SegmentationLoader` Class. Args: - segmentation_name (str): Name of the segmentation to be used for shape generation in the sdata object. cell_sets (list of dict): List of dictionaries containing the sets of cells which should be sorted into a single well. Mandatory keys for each dictionary are: name, classes. Optional keys are: well. calibration_marker (numpy.array): Array of size ‘(3,2)’ containing the calibration marker coordinates in the ‘(row, column)’ format. + name (str, optional): Name of the output file. If not provided, the name will be generated based on the names of the cell sets or if also not specified set to "selected_cells". Example: @@ -77,7 +222,6 @@ def process( # A numpy Array of shape (3, 2) should be passed. calibration_marker = np.array([marker_0, marker_1, marker_2]) - # Sets of cells can be defined by providing a name and a list of classes in a dictionary. cells_to_select = [{"name": "dataset1", "classes": [1, 2, 3]}] @@ -122,7 +266,7 @@ def process( convolution_smoothing: 25 # fold reduction of datapoints for compression - poly_compression_factor: 30 + rdp: 0.7 # Optimization of the cutting path inbetween shapes # optimized paths improve the cutting time and the microscopes focus @@ -160,32 +304,29 @@ def process( self._setup_selection() - ## TO Do - # check if classes and seglookup table already exist as pickle file - # if not create them - # else load them and proceed with selection - - # load segmentation from hdf5 - self.path_seg_mask = self.filehandler._load_seg_to_memmap( - [segmentation_name], tmp_dir_abs_path=self._tmp_dir_path - ) + print("Here", flush=True) - segmentation = tempmmap.mmap_array_from_path(self.path_seg_mask) + start_time = timeit.default_timer() + cell_ids = self._get_cell_ids(cell_sets) + centers = self._get_centers(cell_ids) + coord_index = self._get_coords(cell_ids = cell_ids, + centers = centers, + width = self.cell_radius, + batch_size = self.batch_size, + threads = self.threads) + self.log(f"Coordinate lookup index calculation took {timeit.default_timer() - start_time} seconds.") - # create segmentation loader sl = SegmentationLoader( config=self.config, verbose=self.debug, processes=self.config["processes_cell_sets"], ) - if len(segmentation.shape) == 3: - segmentation = np.squeeze(segmentation) - else: - raise ValueError(f"Segmentation shape is not correct. Expected 2D array, got {segmentation.shape}") + shape_collection = sl(None, + self.cell_sets, + self.calibration_marker, + coords_lookup=coord_index) - # get shape collections - shape_collection = sl(segmentation, self.cell_sets, self.calibration_marker) if self.debug: shape_collection.plot(calibration=True) @@ -196,4 +337,4 @@ def process( self.log(f"Saved output at {self.savepath}") # perform post processing cleanup - self._post_processing_cleanup(vars_to_delete=[shape_collection, sl, segmentation]) + self._post_processing_cleanup(vars_to_delete=[shape_collection, sl, coord_index]) From bf3a793792ffb963c9d079019ca8ee7a7b1efabf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 20:12:44 +0100 Subject: [PATCH 28/56] fix remove deprecated parameter --- src/scportrait/pipeline/project.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py index 864195d0..caa76f2f 100644 --- a/src/scportrait/pipeline/project.py +++ b/src/scportrait/pipeline/project.py @@ -1080,7 +1080,6 @@ def select( self, cell_sets: list[dict], calibration_marker: np.ndarray | None = None, - segmentation_name: str = "seg_all_nucleus", name: str | None = None, ): """ @@ -1096,10 +1095,8 @@ def select( raise ValueError("No nucleus or cytosol segmentation loaded. Please load a segmentation first.") assert self.sdata is not None, "No sdata object loaded." - assert segmentation_name in self.sdata.labels, f"Segmentation {segmentation_name} not found in sdata object." self.selection_f( - segmentation_name=segmentation_name, cell_sets=cell_sets, calibration_marker=calibration_marker, name=name, From c475a6b429d203e1aedab36663f66ef40d364b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 20:13:59 +0100 Subject: [PATCH 29/56] fix incorrect check for edge pixels in image crop --- src/scportrait/pipeline/selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/selection.py b/src/scportrait/pipeline/selection.py index cc26c30c..8e4c2123 100644 --- a/src/scportrait/pipeline/selection.py +++ b/src/scportrait/pipeline/selection.py @@ -121,7 +121,7 @@ def __get_coords(self, sparse = coo_array(_cropped == _id) - if 0 in sparse: + if 0 in sparse.coords[0] or 0 in sparse.coords[1] or width*2 - 1 in sparse.coords[0] or width*2 - 1 in sparse.coords[1]: Warning(f"Cell {i} with id {_id} is potentially not fully contained in the bounding mask. Consider increasing the value for the 'cell_width' parameter in your config.") x = sparse.coords[0] + x_start From 555323858fbb7c792c47faabf2ece5c303c04b2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 20:16:01 +0100 Subject: [PATCH 30/56] fix remove debugging statement --- src/scportrait/pipeline/selection.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/scportrait/pipeline/selection.py b/src/scportrait/pipeline/selection.py index 8e4c2123..997e33a4 100644 --- a/src/scportrait/pipeline/selection.py +++ b/src/scportrait/pipeline/selection.py @@ -304,8 +304,6 @@ def process( self._setup_selection() - print("Here", flush=True) - start_time = timeit.default_timer() cell_ids = self._get_cell_ids(cell_sets) centers = self._get_centers(cell_ids) From 1af6fe1ffae7daf1606992601b431f3acd588c7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 20:33:52 +0100 Subject: [PATCH 31/56] ensure uptodate py-lmd version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index de7f4aa0..a143389b 100755 --- a/requirements.txt +++ b/requirements.txt @@ -35,4 +35,4 @@ pyqt5 lxml_html_clean ashlar>=1.19.0 networkx -py-lmd +py-lmd>=1.3.0 From 733c58a07ff82ed19cc004c14fe8e74eb8394011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 20:34:29 +0100 Subject: [PATCH 32/56] ensure most uptodate py-lmd version --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index e705f1ff..8d807d29 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -35,7 +35,7 @@ pyqt5 lxml_html_clean ashlar>=1.19.0 networkx -py-lmd +py-lmd>=1.3.0 #packages for building the documentation sphinx From cdcf9bb1cbcf2cb01e1e0f788e644e465c66a40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 20:38:24 +0100 Subject: [PATCH 33/56] ruff linting --- src/scportrait/pipeline/selection.py | 122 +++++++++++++------------ src/scportrait/tools/stitch/_stitch.py | 4 +- 2 files changed, 65 insertions(+), 61 deletions(-) diff --git a/src/scportrait/pipeline/selection.py b/src/scportrait/pipeline/selection.py index 997e33a4..a3c10746 100644 --- a/src/scportrait/pipeline/selection.py +++ b/src/scportrait/pipeline/selection.py @@ -1,22 +1,21 @@ +import multiprocessing as mp import os +import pickle +import timeit +from functools import partial as func_partial +import h5py +import matplotlib.pyplot as plt import numpy as np +import pandas as pd from alphabase.io import tempmmap from lmd.lib import SegmentationLoader - -import h5py -import timeit -import pandas as pd -import pickle from scipy.sparse import coo_array from tqdm.auto import tqdm -from functools import partial as func_partial -import multiprocessing as mp from scportrait.pipeline._base import ProcessingStep from scportrait.pipeline._utils.helper import flatten -import matplotlib.pyplot as plt class LMDSelection(ProcessingStep): """ @@ -32,7 +31,7 @@ def __init__(self, *args, **kwargs): self.cell_sets = None self.calibration_marker = None - self.deep_debug = False #flag for deep debugging by developers + self.deep_debug = False # flag for deep debugging by developers def _check_config(self): assert "segmentation_channel" in self.config, "segmentation_channel not defined in config" @@ -40,8 +39,8 @@ def _check_config(self): # check for optional config parameters - #this defines how large the box mask around the center of a cell is for the coordinate extraction - #assumption is that all pixels belonging to each mask are within the box otherwise they will be cut off during cutting contour generation + # this defines how large the box mask around the center of a cell is for the coordinate extraction + # assumption is that all pixels belonging to each mask are within the box otherwise they will be cut off during cutting contour generation if "cell_width" in self.config: self.cell_radius = self.config["cell_width"] @@ -66,7 +65,9 @@ def _check_config(self): self.orientation_transform = self.config["orientation_transform"] else: self.orientation_transform = np.array([[0, -1], [1, 0]]) - self.config["orientation_transform"] = self.orientation_transform #ensure its also in config so its passed on to the segmentation loader + self.config["orientation_transform"] = ( + self.orientation_transform + ) # ensure its also in config so its passed on to the segmentation loader if "processes_cell_sets" in self.config: self.processes_cell_sets = self.config["processes_cell_sets"] @@ -76,7 +77,6 @@ def _check_config(self): self.processes_cell_sets = 1 def _setup_selection(self): - # configure name of extraction if self.name is None: try: @@ -89,13 +89,14 @@ def _setup_selection(self): savename = name.replace(" ", "_") + ".xml" self.savepath = os.path.join(self.directory, savename) - #check that the segmentation label exists - assert self.segmentation_channel_to_select in self.project.filehandler.get_sdata()._shared_keys, f"Segmentation channel {self.segmentation_channel_to_select} not found in sdata." + # check that the segmentation label exists + assert ( + self.segmentation_channel_to_select in self.project.filehandler.get_sdata()._shared_keys + ), f"Segmentation channel {self.segmentation_channel_to_select} not found in sdata." - def __get_coords(self, - cell_ids: list, - centers:list[tuple[int, int]], - width:int = 60) -> list[tuple[int, np.ndarray]]: + def __get_coords( + self, cell_ids: list, centers: list[tuple[int, int]], width: int = 60 + ) -> list[tuple[int, np.ndarray]]: results = [] _sdata = self.project.filehandler.get_sdata() @@ -105,12 +106,14 @@ def __get_coords(self, x_start = np.max([int(values[0]) - width, 0]) y_start = np.max([int(values[1]) - width, 0]) - x_end = x_start + width*2 - y_end = y_start + width*2 + x_end = x_start + width * 2 + y_end = y_start + width * 2 - _cropped = _sdata[self.segmentation_channel_to_select][slice(x_start, x_end), slice(y_start, y_end)].compute() + _cropped = _sdata[self.segmentation_channel_to_select][ + slice(x_start, x_end), slice(y_start, y_end) + ].compute() - #optional plotting output for deep debugging + # optional plotting output for deep debugging if self.deep_debug: if self.threads == 1: plt.figure() @@ -121,44 +124,48 @@ def __get_coords(self, sparse = coo_array(_cropped == _id) - if 0 in sparse.coords[0] or 0 in sparse.coords[1] or width*2 - 1 in sparse.coords[0] or width*2 - 1 in sparse.coords[1]: - Warning(f"Cell {i} with id {_id} is potentially not fully contained in the bounding mask. Consider increasing the value for the 'cell_width' parameter in your config.") + if ( + 0 in sparse.coords[0] + or 0 in sparse.coords[1] + or width * 2 - 1 in sparse.coords[0] + or width * 2 - 1 in sparse.coords[1] + ): + Warning( + f"Cell {i} with id {_id} is potentially not fully contained in the bounding mask. Consider increasing the value for the 'cell_width' parameter in your config." + ) x = sparse.coords[0] + x_start y = sparse.coords[1] + y_start - results.append((_id, np.array(list(zip(x, y, strict = True))))) + results.append((_id, np.array(list(zip(x, y, strict=True))))) - return(results) + return results - def _get_coords_multi(self, width:int, arg: tuple[list[int], np.ndarray]) -> list[tuple[int, np.ndarray]]: + def _get_coords_multi(self, width: int, arg: tuple[list[int], np.ndarray]) -> list[tuple[int, np.ndarray]]: cell_ids, centers = arg results = self.__get_coords(cell_ids, centers, width) - return(results) - - def _get_coords(self, - cell_ids: list, - centers:list[tuple[int, int]], - width:int = 60, - batch_size:int = 100, - threads:int = 10) -> dict: + return results - #create batches - n_batches = int(np.ceil(len(cell_ids)/batch_size)) - slices = [(i*batch_size, i*batch_size + batch_size) for i in range(n_batches - 1)] - slices.append(((n_batches - 1)*batch_size, len(cell_ids))) + def _get_coords( + self, cell_ids: list, centers: list[tuple[int, int]], width: int = 60, batch_size: int = 100, threads: int = 10 + ) -> dict: + # create batches + n_batches = int(np.ceil(len(cell_ids) / batch_size)) + slices = [(i * batch_size, i * batch_size + batch_size) for i in range(n_batches - 1)] + slices.append(((n_batches - 1) * batch_size, len(cell_ids))) batched_args = [(cell_ids[start:end], centers[start:end]) for start, end in slices] - f = func_partial(self._get_coords_multi, - width - ) + f = func_partial(self._get_coords_multi, width) - if threads == 1: # if only one thread is used, the function is called directly to avoid the overhead of multiprocessing + if ( + threads == 1 + ): # if only one thread is used, the function is called directly to avoid the overhead of multiprocessing results = [f(arg) for arg in batched_args] else: - with mp.get_context(self.context).Pool(processes=threads) as pool: - results = list(tqdm( + with mp.get_context(self.context).Pool(processes=threads) as pool: + results = list( + tqdm( pool.imap(f, batched_args), total=len(batched_args), desc="Processing cell batches", @@ -168,7 +175,7 @@ def _get_coords(self, pool.join() results = flatten(results) - return(dict(results)) + return dict(results) def _get_cell_ids(self, cell_sets: list[dict]) -> list[int]: cell_ids = [] @@ -177,13 +184,15 @@ def _get_cell_ids(self, cell_sets: list[dict]) -> list[int]: cell_ids.extend(cell_set["classes"]) else: Warning(f"Cell set {cell_set['name']} does not contain any classes.") - return(cell_ids) + return cell_ids def _get_centers(self, cell_ids: list[int]) -> list[tuple[int, int]]: _sdata = self.project.filehandler.get_sdata() centers = _sdata["centers_cells"].compute() centers = centers.loc[cell_ids, :] - return(centers[["y", "x"]].values.tolist()) #needs to be returned as yx to match the coordinate system as saved in spatialdataobjects + return centers[ + ["y", "x"] + ].values.tolist() # needs to be returned as yx to match the coordinate system as saved in spatialdataobjects def _post_processing_cleanup(self, vars_to_delete: list | None = None): if vars_to_delete is not None: @@ -307,11 +316,10 @@ def process( start_time = timeit.default_timer() cell_ids = self._get_cell_ids(cell_sets) centers = self._get_centers(cell_ids) - coord_index = self._get_coords(cell_ids = cell_ids, - centers = centers, - width = self.cell_radius, - batch_size = self.batch_size, - threads = self.threads) + print("Here", flush=True) + coord_index = self._get_coords( + cell_ids=cell_ids, centers=centers, width=self.cell_radius, batch_size=self.batch_size, threads=self.threads + ) self.log(f"Coordinate lookup index calculation took {timeit.default_timer() - start_time} seconds.") sl = SegmentationLoader( @@ -320,11 +328,7 @@ def process( processes=self.config["processes_cell_sets"], ) - shape_collection = sl(None, - self.cell_sets, - self.calibration_marker, - coords_lookup=coord_index) - + shape_collection = sl(None, self.cell_sets, self.calibration_marker, coords_lookup=coord_index) if self.debug: shape_collection.plot(calibration=True) diff --git a/src/scportrait/tools/stitch/_stitch.py b/src/scportrait/tools/stitch/_stitch.py index 90ead29f..01938a0f 100644 --- a/src/scportrait/tools/stitch/_stitch.py +++ b/src/scportrait/tools/stitch/_stitch.py @@ -23,6 +23,7 @@ from scportrait.tools.stitch._utils.ashlar_plotting import plot_edge_quality, plot_edge_scatter from scportrait.tools.stitch._utils.filewriters import write_ome_zarr, write_spatialdata, write_tif, write_xml + class Stitcher: """ Class for stitching of image tiles to assemble a mosaic. @@ -161,7 +162,6 @@ def _lazy_imports(self): BioformatsReaderRescale, FilePatternReaderRescale, ) - from scportrait.tools.stitch._utils.parallelized_ashlar import ParallelEdgeAligner, ParallelMosaic self.ashlar_thumbnail = thumbnail @@ -655,7 +655,7 @@ def _initialize_aligner(self): return aligner def _initialize_mosaic(self): - mosaic =self.ParallelMosaic( + mosaic = self.ParallelMosaic( self.aligner, self.aligner.mosaic_shape, verbose=True, channels=self.channels, n_threads=self.threads ) return mosaic From d1d56dd0ae864411d1e523e5129d605102db31c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 21:00:10 +0100 Subject: [PATCH 34/56] fix ruff issues --- src/scportrait/pipeline/_utils/helper.py | 2 +- src/scportrait/pipeline/selection.py | 6 +++--- src/scportrait/tools/stitch/_stitch.py | 9 ++++----- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/scportrait/pipeline/_utils/helper.py b/src/scportrait/pipeline/_utils/helper.py index 9b11fa2a..9b2b4fed 100644 --- a/src/scportrait/pipeline/_utils/helper.py +++ b/src/scportrait/pipeline/_utils/helper.py @@ -3,7 +3,7 @@ T = TypeVar("T") -def flatten(nested_list: list[list[T]]) -> list[T]: +def flatten(nested_list: list[list[T]]) -> list[T | tuple[T]]: """Flatten a list of lists into a single list. Args: diff --git a/src/scportrait/pipeline/selection.py b/src/scportrait/pipeline/selection.py index a3c10746..87b10e81 100644 --- a/src/scportrait/pipeline/selection.py +++ b/src/scportrait/pipeline/selection.py @@ -148,7 +148,7 @@ def _get_coords_multi(self, width: int, arg: tuple[list[int], np.ndarray]) -> li def _get_coords( self, cell_ids: list, centers: list[tuple[int, int]], width: int = 60, batch_size: int = 100, threads: int = 10 - ) -> dict: + ) -> dict[int, np.ndarray]: # create batches n_batches = int(np.ceil(len(cell_ids) / batch_size)) slices = [(i * batch_size, i * batch_size + batch_size) for i in range(n_batches - 1)] @@ -174,8 +174,8 @@ def _get_coords( pool.close() pool.join() - results = flatten(results) - return dict(results) + results = flatten(results) # type: ignore + return dict(results) # type: ignore def _get_cell_ids(self, cell_sets: list[dict]) -> list[int]: cell_ids = [] diff --git a/src/scportrait/tools/stitch/_stitch.py b/src/scportrait/tools/stitch/_stitch.py index 01938a0f..00efc14c 100644 --- a/src/scportrait/tools/stitch/_stitch.py +++ b/src/scportrait/tools/stitch/_stitch.py @@ -23,7 +23,6 @@ from scportrait.tools.stitch._utils.ashlar_plotting import plot_edge_quality, plot_edge_scatter from scportrait.tools.stitch._utils.filewriters import write_ome_zarr, write_spatialdata, write_tif, write_xml - class Stitcher: """ Class for stitching of image tiles to assemble a mosaic. @@ -107,10 +106,6 @@ def __init__( """ self._lazy_imports() - # workaround for lazy imports of module - if self.reader_type == "FilePatternReaderRescale": - self.reader_type = self.FilePatternReaderRescale - if orientation is None: orientation = {"flip_x": False, "flip_y": True} @@ -139,6 +134,10 @@ def __init__( self.orientation = orientation self.reader_type = reader_type + # workaround for lazy imports of module + if self.reader_type == "FilePatternReaderRescale": + self.reader_type = self.FilePatternReaderRescale + # workflow setup self.plot_QC = plot_QC self.overwrite = overwrite From 57a572b204d6ca6f8eaaac786ab8a461e760958f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sat, 25 Jan 2025 21:00:27 +0100 Subject: [PATCH 35/56] ruff linting --- src/scportrait/tools/stitch/_stitch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scportrait/tools/stitch/_stitch.py b/src/scportrait/tools/stitch/_stitch.py index 00efc14c..e1020d64 100644 --- a/src/scportrait/tools/stitch/_stitch.py +++ b/src/scportrait/tools/stitch/_stitch.py @@ -23,6 +23,7 @@ from scportrait.tools.stitch._utils.ashlar_plotting import plot_edge_quality, plot_edge_scatter from scportrait.tools.stitch._utils.filewriters import write_ome_zarr, write_spatialdata, write_tif, write_xml + class Stitcher: """ Class for stitching of image tiles to assemble a mosaic. From e569264046f43a820a732b8532bbddedb69ff866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sun, 26 Jan 2025 22:53:49 +0100 Subject: [PATCH 36/56] add helper function to read config files --- src/scportrait/pipeline/_utils/helper.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/scportrait/pipeline/_utils/helper.py b/src/scportrait/pipeline/_utils/helper.py index 9b2b4fed..1f6c0123 100644 --- a/src/scportrait/pipeline/_utils/helper.py +++ b/src/scportrait/pipeline/_utils/helper.py @@ -1,7 +1,15 @@ from typing import TypeVar +import yaml T = TypeVar("T") +def read_config(config_path: str) -> dict: + with open(config_path) as stream: + try: + config = yaml.safe_load(stream) + except yaml.YAMLError as exc: + print(exc) + return config def flatten(nested_list: list[list[T]]) -> list[T | tuple[T]]: """Flatten a list of lists into a single list. From f4c73bf892e97ce92cb98ddc5f1a333a6f294153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sun, 26 Jan 2025 22:54:08 +0100 Subject: [PATCH 37/56] if config is passed as a string automatically read --- src/scportrait/pipeline/_base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/_base.py b/src/scportrait/pipeline/_base.py index fdca9b64..12ff9ae0 100644 --- a/src/scportrait/pipeline/_base.py +++ b/src/scportrait/pipeline/_base.py @@ -9,6 +9,7 @@ import numpy as np import torch +from scportrait.pipeline._utils.helper import read_config class Logable: """Create log entries. @@ -183,7 +184,11 @@ def __init__( self.debug = debug self.overwrite = overwrite self.project_location = project_location - self.config = config + + if isinstance(config, str): + self.config = read_config(config) + else: + self.config = config self.overwrite = overwrite self.project = project From 9873f6f62dae3895c771d7143735253c6f797d88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sun, 26 Jan 2025 22:54:26 +0100 Subject: [PATCH 38/56] utilize new read_config function --- src/scportrait/pipeline/project.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py index caa76f2f..ebd44192 100644 --- a/src/scportrait/pipeline/project.py +++ b/src/scportrait/pipeline/project.py @@ -23,7 +23,6 @@ import numpy as np import psutil import xarray -import yaml from alphabase.io import tempmmap from napari_spatialdata import Interactive from ome_zarr.io import parse_url @@ -33,6 +32,7 @@ from scportrait.io import daskmmap from scportrait.pipeline._base import Logable +from scportrait.pipeline._utils.helper import read_config from scportrait.pipeline._utils.sdata_io import sdata_filehandler from scportrait.pipeline._utils.spatialdata_helper import ( calculate_centroids, @@ -94,7 +94,7 @@ class Project(Logable): def __init__( self, project_location: str, - config_path: str, + config_path: str = None, segmentation_f=None, extraction_f=None, featurization_f=None, @@ -185,11 +185,7 @@ def _load_config_from_file(self, file_path): if not os.path.isfile(file_path): raise ValueError(f"Your config path {file_path} is invalid.") - with open(file_path) as stream: - try: - self.config = yaml.safe_load(stream) - except yaml.YAMLError as exc: - print(exc) + self.config = read_config(file_path) def _get_config_file(self, config_path: str | None = None) -> None: """Load the config file for the project. If no config file is passed the default config file in the project directory is loaded. From 7e04994faab886388d7ddf4d3b5985a5cdf7a8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sun, 26 Jan 2025 22:54:48 +0100 Subject: [PATCH 39/56] turn off overwriting for classification directory --- src/scportrait/pipeline/project.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py index ebd44192..6f597aa3 100644 --- a/src/scportrait/pipeline/project.py +++ b/src/scportrait/pipeline/project.py @@ -305,7 +305,7 @@ def _setup_featurization_f(self, featurization_f): self.featurization_directory, project_location=self.project_location, debug=self.debug, - overwrite=self.overwrite, + overwrite=False, #this needs to be set to false as the featurization step should not remove previously created features project=self, filehandler=self.filehandler, ) @@ -1063,6 +1063,8 @@ def featurize( # setup overwrite if specified in call if overwrite is not None: self.featurization_f.overwrite_run_path = overwrite + if overwrite is None: + self.featurization_f.overwrite_run_path = True # update the number of masks that are available in the segmentation object self.featurization_f.n_masks = sum([self.nuc_seg_status, self.cyto_seg_status]) From 4b4def2a716ec0ec1e2bfcf98be8710f3cd701e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sun, 26 Jan 2025 22:59:10 +0100 Subject: [PATCH 40/56] fix reading of config if method is a key in config ensure that only the config parameters relevant to that method are read --- src/scportrait/pipeline/_base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/_base.py b/src/scportrait/pipeline/_base.py index 12ff9ae0..6a67c180 100644 --- a/src/scportrait/pipeline/_base.py +++ b/src/scportrait/pipeline/_base.py @@ -186,7 +186,11 @@ def __init__( self.project_location = project_location if isinstance(config, str): - self.config = read_config(config) + config = read_config(config) + if self.__class__.__name__ in config.keys(): + self.config = config[self.__class__.__name__ ] + else: + self.config = config else: self.config = config self.overwrite = overwrite From db03a2370351b8e494b31f8c4a6c5a6f5bc96365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Sun, 26 Jan 2025 23:03:23 +0100 Subject: [PATCH 41/56] make datatype optional for directory creation --- src/scportrait/pipeline/featurization.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/featurization.py b/src/scportrait/pipeline/featurization.py index 316b6aaf..39d45dd9 100644 --- a/src/scportrait/pipeline/featurization.py +++ b/src/scportrait/pipeline/featurization.py @@ -36,6 +36,7 @@ def __init__(self, *args, **kwargs): self.model = None self.transforms = None self.expected_imagesize = None + self.data_type = None self._setup_channel_selection() @@ -59,7 +60,10 @@ def _setup_output(self): if not os.path.isdir(self.directory): os.makedirs(self.directory) - self.run_path = os.path.join(self.directory, f"{self.data_type}_{self.label}") + if self.data_type is None: + self.run_path = os.path.join(self.directory, self.label) + else: + self.run_path = os.path.join(self.directory, f"{self.data_type}_{self.label}") if not os.path.isdir(self.run_path): os.makedirs(self.run_path) From e72c3366ed6c842a769b7f671edbcae77c9c7a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 01:51:26 +0100 Subject: [PATCH 42/56] add from_project parameter start implementing support for running methods outside of the scportrait project structure --- src/scportrait/pipeline/_base.py | 24 +++++++++++++++++++----- src/scportrait/pipeline/project.py | 4 ++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/scportrait/pipeline/_base.py b/src/scportrait/pipeline/_base.py index 6a67c180..fe07217e 100644 --- a/src/scportrait/pipeline/_base.py +++ b/src/scportrait/pipeline/_base.py @@ -177,13 +177,30 @@ class ProcessingStep(Logable): DEFAULT_SELECTION_DIR_NAME = "selection" def __init__( - self, config, directory, project_location, debug=False, overwrite=False, project=None, filehandler=None + self, + config, + directory = None, + project_location = None, + debug=False, + overwrite=False, + project=None, + filehandler=None, + from_project:bool = False, ): super().__init__(directory=directory) self.debug = debug self.overwrite = overwrite - self.project_location = project_location + if from_project: + self.project_run = True + self.project_location = project_location + self.project = project + self.filehandler = filehandler + else: + self.project_run = False + self.project_location = None + self.project = None + self.filehandler = None if isinstance(config, str): config = read_config(config) @@ -195,9 +212,6 @@ def __init__( self.config = config self.overwrite = overwrite - self.project = project - self.filehandler = filehandler - self.get_context() self.deep_debug = False diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py index 6f597aa3..138344d9 100644 --- a/src/scportrait/pipeline/project.py +++ b/src/scportrait/pipeline/project.py @@ -253,6 +253,7 @@ def _setup_segmentation_f(self, segmentation_f): overwrite=self.overwrite, project=None, filehandler=self.filehandler, + from_project=True, ) def _setup_extraction_f(self, extraction_f): @@ -281,6 +282,7 @@ def _setup_extraction_f(self, extraction_f): overwrite=self.overwrite, project=self, filehandler=self.filehandler, + from_project=True, ) def _setup_featurization_f(self, featurization_f): @@ -308,6 +310,7 @@ def _setup_featurization_f(self, featurization_f): overwrite=False, #this needs to be set to false as the featurization step should not remove previously created features project=self, filehandler=self.filehandler, + from_project=True, ) def _setup_selection(self, selection_f): @@ -335,6 +338,7 @@ def _setup_selection(self, selection_f): overwrite=self.overwrite, project=self, filehandler=self.filehandler, + from_project=True, ) def update_featurization_f(self, featurization_f): From 07098c40e941d95fb1202577793726b2a4d6eecd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 01:51:39 +0100 Subject: [PATCH 43/56] save n_masks to single-cell dataset --- src/scportrait/pipeline/extraction.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/scportrait/pipeline/extraction.py b/src/scportrait/pipeline/extraction.py index 505f6081..20bfbb75 100644 --- a/src/scportrait/pipeline/extraction.py +++ b/src/scportrait/pipeline/extraction.py @@ -732,6 +732,12 @@ def _transfer_tempmmap_to_hdf5(self): dtype=h5py.special_dtype(vlen=str), ) + hf.create_dataset( + "n_masks", + data=self.n_masks, + dtype=int, + ) + self.log("channel information created.") # cleanup memory From cde2998ff46cdfa3100486c8065d5dbf0d086a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 01:51:53 +0100 Subject: [PATCH 44/56] read masks from hdf5 if not already provided --- src/scportrait/pipeline/featurization.py | 129 +++++++++++++++-------- 1 file changed, 85 insertions(+), 44 deletions(-) diff --git a/src/scportrait/pipeline/featurization.py b/src/scportrait/pipeline/featurization.py index 39d45dd9..32089592 100644 --- a/src/scportrait/pipeline/featurization.py +++ b/src/scportrait/pipeline/featurization.py @@ -6,6 +6,7 @@ from functools import partial as func_partial import numpy as np +import h5py import pandas as pd import pytorch_lightning as pl import torch @@ -17,7 +18,6 @@ from scportrait.tools.ml.datasets import HDF5SingleCellDataset from scportrait.tools.ml.plmodels import MultilabelSupervisedModel - class _FeaturizationBase(ProcessingStep): PRETRAINED_MODEL_NAMES = [ "autophagy_classifier", @@ -170,10 +170,22 @@ def _setup_inference_device(self): self.inference_device = self._detect_automatic_inference_device() self.log(f"Automatically configured inferece device to {self.inference_device}") - def _general_setup(self): + def _get_nmasks(self): + if "n_masks" not in self.__dict__.keys(): + try: + self.n_masks = h5py.File(self.extraction_file, "r")["n_masks"][()].item() + except Exception as e: + raise ValueError( + f"Could not extract number of masks from HDF5 file. Error: {e}" + ) from e + + def _general_setup(self, extraction_dir: str, return_results: bool = False): """Helper function to execute all setup functions that are common to all featurization steps.""" - self._setup_output() + self.extraction_file = extraction_dir + if not return_results: + self._setup_output() + self._get_nmasks() self._setup_log_transform() self._setup_inference_device() @@ -784,8 +796,8 @@ def _setup_transforms(self) -> None: return - def _setup(self): - self._general_setup() + def _setup(self, extraction_dir: str, return_results: bool): + self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._get_model_specs() self._get_network_dir() @@ -803,7 +815,7 @@ def _setup(self): self._setup_encoders() self._setup_transforms() - def process(self, extraction_dir: str, size: int = 0): + def process(self, extraction_dir: str, size: int = 0, return_results: bool = False): """ Perform classification on the provided HDF5 dataset. @@ -880,7 +892,7 @@ class based on the previous single-cell extraction. Therefore, only the second a self.log("Started MLClusterClassifier classification.") # perform setup - self._setup() + self._setup(extraction_dir = extraction_dir, return_results=return_results) self.dataloader = self.generate_dataloader( extraction_dir, @@ -890,21 +902,28 @@ class based on the previous single-cell extraction. Therefore, only the second a ) # perform inference + all_results = [] for model in self.models: self.log(f"Starting inference for model encoder {model.__name__}") results = self.inference(self.dataloader, model) - output_name = f"inference_{model.__name__}" - path = os.path.join(self.run_path, f"{output_name}.csv") + if not return_results: + output_name = f"inference_{model.__name__}" + path = os.path.join(self.run_path, f"{output_name}.csv") - self._write_results_csv(results, path) - self._write_results_sdata(results, label=f"{self.label}_{model.__name__}") - - self.log(f"Results saved to file: {path}") + self._write_results_csv(results, path) + self._write_results_sdata(results, label=f"{self.label}_{model.__name__}") + else: + all_results.append(results) - # perform post processing cleanup - if not self.deep_debug: - self._post_processing_cleanup() + if return_results: + self._clear_cache() + return all_results + else: + self.log(f"Results saved to file: {path}") + # perform post processing cleanup + if not self.deep_debug: + self._post_processing_cleanup() class EnsembleClassifier(_FeaturizationBase): @@ -956,8 +975,8 @@ def _load_models(self): memory_usage = self._get_gpu_memory_usage() self.log(f"GPU memory usage after loading models: {memory_usage}") - def _setup(self): - self._general_setup() + def _setup(self, extraction_dir: str): + self._general_set(extraction_dir=extraction_dir) self._get_model_specs() self._setup_transforms() @@ -969,7 +988,7 @@ def _setup(self): self._load_models() - def process(self, extraction_dir, size=0): + def process(self, extraction_dir:str, size:int = 0, return_results:bool = False): """ Function called to perform classification on the provided HDF5 dataset. @@ -1024,7 +1043,7 @@ class based on the previous single-cell extraction. Therefore, no parameters nee self.log("Starting Ensemble Classification") - self._setup() + self._setup(extraction_dir=extraction_dir, return_results=return_results) self.dataloader = self.generate_dataloader( extraction_dir, @@ -1034,19 +1053,28 @@ class based on the previous single-cell extraction. Therefore, no parameters nee ) # perform inference + all_results = {} for model_name, model in zip(self.model_names, self.model, strict=False): self.log(f"Starting inference for model {model_name}") results = self.inference(self.dataloader, model) output_name = f"ensemble_inference_{model_name}" - path = os.path.join(self.run_path, f"{output_name}.csv") - self._write_results_csv(results, path) - self._write_results_sdata(results, label=model_name) + if not return_results: + path = os.path.join(self.run_path, f"{output_name}.csv") - # perform post processing cleanup - if not self.deep_debug: - self._post_processing_cleanup() + self._write_results_csv(results, path) + self._write_results_sdata(results, label=model_name) + else: + all_results[model_name] = results + + if return_results: + self._clear_cache() + return all_results + else: + # perform post processing cleanup + if not self.deep_debug: + self._post_processing_cleanup() ####### CellFeaturization based on Classic Featurecalculation ####### @@ -1083,10 +1111,19 @@ def _setup_transforms(self): return def _get_channel_specs(self): - if "channel_names" in self.project.__dict__.keys(): - self.channel_names = self.project.channel_names + if self.project is None: + try: + with h5py.File(self.extraction_file, "r") as f: + self.channel_names = list(f["channel_information"][:].astype(str)) + except Exception as e: + raise ValueError( + f"Could not extract channel names from HDF5 file. Please provide channel names manually. Error: {e}" + ) from e else: - self.channel_names = self.project.input_image.c.values + if "channel_names" in self.project.__dict__.keys(): + self.channel_names = self.project.channel_names + else: + self.channel_names = self.project.input_image.c.values def _generate_column_names( self, @@ -1298,12 +1335,12 @@ def __init__(self, *args, **kwargs): self.channel_selection = None # ensure that all images are passed to the function - def _setup(self): - self._general_setup() + def _setup(self, extraction_dir:str, return_results:bool): + self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._setup_transforms() self._get_channel_specs() - def process(self, extraction_dir, size=0): + def process(self, extraction_dir: str, size: int =0, return_results: bool = False): """ Perform featurization on the provided HDF5 dataset. @@ -1358,7 +1395,7 @@ def process(self, extraction_dir, size=0): self.log("Started CellFeaturization of all available channels.") # perform setup - self._setup() + self._setup(extraction_dir=extraction_dir, return_results=return_results) self.dataloader = self.generate_dataloader( extraction_dir, @@ -1388,15 +1425,19 @@ def process(self, extraction_dir, size=0): column_names=self.column_names, ) - output_name = "calculated_image_features" - path = os.path.join(self.run_path, f"{output_name}.csv") + if return_results: + self._clear_cache() + return results + else: + output_name = "calculated_image_features" + path = os.path.join(self.run_path, f"{output_name}.csv") - self._write_results_csv(results, path) - self._write_results_sdata(results) + self._write_results_csv(results, path) + self._write_results_sdata(results) - # perform post processing cleanup - if not self.deep_debug: - self._post_processing_cleanup() + # perform post processing cleanup + if not self.deep_debug: + self._post_processing_cleanup() class CellFeaturizer_single_channel(_cellFeaturizerBase): @@ -1412,17 +1453,17 @@ def _setup_channel_selection(self): self.channel_selection = [0, self.channel_selection] return - def _setup(self): - self._general_setup() + def _setup(self, extraction_dir:str, return_results:bool): + self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._setup_channel_selection() self._setup_transforms() self._get_channel_specs() - def process(self, extraction_dir, size=0): + def process(self, extraction_dir, size=0, return_results: bool = False): self.log(f"Started CellFeaturization of selected channel {self.channel_selection}.") # perform setup - self._setup() + self._setup(extraction_dir=extraction_dir, return_results=return_results) self.dataloader = self.generate_dataloader( extraction_dir, From 72f0c26e906d62193c3512e367a43af4c83162e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 01:54:11 +0100 Subject: [PATCH 45/56] fix ruff issues + linting --- src/scportrait/pipeline/_base.py | 9 +++++---- src/scportrait/pipeline/_utils/helper.py | 3 +++ src/scportrait/pipeline/featurization.py | 21 ++++++++++----------- src/scportrait/pipeline/project.py | 2 +- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/scportrait/pipeline/_base.py b/src/scportrait/pipeline/_base.py index fe07217e..d241cdb1 100644 --- a/src/scportrait/pipeline/_base.py +++ b/src/scportrait/pipeline/_base.py @@ -11,6 +11,7 @@ from scportrait.pipeline._utils.helper import read_config + class Logable: """Create log entries. @@ -179,13 +180,13 @@ class ProcessingStep(Logable): def __init__( self, config, - directory = None, - project_location = None, + directory=None, + project_location=None, debug=False, overwrite=False, project=None, filehandler=None, - from_project:bool = False, + from_project: bool = False, ): super().__init__(directory=directory) @@ -205,7 +206,7 @@ def __init__( if isinstance(config, str): config = read_config(config) if self.__class__.__name__ in config.keys(): - self.config = config[self.__class__.__name__ ] + self.config = config[self.__class__.__name__] else: self.config = config else: diff --git a/src/scportrait/pipeline/_utils/helper.py b/src/scportrait/pipeline/_utils/helper.py index 1f6c0123..e9301048 100644 --- a/src/scportrait/pipeline/_utils/helper.py +++ b/src/scportrait/pipeline/_utils/helper.py @@ -1,8 +1,10 @@ from typing import TypeVar + import yaml T = TypeVar("T") + def read_config(config_path: str) -> dict: with open(config_path) as stream: try: @@ -11,6 +13,7 @@ def read_config(config_path: str) -> dict: print(exc) return config + def flatten(nested_list: list[list[T]]) -> list[T | tuple[T]]: """Flatten a list of lists into a single list. diff --git a/src/scportrait/pipeline/featurization.py b/src/scportrait/pipeline/featurization.py index 32089592..28124a79 100644 --- a/src/scportrait/pipeline/featurization.py +++ b/src/scportrait/pipeline/featurization.py @@ -5,8 +5,8 @@ from contextlib import redirect_stdout from functools import partial as func_partial -import numpy as np import h5py +import numpy as np import pandas as pd import pytorch_lightning as pl import torch @@ -18,6 +18,7 @@ from scportrait.tools.ml.datasets import HDF5SingleCellDataset from scportrait.tools.ml.plmodels import MultilabelSupervisedModel + class _FeaturizationBase(ProcessingStep): PRETRAINED_MODEL_NAMES = [ "autophagy_classifier", @@ -175,9 +176,7 @@ def _get_nmasks(self): try: self.n_masks = h5py.File(self.extraction_file, "r")["n_masks"][()].item() except Exception as e: - raise ValueError( - f"Could not extract number of masks from HDF5 file. Error: {e}" - ) from e + raise ValueError(f"Could not extract number of masks from HDF5 file. Error: {e}") from e def _general_setup(self, extraction_dir: str, return_results: bool = False): """Helper function to execute all setup functions that are common to all featurization steps.""" @@ -892,7 +891,7 @@ class based on the previous single-cell extraction. Therefore, only the second a self.log("Started MLClusterClassifier classification.") # perform setup - self._setup(extraction_dir = extraction_dir, return_results=return_results) + self._setup(extraction_dir=extraction_dir, return_results=return_results) self.dataloader = self.generate_dataloader( extraction_dir, @@ -975,8 +974,8 @@ def _load_models(self): memory_usage = self._get_gpu_memory_usage() self.log(f"GPU memory usage after loading models: {memory_usage}") - def _setup(self, extraction_dir: str): - self._general_set(extraction_dir=extraction_dir) + def _setup(self, extraction_dir: str, return_results: bool): + self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._get_model_specs() self._setup_transforms() @@ -988,7 +987,7 @@ def _setup(self, extraction_dir: str): self._load_models() - def process(self, extraction_dir:str, size:int = 0, return_results:bool = False): + def process(self, extraction_dir: str, size: int = 0, return_results: bool = False): """ Function called to perform classification on the provided HDF5 dataset. @@ -1335,12 +1334,12 @@ def __init__(self, *args, **kwargs): self.channel_selection = None # ensure that all images are passed to the function - def _setup(self, extraction_dir:str, return_results:bool): + def _setup(self, extraction_dir: str, return_results: bool): self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._setup_transforms() self._get_channel_specs() - def process(self, extraction_dir: str, size: int =0, return_results: bool = False): + def process(self, extraction_dir: str, size: int = 0, return_results: bool = False): """ Perform featurization on the provided HDF5 dataset. @@ -1453,7 +1452,7 @@ def _setup_channel_selection(self): self.channel_selection = [0, self.channel_selection] return - def _setup(self, extraction_dir:str, return_results:bool): + def _setup(self, extraction_dir: str, return_results: bool): self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._setup_channel_selection() self._setup_transforms() diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py index 138344d9..90f2bc38 100644 --- a/src/scportrait/pipeline/project.py +++ b/src/scportrait/pipeline/project.py @@ -307,7 +307,7 @@ def _setup_featurization_f(self, featurization_f): self.featurization_directory, project_location=self.project_location, debug=self.debug, - overwrite=False, #this needs to be set to false as the featurization step should not remove previously created features + overwrite=False, # this needs to be set to false as the featurization step should not remove previously created features project=self, filehandler=self.filehandler, from_project=True, From 4d10cfb1ee4b58ead38a3b62afefb9d12e4fa7a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 11:15:41 +0100 Subject: [PATCH 46/56] add support for passing multiple HDF5 paths to featurizers --- src/scportrait/pipeline/featurization.py | 82 ++++++++++++++++++------ 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/src/scportrait/pipeline/featurization.py b/src/scportrait/pipeline/featurization.py index 28124a79..4303a2ba 100644 --- a/src/scportrait/pipeline/featurization.py +++ b/src/scportrait/pipeline/featurization.py @@ -109,6 +109,23 @@ def _detect_automatic_inference_device(self): return inference_device + def _get_nmasks(self): + if "n_masks" not in self.__dict__.keys(): + if isinstance(self.extraction_file, str): + with h5py.File(self.extraction_file, "r") as f: + self.n_masks = f["n_masks"][()].item() + if isinstance(self.extraction_file, list): + n_masks = [] + for file in self.extraction_file: + with h5py.File(file, "r") as f: + n_masks.append(f["n_masks"][()].item()) + assert (x == n_masks[0] for x in n_masks), "number of masks are not consistent over all passed HDF5 files." + self.n_masks = n_masks[0] + try: + self.n_masks = h5py.File(self.extraction_file, "r")["n_masks"][()].item() + except Exception as e: + raise ValueError(f"Could not extract number of masks from HDF5 file. Error: {e}") from e + def _setup_inference_device(self): """ Configure the featurization run to use the specified inference device. @@ -171,13 +188,6 @@ def _setup_inference_device(self): self.inference_device = self._detect_automatic_inference_device() self.log(f"Automatically configured inferece device to {self.inference_device}") - def _get_nmasks(self): - if "n_masks" not in self.__dict__.keys(): - try: - self.n_masks = h5py.File(self.extraction_file, "r")["n_masks"][()].item() - except Exception as e: - raise ValueError(f"Could not extract number of masks from HDF5 file. Error: {e}") from e - def _general_setup(self, extraction_dir: str, return_results: bool = False): """Helper function to execute all setup functions that are common to all featurization steps.""" @@ -406,7 +416,8 @@ def configure_transforms(self, selected_transforms: list): def generate_dataloader( self, - extraction_dir: str, + extraction_dir: str | list[str], + labels: int | list[int] = 0, selected_transforms: transforms.Compose = transforms.Compose([]), size: int = 0, seed: int | None = 42, @@ -443,11 +454,20 @@ def generate_dataloader( self.log(f"Expected image size is set to {self.expected_imagesize}. Resizing images to this size.") t = transforms.Compose([t, transforms.Resize(self.expected_imagesize)]) + if isinstance(extraction_dir, list): + assert isinstance(labels, list), "If multiple directories are provided, multiple labels must be provided." + paths = extraction_dir + labels = labels + elif isinstance(extraction_dir, str): + assert isinstance(labels, int), "If only one directory is provided, only one label must be provided." + paths = [extraction_dir] + labels = [labels] + f = io.StringIO() with redirect_stdout(f): dataset = dataset_class( - dir_list=[extraction_dir], - dir_labels=[0], + dir_list=paths, + dir_labels=labels, transform=t, return_id=True, select_channel=self.channel_selection, @@ -814,7 +834,11 @@ def _setup(self, extraction_dir: str, return_results: bool): self._setup_encoders() self._setup_transforms() - def process(self, extraction_dir: str, size: int = 0, return_results: bool = False): + def process(self, + extraction_dir: str, + labels: int | list[int] = 0, + size: int = 0, + return_results: bool = False): """ Perform classification on the provided HDF5 dataset. @@ -895,6 +919,7 @@ class based on the previous single-cell extraction. Therefore, only the second a self.dataloader = self.generate_dataloader( extraction_dir, + labels = labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, @@ -987,7 +1012,11 @@ def _setup(self, extraction_dir: str, return_results: bool): self._load_models() - def process(self, extraction_dir: str, size: int = 0, return_results: bool = False): + def process(self, + extraction_dir: str, + labels: int | list[int] = 0, + size: int = 0, + return_results: bool = False): """ Function called to perform classification on the provided HDF5 dataset. @@ -1046,6 +1075,7 @@ class based on the previous single-cell extraction. Therefore, no parameters nee self.dataloader = self.generate_dataloader( extraction_dir, + labels = labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, @@ -1111,13 +1141,16 @@ def _setup_transforms(self): def _get_channel_specs(self): if self.project is None: - try: + if isinstance(self.extraction_file, str): with h5py.File(self.extraction_file, "r") as f: self.channel_names = list(f["channel_information"][:].astype(str)) - except Exception as e: - raise ValueError( - f"Could not extract channel names from HDF5 file. Please provide channel names manually. Error: {e}" - ) from e + if isinstance(self.extraction_file, list): + channel_names = [] + for file in self.extraction_file: + with h5py.File(file, "r") as f: + channel_names.append(list(f["channel_information"][:].astype(str))) + assert (x == channel_names[0] for x in channel_names), "Channel names are not consistent over all passed HDF5 files." + self.channel_names = channel_names[0] else: if "channel_names" in self.project.__dict__.keys(): self.channel_names = self.project.channel_names @@ -1339,7 +1372,11 @@ def _setup(self, extraction_dir: str, return_results: bool): self._setup_transforms() self._get_channel_specs() - def process(self, extraction_dir: str, size: int = 0, return_results: bool = False): + def process(self, + extraction_dir: str | list[str], + labels: int | list[int] = 0, + size: int = 0, + return_results: bool = False): """ Perform featurization on the provided HDF5 dataset. @@ -1398,6 +1435,7 @@ def process(self, extraction_dir: str, size: int = 0, return_results: bool = Fal self.dataloader = self.generate_dataloader( extraction_dir, + labels = labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, @@ -1458,7 +1496,12 @@ def _setup(self, extraction_dir: str, return_results: bool): self._setup_transforms() self._get_channel_specs() - def process(self, extraction_dir, size=0, return_results: bool = False): + def process(self, + extraction_dir: str | list[str], + labels: int | list[int] = 0, + size=0, + return_results: bool = False): + self.log(f"Started CellFeaturization of selected channel {self.channel_selection}.") # perform setup @@ -1466,6 +1509,7 @@ def process(self, extraction_dir, size=0, return_results: bool = False): self.dataloader = self.generate_dataloader( extraction_dir, + labels = labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, From d51dc9460b9910086d696a4826a2a6a35d803154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 14:58:40 +0100 Subject: [PATCH 47/56] fix incorrect definition of from_project parameter in a project run --- src/scportrait/pipeline/segmentation/segmentation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/scportrait/pipeline/segmentation/segmentation.py b/src/scportrait/pipeline/segmentation/segmentation.py index 63d62490..82e1e8f7 100644 --- a/src/scportrait/pipeline/segmentation/segmentation.py +++ b/src/scportrait/pipeline/segmentation/segmentation.py @@ -85,6 +85,7 @@ def __init__( overwrite, project, filehandler, + from_project: bool = False, **kwargs, ): super().__init__( @@ -95,6 +96,7 @@ def __init__( overwrite=overwrite, project=project, filehandler=filehandler, + from_project=from_project, ) if self.directory is not None: From 94d4f77ad4bdfbd1bfada00bfb4e9bba37551551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 15:01:41 +0100 Subject: [PATCH 48/56] ruff linting + fix typing issues --- src/scportrait/pipeline/featurization.py | 51 ++++++++++-------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/src/scportrait/pipeline/featurization.py b/src/scportrait/pipeline/featurization.py index 4303a2ba..9a0cbee5 100644 --- a/src/scportrait/pipeline/featurization.py +++ b/src/scportrait/pipeline/featurization.py @@ -119,7 +119,9 @@ def _get_nmasks(self): for file in self.extraction_file: with h5py.File(file, "r") as f: n_masks.append(f["n_masks"][()].item()) - assert (x == n_masks[0] for x in n_masks), "number of masks are not consistent over all passed HDF5 files." + assert ( + x == n_masks[0] for x in n_masks + ), "number of masks are not consistent over all passed HDF5 files." self.n_masks = n_masks[0] try: self.n_masks = h5py.File(self.extraction_file, "r")["n_masks"][()].item() @@ -188,7 +190,7 @@ def _setup_inference_device(self): self.inference_device = self._detect_automatic_inference_device() self.log(f"Automatically configured inferece device to {self.inference_device}") - def _general_setup(self, extraction_dir: str, return_results: bool = False): + def _general_setup(self, extraction_dir: str | list[str], return_results: bool = False): """Helper function to execute all setup functions that are common to all featurization steps.""" self.extraction_file = extraction_dir @@ -834,11 +836,7 @@ def _setup(self, extraction_dir: str, return_results: bool): self._setup_encoders() self._setup_transforms() - def process(self, - extraction_dir: str, - labels: int | list[int] = 0, - size: int = 0, - return_results: bool = False): + def process(self, extraction_dir: str, labels: int | list[int] = 0, size: int = 0, return_results: bool = False): """ Perform classification on the provided HDF5 dataset. @@ -919,7 +917,7 @@ class based on the previous single-cell extraction. Therefore, only the second a self.dataloader = self.generate_dataloader( extraction_dir, - labels = labels, + labels=labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, @@ -1012,11 +1010,7 @@ def _setup(self, extraction_dir: str, return_results: bool): self._load_models() - def process(self, - extraction_dir: str, - labels: int | list[int] = 0, - size: int = 0, - return_results: bool = False): + def process(self, extraction_dir: str, labels: int | list[int] = 0, size: int = 0, return_results: bool = False): """ Function called to perform classification on the provided HDF5 dataset. @@ -1075,7 +1069,7 @@ class based on the previous single-cell extraction. Therefore, no parameters nee self.dataloader = self.generate_dataloader( extraction_dir, - labels = labels, + labels=labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, @@ -1149,7 +1143,9 @@ def _get_channel_specs(self): for file in self.extraction_file: with h5py.File(file, "r") as f: channel_names.append(list(f["channel_information"][:].astype(str))) - assert (x == channel_names[0] for x in channel_names), "Channel names are not consistent over all passed HDF5 files." + assert ( + x == channel_names[0] for x in channel_names + ), "Channel names are not consistent over all passed HDF5 files." self.channel_names = channel_names[0] else: if "channel_names" in self.project.__dict__.keys(): @@ -1367,16 +1363,14 @@ def __init__(self, *args, **kwargs): self.channel_selection = None # ensure that all images are passed to the function - def _setup(self, extraction_dir: str, return_results: bool): + def _setup(self, extraction_dir: str | list[str], return_results: bool): self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._setup_transforms() self._get_channel_specs() - def process(self, - extraction_dir: str | list[str], - labels: int | list[int] = 0, - size: int = 0, - return_results: bool = False): + def process( + self, extraction_dir: str | list[str], labels: int | list[int] = 0, size: int = 0, return_results: bool = False + ): """ Perform featurization on the provided HDF5 dataset. @@ -1435,7 +1429,7 @@ def process(self, self.dataloader = self.generate_dataloader( extraction_dir, - labels = labels, + labels=labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, @@ -1490,18 +1484,15 @@ def _setup_channel_selection(self): self.channel_selection = [0, self.channel_selection] return - def _setup(self, extraction_dir: str, return_results: bool): + def _setup(self, extraction_dir: str | list[str], return_results: bool): self._general_setup(extraction_dir=extraction_dir, return_results=return_results) self._setup_channel_selection() self._setup_transforms() self._get_channel_specs() - def process(self, - extraction_dir: str | list[str], - labels: int | list[int] = 0, - size=0, - return_results: bool = False): - + def process( + self, extraction_dir: str | list[str], labels: int | list[int] = 0, size=0, return_results: bool = False + ): self.log(f"Started CellFeaturization of selected channel {self.channel_selection}.") # perform setup @@ -1509,7 +1500,7 @@ def process(self, self.dataloader = self.generate_dataloader( extraction_dir, - labels = labels, + labels=labels, selected_transforms=self.transforms, size=size, dataset_class=self.DEFAULT_DATA_LOADER, From 06835c5643eea892f56610c531ef21dcf8da99d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 15:05:31 +0100 Subject: [PATCH 49/56] fix remove unnecessary print statement --- src/scportrait/pipeline/selection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scportrait/pipeline/selection.py b/src/scportrait/pipeline/selection.py index 87b10e81..aa53235c 100644 --- a/src/scportrait/pipeline/selection.py +++ b/src/scportrait/pipeline/selection.py @@ -316,7 +316,6 @@ def process( start_time = timeit.default_timer() cell_ids = self._get_cell_ids(cell_sets) centers = self._get_centers(cell_ids) - print("Here", flush=True) coord_index = self._get_coords( cell_ids=cell_ids, centers=centers, width=self.cell_radius, batch_size=self.batch_size, threads=self.threads ) From 8547d0fc777a8659279acbda3a81eef62b2a2a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 15:19:14 +0100 Subject: [PATCH 50/56] silences dask warning addresses #139 --- src/scportrait/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/scportrait/__init__.py b/src/scportrait/__init__.py index 972a0f85..159650a1 100644 --- a/src/scportrait/__init__.py +++ b/src/scportrait/__init__.py @@ -1,7 +1,13 @@ """Top-level package for scPortrait""" +# silence warnings +import warnings + from scportrait import io from scportrait import pipeline as pipeline from scportrait import plotting as pl from scportrait import processing as pp from scportrait import tools as tl + +# silence warning from spatialdata resulting in an older dask version see #139 +warnings.filterwarnings("ignore", message="ignoring keyword argument 'read_only'") From a4373e8a8a04cfb912e4f1b2aee685299f4d871d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 15:22:02 +0100 Subject: [PATCH 51/56] silence cellpose warning addresses #141 --- src/scportrait/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/scportrait/__init__.py b/src/scportrait/__init__.py index 159650a1..772f8753 100644 --- a/src/scportrait/__init__.py +++ b/src/scportrait/__init__.py @@ -11,3 +11,8 @@ # silence warning from spatialdata resulting in an older dask version see #139 warnings.filterwarnings("ignore", message="ignoring keyword argument 'read_only'") + +# silence warning from cellpose resulting in missing parameter set in model call see #141 +warnings.filterwarnings( + "ignore", message=r"You are using `torch.load` with `weights_only=False`.*", category=FutureWarning +) From 46b2fc1e415d8b60a8c24c87c13a8dda14c5ec51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 17:03:12 +0100 Subject: [PATCH 52/56] update example notebooks --- examples/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/notebooks b/examples/notebooks index 5a9b127f..c37c8473 160000 --- a/examples/notebooks +++ b/examples/notebooks @@ -1 +1 @@ -Subproject commit 5a9b127f06a39d326931728a0cf9850848fca205 +Subproject commit c37c8473d5a61923185d4a24d76d87c697037cb2 From c2073d1e2f16dc87a337ab5827e87da077d9aa34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 18:05:47 +0100 Subject: [PATCH 53/56] fix incorrect logic in check for segmentation masks --- src/scportrait/pipeline/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scportrait/pipeline/project.py b/src/scportrait/pipeline/project.py index 90f2bc38..c219ee3b 100644 --- a/src/scportrait/pipeline/project.py +++ b/src/scportrait/pipeline/project.py @@ -1093,7 +1093,7 @@ def select( self._check_sdata_status() - if not self.nuc_seg_status or not self.cyto_seg_status: + if not self.nuc_seg_status and not self.cyto_seg_status: raise ValueError("No nucleus or cytosol segmentation loaded. Please load a segmentation first.") assert self.sdata is not None, "No sdata object loaded." From e059a315221195481441659cee3cff7bb8c570dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Mon, 27 Jan 2025 18:14:50 +0100 Subject: [PATCH 54/56] update notebook submodule commit number --- examples/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/notebooks b/examples/notebooks index c37c8473..904a93f3 160000 --- a/examples/notebooks +++ b/examples/notebooks @@ -1 +1 @@ -Subproject commit c37c8473d5a61923185d4a24d76d87c697037cb2 +Subproject commit 904a93f389dcb2d5b6ee0c172c48dcc173ca127d From 864d25dac39b5c9112fd768656d3bf3fb18c252f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Fri, 31 Jan 2025 12:57:49 +0100 Subject: [PATCH 55/56] update py-lmd version to 1.3.1 this fixes a bug in py-lmd, in addition py-lmd 1.3.1 supports numpy<=2.1 --- requirements.txt | 2 +- requirements_dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1a80991b..59b101aa 100755 --- a/requirements.txt +++ b/requirements.txt @@ -35,4 +35,4 @@ pyqt5 lxml_html_clean ashlar>=1.19.0 networkx -py-lmd>=1.3.0 +py-lmd>=1.3.1 diff --git a/requirements_dev.txt b/requirements_dev.txt index 57e26bb3..9ff75f16 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -35,7 +35,7 @@ pyqt5 lxml_html_clean ashlar>=1.19.0 networkx -py-lmd>=1.3.0 +py-lmd>=1.3.1 #packages for building the documentation sphinx From 2070779697823abc92cd4f0987c77906f3a1d4be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=A4dler?= <15019107+sophiamaedler@users.noreply.github.com> Date: Fri, 31 Jan 2025 13:13:31 +0100 Subject: [PATCH 56/56] update submodule commit number --- examples/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/notebooks b/examples/notebooks index 904a93f3..d5ea844b 160000 --- a/examples/notebooks +++ b/examples/notebooks @@ -1 +1 @@ -Subproject commit 904a93f389dcb2d5b6ee0c172c48dcc173ca127d +Subproject commit d5ea844b033e18d5fc3c82213c8bfde93465d47f