From 92c8a2b4faefa9c3709f31a4bc2297606dcedc3b Mon Sep 17 00:00:00 2001 From: Lee Newberg Date: Mon, 16 Sep 2024 14:59:18 -0400 Subject: [PATCH 1/5] ENH: Make tensorflow,torch,zarr packages optional --- .github/workflows/build-test-package.yml | 2 +- histomics_stream/configure.py | 6 +++--- pyproject.toml | 13 +++++++++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-test-package.yml b/.github/workflows/build-test-package.yml index 53c48e9..77fa663 100644 --- a/.github/workflows/build-test-package.yml +++ b/.github/workflows/build-test-package.yml @@ -41,7 +41,7 @@ jobs: - name: Install histomics_stream run: | - pip install . + pip install .[tensorflow,torch,zarr] - name: Lint with flake8 run: | diff --git a/histomics_stream/configure.py b/histomics_stream/configure.py index 306c20b..253c6fd 100644 --- a/histomics_stream/configure.py +++ b/histomics_stream/configure.py @@ -26,8 +26,6 @@ import re import itk -import large_image -import large_image_source_tiff import numpy as np import scipy.interpolate @@ -185,7 +183,6 @@ def __call__(self, slide): # Do the work. if not re.compile(r"\.zarr$").search(filename): - # create large_image, prioritizing tiff source over openslide try: import large_image_source_tiff @@ -1060,6 +1057,9 @@ def read_large_image( # ChunkLocations.read_large_image._num_chunks += 1 # print(f"{chunk_name} begin {datetime.datetime.now()}") + import large_image + import large_image_source_tiff + ts = ( large_image_source_tiff.open(filename) if os.path.splitext(filename)[1] in (".tif", ".tiff", ".svs") diff --git a/pyproject.toml b/pyproject.toml index 45ace60..3acb64b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" requires-python = ">=3.6" authors = [{name = "Lee A. Newberg", email = "lee.newberg@kitware.com"}] maintainers = [{name = "Lee A. Newberg", email = "lee.newberg@kitware.com"}] -keywords = ["tensorflow", "whole slide image", "stream", "machine learning"] +keywords = ["tensorflow", "torch", "whole slide image", "stream", "machine learning"] classifiers = ["License :: OSI Approved :: Apache Software License"] dependencies = [ "imagecodecs", @@ -16,11 +16,20 @@ dependencies = [ "numcodecs", "numpy", "scipy", +] +dynamic = ["version", "description"] + +[project.optional-dependencies] +tensorflow = [ "tensorflow<3.0.0", + "keras", +] +torch = [ "torch<2.0.0", +] +zarr = [ "zarr", ] -dynamic = ["version", "description"] [project.urls] Source = "https://github.com/DigitalSlideArchive/HistomicsStream" From 57269f0fe9b3bf2172b7b196a59efbf4f2bb3eb0 Mon Sep 17 00:00:00 2001 From: Lee Newberg Date: Mon, 16 Sep 2024 15:37:30 -0400 Subject: [PATCH 2/5] DOC: Document the torch and tensorflow optional packages --- README.md | 4 +++- example/pytorch_stream.ipynb | 2 +- example/tensorflow_stream.ipynb | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6207526..00038b5 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,9 @@ sudo apt install -y python3-openslide openslide-tools pip install histomics_stream 'large_image[openslide]' \ scikit_image --find-links https://girder.github.io/large_image_wheels ``` -Note that additional image readers can be supported by using, e.g., `large_image[openslide,ometiff,openjpeg,bioformats]` instead of `large_image[openslide]`. +Additional functionality is supported with subpackages, e.g., `histomics_stream[tensorflow,torch,zarr]`. These packages are optional when histomics_stream is used only for masking and/or organizing image tiles into larger image chunks that are more efficient to read than individual image tiles. However, if you are creating a tensorflow `Dataset` or a pytorch `DataLoader` then you will need the corresponding packages. + +Additional image readers can be supported by using, e.g., `large_image[openslide,ometiff,openjpeg,bioformats]` instead of `large_image[openslide]`. After launching `python3`, import the `histomics_stream` package with: diff --git a/example/pytorch_stream.ipynb b/example/pytorch_stream.ipynb index 0b17e53..653782b 100644 --- a/example/pytorch_stream.ipynb +++ b/example/pytorch_stream.ipynb @@ -33,7 +33,7 @@ "!apt update\n", "!apt install -y python3-openslide openslide-tools\n", "!pip install 'large_image[openslide,tiff]' --find-links https://girder.github.io/large_image_wheels\n", - "!pip install histomics_stream\n", + "!pip install histomics_stream[torch]\n", "\n", "# Get other packages used in this notebook\n", "# N.B. itkwidgets works with jupyter<=3.0.0\n", diff --git a/example/tensorflow_stream.ipynb b/example/tensorflow_stream.ipynb index 1e1d4c5..cd8e052 100644 --- a/example/tensorflow_stream.ipynb +++ b/example/tensorflow_stream.ipynb @@ -33,7 +33,7 @@ "!apt update\n", "!apt install -y python3-openslide openslide-tools\n", "!pip install 'large_image[openslide,tiff]' --find-links https://girder.github.io/large_image_wheels\n", - "!pip install histomics_stream\n", + "!pip install histomics_stream[tensorflow]\n", "\n", "# Get other packages used in this notebook\n", "# N.B. itkwidgets works with jupyter<=3.0.0\n", From 0c310902472cd57d805c2660da874fb5ded39bde Mon Sep 17 00:00:00 2001 From: Lee Newberg Date: Mon, 16 Sep 2024 18:21:53 -0400 Subject: [PATCH 3/5] TEST: Update URLs for test case images --- test/test_mask.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_mask.py b/test/test_mask.py index 83bbdaa..7041143 100644 --- a/test/test_mask.py +++ b/test/test_mask.py @@ -27,12 +27,10 @@ def test_mask_threshold(): wsi_path = pooch.retrieve( fname="TCGA-AN-A0G0-01Z-00-DX1.svs", url=( - "https://drive.google.com/uc" + "https://drive.usercontent.google.com/download" "?export=download" "&id=19agE_0cWY582szhOVxp9h3kozRfB4CvV" "&confirm=t" - "&uuid=6f2d51e7-9366-4e98-abc7-4f77427dd02c" - "&at=ALgDtswlqJJw1KU7P3Z1tZNcE01I:1679111148632" ), known_hash="d046f952759ff6987374786768fc588740eef1e54e4e295a684f3bd356c8528f", path=str(pooch.os_cache("pooch")) + os.sep + "wsi", @@ -43,9 +41,10 @@ def test_mask_threshold(): mask_path = pooch.retrieve( fname="TCGA-AN-A0G0-01Z-00-DX1.mask.png", url=( - "https://drive.google.com/uc" + "https://drive.usercontent.google.com/download" "?export=download" "&id=17GOOHbL8Bo3933rdIui82akr7stbRfta" + "&confirm=t" ), known_hash="bb657ead9fd3b8284db6ecc1ca8a1efa57a0e9fd73d2ea63ce6053fbd3d65171", path=str(pooch.os_cache("pooch")) + os.sep + "wsi", From a69556e2c802ceed282c11ed448f66253e62aa23 Mon Sep 17 00:00:00 2001 From: Lee Newberg Date: Mon, 16 Sep 2024 19:06:07 -0400 Subject: [PATCH 4/5] TEST: Peform GitHub testing with Pythons 3.8-3.11 Also, run `pip install --upgrade typing-extensions` on GitHub after installing the tensorflow dependency of histomics_stream because tensorflow under Python 3.8 inappropriately downgrades typing-extensions and that breaks a dependency of large_image. --- .github/workflows/build-test-package.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-test-package.yml b/.github/workflows/build-test-package.yml index 77fa663..9ca7ffa 100644 --- a/.github/workflows/build-test-package.yml +++ b/.github/workflows/build-test-package.yml @@ -8,7 +8,7 @@ jobs: strategy: max-parallel: 2 matrix: - python-version: ["3.6", "3.7", "3.8", "3.9"] + python-version: ["3.8", "3.9", "3.10", "3.11"] include: - flake8-python-git-tag: "" @@ -42,6 +42,10 @@ jobs: - name: Install histomics_stream run: | pip install .[tensorflow,torch,zarr] + # With Python 3.8, tensorflow downgrades typing-extensions, which appears to + # be unnecessary and breaks a dependency of large_image, so we overrule that + # next. + pip install --upgrade typing-extensions - name: Lint with flake8 run: | From 8b3bab7febedd3a1f55ff90ee18c486cc803bb45 Mon Sep 17 00:00:00 2001 From: Lee Newberg Date: Tue, 17 Sep 2024 12:33:15 -0400 Subject: [PATCH 5/5] BUG: Python 3.11 requires random.sample(_sequence_, int) --- histomics_stream/configure.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/histomics_stream/configure.py b/histomics_stream/configure.py index 253c6fd..0e34f93 100644 --- a/histomics_stream/configure.py +++ b/histomics_stream/configure.py @@ -589,7 +589,7 @@ def __call__(self, slide): if 0 <= self.randomly_select < len(slide["tiles"]): # Choose a subset of the tiles randomly slide["tiles"] = dict( - random.sample(slide["tiles"].items(), self.randomly_select) + random.sample(sorted(slide["tiles"].items()), self.randomly_select) ) def check_mask_filename(self, mask_filename): @@ -806,7 +806,7 @@ def __call__(self, slide): if 0 <= self.randomly_select < len(slide["tiles"]): # Choose a subset of the tiles randomly slide["tiles"] = dict( - random.sample(slide["tiles"].items(), self.randomly_select) + random.sample(sorted(slide["tiles"].items()), self.randomly_select) )