DigitalSlideArchive · Leengit · Sep 17, 2024 · Sep 16, 2024 · Sep 16, 2024 · Sep 16, 2024
diff --git a/.github/workflows/build-test-package.yml b/.github/workflows/build-test-package.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       max-parallel: 2
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
         include:
           - flake8-python-git-tag: ""
@@ -41,7 +41,11 @@ jobs:
 
       - name: Install histomics_stream
         run: |
-          pip install .
+          pip install .[tensorflow,torch,zarr]
+          # With Python 3.8, tensorflow downgrades typing-extensions, which appears to
+          # be unnecessary and breaks a dependency of large_image, so we overrule that
+          # next.
+          pip install --upgrade typing-extensions
 
       - name: Lint with flake8
         run: |

diff --git a/README.md b/README.md
@@ -16,7 +16,9 @@ sudo apt install -y python3-openslide openslide-tools
 pip install histomics_stream 'large_image[openslide]' \
   scikit_image --find-links https://girder.github.io/large_image_wheels
 ```
-Note that additional image readers can be supported by using, e.g., `large_image[openslide,ometiff,openjpeg,bioformats]` instead of `large_image[openslide]`.
+Additional functionality is supported with subpackages, e.g., `histomics_stream[tensorflow,torch,zarr]`. These packages are optional when histomics_stream is used only for masking and/or organizing image tiles into larger image chunks that are more efficient to read than individual image tiles.  However, if you are creating a tensorflow `Dataset` or a pytorch `DataLoader` then you will need the corresponding packages.
+
+Additional image readers can be supported by using, e.g., `large_image[openslide,ometiff,openjpeg,bioformats]` instead of `large_image[openslide]`.
 
 After launching `python3`, import the `histomics_stream` package with:
 

diff --git a/example/pytorch_stream.ipynb b/example/pytorch_stream.ipynb
@@ -33,7 +33,7 @@
     "!apt update\n",
     "!apt install -y python3-openslide openslide-tools\n",
     "!pip install 'large_image[openslide,tiff]' --find-links https://girder.github.io/large_image_wheels\n",
-    "!pip install histomics_stream\n",
+    "!pip install histomics_stream[torch]\n",
     "\n",
     "# Get other packages used in this notebook\n",
     "# N.B. itkwidgets works with jupyter<=3.0.0\n",

diff --git a/example/tensorflow_stream.ipynb b/example/tensorflow_stream.ipynb
@@ -33,7 +33,7 @@
     "!apt update\n",
     "!apt install -y python3-openslide openslide-tools\n",
     "!pip install 'large_image[openslide,tiff]' --find-links https://girder.github.io/large_image_wheels\n",
-    "!pip install histomics_stream\n",
+    "!pip install histomics_stream[tensorflow]\n",
     "\n",
     "# Get other packages used in this notebook\n",
     "# N.B. itkwidgets works with jupyter<=3.0.0\n",

diff --git a/histomics_stream/configure.py b/histomics_stream/configure.py
@@ -26,8 +26,6 @@
 import re
 
 import itk
-import large_image
-import large_image_source_tiff
 import numpy as np
 import scipy.interpolate
 
@@ -185,7 +183,6 @@ def __call__(self, slide):
 
         # Do the work.
         if not re.compile(r"\.zarr$").search(filename):
-
             # create large_image, prioritizing tiff source over openslide
             try:
                 import large_image_source_tiff
@@ -592,7 +589,7 @@ def __call__(self, slide):
         if 0 <= self.randomly_select < len(slide["tiles"]):
             # Choose a subset of the tiles randomly
             slide["tiles"] = dict(
-                random.sample(slide["tiles"].items(), self.randomly_select)
+                random.sample(sorted(slide["tiles"].items()), self.randomly_select)
             )
 
     def check_mask_filename(self, mask_filename):
@@ -809,7 +806,7 @@ def __call__(self, slide):
         if 0 <= self.randomly_select < len(slide["tiles"]):
             # Choose a subset of the tiles randomly
             slide["tiles"] = dict(
-                random.sample(slide["tiles"].items(), self.randomly_select)
+                random.sample(sorted(slide["tiles"].items()), self.randomly_select)
             )
 
 
@@ -1060,6 +1057,9 @@ def read_large_image(
         # ChunkLocations.read_large_image._num_chunks += 1
 
         # print(f"{chunk_name} begin {datetime.datetime.now()}")
+        import large_image
+        import large_image_source_tiff
+
         ts = (
             large_image_source_tiff.open(filename)
             if os.path.splitext(filename)[1] in (".tif", ".tiff", ".svs")

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,19 +8,28 @@ readme = "README.md"
 requires-python = ">=3.6"
 authors = [{name = "Lee A. Newberg", email = "[email protected]"}]
 maintainers = [{name = "Lee A. Newberg", email = "[email protected]"}]
-keywords = ["tensorflow", "whole slide image", "stream", "machine learning"]
+keywords = ["tensorflow", "torch", "whole slide image", "stream", "machine learning"]
 classifiers = ["License :: OSI Approved :: Apache Software License"]
 dependencies = [
     "imagecodecs",
     "itk",
     "numcodecs",
     "numpy",
     "scipy",
+]
+dynamic = ["version", "description"]
+
+[project.optional-dependencies]
+tensorflow = [
     "tensorflow<3.0.0",
+    "keras",
+]
+torch = [
     "torch<2.0.0",
+]
+zarr = [
     "zarr",
 ]
-dynamic = ["version", "description"]
 
 [project.urls]
 Source = "https://github.com/DigitalSlideArchive/HistomicsStream"

diff --git a/test/test_mask.py b/test/test_mask.py
@@ -27,12 +27,10 @@ def test_mask_threshold():
     wsi_path = pooch.retrieve(
         fname="TCGA-AN-A0G0-01Z-00-DX1.svs",
         url=(
-            "https://drive.google.com/uc"
+            "https://drive.usercontent.google.com/download"
             "?export=download"
             "&id=19agE_0cWY582szhOVxp9h3kozRfB4CvV"
             "&confirm=t"
-            "&uuid=6f2d51e7-9366-4e98-abc7-4f77427dd02c"
-            "&at=ALgDtswlqJJw1KU7P3Z1tZNcE01I:1679111148632"
         ),
         known_hash="d046f952759ff6987374786768fc588740eef1e54e4e295a684f3bd356c8528f",
         path=str(pooch.os_cache("pooch")) + os.sep + "wsi",
@@ -43,9 +41,10 @@ def test_mask_threshold():
     mask_path = pooch.retrieve(
         fname="TCGA-AN-A0G0-01Z-00-DX1.mask.png",
         url=(
-            "https://drive.google.com/uc"
+            "https://drive.usercontent.google.com/download"
             "?export=download"
             "&id=17GOOHbL8Bo3933rdIui82akr7stbRfta"
+            "&confirm=t"
         ),
         known_hash="bb657ead9fd3b8284db6ecc1ca8a1efa57a0e9fd73d2ea63ce6053fbd3d65171",
         path=str(pooch.os_cache("pooch")) + os.sep + "wsi",