Skip to content

Commit

Permalink
Add CytoDataFrame segmentation mask outline capabilities for image di…
Browse files Browse the repository at this point in the history
…splay in notebooks (#59)

* add draw_outline_on_image method

* add class metadata for handling mask data

* linting

* make cytodataframe adjustments for segmt makss

* rerun with example notebook for outlier display

* adjust test for mask outlines

* add in try except for image processing

* correct documentation

Co-Authored-By: Jenna Tomkinson <[email protected]>

---------

Co-authored-by: Jenna Tomkinson <[email protected]>
  • Loading branch information
d33bs and jenna-tomkinson authored Jul 25, 2024
1 parent b0957e3 commit 82ffe71
Show file tree
Hide file tree
Showing 7 changed files with 416 additions and 62 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ cython_debug/

# test data ignores
*.tif
*.tiff
*.sqlite
*.parquet
*.zip
Expand Down
79 changes: 41 additions & 38 deletions docs/examples/cosmicqc_in_a_nutshell.ipynb

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion docs/examples/cosmicqc_in_a_nutshell.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@

# set a context directory for images associated with the dataset
image_context_dir = pathlib.Path(data_path).parent / "Plate_2_images"
mask_context_dir = pathlib.Path(data_path).parent / "Plate_2_masks"

# create a cosmicqc CytoDataFrame (single-cell DataFrame)
scdf = cosmicqc.CytoDataFrame(data=data_path, data_context_dir=image_context_dir)
scdf = cosmicqc.CytoDataFrame(
data=data_path,
data_context_dir=image_context_dir,
data_mask_context_dir=mask_context_dir,
)

# display the dataframe
scdf
Expand Down
3 changes: 3 additions & 0 deletions src/cosmicqc/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def identify_outliers(
axis=1,
),
data_context_dir=df._custom_attrs["data_context_dir"],
data_mask_context_dir=df._custom_attrs["data_mask_context_dir"],
)
)

Expand Down Expand Up @@ -289,6 +290,7 @@ def label_outliers( # noqa: PLR0913
axis=1,
),
data_context_dir=df._custom_attrs["data_context_dir"],
data_mask_context_dir=df._custom_attrs["data_mask_context_dir"],
)

# for multiple outlier processing
Expand All @@ -315,6 +317,7 @@ def label_outliers( # noqa: PLR0913
result = CytoDataFrame(
labeled_df.loc[:, ~labeled_df.columns.duplicated()],
data_context_dir=df._custom_attrs["data_context_dir"],
data_mask_context_dir=df._custom_attrs["data_mask_context_dir"],
)

# export the file if specified
Expand Down
114 changes: 98 additions & 16 deletions src/cosmicqc/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Union,
)

import numpy as np
import pandas as pd
import plotly
import plotly.colors as pc
Expand All @@ -34,7 +35,7 @@
from pandas.io.formats import (
format as fmt,
)
from PIL import Image
from PIL import Image, ImageDraw

# provide backwards compatibility for Self type in earlier Python versions.
# see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods
Expand All @@ -61,11 +62,12 @@ class CytoDataFrame(pd.DataFrame):

_metadata: ClassVar = ["_custom_attrs"]

def __init__(
def __init__( # noqa: PLR0912
self: CytoDataFrame_type,
data: Union[CytoDataFrame_type, pd.DataFrame, str, pathlib.Path],
data_context_dir: Optional[str] = None,
data_bounding_box: Optional[pd.DataFrame] = None,
data_mask_context_dir: Optional[str] = None,
**kwargs: Dict[str, Any],
) -> None:
"""
Expand All @@ -78,6 +80,8 @@ def __init__(
Directory context for the image data within the DataFrame.
data_bounding_box (Optional[pd.DataFrame]):
Bounding box data for the DataFrame images.
data_mask_context_dir: Optional[str]:
Directory context for the mask data for images.
**kwargs:
Additional keyword arguments to pass to the pandas read functions.
"""
Expand All @@ -86,16 +90,23 @@ def __init__(
"data_source": None,
"data_context_dir": None,
"data_bounding_box": None,
"data_mask_context_dir": None,
}

if data_context_dir is not None:
self._custom_attrs["data_context_dir"] = data_context_dir

if data_mask_context_dir is not None:
self._custom_attrs["data_mask_context_dir"] = data_mask_context_dir

if isinstance(data, CytoDataFrame):
self._custom_attrs["data_source"] = data._custom_attrs["data_source"]
self._custom_attrs["data_context_dir"] = data._custom_attrs[
"data_context_dir"
]
self._custom_attrs["data_mask_context_dir"] = data._custom_attrs[
"data_mask_context_dir"
]
super().__init__(data)
elif isinstance(data, (pd.DataFrame, pd.Series)):
self._custom_attrs["data_source"] = (
Expand Down Expand Up @@ -157,6 +168,7 @@ def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: # noqa:
super().__getitem__(key),
data_context_dir=self._custom_attrs["data_context_dir"],
data_bounding_box=self._custom_attrs["data_bounding_box"],
data_mask_context_dir=self._custom_attrs["data_mask_context_dir"],
)

def _wrap_method(
Expand Down Expand Up @@ -190,6 +202,7 @@ def _wrap_method(
result,
data_context_dir=self._custom_attrs["data_context_dir"],
data_bounding_box=self._custom_attrs["data_bounding_box"],
data_mask_context_dir=self._custom_attrs["data_mask_context_dir"],
)
return result

Expand Down Expand Up @@ -591,33 +604,103 @@ def find_image_columns(self: CytoDataFrame_type) -> bool:
]

@staticmethod
def draw_outline_on_image(actual_image_path: str, mask_image_path: str) -> Image:
"""
Draws outlines on a TIFF image based on a mask image and returns
the combined result.
This method takes the path to a TIFF image and a mask image, creates
an outline from the mask, and overlays it on the TIFF image. The resulting
image, which combines the TIFF image with the mask outline, is returned.
Args:
actual_image_path (str): Path to the TIFF image file.
mask_image_path (str): Path to the mask image file.
Returns:
PIL.Image.Image: A PIL Image object that is the result of
combining the TIFF image with the mask outline.
Raises:
FileNotFoundError: If the specified image or mask file does not exist.
ValueError: If the images are not in compatible formats or sizes.
"""
# Load the TIFF image
tiff_image_array = skimage.io.imread(actual_image_path)
# Convert to PIL Image and then to 'RGBA'
tiff_image = Image.fromarray(np.uint8(tiff_image_array)).convert("RGBA")

# Load the mask image and convert it to grayscale
mask_image = Image.open(mask_image_path).convert("L")
mask_array = np.array(mask_image)
mask_array[mask_array > 0] = 255 # Ensure non-zero values are 255 (white)

# Find contours of the mask
contours = skimage.measure.find_contours(mask_array, level=128)

# Create an outline image with transparent background
outline_image = Image.new("RGBA", tiff_image.size, (0, 0, 0, 0))
draw = ImageDraw.Draw(outline_image)

for contour in contours:
# Swap x and y to match image coordinates
draw.line(
[(x, y) for y, x in np.round(contour).astype(int)],
fill=(0, 255, 0, 200),
width=2,
)

# Combine the TIFF image with the outline image
return Image.alpha_composite(tiff_image, outline_image)

def process_image_data_as_html_display(
self: CytoDataFrame_type,
data_value: Any, # noqa: ANN401
bounding_box: Tuple[int, int, int, int],
data_context_dir: Optional[str] = None,
) -> str:
if not pathlib.Path(data_value).is_file():
if not pathlib.Path(
candidate_path := (f"{data_context_dir}/{data_value}")
candidate_path := (
f"{self._custom_attrs['data_context_dir']}/{data_value}"
)
).is_file():
return data_value
else:
data_value = candidate_path
pass

try:
if self._custom_attrs["data_mask_context_dir"] is not None and (
matching_mask_file := list(
pathlib.Path(self._custom_attrs["data_mask_context_dir"]).glob(
f"{pathlib.Path(candidate_path).stem}*"
)
)
):
pil_image = self.draw_outline_on_image(
actual_image_path=candidate_path,
mask_image_path=matching_mask_file[0],
)

else:
# Read the TIFF image from the byte array
tiff_image = skimage.io.imread(candidate_path)

# Read the TIFF image from the byte array
tiff_image = skimage.io.imread(data_value)
# Convert the image array to a PIL Image
pil_image = Image.fromarray(tiff_image)

# Convert the image array to a PIL Image
pil_image = Image.fromarray(tiff_image)
cropped_img = pil_image.crop(bounding_box)

cropped_img = pil_image.crop(bounding_box)
# Save the PIL Image as PNG to a BytesIO object
png_bytes_io = BytesIO()
cropped_img.save(png_bytes_io, format="PNG")

# Save the PIL Image as PNG to a BytesIO object
png_bytes_io = BytesIO()
cropped_img.save(png_bytes_io, format="PNG")
# Get the PNG byte data
png_bytes = png_bytes_io.getvalue()

# Get the PNG byte data
png_bytes = png_bytes_io.getvalue()
except (FileNotFoundError, ValueError):
# return the raw data value if we run into an exception of some kind
print("Unable to process image from {candidate_path}")
return data_value

return (
'<img src="data:image/png;base64,'
Expand Down Expand Up @@ -694,7 +777,6 @@ def _repr_html_(
data.loc[display_indices, image_col] = data.loc[display_indices].apply(
lambda row: self.process_image_data_as_html_display(
data_value=row[image_col],
data_context_dir=self._custom_attrs["data_context_dir"],
bounding_box=(
row["Cytoplasm_AreaShape_BoundingBoxMinimum_X"],
row["Cytoplasm_AreaShape_BoundingBoxMinimum_Y"],
Expand Down
Loading

0 comments on commit 82ffe71

Please sign in to comment.