Skip to content

Commit

Permalink
Internal change
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 503209447
  • Loading branch information
Google Health authored and 12dmodel committed Jan 19, 2023
1 parent d8eb6be commit 88cc373
Show file tree
Hide file tree
Showing 9 changed files with 656 additions and 0 deletions.
1 change: 1 addition & 0 deletions breast_survival_prediction/CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
We are currently not accepting contributions for this project.
12 changes: 12 additions & 0 deletions breast_survival_prediction/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Breast Cancer Survival Prediction: Stage-2 Featurization Code

This repo contain code to reproduce the stage2 featurization of all three
components of the automatic Nottingham grading system. This is described in the
paper "Deep learning models for histologic grading of breast cancer and
association with disease prognosis"
[[Link](https://doi.org/10.1038/s41523-022-00478-y)].
For an example of how the code should be used, please refer to example.ipynb.

NOTE: the content of this research code repository (i) is not intended to be a
medical device; and (ii) is not intended for clinical use of any kind, including
but not limited to diagnosis or prognosis.
256 changes: 256 additions & 0 deletions breast_survival_prediction/example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "kIbu0zjWkPZn"
},
"source": [
"This notebook illustrates the usage of stage-2 features as described in \"Deep learning models for histologic grading of breast cancer and association with disease prognosis\"."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "eN2-OtOHSUbp"
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tIbsTm9fyMp5"
},
"source": [
"# Mitotic Count"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ymkgekfhSpTA"
},
"outputs": [],
"source": [
"def generate_mitotic_heatmap(heatmap_size, list_of_coordinates, mitosis_size=6):\n",
" \"\"\"Generates mitotic heatmap with the given list of coordinates.\n",
" \n",
" Args:\n",
" heatmap_size: size of the heatmap to generate\n",
" list_of_coordinate: coordinates (tuple) of center of the mitoses.\n",
" mitosis_size: size of each mitosis.\n",
" Returns:\n",
" Heatmaps that represent mitosis detection.\n",
" \"\"\"\n",
" half_mitosis_size = int(mitosis_size / 2)\n",
" heatmap = np.zeros(heatmap_size)\n",
" for coord in list_of_coordinates:\n",
" y, x = coord\n",
" y = y - half_mitosis_size\n",
" x = x - half_mitosis_size\n",
" heatmap[y :(y + mitosis_size - 1), x:(x + mitosis_size - 1)] = 1\n",
" return heatmap\n",
"\n",
"\n",
"def detect_and_calc_density(heatmap,\n",
" detection_th=0.5,\n",
" morph_erode_size=4,\n",
" window_size=128,\n",
" stride=64):\n",
" \"\"\"Combined steps of detection and density calculation.\n",
"\n",
" Args:\n",
" heatmap: 2D array of shape (height, width) that represent probability of\n",
" mitotic activity.\n",
" detection_th: detection threshold, see mc_util.heatmap_to_list.\n",
" morph_erode_size: size of structuring element for detection cleanup, see\n",
" mc_util.heatmap_to_list.\n",
" stride: density window stride, see mc_util.calculate_density.\n",
" window_size: density window size, see mc_util.calculate_density.\n",
"\n",
" Returns:\n",
" Dict of detection and density.\n",
" \"\"\"\n",
" # Resize mask so it is in the same size as the heatmap.\n",
"\n",
" detection = mc_util.heatmap_to_list(\n",
" heatmap,\n",
" detection_th,\n",
" morph_erode_size=morph_erode_size)\n",
" heatmap_size = heatmap.shape\n",
"\n",
" density = mc_util.calculate_density(\n",
" detection, heatmap_size, window_size, stride)\n",
" return {'density': density, 'detection': detection}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"executionInfo": {
"elapsed": 326,
"status": "ok",
"timestamp": 1659999029378,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": 420
},
"id": "ggoAd2z3ZOI0",
"outputId": "a164b777-c101-4470-f022-07c6632c098f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Input mitosis list: [(256, 650), (265, 467), (267, 514), (279, 443), (287, 458), (288, 438), (294, 744), (297, 314), (298, 627), (299, 616)]\n",
"Detected mitosis list: [(255, 649), (264, 466), (266, 513), (278, 442), (286, 457), (287, 437), (293, 743), (296, 313), (297, 626), (298, 615)]\n",
"calculated_features: [0. 0. 0. 0.00012207 0.0004425 ]\n"
]
}
],
"source": [
"np.random.seed(0)\n",
"\n",
"heatmap_size = (1024, 1024)\n",
"n_mitosis = 100\n",
"list_of_mitosis = [(np.random.randint(256, 768), np.random.randint(256, 768)) for _ in range(n_mitosis)]\n",
"\n",
"\n",
"heatmap = generate_mitotic_heatmap(heatmap_size, list_of_mitosis)\n",
"res = detect_and_calc_density(heatmap)\n",
"mc_features = stage2_features.mc_featurizer(res)\n",
"detected_mitosis = [(int(x[0]), int(x[1])) for x in res['detection']]\n",
"print('Input mitosis list: ', sorted(list_of_mitosis, key=lambda x: x[0])[:10])\n",
"print('Detected mitosis list:',sorted(detected_mitosis, key=lambda x: x[0])[:10])\n",
"print('calculated_features:', mc_features)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6O-4d1q5yOiX"
},
"source": [
"# Nuclear Pleomorphism and Tubule Formation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DHNUaryHzC2i"
},
"outputs": [],
"source": [
"ic_heatmap = [\n",
" np.zeros((5, 4)),\n",
"[\n",
" [np.nan, np.nan, np.nan, np.nan,],\n",
" [1, 1, 1, 0],\n",
" [1, 1, 1, 0],\n",
" [1, 1, 1, 0],\n",
" [1, np.nan, np.nan, np.nan,],\n",
"]\n",
"]\n",
"# NP/TF 1 heatmap: 0.4 of IC area, 0.33 of non-IC area\n",
"nptf1_heatmap = [\n",
" [0, 0, 0, 0],\n",
" [1, 1, 1, 1],\n",
" [1, 0, 0, 0],\n",
" [0, 0, 0, 0],\n",
" [0, 0, 0, 0],\n",
"]\n",
"# NP/TF 2 heatmap: 0.2 of IC area, 0.66 of non-IC area\n",
"nptf2_heatmap = [\n",
" [0, 0, 0, 0],\n",
" [0, 0, 0, 0],\n",
" [0, 1, 1, 1],\n",
" [0, 0, 0, 1],\n",
" [0, 0, 0, 0],\n",
"]\n",
"# NP/TF 3 heatmap: 0.4 of IC area, 0.0 of non-IC area\n",
"nptf3_heatmap = [\n",
" [0, 0, 0, 0],\n",
" [0, 0, 0, 0],\n",
" [0, 0, 0, 0],\n",
" [1, 1, 1, 0],\n",
" [1, 0, 0, 0],\n",
"]\n",
"\n",
"# IC Heatmap is expected to be 3D with last channel representing the probability of being invasive carcinoma.\n",
"ic_heatmap = np.dstack(ic_heatmap)\n",
"nptf_heatmap = np.dstack([nptf1_heatmap, nptf2_heatmap, nptf3_heatmap])\n",
"\n",
"tmap = {'ic_heatmap': ic_heatmap, 'heatmap': nptf_heatmap}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"executionInfo": {
"elapsed": 288,
"status": "ok",
"timestamp": 1659999030423,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": 420
},
"id": "NKwIr4KSgsxd",
"outputId": "263e1c38-3c1b-4ae4-f3e0-c3d58e676fdb"
},
"outputs": [
{
"data": {
"text/plain": [
"array([0.4 , 0.2 , 0.4 , 0.33333333, 0.66666667,\n",
" 0. ])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stage2_features.np_tf_featurizer(tmap)"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"last_runtime": {
"build_target": "",
"kind": "local"
},
"name": "Example Usages of Stage-2 Featurization",
"provenance": [
{
"file_id": "1e_Vb20SWXN6aL_IDabLRxLZppERlI5Uj",
"timestamp": 1659388964753
}
]
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
91 changes: 91 additions & 0 deletions breast_survival_prediction/mitotic_features_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""Utilities function for calculating second-stage features for mitotic model."""

from typing import List, Optional, Tuple
import cv2
import numpy as np
import PIL.Image


def heatmap_to_list(
hm: np.ndarray,
detection_th: float,
mask: Optional[np.ndarray] = None,
morph_erode_size: Optional[int] = None) -> List[Tuple[float]]:
"""Detect mitosis on the heatmap.
Args:
hm: 2D heatmap output from the mitotic model.
detection_th: Probability threshold for detection (float between 0-1).
mask: Area not to consider such as out of tissue or out of tumor.
morph_erode_size: Size of morphological eroding structuring element. This is
used as the clean up step.
Returns:
List of (row, column) heatmap-coordinate of the detected centroid.
"""
binarized_hm = hm > detection_th
if mask is not None:
binarized_hm = binarized_hm * mask
if morph_erode_size and morph_erode_size > 0:
binarized_hm = cv2.morphologyEx(
binarized_hm.astype('uint8'), cv2.MORPH_ERODE,
np.ones(morph_erode_size))
# into one connected component, see
# https://docs.opencv.org/3.4/dd/d46/imgproc_8hpp.html
# Index 3 is the centroid among other info.
detected_centroids = cv2.connectedComponentsWithStats(
binarized_hm.astype('uint8'), 8, cv2.CV_32S
)[3]
# The centroids include the background label, so we slice from the second
# element onward.
detected_centroids = detected_centroids[1:]
# convert to (row, column) format so that this is consistent with indexing.
return [(pt[1], pt[0]) for pt in detected_centroids]


def calculate_density(detected_centroids: List[Tuple[float]],
heatmap_shape: List[int],
window_size: int,
stride: int,
mask: Optional[np.ndarray] = None) -> np.ndarray:
"""Calculate density map.
Args:
detected_centroids: list of detected centroids in original heatmap's
coordinate. This can be the output of the heatmap_to_list function.
heatmap_shape: original heatmap shape (row, column).
window_size: size of windows to calculate density.
stride: overlap between each density windows. This is what control the size
of density map, similar to prediction_size in the inference pipeline.
mask: binary mask that specify area to compute density. If set, things
outside of mask will get NaN.
Returns:
numpy array representing the density map.
"""
density_shape = (int(heatmap_shape[0] // stride),
int(heatmap_shape[1] // stride))
window_area = window_size**2
density_map = np.zeros(density_shape)
density_map[:] = np.nan

def _is_inside(pt, tl):
y, x = pt[0], pt[1]
yb, xb = tl[0], tl[1]
return (x >= xb) and (x <= xb + window_size) and (y >= yb) and (
y <= yb + window_size)

if mask is not None:
# PIL expects (width, height), but the shape is
# (row, column) = (height, width)
m_pil = PIL.Image.fromarray(mask).resize(
(density_shape[1], density_shape[0]), PIL.Image.NEAREST)
mask = np.array(m_pil)
for i in range(density_shape[0]):
for j in range(density_shape[1]):
topleft = [i * stride, j * stride]
if (mask is not None) and (not mask[i][j]):
continue
inbox = [_is_inside(pt, topleft) for pt in detected_centroids]
density_map[i][j] = float(np.sum(inbox))
return density_map / window_area
Loading

0 comments on commit 88cc373

Please sign in to comment.