Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract shape of full-resolution image, estimate visium pixel size #219

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 193 additions & 1 deletion src/spatialdata_io/readers/visium.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import os
import re
from collections.abc import Mapping
from enum import Enum
from pathlib import Path
from types import MappingProxyType
from typing import Any
from typing import Any, Tuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -277,3 +278,194 @@ def _read_image(image_file: Path, imread_kwargs: dict[str, Any]) -> Any:
else:
raise ValueError(f"Image shape {im.shape} is not supported.")
return image


def get_sdata_res(sdata: SpatialData):
"""
Retrieve the image resolution from the Visium SpatialData object.

This function extracts the shape (resolution) of the highest resolution image (scale0)
from the Visium SpatialData object. The shape is returned as the number of channels (c),
height (y), and width (x) in pixels.

Parameters
----------
sdata : SpatialData
A SpatialData object containing images and spatial data, with image resolutions stored
in a multi-scale format.

Returns
-------
shape : tuple
A tuple containing the image shape in the format (channels, height, width):
- c: Number of image channels (typically RGB).
- y: Image height in pixels.
- x: Image width in pixels.
"""

image_name = list(sdata.images.keys())[0]
dimensions = sdata.images[image_name]["scale0"].dims
shape = dimensions["c"], dimensions["y"], dimensions["x"]
return shape


class SpotPacking(Enum):
"""Types of ST spots disposition,
for Orange Crate Packing see:
https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide
"""

ORANGE_CRATE_PACKING = 0
GRID_PACKING = 1


def find_pixel_size_visium(
my_df: pd.DataFrame, inter_spot_dist: float = 100.0, packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING
) -> tuple[float, int]:
"""Estimate the pixel size of an image in um/px given a dataframe containing the spot coordinates in that image

Args:
my_df (pd.DataFrame): dataframe containing the coordinates of each spot in an image, it must contain the following columns:
['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_col', 'array_row']
inter_spot_dist (float, optional): the distance in um between two spots on the same row. Defaults to 100..
packing (SpotPacking, optional): disposition of the spots on the slide. Defaults to SpotPacking.ORANGE_CRATE_PACKING.

Raises:
Exception: if cannot find two spots on the same row

Returns:
Tuple[float, int]: approximation of the pixel size in um/px and over how many spots that pixel size was estimated
"""

def _cart_dist(start_spot, end_spot):
"""cartesian distance in pixel between two spots"""
d = np.sqrt(
(start_spot["pxl_col_in_fullres"] - end_spot["pxl_col_in_fullres"]) ** 2
+ (start_spot["pxl_row_in_fullres"] - end_spot["pxl_row_in_fullres"]) ** 2
)
return d

df = my_df.copy()

max_dist_col = 0
approx_nb = 0
best_approx = 0
df = df.sort_values("array_row")
for _, row in df.iterrows():
y = row["array_col"]
x = row["array_row"]
if len(df[df["array_row"] == x]) > 1:
b = df[df["array_row"] == x]["array_col"].idxmax()
start_spot = row
end_spot = df.loc[b]
dist_px = _cart_dist(start_spot, end_spot)

div = 1 if packing == SpotPacking.GRID_PACKING else 2
dist_col = abs(df.loc[b, "array_col"] - y) // div

approx_nb += 1

if dist_col > max_dist_col:
max_dist_col = dist_col
best_approx = inter_spot_dist / (dist_px / dist_col)
if approx_nb > 3:
break

if approx_nb == 0:
raise Exception("Pixel size estimation failed. Couldn't find two spots on the same row")

return best_approx, max_dist_col


def create_df_coord_visium(data: SpatialData):
"""
Create a DataFrame with coordinates and array indices from Visium SpatialData.

This function processes the spatial shapes and table data from a Visium SpatialData object
to generate a DataFrame containing pixel coordinates (row and column) for each spot
on the tissue image at full resolution. It also includes the corresponding array row and column
indices from the data tables.

Parameters
----------
data : SpatialData
A SpatialData object containing Visium spatial information, including shapes and table data
(spot coordinates and array indices).

Returns
-------
df_coord : pandas.DataFrame
A DataFrame with the following columns:
- 'pxl_row_in_fullres': Pixel row coordinates in full-resolution tissue image.
- 'pxl_col_in_fullres': Pixel column coordinates in full-resolution tissue image.
- 'array_row': Row index of the spot in the Visium array.
- 'array_col': Column index of the spot in the Visium array.
"""
tissue_name = list(data.shapes.keys())[0]
shapes_df = data.shapes[tissue_name]
shapes_df["pxl_col_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.x)
shapes_df["pxl_row_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.y)

shapes_df["array_row"] = list(data.tables["table"].obs["array_row"])
shapes_df["array_col"] = list(data.tables["table"].obs["array_col"])

# Now, you have the necessary DataFrame in the correct format:
df_coord = shapes_df[["pxl_row_in_fullres", "pxl_col_in_fullres", "array_row", "array_col"]]
return df_coord


def calculate_pixel_size_from_visium(
path: str,
dataset_id: str,
counts_file: str,
fullres_image_file: str,
tissue_positions_file: str,
scalefactors_file: str,
inter_spot_dist: float = 100.0,
) -> SpatialData:
"""
Main function to load data into a spatialdata class and calculate scale0 image shape and pixel size.

Parameters
----------
path : str
Path to the directory containing the data.
dataset_id : str
ID of the dataset to use.
counts_file : str
Path to the filtered feature barcode matrix (counts file).
fullres_image_file : str
Path to the full-resolution image file (usually tissue_hires_image.png).
tissue_positions_file : str
Path to the tissue positions file (usually tissue_positions_list.csv).
scalefactors_file : str
Path to the scalefactors file (usually scalefactors_json.json).
inter_spot_dist : float, optional
Distance between 2 spots in a visium field. Default value = 100um.

Returns
-------
visium_sdata : SpatialData
SpatialData object that includes both image shape and pixel size stored in tables['table'].uns
"""

# Load the SpatialData object using the visium function. 6 files are expected to be passed from Visium raw data
visium_sdata = visium(
path=path,
dataset_id=dataset_id,
counts_file=counts_file,
fullres_image_file=fullres_image_file,
tissue_positions_file=tissue_positions_file,
scalefactors_file=scalefactors_file,
)

df_coord = create_df_coord_visium(visium_sdata)

pixel_size, _ = find_pixel_size_visium(df_coord, inter_spot_dist)

image_shape = get_sdata_res(visium_sdata)

visium_sdata.tables["table"].uns["image_shape"] = image_shape
visium_sdata.tables["table"].uns["pixel_size"] = pixel_size

return visium_sdata
Loading