From 3980b21e218131f22458541e2de67d79c9b0e897 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Fri, 16 Aug 2024 15:38:19 +0200 Subject: [PATCH] fix for old visium data (<2.x) with header in tissue positions file (>=2.x) --- src/spatialdata_io/readers/visium.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 7e09de96..e5feaa7a 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -4,7 +4,6 @@ import os import re from collections.abc import Mapping -from functools import partial from pathlib import Path from types import MappingProxyType from typing import Any @@ -138,7 +137,6 @@ def visium( if (path / "spatial" / VisiumKeys.SPOTS_FILE_1).exists() or ( tissue_positions_file is not None and str(VisiumKeys.SPOTS_FILE_1) in str(tissue_positions_file) ): - read_coords = partial(pd.read_csv, header=None, index_col=0) tissue_positions_file = ( path / "spatial" / VisiumKeys.SPOTS_FILE_1 if tissue_positions_file is None @@ -147,7 +145,6 @@ def visium( elif (path / "spatial" / VisiumKeys.SPOTS_FILE_2).exists() or ( tissue_positions_file is not None and str(VisiumKeys.SPOTS_FILE_2) in str(tissue_positions_file) ): - read_coords = partial(pd.read_csv, header=0, index_col=0) tissue_positions_file = ( path / "spatial" / VisiumKeys.SPOTS_FILE_2 if tissue_positions_file is None @@ -155,11 +152,18 @@ def visium( ) else: raise ValueError(f"Cannot find `tissue_positions` file in `{path}`.") - coords = read_coords(tissue_positions_file) + coords = pd.read_csv(tissue_positions_file, header=None, index_col=0) - # to handle spaceranger_version < 2.0.0, where no column names are provided - if "in_tissue" not in coords.columns: - coords.columns = ["in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres"] + if tissue_positions_file.name == VisiumKeys.SPOTS_FILE_1: + # spaceranger_version < 2.0.0 but header detected: https://github.com/scverse/spatialdata-io/issues/146 + if "in_tissue" in coords.iloc[0].tolist(): + coords = pd.read_csv(tissue_positions_file, header=0, index_col=0) + else: + assert "in_tissue" not in coords.columns + coords.columns = ["in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres"] + else: + assert tissue_positions_file.name == VisiumKeys.SPOTS_FILE_2 + coords = pd.read_csv(tissue_positions_file, header=0, index_col=0) adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True) coords = adata.obs[[VisiumKeys.SPOTS_X, VisiumKeys.SPOTS_Y]].values