diff --git a/src/troutpy/pl/plotting.py b/src/troutpy/pl/plotting.py index 0264fab..d07f5c8 100644 --- a/src/troutpy/pl/plotting.py +++ b/src/troutpy/pl/plotting.py @@ -608,46 +608,26 @@ def spatial_interactions( This function generates a scatter plot showing the positions of target cells, source cells, and extracellular RNA transcripts within a spatial omics dataset. The target and source cells are highlighted in different colors, while the RNA transcripts are shown as points at their respective positions. Optionally, a background image (e.g., tissue section) can be displayed. Parameters: - sdata : AnnData - An AnnData object containing the spatial omics data, including transcript expression and cell positions. - layer : str, optional, default: 'extracellular_transcripts_enriched' - The layer in the AnnData object that contains the extracellular RNA transcript data. - gene : str, optional, default: 'Arc' - The gene of interest to be visualized in terms of its spatial interaction with source and target cells. - gene_key : str, optional, default: 'feature_name' - The column name in the AnnData object used to identify the gene. - cell_id_key : str, optional, default: 'cell_id' - The column name in the AnnData object used to identify individual cells. - color_target : str, optional, default: 'blue' - The color to be used for target cells in the plot. - color_source : str, optional, default: 'red' - The color to be used for source cells in the plot. - color_transcript : str, optional, default: 'green' - The color to be used for the RNA transcripts in the plot. - spatial_key : str, optional, default: 'spatial' - The key in the AnnData object that stores the spatial coordinates of the cells. - img : Optional[Union[bool, Sequence]], optional, default: None - A background image to overlay on the plot, such as a tissue section. Can be set to `None` to omit. - img_alpha : Optional[float], optional, default: None - The transparency level of the background image. Ignored if `img` is `None`. - image_cmap : Optional[Colormap], optional, default: None - The colormap to be used for the background image, if applicable. - size : Optional[Union[float, Sequence[float]]], optional, default: 8 - The size of the scatter plot points for the cells and transcripts. - alpha : float, optional, default: 0.6 - The transparency level for the scatter plot points. - title : Optional[Union[str, Sequence[str]]], optional, default: None - The title of the plot. If `None`, the gene name is used. - legend_loc : Optional[str], optional, default: 'best' - The location of the legend in the plot. - figsize : Tuple[float, float], optional, default: (10, 10) - The dimensions of the plot in inches. - dpi : Optional[int], optional, default: 100 - The resolution (dots per inch) for the plot. - save : Optional[Union[str, Path]], optional, default: None - The path to save the plot image. If `None`, the plot is displayed but not saved. - **kwargs : Additional keyword arguments - Any additional arguments passed to the `scatter` or `imshow` functions for customizing plot appearance. + sdata (AnnData): An AnnData object containing the spatial omics data, including transcript expression and cell positions. + layer (str): The layer in the AnnData object that contains the extracellular RNA transcript data. + gene (str): The gene of interest to be visualized in terms of its spatial interaction with source and target cells. + gene_key (str): The column name in the AnnData object used to identify the gene. + cell_id_key (str): The column name in the AnnData object used to identify individual cells. + color_target (str): The color to be used for target cells in the plot. + color_source (str): The color to be used for source cells in the plot. + color_transcript (str): The color to be used for the RNA transcripts in the plot. + spatial_key (str): The key in the AnnData object that stores the spatial coordinates of the cells. + img (Optional[Union[bool, Sequence]]): A background image to overlay on the plot, such as a tissue section. Can be set to `None` to omit. + img_alpha (Optional[float]): The transparency level of the background image. Ignored if `img` is `None`. + image_cmap (Optional[Colormap]): The colormap to be used for the background image, if applicable. + size (Optional[Union[float, Sequence[float]]]): The size of the scatter plot points for the cells and transcripts. + alpha (float): The transparency level for the scatter plot points. + title (Optional[Union[str, Sequence[str]]]): The title of the plot. If `None`, the gene name is used. + legend_loc (Optional[str]): The location of the legend in the plot. + figsize (Tuple[float, float]): The dimensions of the plot in inches. + dpi (Optional[int]): The resolution (dots per inch) for the plot. + save (Optional[Union[str, Path]]): The path to save the plot image. If `None`, the plot is displayed but not saved. + **kwargs : Any additional arguments passed to the `scatter` or `imshow` functions for customizing plot appearance. Returns: None diff --git a/src/troutpy/pp/compute.py b/src/troutpy/pp/compute.py index a878215..9ec7c8f 100644 --- a/src/troutpy/pp/compute.py +++ b/src/troutpy/pp/compute.py @@ -42,35 +42,23 @@ def define_extracellular( This function identifies extracellular transcripts based on the specified method and updates the spatial data object accordingly. Parameters: - sdata : SpatialData - A spatial data object containing transcriptomic information. - layer : str, optional (default: 'transcripts') - The layer in `sdata.points` containing the transcript data to process. - method : str, optional (default: 'segmentation_free') - The method to define extracellular transcripts. Options: - - 'segmentation_free': Uses segmentation-free clustering results. - - 'nuclei': Uses overlap with nuclear annotations to classify extracellular transcripts. - - 'cells': Classifies transcripts not assigned to a cell as extracellular. - min_prop_of_extracellular : float, optional (default: 0.8) - - Minimum proportion of transcripts in a cluster required to be extracellular for it to be classified as such (used only with 'segmentation_free' method). - unassigned_to_cell_tag : str, optional (default: 'UNASSIGNED')Tag indicating transcripts not assigned to any cell. - copy : bool, optional (default: False) If True, returns a copy of the updated spatial data. If False, updates the `sdata` object in-place. + sdata (SpatialData): A spatial data object containing transcriptomic information. + layer (str): The layer in `sdata.points` containing the transcript data to process. + method (str):The method to define extracellular transcripts. Options: + - 'segmentation_free': Uses segmentation-free clustering results. + - 'nuclei': Uses overlap with nuclear annotations to classify extracellular transcripts. + - 'cells': Classifies transcripts not assigned to a cell as extracellular. + min_prop_of_extracellular (float, optional): Minimum proportion of transcripts in a cluster required to be extracellular for it to be classified as such (used only with 'segmentation_free' method). + unassigned_to_cell_tag (str, optional): Tag indicating transcripts not assigned to any cell. + copy (bool): If True, returns a copy of the updated spatial data. If False, updates the `sdata` object in-place. Returns: - Optional[SpatialData]: - If `copy` is True, returns a copy of the updated `sdata` object.Otherwise, updates the `sdata` object in-place and returns None. + Optional[SpatialData]: If `copy` is True, returns a copy of the updated `sdata` object.Otherwise, updates the `sdata` object in-place and returns None. Notes: - - The 'segmentation_free' method uses clustering results to determine extracellular transcripts. - - The 'nuclei' method assumes transcripts outside nuclei are extracellular. - - The 'cells' method classifies transcripts unassigned to cells as extracellular. - - Example: - ```python - updated_sdata = define_extracellular( - sdata, method='segmentation_free', min_prop_of_extracellular=0.9, copy=True - ) - ``` + - The 'segmentation_free' method uses clustering results to determine extracellular transcripts. + - The 'nuclei' method assumes transcripts outside nuclei are extracellular. + - The 'cells' method classifies transcripts unassigned to cells as extracellular. """ # Compute the data layer data = sdata.points[layer].compute() @@ -108,16 +96,12 @@ def compute_crosstab(data, xvar: str = '', yvar: str = ''): Compute a crosstabulation (contingency table) of two categorical variables from the given DataFrame. Parameters: - data : pandas.DataFrame - The input DataFrame containing the data to be analyzed. - xvar : str, optional - The name of the column to use as the rows of the crosstab. Default is an empty string. - yvar : str, optional - The name of the column to use as the columns of the crosstab. Default is an empty string. + data (pandas.DataFrame): The input DataFrame containing the data to be analyzed. + xvar (str, optional): The name of the column to use as the rows of the crosstab. Default is an empty string. + yvar (str, optional): The name of the column to use as the columns of the crosstab. Default is an empty string. Returns: - pandas.DataFrame - A DataFrame representing the crosstab of the specified variables, with counts of occurrences for each combination of categories. + crosstab_data (pandas.DataFrame): A DataFrame representing the crosstab of the specified variables, with counts of occurrences for each combination of categories. """ crosstab_data = pd.crosstab(data[xvar], data[yvar]) return crosstab_data diff --git a/src/troutpy/pp/format.py b/src/troutpy/pp/format.py index 1d10cba..cde05fa 100644 --- a/src/troutpy/pp/format.py +++ b/src/troutpy/pp/format.py @@ -13,17 +13,14 @@ def format_adata(input_path, outpath_dummy, xlimits, ylimits): Processes and formats AnnData and transcripts by loading data, merging cell information,applying spatial filters, and saving the processed data to a dummy output directory. Parameters: - input_path (str): Path to the input directory containing: - - 'cell_feature_matrix.h5' - - 'cells.parquet' - - 'transcripts.parquet' - outpath_dummy (str): Path to the output directory where processed files will be saved. - xlimits (list or tuple of two ints): Spatial limits for the x-coordinate filtering [min_x, max_x]. - ylimits (list or tuple of two ints): Spatial limits for the y-coordinate filtering [min_y, max_y]. + input_path (str): Path to the input directory containing:'cell_feature_matrix.h5','cells.parquet','transcripts.parquet' + outpath_dummy (str): Path to the output directory where processed files will be saved. + xlimits (list or tuple of two ints): Spatial limits for the x-coordinate filtering [min_x, max_x]. + ylimits (list or tuple of two ints): Spatial limits for the y-coordinate filtering [min_y, max_y]. Raises: - FileNotFoundError: If required input files are not found in the input_path. - ValueError: If xlimits or ylimits are not properly defined. + FileNotFoundError: If required input files are not found in the input_path. + ValueError: If xlimits or ylimits are not properly defined. """ # Validate input limits if not (isinstance(xlimits, (list | tuple)) and len(xlimits) == 2):