From 340af850a4cb6e635b23b4352647a2cc16469474 Mon Sep 17 00:00:00 2001 From: sergiomarco25 Date: Fri, 3 Jan 2025 10:06:55 +0100 Subject: [PATCH] correct2 --- src/troutpy/pl/plotting.py | 51 +++++++++++++------------- src/troutpy/pp/compute.py | 9 ++--- src/troutpy/tl/NMF.py | 31 +++++++--------- src/troutpy/tl/__init__.py | 2 +- src/troutpy/tl/interactions.py | 63 ++++++++++++--------------------- src/troutpy/tl/quantify_xrna.py | 21 ++++++----- 6 files changed, 73 insertions(+), 104 deletions(-) diff --git a/src/troutpy/pl/plotting.py b/src/troutpy/pl/plotting.py index a0d1d94..99e09f7 100644 --- a/src/troutpy/pl/plotting.py +++ b/src/troutpy/pl/plotting.py @@ -410,7 +410,7 @@ def proportion_above_threshold( output_path:str='',format='pdf' ): """ Plot top and bottom percentile of features - + Plots the top and bottom percentiles of features with the highest and lowest proportions above a threshold, or visualizes a specific list of transcripts. Parameters: @@ -578,8 +578,7 @@ def apply_exrnaH_to_cellular_to_create_cellularW(adata_extracellular_with_nmf, a Parameters: ----------- adata_extracellular_with_nmf : AnnData - An AnnData object containing the extracellular RNA data with the NMF results. - The H matrix is expected to be stored in `adata.uns['H_nmf']`. + An AnnData object containing the extracellular RNA data with the NMF results. The H matrix is expected to be stored in `adata.uns['H_nmf']`. adata_annotated_cellular : AnnData An AnnData object containing the cellular RNA data with annotated gene expression values. @@ -901,35 +900,35 @@ def interactions_with_arrows( save: Optional[Union[str, Path]] = None, **kwargs ): - """ - Visualizes interactions between source and target cells using arrows, along with transcript locations. + """Visualizes interactions between source and target cells using arrows, along with transcript locations. The function plots arrows from source to target cells based on transcript proximity, color-coding source and target cells, and transcript locations. An optional image layer can be overlaid behind the plot. Parameters: - sdata (AnnData): The AnnData object containing the spatial omics data. - layer (str, optional): The key in `sdata` for the extracellular transcript layer to analyze. Default is 'extracellular_transcripts_enriched'. - gene (str, optional): The gene of interest. Default is 'Arc'. - gene_key (str, optional): The key for gene names in the data. Default is 'feature_name'. - cell_id_key (str, optional): The key for cell IDs. Default is 'cell_id'. - color_target (str, optional): Color for the target cells. Default is 'blue'. - color_source (str, optional): Color for the source cells. Default is 'red'. - color_transcript (str, optional): Color for the transcript locations. Default is 'green'. - spatial_key (str, optional): The key for spatial coordinates in `sdata`. Default is 'spatial'. - img (Optional[Union[bool, Sequence]], optional): Optional background image (e.g., tissue section) to display behind the plot. - img_alpha (Optional[float], optional): Transparency level for the background image. Default is None (no image). - image_cmap (Optional[Colormap], optional): Colormap for the image. Default is None. - size (Optional[Union[float, Sequence[float]]], optional): Size of the plotted points (cells and transcripts). Default is 8. - alpha (float, optional): Transparency level for plotted points. Default is 0.6. - title (Optional[Union[str, Sequence[str]]], optional): Title of the plot. Default is the gene name. - legend_loc (Optional[str], optional): Location of the legend on the plot. Default is 'best'. - figsize (Tuple[float, float], optional): Size of the plot. Default is (10, 10). - dpi (Optional[int], optional): Resolution of the plot. Default is 100. - save (Optional[Union[str, Path]], optional): If provided, the path where the plot will be saved. - **kwargs: Additional arguments passed to the `scatter` and `imshow` functions for customization. + ---------- + - sdata (AnnData): The AnnData object containing the spatial omics data. + - layer (str, optional): The key in `sdata` for the extracellular transcript layer to analyze. Default is 'extracellular_transcripts_enriched'. + - gene (str, optional): The gene of interest. Default is 'Arc'. + - gene_key (str, optional): The key for gene names in the data. Default is 'feature_name'. + - cell_id_key (str, optional): The key for cell IDs. Default is 'cell_id'. + - color_target (str, optional): Color for the target cells. Default is 'blue'. + - color_source (str, optional): Color for the source cells. Default is 'red'. + - color_transcript (str, optional): Color for the transcript locations. Default is 'green'. + - spatial_key (str, optional): The key for spatial coordinates in `sdata`. Default is 'spatial'. + - img (Optional[Union[bool, Sequence]], optional): Optional background image (e.g., tissue section) to display behind the plot. + - img_alpha (Optional[float], optional): Transparency level for the background image. Default is None (no image). + - image_cmap (Optional[Colormap], optional): Colormap for the image. Default is None. + - size (Optional[Union[float, Sequence[float]]], optional): Size of the plotted points (cells and transcripts). Default is 8. + - alpha (float, optional): Transparency level for plotted points. Default is 0.6. + - title (Optional[Union[str, Sequence[str]]], optional): Title of the plot. Default is the gene name. + - legend_loc (Optional[str], optional): Location of the legend on the plot. Default is 'best'. + - figsize (Tuple[float, float], optional): Size of the plot. Default is (10, 10). + - dpi (Optional[int], optional): Resolution of the plot. Default is 100. + - save (Optional[Union[str, Path]], optional): If provided, the path where the plot will be saved. + - **kwargs: Additional arguments passed to the `scatter` and `imshow` functions for customization. Returns: - None: The function displays or saves a plot of interactions between cells and transcripts. + - None: The function displays or saves a plot of interactions between cells and transcripts. Notes: The plot will show arrows from source to target cells, with different colors for source, target, and transcript points. diff --git a/src/troutpy/pp/compute.py b/src/troutpy/pp/compute.py index 8243c48..5f6cd4f 100644 --- a/src/troutpy/pp/compute.py +++ b/src/troutpy/pp/compute.py @@ -54,12 +54,9 @@ def define_extracellular( - 'nuclei': Uses overlap with nuclear annotations to classify extracellular transcripts. - 'cells': Classifies transcripts not assigned to a cell as extracellular. min_prop_of_extracellular : float, optional (default: 0.8) - Minimum proportion of transcripts in a cluster required to be extracellular for it to be classified as such (used only with 'segmentation_free' method). - unassigned_to_cell_tag : str, optional (default: 'UNASSIGNED') - Tag indicating transcripts not assigned to any cell. - copy : bool, optional (default: False) - - If True, returns a copy of the updated spatial data. - - If False, updates the `sdata` object in-place. + - Minimum proportion of transcripts in a cluster required to be extracellular for it to be classified as such (used only with 'segmentation_free' method). + unassigned_to_cell_tag : str, optional (default: 'UNASSIGNED')Tag indicating transcripts not assigned to any cell. + copy : bool, optional (default: False) If True, returns a copy of the updated spatial data. If False, updates the `sdata` object in-place. Returns: Optional[SpatialData]: diff --git a/src/troutpy/tl/NMF.py b/src/troutpy/tl/NMF.py index 41ac6e4..942b462 100644 --- a/src/troutpy/tl/NMF.py +++ b/src/troutpy/tl/NMF.py @@ -48,35 +48,28 @@ def nmf( Parameters: ---------- - sdata : spatial data object + - sdata : spatial data object Input spatial data containing transcript and bin data. - - layer : str, optional + - layer : str, optional Layer name of the data that contains extracellular transcripts (default: 'extracellular_transcripts_enriched'). - - feature_key : str, optional + - feature_key : str, optional Column name for the transcript feature (default: 'feature_name'). - - bin_key : str, optional + - bin_key : str, optional Column name for bin IDs (default: 'bin_id'). - - density_table_key : str, optional + - density_table_key : str, optional Key to retrieve the density table from sdata (default: 'segmentation_free_table'). - - n_components : int, optional + - n_components : int, optional Number of components for NMF (default: 20). - - subsample_percentage : float, optional + - subsample_percentage : float, optional Percentage of data to use for NMF (default: 0.1). - - random_state : int, optional + - random_state : int, optional Random state for NMF initialization for reproducibility (default: None). Returns: ------- - sdata : Updated spatial data object with NMF components stored. + - sdata : Updated spatial data object with NMF components stored. """ - if all==False: + if not all: # Extract the DataFrame with feature_name and bin_id df = sdata.points[layer][[feature_key, bin_key]].compute() # Filter the density table to include only the relevant bin_ids and feature_names @@ -110,10 +103,10 @@ def apply_exrna_factors_to_cells(sdata, layer_factors='nmf_data'): Parameters: sdata (AnnData): The AnnData object containing both extracellular and cellular data. layer_factors (str, optional): The key in `sdata` that contains the extracellular RNA data with NMF factors. Default is 'nmf_data'. - + Returns: AnnData: The updated `sdata` object with annotated cellular data that includes the applied exRNA factors as new columns. - + Notes: The function assumes that the extracellular RNA data is stored in `sdata[layer_factors]` and that the NMF factor loadings are stored in the `uns` attribute of the extracellular dataset as 'H_nmf'. The factor scores are added to the `obs` attribute of the cellular data. """ diff --git a/src/troutpy/tl/__init__.py b/src/troutpy/tl/__init__.py index 03323b8..dfd04f7 100644 --- a/src/troutpy/tl/__init__.py +++ b/src/troutpy/tl/__init__.py @@ -1,4 +1,4 @@ -from .source_cell import create_xrna_metadata,compute_source_cells,distance_to_source_cell,compute_distant_cells_prop,get_proportion_expressed_per_cell_type +from .source_cell import compute_source_cells,distance_to_source_cell,compute_distant_cells_prop,get_proportion_expressed_per_cell_type from .target_cell import calculate_target_cells,define_target_by_celltype from .estimate_density import colocalization_proportion from .quantify_xrna import spatial_variability,create_xrna_metadata,quantify_overexpression,extracellular_enrichment,spatial_colocalization diff --git a/src/troutpy/tl/interactions.py b/src/troutpy/tl/interactions.py index 3c4cfb6..a76cb92 100644 --- a/src/troutpy/tl/interactions.py +++ b/src/troutpy/tl/interactions.py @@ -16,20 +16,16 @@ def get_number_of_communication_genes( ) -> pd.DataFrame: """Compute the number of exchanged genes between any two cell types - Args: - source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with - proportion of cells per cell type expressing corresponding gene - target_proportions : A data frame - (Gene name x Cell Type) with proportion of cells per cell type being the - physically clostest cell to transcripts of corresponding gene. - Defaults to 0.2. - source_proportion_threshold (float, optional): The threshold to consider a cell - type to be a significant source of a gene. Defaults to 0.2. - target_proportion_threshold (float, optional): The threshold to consider a cell - type to be a significant target of a gene. Defaults to 0.2. + Parameters: + - source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with + - proportion of cells per cell type expressing corresponding gene + - target_proportions : A data frame + - (Gene name x Cell Type) with proportion of cells per cell type being the physically clostest cell to transcripts of corresponding gene. Defaults to 0.2. + - source_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant source of a gene. Defaults to 0.2. + - target_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant target of a gene. Defaults to 0.2. Returns: - pd.DataFrame: _description_ + - pd.DataFrame: _description_ """ # filter the source and target cell types by defining signficant proportions @@ -67,59 +63,44 @@ def get_gene_interaction_strength( output_path: str = '', # Directory to save the plot format: str = 'pdf' # Format to save the plot (e.g., pdf, png) ) -> None: - """ - Computes and visualizes the interaction strength for a specific gene between source and target cell types. + """Computes and visualizes the interaction strength for a specific gene between source and target cell types. - This function calculates the interaction strength between source and target cell types for a specified gene - by multiplying the proportions of the gene in the source and target cell types. The interaction matrix can - be visualized using a chord diagram, with the option to save the resulting plot. + This function calculates the interaction strength between source and target cell types for a specified gene by multiplying the proportions of the gene in the source and target cell types. The interaction matrix can be visualized using a chord diagram, with the option to save the resulting plot. Parameters: ---------- - source_proportions : pd.DataFrame - A DataFrame where rows represent genes and columns represent source cell types. Each value indicates - the proportion of the gene in the respective source cell type. + - source_proportions : pd.DataFrame + A DataFrame where rows represent genes and columns represent source cell types. Each value indicates the proportion of the gene in the respective source cell type. - target_proportions : pd.DataFrame + - target_proportions : pd.DataFrame A DataFrame where rows represent genes and columns represent target cell types. Each value indicates the proportion of the gene in the respective target cell type. - gene_symbol : str, optional + - gene_symbol : str, optional The gene symbol for which the interaction strength is to be computed and visualized (default: ''). - return_interactions : bool, optional + - return_interactions : bool, optional If True, returns the interaction matrix as a NumPy array (default: False). - save : bool, optional + - save : bool, optional If True, saves the chord diagram plot to the specified output path (default: False). - output_path : str, optional - The directory path where the plot will be saved. If `save=True`, this path will be used to store the file - (default: ''). A 'figures' subdirectory is created if it doesn't exist. + - output_path : str, optional + The directory path where the plot will be saved. If `save=True`, this path will be used to store the file (default: ''). A 'figures' subdirectory is created if it doesn't exist. - format : str, optional + - format : str, optional The file format for saving the plot (e.g., 'pdf', 'png'). This is used only if `save=True` (default: 'pdf'). Returns: ------- - None or np.ndarray - If `return_interactions=True`, the function returns the interaction matrix as a NumPy array. Otherwise, - the function generates a chord diagram plot. + - None or np.ndarray + If `return_interactions=True`, the function returns the interaction matrix as a NumPy array. Otherwise, the function generates a chord diagram plot. Notes: ----- - - The function computes the interaction matrix by multiplying the proportions of the gene in the source and - target cell types. + - The function computes the interaction matrix by multiplying the proportions of the gene in the source and target cell types. - The chord diagram visualizes the interaction strength between the cell types. - If `save=True`, the plot is saved in the specified format and location. - - Example: - ------- - To compute and visualize the interaction strength for a specific gene: - - >>> get_gene_specific_interaction_strength(source_proportions, target_proportions, gene_symbol='MYC', save=True, output_path='results', format='png') - - This will save the plot as a PNG file in the 'results/figures' directory. """ # Ensure the target proportions have the same cell type columns as the source proportions diff --git a/src/troutpy/tl/quantify_xrna.py b/src/troutpy/tl/quantify_xrna.py index 39bc1b2..2dd437a 100644 --- a/src/troutpy/tl/quantify_xrna.py +++ b/src/troutpy/tl/quantify_xrna.py @@ -309,31 +309,30 @@ def spatial_colocalization( n_threads=1, threshold_colocalized=1,copy=False ): - """ - Computes spatial variability of extracellular RNA using Moran's I. + """Computes spatial variability of extracellular RNA using Moran's I. Parameters: ----------- - sdata : SpatialData + - sdata : SpatialData The spatial transcriptomics dataset in SpatialData format. - coords_keys : list of str, optional + - coords_keys : list of str, optional The keys for spatial coordinates in the dataset (default: ['x', 'y']). - gene_id_key : str, optional + - gene_id_key : str, optional The key for gene identifiers in the dataset (default: 'feature_name'). - n_neighbors : int, optional + - n_neighbors : int, optional Number of neighbors to use for computing spatial neighbors (default: 10). - resolution : int, optional + - resolution : int, optional The resolution for kernel density estimation (default: 1000). - binsize : int, optional + - binsize : int, optional The binsize for kernel density estimation (default: 20). - n_threads : int, optional + - n_threads : int, optional The number of threads for LazyKDE processing (default: 1). - spatial_autocorr_mode : str, optional + - spatial_autocorr_mode : str, optional The mode for spatial autocorrelation computation (default: "moran"). Returns: -------- - pd.DataFrame + - pd.DataFrame A DataFrame containing Moran's I values for each gene, indexed by gene names. """ # Step 1: Extract and preprocess data