From 340af850a4cb6e635b23b4352647a2cc16469474 Mon Sep 17 00:00:00 2001
From: sergiomarco25 <sergiomarco.salas@scilifelab.se>
Date: Fri, 3 Jan 2025 10:06:55 +0100
Subject: [PATCH] correct2

---
 src/troutpy/pl/plotting.py      | 51 +++++++++++++-------------
 src/troutpy/pp/compute.py       |  9 ++---
 src/troutpy/tl/NMF.py           | 31 +++++++---------
 src/troutpy/tl/__init__.py      |  2 +-
 src/troutpy/tl/interactions.py  | 63 ++++++++++++---------------------
 src/troutpy/tl/quantify_xrna.py | 21 ++++++-----
 6 files changed, 73 insertions(+), 104 deletions(-)

diff --git a/src/troutpy/pl/plotting.py b/src/troutpy/pl/plotting.py
index a0d1d94..99e09f7 100644
--- a/src/troutpy/pl/plotting.py
+++ b/src/troutpy/pl/plotting.py
@@ -410,7 +410,7 @@ def proportion_above_threshold(
     output_path:str='',format='pdf'
 ):
     """ Plot top and bottom percentile of features
-    
+ 
     Plots the top and bottom percentiles of features with the highest and lowest proportions above a threshold, or visualizes a specific list of transcripts.
 
     Parameters:
@@ -578,8 +578,7 @@ def apply_exrnaH_to_cellular_to_create_cellularW(adata_extracellular_with_nmf, a
     Parameters:
     -----------
     adata_extracellular_with_nmf : AnnData
-        An AnnData object containing the extracellular RNA data with the NMF results. 
-        The H matrix is expected to be stored in `adata.uns['H_nmf']`.
+        An AnnData object containing the extracellular RNA data with the NMF results. The H matrix is expected to be stored in `adata.uns['H_nmf']`.
     adata_annotated_cellular : AnnData
         An AnnData object containing the cellular RNA data with annotated gene expression values.
 
@@ -901,35 +900,35 @@ def interactions_with_arrows(
     save: Optional[Union[str, Path]] = None,
     **kwargs
 ):
-    """
-    Visualizes interactions between source and target cells using arrows, along with transcript locations.
+    """Visualizes interactions between source and target cells using arrows, along with transcript locations.
 
     The function plots arrows from source to target cells based on transcript proximity, color-coding source and target cells, and transcript locations. An optional image layer can be overlaid behind the plot. 
 
     Parameters:
-    sdata (AnnData): The AnnData object containing the spatial omics data.
-    layer (str, optional): The key in `sdata` for the extracellular transcript layer to analyze. Default is 'extracellular_transcripts_enriched'.
-    gene (str, optional): The gene of interest. Default is 'Arc'.
-    gene_key (str, optional): The key for gene names in the data. Default is 'feature_name'.
-    cell_id_key (str, optional): The key for cell IDs. Default is 'cell_id'.
-    color_target (str, optional): Color for the target cells. Default is 'blue'.
-    color_source (str, optional): Color for the source cells. Default is 'red'.
-    color_transcript (str, optional): Color for the transcript locations. Default is 'green'.
-    spatial_key (str, optional): The key for spatial coordinates in `sdata`. Default is 'spatial'.
-    img (Optional[Union[bool, Sequence]], optional): Optional background image (e.g., tissue section) to display behind the plot.
-    img_alpha (Optional[float], optional): Transparency level for the background image. Default is None (no image).
-    image_cmap (Optional[Colormap], optional): Colormap for the image. Default is None.
-    size (Optional[Union[float, Sequence[float]]], optional): Size of the plotted points (cells and transcripts). Default is 8.
-    alpha (float, optional): Transparency level for plotted points. Default is 0.6.
-    title (Optional[Union[str, Sequence[str]]], optional): Title of the plot. Default is the gene name.
-    legend_loc (Optional[str], optional): Location of the legend on the plot. Default is 'best'.
-    figsize (Tuple[float, float], optional): Size of the plot. Default is (10, 10).
-    dpi (Optional[int], optional): Resolution of the plot. Default is 100.
-    save (Optional[Union[str, Path]], optional): If provided, the path where the plot will be saved.
-    **kwargs: Additional arguments passed to the `scatter` and `imshow` functions for customization.
+    ----------
+    - sdata (AnnData): The AnnData object containing the spatial omics data.
+    - layer (str, optional): The key in `sdata` for the extracellular transcript layer to analyze. Default is 'extracellular_transcripts_enriched'.
+    - gene (str, optional): The gene of interest. Default is 'Arc'.
+    - gene_key (str, optional): The key for gene names in the data. Default is 'feature_name'.
+    - cell_id_key (str, optional): The key for cell IDs. Default is 'cell_id'.
+    - color_target (str, optional): Color for the target cells. Default is 'blue'.
+    - color_source (str, optional): Color for the source cells. Default is 'red'.
+    - color_transcript (str, optional): Color for the transcript locations. Default is 'green'.
+    - spatial_key (str, optional): The key for spatial coordinates in `sdata`. Default is 'spatial'.
+    - img (Optional[Union[bool, Sequence]], optional): Optional background image (e.g., tissue section) to display behind the plot.
+    - img_alpha (Optional[float], optional): Transparency level for the background image. Default is None (no image).
+    - image_cmap (Optional[Colormap], optional): Colormap for the image. Default is None.
+    - size (Optional[Union[float, Sequence[float]]], optional): Size of the plotted points (cells and transcripts). Default is 8.
+    - alpha (float, optional): Transparency level for plotted points. Default is 0.6.
+    - title (Optional[Union[str, Sequence[str]]], optional): Title of the plot. Default is the gene name.
+    - legend_loc (Optional[str], optional): Location of the legend on the plot. Default is 'best'.
+    - figsize (Tuple[float, float], optional): Size of the plot. Default is (10, 10).
+    - dpi (Optional[int], optional): Resolution of the plot. Default is 100.
+    - save (Optional[Union[str, Path]], optional): If provided, the path where the plot will be saved.
+    - **kwargs: Additional arguments passed to the `scatter` and `imshow` functions for customization.
 
     Returns:
-    None: The function displays or saves a plot of interactions between cells and transcripts.
+    - None: The function displays or saves a plot of interactions between cells and transcripts.
 
     Notes:
     The plot will show arrows from source to target cells, with different colors for source, target, and transcript points.
diff --git a/src/troutpy/pp/compute.py b/src/troutpy/pp/compute.py
index 8243c48..5f6cd4f 100644
--- a/src/troutpy/pp/compute.py
+++ b/src/troutpy/pp/compute.py
@@ -54,12 +54,9 @@ def define_extracellular(
             - 'nuclei': Uses overlap with nuclear annotations to classify extracellular transcripts.
             - 'cells': Classifies transcripts not assigned to a cell as extracellular.
         min_prop_of_extracellular : float, optional (default: 0.8)
-            Minimum proportion of transcripts in a cluster required to be extracellular for it to be classified as such (used only with 'segmentation_free' method).
-        unassigned_to_cell_tag : str, optional (default: 'UNASSIGNED')
-            Tag indicating transcripts not assigned to any cell.
-        copy : bool, optional (default: False)
-            - If True, returns a copy of the updated spatial data. 
-            - If False, updates the `sdata` object in-place.
+            - Minimum proportion of transcripts in a cluster required to be extracellular for it to be classified as such (used only with 'segmentation_free' method).
+        unassigned_to_cell_tag : str, optional (default: 'UNASSIGNED')Tag indicating transcripts not assigned to any cell.
+        copy : bool, optional (default: False) If True, returns a copy of the updated spatial data. If False, updates the `sdata` object in-place.
 
     Returns:
         Optional[SpatialData]:
diff --git a/src/troutpy/tl/NMF.py b/src/troutpy/tl/NMF.py
index 41ac6e4..942b462 100644
--- a/src/troutpy/tl/NMF.py
+++ b/src/troutpy/tl/NMF.py
@@ -48,35 +48,28 @@ def nmf(
 
     Parameters:
     ----------
-    sdata : spatial data object
+    - sdata : spatial data object 
         Input spatial data containing transcript and bin data.
-  
-    layer : str, optional
+    - layer : str, optional
         Layer name of the data that contains extracellular transcripts (default: 'extracellular_transcripts_enriched').
-    
-    feature_key : str, optional
+    - feature_key : str, optional
         Column name for the transcript feature (default: 'feature_name').
-    
-    bin_key : str, optional
+    - bin_key : str, optional
         Column name for bin IDs (default: 'bin_id').
-    
-    density_table_key : str, optional
+    - density_table_key : str, optional
         Key to retrieve the density table from sdata (default: 'segmentation_free_table').
-    
-    n_components : int, optional
+    - n_components : int, optional
         Number of components for NMF (default: 20).
-   
-    subsample_percentage : float, optional
+    - subsample_percentage : float, optional
         Percentage of data to use for NMF (default: 0.1).
-   
-    random_state : int, optional
+    - random_state : int, optional
         Random state for NMF initialization for reproducibility (default: None).
 
     Returns:
     -------
-    sdata : Updated spatial data object with NMF components stored.
+    - sdata : Updated spatial data object with NMF components stored.
     """
-    if all==False:
+    if not all:
     # Extract the DataFrame with feature_name and bin_id
          df = sdata.points[layer][[feature_key, bin_key]].compute()
          # Filter the density table to include only the relevant bin_ids and feature_names
@@ -110,10 +103,10 @@ def apply_exrna_factors_to_cells(sdata, layer_factors='nmf_data'):
     Parameters:
     sdata (AnnData): The AnnData object containing both extracellular and cellular data.
     layer_factors (str, optional): The key in `sdata` that contains the extracellular RNA data with NMF factors. Default is 'nmf_data'.
-    
+  
     Returns:
     AnnData: The updated `sdata` object with annotated cellular data that includes the applied exRNA factors as new columns.
-    
+
     Notes:
     The function assumes that the extracellular RNA data is stored in `sdata[layer_factors]` and that the NMF factor loadings are stored in the `uns` attribute of the extracellular dataset as 'H_nmf'. The factor scores are added to the `obs` attribute of the cellular data.
     """
diff --git a/src/troutpy/tl/__init__.py b/src/troutpy/tl/__init__.py
index 03323b8..dfd04f7 100644
--- a/src/troutpy/tl/__init__.py
+++ b/src/troutpy/tl/__init__.py
@@ -1,4 +1,4 @@
-from .source_cell import create_xrna_metadata,compute_source_cells,distance_to_source_cell,compute_distant_cells_prop,get_proportion_expressed_per_cell_type
+from .source_cell import compute_source_cells,distance_to_source_cell,compute_distant_cells_prop,get_proportion_expressed_per_cell_type
 from .target_cell import calculate_target_cells,define_target_by_celltype
 from .estimate_density import colocalization_proportion
 from .quantify_xrna import spatial_variability,create_xrna_metadata,quantify_overexpression,extracellular_enrichment,spatial_colocalization
diff --git a/src/troutpy/tl/interactions.py b/src/troutpy/tl/interactions.py
index 3c4cfb6..a76cb92 100644
--- a/src/troutpy/tl/interactions.py
+++ b/src/troutpy/tl/interactions.py
@@ -16,20 +16,16 @@ def get_number_of_communication_genes(
     ) -> pd.DataFrame:
     """Compute the number of exchanged genes between any two cell types
 
-    Args:
-        source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with 
-        proportion of cells per cell type expressing corresponding gene 
-        target_proportions : A data frame 
-        (Gene name x Cell Type) with proportion of cells per cell type being the 
-        physically clostest cell to transcripts of corresponding gene. 
-        Defaults to 0.2.
-        source_proportion_threshold (float, optional): The threshold to consider a cell
-        type to be a significant source of a gene. Defaults to 0.2.
-        target_proportion_threshold (float, optional): The threshold to consider a cell
-        type to be a significant target of a gene. Defaults to 0.2.
+    Parameters:
+        - source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with 
+        - proportion of cells per cell type expressing corresponding gene 
+        - target_proportions : A data frame 
+        - (Gene name x Cell Type) with proportion of cells per cell type being the physically clostest cell to transcripts of corresponding gene. Defaults to 0.2.
+        - source_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant source of a gene. Defaults to 0.2.
+        - target_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant target of a gene. Defaults to 0.2.
 
     Returns:
-        pd.DataFrame: _description_
+        - pd.DataFrame: _description_
     """
     
     # filter the source and target cell types by defining signficant proportions
@@ -67,59 +63,44 @@ def get_gene_interaction_strength(
     output_path: str = '',             # Directory to save the plot
     format: str = 'pdf'                # Format to save the plot (e.g., pdf, png)
 ) -> None:
-    """
-    Computes and visualizes the interaction strength for a specific gene between source and target cell types.
+    """Computes and visualizes the interaction strength for a specific gene between source and target cell types.
 
-    This function calculates the interaction strength between source and target cell types for a specified gene
-    by multiplying the proportions of the gene in the source and target cell types. The interaction matrix can 
-    be visualized using a chord diagram, with the option to save the resulting plot.
+    This function calculates the interaction strength between source and target cell types for a specified gene by multiplying the proportions of the gene in the source and target cell types. The interaction matrix can be visualized using a chord diagram, with the option to save the resulting plot.
 
     Parameters:
     ----------
-    source_proportions : pd.DataFrame
-        A DataFrame where rows represent genes and columns represent source cell types. Each value indicates 
-        the proportion of the gene in the respective source cell type.
+    - source_proportions : pd.DataFrame
+        A DataFrame where rows represent genes and columns represent source cell types. Each value indicates the proportion of the gene in the respective source cell type.
 
-    target_proportions : pd.DataFrame
+    - target_proportions : pd.DataFrame
         A DataFrame where rows represent genes and columns represent target cell types. Each value indicates 
         the proportion of the gene in the respective target cell type.
 
-    gene_symbol : str, optional
+    - gene_symbol : str, optional
         The gene symbol for which the interaction strength is to be computed and visualized (default: '').
 
-    return_interactions : bool, optional
+    - return_interactions : bool, optional
         If True, returns the interaction matrix as a NumPy array (default: False).
 
-    save : bool, optional
+    - save : bool, optional
         If True, saves the chord diagram plot to the specified output path (default: False).
 
-    output_path : str, optional
-        The directory path where the plot will be saved. If `save=True`, this path will be used to store the file 
-        (default: ''). A 'figures' subdirectory is created if it doesn't exist.
+   - output_path : str, optional
+        The directory path where the plot will be saved. If `save=True`, this path will be used to store the file (default: ''). A 'figures' subdirectory is created if it doesn't exist.
 
-    format : str, optional
+    - format : str, optional
         The file format for saving the plot (e.g., 'pdf', 'png'). This is used only if `save=True` (default: 'pdf').
 
     Returns:
     -------
-    None or np.ndarray
-        If `return_interactions=True`, the function returns the interaction matrix as a NumPy array. Otherwise, 
-        the function generates a chord diagram plot.
+    - None or np.ndarray
+        If `return_interactions=True`, the function returns the interaction matrix as a NumPy array. Otherwise, the function generates a chord diagram plot.
 
     Notes:
     -----
-    - The function computes the interaction matrix by multiplying the proportions of the gene in the source and 
-      target cell types.
+    - The function computes the interaction matrix by multiplying the proportions of the gene in the source and target cell types.
     - The chord diagram visualizes the interaction strength between the cell types.
     - If `save=True`, the plot is saved in the specified format and location.
-
-    Example:
-    -------
-    To compute and visualize the interaction strength for a specific gene:
-
-    >>> get_gene_specific_interaction_strength(source_proportions, target_proportions, gene_symbol='MYC', save=True, output_path='results', format='png')
-
-    This will save the plot as a PNG file in the 'results/figures' directory.
     """
 
     # Ensure the target proportions have the same cell type columns as the source proportions
diff --git a/src/troutpy/tl/quantify_xrna.py b/src/troutpy/tl/quantify_xrna.py
index 39bc1b2..2dd437a 100644
--- a/src/troutpy/tl/quantify_xrna.py
+++ b/src/troutpy/tl/quantify_xrna.py
@@ -309,31 +309,30 @@ def spatial_colocalization(
     n_threads=1, 
     threshold_colocalized=1,copy=False
 ):
-    """
-    Computes spatial variability of extracellular RNA using Moran's I.
+    """Computes spatial variability of extracellular RNA using Moran's I.
 
     Parameters:
     -----------
-    sdata : SpatialData
+    - sdata : SpatialData
         The spatial transcriptomics dataset in SpatialData format.
-    coords_keys : list of str, optional
+    - coords_keys : list of str, optional
         The keys for spatial coordinates in the dataset (default: ['x', 'y']).
-    gene_id_key : str, optional
+    - gene_id_key : str, optional
         The key for gene identifiers in the dataset (default: 'feature_name').
-    n_neighbors : int, optional
+    - n_neighbors : int, optional
         Number of neighbors to use for computing spatial neighbors (default: 10).
-    resolution : int, optional
+    - resolution : int, optional
         The resolution for kernel density estimation (default: 1000).
-    binsize : int, optional
+    - binsize : int, optional
         The binsize for kernel density estimation (default: 20).
-    n_threads : int, optional
+    - n_threads : int, optional
         The number of threads for LazyKDE processing (default: 1).
-    spatial_autocorr_mode : str, optional
+    - spatial_autocorr_mode : str, optional
         The mode for spatial autocorrelation computation (default: "moran").
 
     Returns:
     --------
-    pd.DataFrame
+    - pd.DataFrame
         A DataFrame containing Moran's I values for each gene, indexed by gene names.
     """
     # Step 1: Extract and preprocess data