From d5bce61a66741a1453bc7563a5522aac68299030 Mon Sep 17 00:00:00 2001 From: Zhenyi Wang Date: Tue, 22 Oct 2024 10:48:24 +0800 Subject: [PATCH] update --- .Rproj.user/F5A33326/sources/per/t/9679D688 | 6 +-- .../F5A33326/sources/per/t/9679D688-contents | 46 +++++++++-------- 01-HemaScope_installation_tutorial.Rmd | 42 ++++++++-------- _book/installation.html | 50 ++++++++++--------- _book/search_index.json | 2 +- docs/installation.html | 50 ++++++++++--------- docs/search_index.json | 2 +- 7 files changed, 103 insertions(+), 95 deletions(-) diff --git a/.Rproj.user/F5A33326/sources/per/t/9679D688 b/.Rproj.user/F5A33326/sources/per/t/9679D688 index 2764fa4..b955871 100644 --- a/.Rproj.user/F5A33326/sources/per/t/9679D688 +++ b/.Rproj.user/F5A33326/sources/per/t/9679D688 @@ -3,7 +3,7 @@ "path": "D:/GitHub/HemaScope_Tutorial/01-HemaScope_installation_tutorial.Rmd", "project_path": "01-HemaScope_installation_tutorial.Rmd", "type": "r_markdown", - "hash": "797006684", + "hash": "1225064144", "contents": "", "dirty": false, "created": 1729562861292.0, @@ -16,11 +16,11 @@ "scrollLine": "123" }, "folds": "", - "lastKnownWriteTime": 1729562922, + "lastKnownWriteTime": 1729565172, "encoding": "UTF-8", "collab_server": "", "source_window": "", - "last_content_update": 1729562922491, + "last_content_update": 1729565172, "read_only": false, "read_only_alternatives": [] } \ No newline at end of file diff --git a/.Rproj.user/F5A33326/sources/per/t/9679D688-contents b/.Rproj.user/F5A33326/sources/per/t/9679D688-contents index 4ccc49d..244d40a 100644 --- a/.Rproj.user/F5A33326/sources/per/t/9679D688-contents +++ b/.Rproj.user/F5A33326/sources/per/t/9679D688-contents @@ -34,6 +34,10 @@ conda config --set show_channel_urls true ## Install R +<<<<<<< Updated upstream + +======= +>>>>>>> Stashed changes - R 4.3.3 ``` @@ -85,27 +89,27 @@ BiocManager::install("BiocNeighbors") - From CRAN ``` -install.packages(c("doMC", -"doRNG", -"shinyjs", -"shiny", -"shinyWidgets", -"shinydashboard", -"slickR", -"phateR", -"gelnet", -"parallelDist", -"kableExtra", -"transport", -"feather", -"markdown", -"ggalluvial", -"forcats", -"mcmc", -"MCMCpack", -"fields", -"getopt", -"osfr")) +install.packages("doMC") +install.packages("doRNG") +install.packages("shinyjs") +install.packages("shiny") +install.packages("shinyWidgets") +install.packages("shinydashboard") +install.packages("slickR") +install.packages("phateR") +install.packages("gelnet") +install.packages("parallelDist") +install.packages("kableExtra") +install.packages("transport") +install.packages("feather") +install.packages("markdown") +install.packages("ggalluvial") +install.packages("forcats") +install.packages("mcmc") +install.packages("MCMCpack") +install.packages("fields") +install.packages("getopt") +install.packages("osfr") ``` - From GitHub diff --git a/01-HemaScope_installation_tutorial.Rmd b/01-HemaScope_installation_tutorial.Rmd index 706e17d..244d40a 100644 --- a/01-HemaScope_installation_tutorial.Rmd +++ b/01-HemaScope_installation_tutorial.Rmd @@ -89,27 +89,27 @@ BiocManager::install("BiocNeighbors") - From CRAN ``` -install.packages(c("doMC", -"doRNG", -"shinyjs", -"shiny", -"shinyWidgets", -"shinydashboard", -"slickR", -"phateR", -"gelnet", -"parallelDist", -"kableExtra", -"transport", -"feather", -"markdown", -"ggalluvial", -"forcats", -"mcmc", -"MCMCpack", -"fields", -"getopt", -"osfr")) +install.packages("doMC") +install.packages("doRNG") +install.packages("shinyjs") +install.packages("shiny") +install.packages("shinyWidgets") +install.packages("shinydashboard") +install.packages("slickR") +install.packages("phateR") +install.packages("gelnet") +install.packages("parallelDist") +install.packages("kableExtra") +install.packages("transport") +install.packages("feather") +install.packages("markdown") +install.packages("ggalluvial") +install.packages("forcats") +install.packages("mcmc") +install.packages("MCMCpack") +install.packages("fields") +install.packages("getopt") +install.packages("osfr") ``` - From GitHub diff --git a/_book/installation.html b/_book/installation.html index 3f61c28..abb7067 100644 --- a/_book/installation.html +++ b/_book/installation.html @@ -377,9 +377,11 @@

2.2 Set the channels in conda

2.3 Install R

-
    -
  • R 4.3.3
  • -
+

<<<<<<< Updated upstream + +======= +>>>>>>> Stashed changes +- R 4.3.3

conda install R-base=4.3.3
@@ -423,27 +425,27 @@

2.4 Install required R-packages
  • From CRAN
  • -
    install.packages(c("doMC",
    -"doRNG",
    -"shinyjs",
    -"shiny",
    -"shinyWidgets",
    -"shinydashboard",
    -"slickR",
    -"phateR",
    -"gelnet",
    -"parallelDist",
    -"kableExtra",
    -"transport",
    -"feather",
    -"markdown",
    -"ggalluvial",
    -"forcats",
    -"mcmc",
    -"MCMCpack",
    -"fields",
    -"getopt",
    -"osfr"))
    +
    install.packages("doMC")
    +install.packages("doRNG")
    +install.packages("shinyjs")
    +install.packages("shiny")
    +install.packages("shinyWidgets")
    +install.packages("shinydashboard")
    +install.packages("slickR")
    +install.packages("phateR")
    +install.packages("gelnet")
    +install.packages("parallelDist")
    +install.packages("kableExtra")
    +install.packages("transport")
    +install.packages("feather")
    +install.packages("markdown")
    +install.packages("ggalluvial")
    +install.packages("forcats")
    +install.packages("mcmc")
    +install.packages("MCMCpack")
    +install.packages("fields")
    +install.packages("getopt")
    +install.packages("osfr")
    • From GitHub
    diff --git a/_book/search_index.json b/_book/search_index.json index 4c8f48c..40803c4 100644 --- a/_book/search_index.json +++ b/_book/search_index.json @@ -1 +1 @@ -[["index.html", "HemaScope Tutorial 1 Introduction", " HemaScope Tutorial HemaScope team 2024-10-22 1 Introduction HemaScope is a specialized bioinformatics toolkit designed for analyzing both single-cell and spatial transcriptome sequencing data from hematopoietic cells, including myeloid and lymphoid lineages. We have developed an R package named HemaScopeR, a Shiny interface named HemaScopeShiny, and a cloud platform named HemaScopeCloud. This tutorial introduces how to install and use the R package and Shiny interface, as well as how to access and operate the cloud platform. "],["installation.html", "2 Installation 2.1 Create a new conda environment and activate it 2.2 Set the channels in conda 2.3 Install R 2.4 Install required R-packages 2.5 Create the required python (v.3.9.12) virtual environment 2.6 The installed packages with versions", " 2 Installation 2.1 Create a new conda environment and activate it conda create --name HemaScope_env conda activate HemaScope_env 2.2 Set the channels in conda # Add the default channel conda config --add channels defaults # Add default channel URLs conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2 # Add custom channels conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2 conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch-lts conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/simpleitk conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/deepmodeling # Set to show channel URLs conda config --set show_channel_urls true 2.3 Install R R 4.3.3 conda install R-base=4.3.3 2.4 Install required R-packages From conda conda install -c conda-forge r-devtools=2.4.5 -y conda install -c conda-forge r-Seurat=4.3.0.1 -y conda install -c conda-forge r-Rfast2=0.1.5.1 -y conda install -c conda-forge r-hdf5r=1.3.10 -y conda install -c conda-forge r-ggpubr=0.6.0 -y conda install pwwang::r-seuratwrappers -y conda install -c bioconda bioconductor-monocle=2.28.0 -y conda install -c bioconda bioconductor-slingshot=2.8.0 -y conda install -c bioconda bioconductor-GSVA=1.48.2 -y conda install -c bioconda bioconductor-org.Mm.eg.db=3.17.0 -y conda install -c bioconda bioconductor-org.Hs.eg.db=3.17.0 -y conda install -c bioconda bioconductor-scran=1.28.1 -y conda install -c bioconda bioconductor-AUCell=1.22.0 -y conda install -c bioconda bioconductor-RcisTarget=1.20.0 -y conda install -c bioconda bioconductor-GENIE3=1.24.0 -y conda install -c bioconda bioconductor-biomaRt=2.56.1 -y conda install -c bioconda r-velocyto.r=0.6 -y #conda install -c bioconda bioconductor-limma=3.56.2 -y Enter the R language environment We suggest users do not manually update any already installed R packages during the installation of the following R packages. R From BiocManager # BiocManager(version = "1.30.23") should already be installed as a dependency of r-seuratwrappers. # If it is not installed, please run the following code to install it. # install.packages("BiocManager",version="1.30.23") BiocManager::install("ComplexHeatmap") BiocManager::install("scmap") BiocManager::install("clusterProfiler") BiocManager::install("BiocNeighbors") From CRAN install.packages(c("doMC", "doRNG", "shinyjs", "shiny", "shinyWidgets", "shinydashboard", "slickR", "phateR", "gelnet", "parallelDist", "kableExtra", "transport", "feather", "markdown", "ggalluvial", "forcats", "mcmc", "MCMCpack", "fields", "getopt", "osfr")) From GitHub tips: Sometimes network connection issues may occur, resulting in an error message indicating that GitHub cannot be connected. Please try installing again when the network conditions improve. Usage limitations: Sometimes an API rate limit error occurs, and a GitHub token is needed to provide the GitHub API rate limit. The steps to resolve this are as follows: Register for an account or log in to an existing account on the GitHub website. Then click on your profile picture in the top right corner, go to the dropdown menu and select “Settings.” Next, find “Developer settings” and click on it, then find “Personal access tokens (classic).” Click on it, then click “Create new token (classic).” Create a new token by first naming it anything you like. Then choose the expiration time for the token. Finally, check the “repo” box; the token will be used to download code repositories from GitHub. Click “Generate token.” Copy the generated token password. After that, set the token in the environment variable in R. Since we are using conda, enter R by typing R in the terminal. Then, enter the command: usethis::edit_r_environ(). This will open a file. Press the i key to edit. Paste the token you copied into the code area as follows: GITHUB_TOKEN=“your_token”. Then press Esc, type :wq! (force save). After that, you need to exit Linux and re-enter R. Close and reopen the terminal to apply the environment variable. Reopen Linux, activate the conda environment, and enter R again. devtools::install_github("sqjin/CellChat") devtools::install_github("immunogenomics/presto") devtools::install_github("aertslab/SCENIC@140ad6b") devtools::install_github("pzhulab/abcCellmap@f44c14b") devtools::install_github("navinlabcode/copykat@d7d6569") devtools::install_github('chris-mcginnis-ucsf/DoubletFinder@8c7f76e') devtools::install_github("mojaveazure/seurat-disk@877d4e1") devtools::install_github(c("hfang-bristol/dnet")) Install HemaScopeR from github devtools::install_github(repo="ZhenyiWangTHU/HemaScopeR", dep = FALSE) 2.5 Create the required python (v.3.9.12) virtual environment Run the init_miniconda function to create the miniconda virtual environments for the scRNA-seq pipeline and ST pipeline of 10X Visium data and MERFISH data. library(HemaScopeR) init_miniconda() (Optional) Run the init_miniconda_stereo function to create the miniconda virtual environment for the stereo-seq data. init_miniconda_stereo() 2.6 The installed packages with versions R packages with versions Package Version ------- ------- Python packages with versions Package Version ------------------------ -------------- "],["integrated-scrna-seq-pipeline.html", "3 Integrated scRNA-seq pipeline", " 3 Integrated scRNA-seq pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) Run the integrated scRNA-seq pipeline. scRNASeq_10x_pipeline( # input and output input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix', './SRR7881400/outs/filtered_feature_bc_matrix', './SRR7881401/outs/filtered_feature_bc_matrix', './SRR7881402/outs/filtered_feature_bc_matrix', './SRR7881403/outs/filtered_feature_bc_matrix', './SRR7881404/outs/filtered_feature_bc_matrix', './SRR7881405/outs/filtered_feature_bc_matrix', './SRR7881406/outs/filtered_feature_bc_matrix', './SRR7881407/outs/filtered_feature_bc_matrix', './SRR7881408/outs/filtered_feature_bc_matrix', './SRR7881409/outs/filtered_feature_bc_matrix', './SRR7881410/outs/filtered_feature_bc_matrix', './SRR7881411/outs/filtered_feature_bc_matrix', './SRR7881412/outs/filtered_feature_bc_matrix', './SRR7881413/outs/filtered_feature_bc_matrix', './SRR7881414/outs/filtered_feature_bc_matrix', './SRR7881415/outs/filtered_feature_bc_matrix', './SRR7881416/outs/filtered_feature_bc_matrix', './SRR7881417/outs/filtered_feature_bc_matrix', './SRR7881418/outs/filtered_feature_bc_matrix', './SRR7881419/outs/filtered_feature_bc_matrix', './SRR7881420/outs/filtered_feature_bc_matrix', './SRR7881421/outs/filtered_feature_bc_matrix', './SRR7881422/outs/filtered_feature_bc_matrix', './SRR7881423/outs/filtered_feature_bc_matrix'), project.names = c( 'SRR7881399', 'SRR7881400', 'SRR7881401', 'SRR7881402', 'SRR7881403', 'SRR7881404', 'SRR7881405', 'SRR7881406', 'SRR7881407', 'SRR7881408', 'SRR7881409', 'SRR7881410', 'SRR7881411', 'SRR7881412', 'SRR7881413', 'SRR7881414', 'SRR7881415', 'SRR7881416', 'SRR7881417', 'SRR7881418', 'SRR7881419', 'SRR7881420', 'SRR7881421', 'SRR7881422', 'SRR7881423'), output.dir = './output/', pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python', # quality control and preprocessing gene.column = 2, min.cells = 10, min.feature = 200, mt.pattern = '^MT-', nFeature_RNA.limit = 200, percent.mt.limit = 20, scale.factor = 10000, nfeatures = 3000, ndims = 50, vars.to.regress = NULL, PCs = 1:35, resolution = 0.4, n.neighbors = 50, # remove doublets doublet.percentage = 0.04, doublerFinderwraper.PCs = 1:20, doublerFinderwraper.pN = 0.25, doublerFinderwraper.pK = 0.1, # phateR phate.knn = 50, phate.npca = 20, phate.t = 10, phate.ndim = 2, min.pct = 0.25, logfc.threshold = 0.25, # visualization ViolinPlot.cellTypeOrders = as.character(1:22), ViolinPlot.cellTypeColors = NULL, Org = 'hsa', loom.files.path = c( './SRR7881399/velocyto/SRR7881399.loom', './SRR7881400/velocyto/SRR7881400.loom', './SRR7881401/velocyto/SRR7881401.loom', './SRR7881402/velocyto/SRR7881402.loom', './SRR7881403/velocyto/SRR7881403.loom', './SRR7881404/velocyto/SRR7881404.loom', './SRR7881405/velocyto/SRR7881405.loom', './SRR7881406/velocyto/SRR7881406.loom', './SRR7881407/velocyto/SRR7881407.loom', './SRR7881408/velocyto/SRR7881408.loom', './SRR7881409/velocyto/SRR7881409.loom', './SRR7881410/velocyto/SRR7881410.loom', './SRR7881411/velocyto/SRR7881411.loom', './SRR7881412/velocyto/SRR7881412.loom', './SRR7881413/velocyto/SRR7881413.loom', './SRR7881414/velocyto/SRR7881414.loom', './SRR7881415/velocyto/SRR7881415.loom', './SRR7881416/velocyto/SRR7881416.loom', './SRR7881417/velocyto/SRR7881417.loom', './SRR7881418/velocyto/SRR7881418.loom', './SRR7881419/velocyto/SRR7881419.loom', './SRR7881420/velocyto/SRR7881420.loom', './SRR7881421/velocyto/SRR7881421.loom', './SRR7881422/velocyto/SRR7881422.loom', './SRR7881423/velocyto/SRR7881423.loom'), # cell cycle cellcycleCutoff = NULL, # cell chat sorting = FALSE, ncores = 10, # Verbose = FALSE, # activeEachStep Whether_load_previous_results = FALSE, Step1_Input_Data = TRUE, Step1_Input_Data.type = 'cellranger-count', Step2_Quality_Control = TRUE, Step2_Quality_Control.RemoveBatches = TRUE, Step2_Quality_Control.RemoveDoublets = TRUE, Step3_Clustering = TRUE, Step4_Identify_Cell_Types = TRUE, Step4_Use_Which_Labels = 'clustering', Step4_Cluster_Labels = NULL, Step4_Changed_Labels = NULL, Step4_run_sc_CNV = TRUE, Step5_Visualization = TRUE, Step6_Find_DEGs = TRUE, Step7_Assign_Cell_Cycle = TRUE, Step8_Calculate_Heterogeneity = TRUE, Step9_Violin_Plot_for_Marker_Genes = TRUE, Step10_Calculate_Lineage_Scores = TRUE, Step11_GSVA = TRUE, Step11_GSVA.identify.cellType.features=TRUE, Step11_GSVA.identify.diff.features=FALSE, Step11_GSVA.comparison.design=NULL, Step12_Construct_Trajectories = TRUE, Step12_Construct_Trajectories.clusters = c('3','6','9','10','11','14','15','19'), Step12_Construct_Trajectories.monocle = TRUE, Step12_Construct_Trajectories.slingshot = TRUE, Step12_Construct_Trajectories.scVelo = TRUE, Step13_TF_Analysis = TRUE, Step14_Cell_Cell_Interaction = TRUE, Step15_Generate_the_Report = TRUE ) "],["step-by-step-scrna-seq-pipeline.html", "4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin 4.2 Step 1. Load the input data 4.3 Step 2. Quality Control 4.4 Step 3. Clustering 4.5 Step 4. Identify Cell Types 4.6 Step 5. Visualization 4.7 Step 6. Find DEGs 4.8 Step 7. Assign Cell Cycles 4.9 Step 8. Calculate Heterogeneity 4.10 Step 9. Violin Plot for Marker Genes 4.11 Step 10. Calculate Lineage Scores 4.12 Step 11. GSVA 4.13 Step 12. Construct Trajectories 4.14 Step 13. TF Analysis 4.15 Step 14. Cell-Cell Interaction", " 4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin Load the R packages. library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) library(getopt) library(tools) library(HemaScopeR) Set the paths for the output results, and the Python installation. output.dir = './output' pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python' Create folders for saving the results of HemaScopeR analysis. wdir <- getwd() if(is.null(pythonPath)==FALSE){ reticulate::use_python(pythonPath) }else{print('Please set the path of Python.')} if (!file.exists(paste0(output.dir, '/HemaScopeR_results'))) { dir.create(paste0(output.dir, '/HemaScopeR_results'),recursive =T) } output.dir <- paste0(output.dir,'/HemaScopeR_results') if (!file.exists(paste0(output.dir, '/RDSfiles/'))) { dir.create(paste0(output.dir, '/RDSfiles/')) } #set the path for loading previous results, if necessary previous_results_path <- paste0(output.dir, '/RDSfiles/') # if (Whether_load_previous_results) { # print('Loading the previous results...') # Load_previous_results(previous_results_path = previous_results_path) # } 4.2 Step 1. Load the input data Create a folder for step1 print('Step1. Input data.') if (!file.exists(paste0(output.dir, '/Step1.Input_data/'))) { dir.create(paste0(output.dir, '/Step1.Input_data/')) } Set the parameters for loading the data sets. input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix')#, #'./SRR7881400/outs/filtered_feature_bc_matrix', #'./SRR7881401/outs/filtered_feature_bc_matrix', #'./SRR7881402/outs/filtered_feature_bc_matrix', #'./SRR7881403/outs/filtered_feature_bc_matrix' project.names = c('SRR7881399')#, #'SRR7881400', #'SRR7881401', #'SRR7881402', #'SRR7881403' gene.column = 2 min.cells = 10 min.feature = 200 mt.pattern = '^MT-' # set '^mt-' for mouse data Step1_Input_Data.type = 'cellranger-count' loom.files.path ="./SRR7881399/loom" Load the data sets file.copy(from = input.data.dirs, to = paste0(output.dir,'/Step1.Input_data/'), recursive = TRUE) if(Step1_Input_Data.type == 'cellranger-count'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- Read10X(data.dir = input.data.dirs[i], gene.column = gene.column) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- Read10X(data.dir = input.data.dirs, gene.column = gene.column) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Seurat'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_object.temp <- readRDS(input.data.dirs[i]) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp) } }else{ sc_object <- readRDS(input.data.dirs) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Matrix'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- readRDS(input.data.dirs[i]) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- readRDS(input.data.dirs) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else{ stop('Please input data generated by the cellranger-count software, or a Seurat object, or a gene expression matrix. HemaScopeR does not support other formats of input data.') } Save the variables after executing each step, if necessary. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3 Step 2. Quality Control In this step, the following quality control steps will be performed: Normalize data using the LogNormalize method. Find variable features using the vst method. Scale data using the identified variable features and specified variables to regress out. Perform principal component analysis (PCA) on the scaled data. Find K nearest neighbors based on PCA dimensions. Perform clustering analysis based on the found neighbors. Optionally, remove doublets using doubletFinder. Optionally, integrate multiple datasets by removing batch effects. 4.3.1 Function arguments: nFeature_RNA.limit: The cutoff of the minimum number of detected genes in each cell. percent.mt.limit: The cutoff of the maximum percentage of mitochondria genes in each cell. scale.factor: The scale factor for the ‘data’ slot in the seurat object. nfeatures: The number of selected highly variable features for down stream analysis. ndims: The number of principle components in PCA. vars.to.regress: Variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito. (ScaleData in Seurat) PCs: Which dimensions to use as input features.(RunTSNE and RunUMAP in Seurat) resolution: Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities. (FindClusters in Seurat) n.neighbors: Defines k for the k-nearest neighbor algorithm. (FindNeighbors in Seurat) percentage: Assuming ‘percentage’ doublet formation rate - tailor for your dataset. The default value is 0.05. doublerFinderwraper.PCs Which dimensions to use as input features for doubletFinder. doublerFinderwraper.pN: The percentage of real-artifical data for doubletFinder. doublerFinderwraper.pK: The pK parameter controls the doublet cell detection by determining the number of nearest neighbors and influencing the calculation of pANN scores and the final cell classification results. Adjusting the pK value allows optimization of the doublet cell detection process based on specific data and analysis requirements. 4.3.2 codes for running step2 Create a folder for saving the results of quality control. print('Step2. Quality control.') if (!file.exists(paste0(output.dir, '/Step2.Quality_control/'))) { dir.create(paste0(output.dir, '/Step2.Quality_control/')) } Set the parameters for quality control. # quality control nFeature_RNA.limit = 200 percent.mt.limit = 20 # preprocessing nfeatures = 3000 scale.factor = 10000 ndims = 50 vars.to.regress = NULL PCs = 1:35 resolution = 0.4 n.neighbors = 50 # removing doublets Step2_Quality_Control.RemoveDoublets = TRUE doublet.percentage = 0.04 doublerFinderwraper.PCs = 1:20 doublerFinderwraper.pN = 0.25 doublerFinderwraper.pK = 0.1 # removing batch effect Step2_Quality_Control.RemoveBatches = TRUE Run the quality control process. if(length(input.data.dirs) > 1){ # preprocess and quality control for multiple scRNA-Seq data sets sc_object <- QC_multiple_scRNASeq(seuratObjects = input.data.list, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveBatches = Step2_Quality_Control.RemoveBatches, Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, ndims = ndims, vars.to.regress = vars.to.regress, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK ) }else{ # preprocess and quality control for single scRNA-Seq data set sc_object <- QC_single_scRNASeq(sc_object = sc_object, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, vars.to.regress = vars.to.regress, ndims = ndims, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3.3 Outputs Figure 4.1: Violin plots showing the nFeature, nCount and percent.mt for each sample Figure 4.2: Figures showing the correlation between nFeature and nCount, as well as between nCount and percent.mt Figure 4.3: Figures showing the variable features used for downstream analysis Figure 4.4: ElbowPlot showing suitable number of PCs used for further analysis Figure 4.5: UMAP plot showing doublets found by DoubletFinder 4.4 Step 3. Clustering Create a folder for saving the results of Louvain clustering. print('Step3. Clustering.') if (!file.exists(paste0(output.dir, '/Step3.Clustering/'))) { dir.create(paste0(output.dir, '/Step3.Clustering/')) } Set the parameters for clustering. PCs = 1:35 resolution = 0.4 n.neighbors = 50 Run Louvian clustering. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){graph.name <- 'integrated_snn'}else{graph.name <- 'RNA_snn'} sc_object <- FindNeighbors(sc_object, dims = PCs, k.param = n.neighbors, force.recalc = TRUE) sc_object <- FindClusters(sc_object, resolution = resolution, graph.name = graph.name) sc_object@meta.data$seurat_clusters <- as.character(as.numeric(sc_object@meta.data$seurat_clusters)) # plot clustering pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.6: UMAP plot showing clustering results 4.5 Step 4. Identify Cell Types In this step, users can predict the cell types of hematopoietic cells by implementing two approaches (Scmap and Seurat) through abcCellmap packages. Cells are labeled by 43 different RNA clusters according to unsupervised clustering of single-cell transcriptional profiles, and also labeled by 32 immunophenotypic cell types. In addition, users can use Copykat to measure copy number variation (CNV) and determine the ploidy of each cell. 4.5.1 codes for running abcCellmap Create a folder for saving the results of cell type identification. print('Step4. Identify cell types automatically.') if (!file.exists(paste0(output.dir, '/Step4.Identify_Cell_Types/'))) { dir.create(paste0(output.dir, '/Step4.Identify_Cell_Types/')) } Set the path for the database. databasePath = "~/HemaScopeR/database/" Set the parameters for cell type identification. Step4_Use_Which_Labels = 'clustering' Step4_Cluster_Labels = NULL Step4_Changed_Labels = NULL Org = 'hsa' ncores = 10 Run the cell type identification process. sc_object <- run_cell_annotation(object = sc_object, assay = 'RNA', species = Org, output.dir = paste0(output.dir,'/Step4.Identify_Cell_Types/')) if(Org == 'hsa'){ load(paste0(databasePath,"/HematoMap.reference.rdata")) #the data can be downloaded via the link https://cloud.tsinghua.edu.cn/d/759fd04333274d3f9946 if(length(intersect(rownames(HematoMap.reference), rownames(sc_object))) < 1000){ HematoMap.reference <- RenameGenesSeurat(obj = HematoMap.reference, newnames = toupper(rownames(HematoMap.reference)), gene.use = rownames(HematoMap.reference), de.assay = "RNA", lassays = "RNA") } if(sc_object@active.assay == 'integrated'){ DefaultAssay(sc_object) <- 'RNA' sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) DefaultAssay(sc_object) <- 'integrated' }else{ sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) } } Set the cell labels. # set the cell labels if(Step4_Use_Which_Labels == 'clustering'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$seurat_clusters Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.1'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.2'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.3'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.4'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'HematoMap'){ if(Org == 'hsa'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$predicted.id Idents(sc_object) <- sc_object@meta.data$selectLabels }else{print("'HematoMap' is only applicable to human data ('Org' = 'hsa').")} }else if(Step4_Use_Which_Labels == 'changeLabels'){ if (!is.null(Step4_Cluster_Labels) && !is.null(Step4_Changed_Labels) && length(Step4_Cluster_Labels) == length(Step4_Changed_Labels)){ sc_object@meta.data$selectLabels <- plyr::mapvalues(sc_object@meta.data$seurat_clusters, from = as.character(Step4_Cluster_Labels), to = as.character(Step4_Changed_Labels), warn_missing = FALSE) Idents(sc_object) <- sc_object@meta.data$selectLabels }else{ print("Please input the 'Step4_Cluster_Labels' parameter as Seurat clustering labels, and the 'Step4_Changed_Labels' parameter as new labels. Please note that these two parameters should be of equal length.") } }else{ print('Please set the "Step4_Use_Which_Labels" parameter as "clustering", "abcCellmap.1", "abcCellmap.2", "HematoMap" or "changeLabels".') } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.7: UMAP plots showing cell type annotation results Figure 4.8: Immunophenotype and RNACluster label predicted by scmap Figure 4.9: Immunophenotype and RNACluster label predicted by Seurat 4.5.2 codes for running the CNV analysis sc_CNV(sc_object=sc_object, save_path=paste0(output.dir,'/Step4.Identify_Cell_Types/'), assay = 'RNA', LOW.DR = 0.05, #refer to the Copykat documentation for detailed explanations of the parameters UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = NULL, n.cores = ncores, #note: this step will take a long time, using more ncores could shorten the running time species = Org) Figure 4.10: copykat heatmap Figure 4.11: UMAP plot showing CNV state predicted by copykat 4.6 Step 5. Visualization In this step, users are allowed to gain the statistical results about the numbers and proportions of cell groups, and also use three dimensional reduction methods (TSNE, UMAP, phateR) to visualize the results. 4.6.1 codes for peforming three dimensional reduction methods Create a folder for saving the visualization results. print('Step5. Visualization.') if (!file.exists(paste0(output.dir, '/Step5.Visualization/'))) { dir.create(paste0(output.dir, '/Step5.Visualization/')) } Perform visualization using UMAP and TSNE. # plot cell types pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Figure 4.12: UMAP and TSNE visualization Set the parameters for phateR. phate.knn = 50 #The number of nearest neighbors to consider in the phateR algorithm. Default 50. phate.npca = 20 #The number of principal components to use in the phateR algorithm. Default 20. phate.t = 10 #The t-value for the phateR algorithm, which controls the level of exploration. Default 10. phate.ndim = 2 #The number of dimensions for the output embedding in the phateR algorithm. Default 2. Run phateR for dimensional reduction and visualization. # run phateR if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} if(!is.null(pythonPath)){ run_phateR(sc_object = sc_object, output.dir = paste0(output.dir,'/Step5.Visualization/'), pythonPath = pythonPath, phate.knn = phate.knn, phate.npca = phate.npca, phate.t = phate.t, phate.ndim = phate.ndim) } Figure 4.13: phateR result 4.6.2 codes for calculating the proportions The statistical results for the numbers and proportions of cell groups. # statistical results cells_labels <- as.data.frame(cbind(rownames(sc_object@meta.data), as.character(sc_object@meta.data$selectLabels))) colnames(cells_labels) <- c('cell_id', 'cluster_id') cluster_counts <- cells_labels %>% group_by(cluster_id) %>% summarise(count = n()) total_cells <- nrow(cells_labels) cluster_counts <- cluster_counts %>% mutate(proportion = count / total_cells) cluster_counts <- as.data.frame(cluster_counts) cluster_counts$percentages <- scales::percent(cluster_counts$proportion, accuracy = 0.1) cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='proportion')] cluster_counts$cluster_id_count_percentages <- paste(cluster_counts$cluster_id, " (", cluster_counts$count, ' cells; ', cluster_counts$percentages, ")", sep='') cluster_counts <- cluster_counts[order(cluster_counts$count, decreasing = TRUE),] cluster_counts <- rbind(cluster_counts, c('Total', sum(cluster_counts$count), '100%', 'all cells')) sc_object@meta.data$cluster_id_count_percentages <- mapvalues(sc_object@meta.data$selectLabels, from=cluster_counts$cluster_id, to=cluster_counts$cluster_id_count_percentages, warn_missing=FALSE) colnames(sc_object@meta.data)[which(colnames(sc_object@meta.data) == 'cluster_id_count_percentages')] <- paste('Total ', nrow(sc_object@meta.data), ' cells', sep='') cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='cluster_id_count_percentages')] colnames(cluster_counts) <- c('Cell types', 'Cell counts', 'Percentages') # names(colorvector) <- mapvalues(names(colorvector), # from=cluster_counts$cluster_id, # to=cluster_counts$cluster_id_count_percentages, # warn_missing=FALSE) write.csv(cluster_counts, file=paste(paste0(output.dir, '/Step5.Visualization/'), '/cell types_cell counts_percentages.csv', sep=''), quote=FALSE, row.names=FALSE) The UMAP visualization. pdf(paste(paste0(output.dir, '/Step5.Visualization'), '/cell types_cell counts_percentages_umap.pdf', sep=''), width = 14, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = paste('Total ', nrow(sc_object@meta.data), ' cells', sep=''), label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.14: UMAP plot showing cell type and corresponding proportion 4.7 Step 6. Find DEGs In this step, users can find DEGs (differentially expressed genes) across different cell type group using FindAllMarkers, use GPTCelltype to predict cell label, perform GO and KEGG enrichment analysis, and perform subnetwork analysis for each cell type group. 4.7.1 codes for finding DEGs Set the parameters for identifying differentially expressed genes. min.pct = 0.25 logfc.threshold = 0.25 Create a folder for the DEGs analysis. print('Step6. Find DEGs.') if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/')) } Identify DEGs using Wilcoxon Rank-Sum Test. sc_object.markers <- FindAllMarkers(sc_object, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold) write.csv(sc_object.markers, file = paste0(paste0(output.dir, '/Step6.Find_DEGs/'),'sc_object.markerGenes.csv'), quote=FALSE) # visualization sc_object.markers.top5 <- sc_object.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) pdf(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.pdf'), width = 0.5*length(unique(sc_object.markers.top5$gene)), height = 0.5*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() png(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.png'), width = 20*length(unique(sc_object.markers.top5$gene)), height = 30*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() Figure 4.15: Dotplot showing marker genes of each cell type group 4.7.2 codes for using GPTCelltype Set the parameters for GPTCelltype. your_openai_API_key = '' tissuename = 'human bone marrow' gptmodel = 'gpt-3.5' Use GPTCelltype to assist cell type annotation. GPT_annotation( marker.genes = sc_object.markers, your_openai_API_key = your_openai_API_key, tissuename = tissuename, gptmodel = gptmodel, output.dir = paste0(output.dir, '/Step6.Find_DEGs/')) 4.7.3 Perform GO and KEGG enrichment. # GO enrichment if(Org=='mmu'){ OrgDb <- 'org.Mm.eg.db' }else if(Org=='hsa'){ OrgDb <- 'org.Hs.eg.db' }else{ stop("Org should be 'mmu' or 'hsa'.") } HemaScopeREnrichment(DEGs=sc_object.markers, OrgDb=OrgDb, output.dir=paste0(output.dir, '/Step6.Find_DEGs/')) Figure 4.16: Barplot showing GO(BP)and KEGG enrichment results of each cell type group 4.7.4 Perform subnetwork analysis Create a folder for saving the results of gene network analysis. if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/')) } Perform gene network analysis. OpenXGR_SAG(sc_object.markers = sc_object.markers, output.dir = paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'), subnet.size = 10) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.17: Figure showing subnetwork of each cell type group identified by OpenXGR 4.8 Step 7. Assign Cell Cycles This step assigns cell cycle phases by analyzing cell cycle-related genes and generates plots of the cell cycle analysis results. 4.8.1 Function arguments: sc_object: A Seurat object containing single-cell RNA sequencing data. counts_matrix: The ‘counts’ slot in the Seurat object. data_matrix: The ‘data’ slot in the Seurat object. cellcycleCutoff: The cutoff value for distinguishing between cycling and quiescent cells. Cells with a G1G2Score below this cutoff are considered quiescent. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input Seurat object. databasePath: The path to the database required for the analysis. Org: A character vector specifying the species of cell cycle genes, can be ‘mmu’ (mouse) or ‘hsa’ (human). 4.8.2 codes for step7 Create a folder for saving the results of cell cycle analysis. print('Step7. Assign cell cycles.') if (!file.exists(paste0(output.dir, '/Step7.Assign_cell_cycles/'))) { dir.create(paste0(output.dir, '/Step7.Assign_cell_cycles/')) } Set the parameters for the cell cycle analysis. cellcycleCutoff = NULL Run the cell cycle analysis. datasets.before.batch.removal <- readRDS(paste0(paste0(output.dir, '/RDSfiles/'),'datasets.before.batch.removal.rds')) sc_object <- cellCycle(sc_object=sc_object, counts_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "counts")%>%as.matrix(), data_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix(), cellcycleCutoff = cellcycleCutoff, cellTypeOrders = unique(sc_object@meta.data$selectLabels), output.dir=paste0(output.dir, '/Step7.Assign_cell_cycles/'), databasePath = databasePath, Org = Org) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.8.3 Outputs Figure 4.18: Barplot showing the proportion of different cell cycle within each cell type group Figure 4.19: Density plot showing the distribution of cell cycle scores 4.9 Step 8. Calculate Heterogeneity This step quantifies cell heterogeneity by computing Spearman correlation coefficients between cells within the same cell type groups. 4.9.1 Function arguments: expression_matrix: A numeric matrix representing the expression data, where rows are genes and columns are cells. The matrix should be appropriately preprocessed and filtered before using this function. cell_types_groups: A data frame specifying cell type annotations for each cell, including cell type labels and group information. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input cell_types_groups. 4.9.2 codes for step8 Create a folder for saving the results of heterogeneity calculation. print('Step8. Calculate heterogeneity.') if (!file.exists(paste0(output.dir, '/Step8.Calculate_heterogeneity/'))) { dir.create(paste0(output.dir, '/Step8.Calculate_heterogeneity/')) } Run heterogeneity calculation process. expression_matrix <- GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix() expression_matrix <- expression_matrix[,rownames(sc_object@meta.data)] cell_types_groups <- as.data.frame(cbind(sc_object@meta.data$selectLabels, sc_object@meta.data$datasetID)) colnames(cell_types_groups) <- c('clusters', 'datasetID') if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } heterogeneity(expression_matrix = expression_matrix, cell_types_groups = cell_types_groups, cellTypeOrders = cellTypes_orders, output.dir = paste0(output.dir, '/Step8.Calculate_heterogeneity/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.20: Box plot showing the Spearman correlation coefficients between cells within the same cell type groups(here we take data including more samples as an example) 4.10 Step 9. Violin Plot for Marker Genes This step generates violin plots for marker genes across different cell types. 4.10.1 Function arguments: dataMatrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. features: A character vector specifying the marker genes to plot in the violin plots. CellTypes: A factor vector containing cell type annotations for each cell. cellTypeOrders: A character vector specifying the order of cell types for plotting. Defaults to unique values in CellTypes. cellTypeColors: A character vector specifying the colors to use for cell type groups. Defaults to a color palette. 4.10.2 codes for step9 Create a folder for saving the violin plots of marker genes. print('Step9. Violin plot for marker genes.') if (!file.exists(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'))) { dir.create(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/')) } Run violin plot visualization. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} dataMatrix <- GetAssayData(object = sc_object, slot = "scale.data") if(is.null(marker.genes)&(Org == 'mmu')){ # mpp genes are from 'The bone marrow microenvironment at single cell resolution' # the other genes are from 'single cell characterization of haematopoietic progenitors and their trajectories in homeostasis and perturbed haematopoiesis' # the aliases of these genes were changed in gecodeM16:Gpr64 -> Adgrg2, Sdpr -> Cavin2, Hbb-b1 -> Hbb-bs, Sfpi1 -> Spi1 HSC_lineage_signatures <- c('Slamf1', 'Itga2b', 'Kit', 'Ly6a', 'Bmi1', 'Gata2', 'Hlf', 'Meis1', 'Mpl', 'Mcl1', 'Gfi1', 'Gfi1b', 'Hoxb5') Mpp_genes <- c('Mki67', 'Mpo', 'Elane', 'Ctsg', 'Calr') Erythroid_lineage_signatures <- c('Klf1', 'Gata1', 'Mpl', 'Epor', 'Vwf', 'Zfpm1', 'Fhl1', 'Adgrg2', 'Cavin2','Gypa', 'Tfrc', 'Hbb-bs', 'Hbb-y') Lymphoid_lineage_signatures <- c('Tcf3', 'Ikzf1', 'Notch1', 'Flt3', 'Dntt', 'Btg2', 'Tcf7', 'Rag1', 'Ptprc', 'Ly6a', 'Blnk') Myeloid_lineage_signatures <- c('Gfi1', 'Spi1', 'Mpo', 'Csf2rb', 'Csf1r', 'Gfi1b', 'Hk3', 'Csf2ra', 'Csf3r', 'Sp1', 'Fcgr3') marker.genes <- c(HSC_lineage_signatures, Mpp_genes, Erythroid_lineage_signatures, Lymphoid_lineage_signatures, Myeloid_lineage_signatures) }else if(is.null(marker.genes)&(Org == 'hsa')){ HSPCs_lineage_signatures <- c('CD34','KIT','AVP','FLT3','MME','CD7','CD38','CSF1R','FCGR1A','MPO','ELANE','IL3RA') Myeloids_lineage_signatures <- c('LYZ','CD36','MPO','FCGR1A','CD4','CD14','CD300E','ITGAX','FCGR3A','FLT3','AXL', 'SIGLEC6','CLEC4C','IRF4','LILRA4','IL3RA','IRF8','IRF7','XCR1','CD1C','THBD', 'MRC1','CD34','KIT','ITGA2B','PF4','CD9','ENG','KLF','TFRC') B_cells_lineage_signatures <- c('CD79A','IGLL1','RAG1','RAG2','VPREB1','MME','IL7R','DNTT','MKI67','PCNA','TCL1A','MS4A1','IGHD','CD27','IGHG3') T_NK_cells_lineage_signatures <- c('CD3D','CD3E','CD8A','CCR7','IL7R','SELL','KLRG1','CD27','GNLY', 'NKG7','PDCD1','TNFRSF9','LAG3','CD160','CD4','CD40LG','IL2RA', 'FOXP3','DUSP4','IL2RB','KLRF1','FCGR3A','NCAM1','XCL1','MKI67','PCNA','KLRF') marker.genes <- c(HSPCs_lineage_signatures, Myeloids_lineage_signatures, B_cells_lineage_signatures, T_NK_cells_lineage_signatures) } if(is.null(ViolinPlot.cellTypeOrders)){ ViolinPlot.cellTypeOrders <- unique(sc_object@meta.data$selectLabels) } if(is.null(ViolinPlot.cellTypeColors)){ ViolinPlot.cellTypeColors <- viridis::viridis(length(unique(sc_object@meta.data$selectLabels))) } combinedViolinPlot(dataMatrix = dataMatrix, features = marker.genes, CellTypes = sc_object@meta.data$selectLabels, cellTypeOrders = ViolinPlot.cellTypeOrders, cellTypeColors = ViolinPlot.cellTypeColors, Org = Org, output.dir = paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.21: Violin plot showing the expression of marker genes between cell type groups 4.11 Step 10. Calculate Lineage Scores This step calculates lineage scores for specified gene sets based on the provided expression data. It then generates a heatmap of lineage scores and a heatmap of gene expression patterns. 4.11.1 Function arguments: expression_matrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. cellTypes: A character vector specifying cell type annotations for each cell. e.g. c(“HSC”,“HSC”,“HSC”,“MPP1”,“MPP2”,“MPP2”,“MPP2” …) cellTypes_orders: A character vector specifying the order of cell types for plotting. e.g. c(“HSC”,“MPP1”,“MPP2”) cellTypes_colors: A character vector specifying the colors to use for cell type groups. e.g. c(“HSC” = ‘#006d2c’,“MPP1” = ‘#4292c6’,“MPP2”= ‘#810f7c’). groups: A character vector specifying groups or clusters within each cell type. groups_orders: A character vector specifying the order of groups or clusters for plotting. groups_colors: A character vector specifying the colors to use for group or cluster annotations. e.g. c(‘group1’=‘#d73027’,‘group2’=‘#2171b5’) lineage.genelist: A list of gene sets representing lineage markers. lineage.names: A character vector specifying the names of the lineages. 4.11.2 codes for step10 Create a folder for saving the results of lineage score calculation. print('Step10. Calculate lineage scores.') # we use normalized data here if (!file.exists(paste0(output.dir, '/Step10.Calculate_lineage_scores/'))) { dir.create(paste0(output.dir, '/Step10.Calculate_lineage_scores/')) } Run lineage score calculation. if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'mmu')){ lineage.genelist <- c(list(HSC_lineage_signatures), list(Mpp_genes), list(Erythroid_lineage_signatures), list(Lymphoid_lineage_signatures), list(Myeloid_lineage_signatures)) lineage.names <- c('HSC_lineage_signatures', 'Mpp_genes', 'Erythroid_lineage_signatures', 'Lymphoid_lineage_signatures', 'Myeloid_lineage_signatures') }else if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'hsa')){ lineage.genelist <- c(list(HSPCs_lineage_signatures), list(Myeloids_lineage_signatures), list(B_cells_lineage_signatures), list(T_NK_cells_lineage_signatures)) lineage.names <- c('HSPCs_lineage_signatures', 'Myeloids_lineage_signatures', 'B_cells_lineage_signatures', 'T_NK_cells_lineage_signatures') } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } lineageScores(expression_matrix = expression_matrix, cellTypes = sc_object@meta.data$selectLabels, cellTypes_orders = cellTypes_orders, cellTypes_colors = ViolinPlot.cellTypeColors, groups = sc_object@meta.data$datasetID, groups_orders = unique(sc_object@meta.data$datasetID), groups_colors = groups_colors, lineage.genelist = lineage.genelist, lineage.names = lineage.names, Org = Org, output.dir = paste0(output.dir, '/Step10.Calculate_lineage_scores/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.22: Heatmap showing the expression of lineage genes for each cell Figure 4.23: Heatmap showing the score of lineage signatures for each cell 4.12 Step 11. GSVA This step runs GSVA analysis, which calculates enrichment scores for gene sets in each cell using the provided gene list. It also performs differential GSVA analysis between specified cell groups and generates heatmaps of the results. 4.12.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. GSVA.genelist: A list of gene sets for GSVA analysis. GSVA.cellTypes: A character vector specifying the cell types or labels for each cell. GSVA.cellTypes.orders: A character vector specifying the order of cell types for visualization. GSVA.cellGroups: A character vector specifying the cell groups or conditions for each cell. GSVA.identify.cellType.features: Logical. If TRUE, identify cell type-specific features. GSVA.identify.diff.features: Logical. If TRUE, identify differentially expressed features between cell groups. GSVA.comparison.design: A list specifying the experimental design for differential GSVA analysis. OrgDB: An organism-specific annotation database (OrgDb) for gene symbol conversion. e.g. org.Mm.eg.db or org.Hs.eg.db. 4.12.2 codes for running step11 Create a folder for saving the results of GSVA. print('Step11. GSVA.') if (!file.exists(paste0(output.dir, '/Step11.GSVA/'))) { dir.create(paste0(output.dir, '/Step11.GSVA/')) } Run GSVA. setwd(wdir) if(Org=='mmu'){ load(paste0(databasePath,"/mouse_c2_v5p2.rdata")) GSVA.genelist <- Mm.c2 assign('OrgDB', org.Mm.eg.db) }else if(Org=='hsa'){ load(paste0(databasePath,"/human_c2_v5p2.rdata")) GSVA.genelist <- Hs.c2 assign('OrgDB', org.Hs.eg.db) }else{ stop("Org should be 'mmu' or 'hsa'.") } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } run_GSVA(sc_object = sc_object, GSVA.genelist = GSVA.genelist, GSVA.cellTypes = sc_object@meta.data$selectLabels, GSVA.cellTypes.orders = cellTypes_orders, GSVA.cellGroups = sc_object@meta.data$datasetID, GSVA.identify.cellType.features = Step11_GSVA.identify.cellType.features, GSVA.identify.diff.features = Step11_GSVA.identify.diff.features, GSVA.comparison.design = Step11_GSVA.comparison.design, OrgDB = OrgDB, output.dir = paste0(output.dir, '/Step11.GSVA/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.24: GSVA Heatmap showing the enriched pathways of each cell type group 4.13 Step 12. Construct Trajectories In this step, users are allowed to construct trajectories using three methods including Monocle2, slingshot and scVelo. 4.13.1 data preparation Load gene symbols and ensemble IDs. DefaultAssay(sc_object) <- 'RNA' countsSlot <- GetAssayData(object = sc_object, slot = "counts") gene_metadata <- as.data.frame(rownames(countsSlot)) rownames(gene_metadata) <- gene_metadata[,1] if(Org == 'mmu'){ load(paste0(databasePath,"/mouseGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = mouseGeneSymbolandEnsembleID$geneName, to = mouseGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) }else if(Org == 'hsa'){ load(paste0(databasePath,"/humanGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = humanGeneSymbolandEnsembleID$geneName, to = humanGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) } colnames(gene_metadata) <- c('gene_short_name','ensembleID') Create folders for saving the results of trajectory construction. print('Step12. Construct trajectories.') if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) } Prepare the input data. if(is.null(Step12_Construct_Trajectories.clusters)){ sc_object.subset <- sc_object countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") }else{ sc_object.subset <- subset(sc_object, subset = selectLabels %in% Step12_Construct_Trajectories.clusters) countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") } 4.13.2 monocle2 Running monocle2 involves several steps: Creating a Monocle cellDataSet using the provided cellData, phenoData, and featureData. Estimating size factors, dispersions, and detecting highly variable genes. Performing differential gene expression analysis to identify genes associated with cell state changes. Ordering cells along the inferred trajectories and reducing dimensionality. Generating and saving trajectory plots, including cell trajectory by “State” and by “Cell Types.” 4.13.2.1 Function arguments: cellData: A matrix of gene expression values, where columns represent cells and rows represent genes. phenoData: A data frame containing cell metadata, such as cell labels or other relevant information. featureData: A data frame containing information about features (genes) in the dataset. lowerDetectionLimit: The lower detection limit for gene expression. Genes with expression values below this limit will be treated as non-detected. expressionFamily: The family of the expression distribution used in Monocle analysis. cellTypes: A character vector specifying cell types or labels used for coloring in trajectory plots. monocle.orders: A character vector specifying the order of cell types in the Monocle analysis. monocle.colors: A character vector specifying colors for cell types in trajectory plots. 4.13.2.2 codes for running monocle2 phenoData <- sc_object.subset@meta.data featureData <- gene_metadata run_monocle(cellData = countsSlot.subset, phenoData = phenoData, featureData = featureData, lowerDetectionLimit = 0.5, expressionFamily = VGAM::negbinomial.size(), cellTypes='selectLabels', monocle.orders=Step12_Construct_Trajectories.clusters, monocle.colors = ViolinPlot.cellTypeColors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) Figure 4.25: Figures showing cells in different trajectory states (left) and corresponding cell type groups (right) 4.13.3 Slingshot Running Slingshot to infer cell trajectories and lineage relationships involves several steps: Constructs a Slingshot object using PCA embeddings, cell types, start clusters, and end clusters. Computes and plots the trajectory curves. Computes and plots pseudotime values along the trajectory. 4.13.3.1 Function arguments: slingshot.PCAembeddings: A matrix containing the PCA embeddings of the single-cell data, typically obtained from dimensionality reduction techniques like PCA. slingshot.cellTypes: A character vector specifying cell types or labels for each cell. slingshot.start.clus: A character vector specifying the initial cluster(s) from which cell trajectories should start. slingshot.end.clus: A character vector specifying the target cluster(s) where cell trajectories should end. slingshot.colors: A vector of colors corresponding to cell types for plotting. If not provided, default colors will be used. 4.13.3.2 codes for running Slingshot if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object.subset) <- 'integrated' }else{ DefaultAssay(sc_object.subset) <- 'RNA'} run_slingshot(slingshot.PCAembeddings = Embeddings(sc_object.subset, reduction = "pca")[, PCs], slingshot.cellTypes = sc_object.subset@meta.data$selectLabels, slingshot.start.clus = slingshot.start.clus, slingshot.end.clus = slingshot.end.clus, slingshot.colors = slingshot.colors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) Figure 4.26: Figures showing slingshot curve and infered pseudotime value 4.13.4 scVelo scVelo is implemented in Python, and it takes a Seurat object, cell embeddings, and cell type information as input. The process of data preparation includes the following steps: Format the Seurat object metadata, including cell types and sample names. Extract the spliced, unspliced, and ambiguous count matrices from the Seurat object. Combine the metadata and cell embeddings. Write the necessary input files for scVelo analysis, including cell embeddings, count matrices, and metadata. 4.13.4.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. loom.files.path: A character vector specifying the path(s) to the loom files for scVelo analysis. scvelo.reduction: A character specifying the reduction method used for scVelo analysis (default is ‘pca’). scvelo.column: A character specifying the column in the Seurat object metadata containing cell types. 4.13.4.2 codes for running Scvelo if((!is.null(loom.files.path))&(!is.null(pythonPath))){ prepareDataForScvelo(sc_object = sc_object.subset, loom.files.path = loom.files.path, scvelo.reduction = 'pca', scvelo.column = 'selectLabels', output.dir = paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) reticulate::py_run_string(paste0("import os\\noutputDir = '", output.dir, "'")) reticulate::py_run_file(file.path(system.file(package = "HemaScopeR"), "python/sc_run_scvelo.py"), convert = FALSE) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.27: Figure showing trajectory predicted by scvelo 4.14 Step 13. TF Analysis This step runs SCENIC (Single-Cell Regulatory Network Inference and Clustering) analysis, including the construction of a co-expression network, gene filtering, correlation, and the GENIE3 algorithm to infer regulatory networks. 4.14.1 Function arguments: countMatrix: A matrix containing the raw counts of the single-cell RNA-seq data. cellTypes: A character vector specifying the cell types or labels for each cell. datasetID: A character vector specifying the dataset IDs for each cell. cellTypes_colors: A named vector of colors for cell type visualization. cellTypes_orders: A character vector specifying the desired order of cell types. groups_colors: A named vector of colors for grouping visualization. groups_orders: A character vector specifying the desired order of groups. Org: A character vector specifying the organism (‘mmu’ for mouse or ‘hsa’ for human). 4.14.2 codes for running step13 Create folders for saving the results of TF analysis. print('Step13. TF analysis.') if (!file.exists(paste0(output.dir, '/Step13.TF_analysis/'))) { dir.create(paste0(output.dir, '/Step13.TF_analysis/')) } Run SCENIC to perform TF analysis. run_SCENIC(countMatrix = countsSlot, cellTypes = sc_object@meta.data$selectLabels, datasetID = sc_object@meta.data$datasetID, cellTypes_colors = Step13_TF_Analysis.cellTypes_colors, cellTypes_orders = unique(sc_object@meta.data$selectLabels), groups_colors = Step13_TF_Analysis.groups_colors, groups_orders = unique(sc_object@meta.data$datasetID), Org = Org, output.dir = paste0(output.dir, '/Step13.TF_analysis/'), pythonPath = pythonPath, databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.28: Heatmap showing predicted regulon activity for each cell Figure 4.29: Heatmap showing statistics of regulons 4.15 Step 14. Cell-Cell Interaction The step takes expression data, cluster labels, and other parameters to perform cell-cell communication analysis using the CellChat package. It includes the following steps: Data input and preprocessing. Initialization of a CellChat object. Set the ligand-receptor interaction database based on the specified organism. Preprocess the expression data for cell-cell communication analysis. Identify overexpressed genes and interactions. Project data based on protein-protein interaction networks. Inference of cell-cell communication network. Visualization of the communication network. Systems analysis of cell-cell communication network. 4.15.1 Function arguments: data.input: A matrix of expression data, where rows represent genes and columns represent cells. Row names should be in the format of gene symbols. labels: A vector of cluster labels for each cell, corresponding to the columns of data.input. cell.orders: A character vector specifying the order of cell types or clusters in the analysis. cell.colors: A character vector specifying colors for cell types or clusters in the analysis. sample.names: A vector of sample or cell names, corresponding to the columns of data.input. Org: A string indicating the organism used in the analysis. It should be either “mmu” (mouse) or “hsa” (human). sorting: A logical value indicating whether to consider cell population size in communication analysis. 4.15.2 codes for running step14 Create folders for saving the results of cell-cell interaction analysis. print('Step14. Cell-cell interaction.') if (!file.exists(paste0(output.dir, '/Step14.Cell_cell_interection/'))) { dir.create(paste0(output.dir, '/Step14.Cell_cell_interection/')) } Run CellChat to perform cell-cell interaction analysis. tempwd <- getwd() run_CellChat(data.input=countsSlot, labels = sc_object@meta.data$selectLabels, cell.orders = ViolinPlot.cellTypeOrders, cell.colors = ViolinPlot.cellTypeColors, sample.names = rownames(sc_object@meta.data), Org = Org, sorting = sorting, output.dir = paste0(output.dir, '/Step14.Cell_cell_interection/')) setwd(tempwd) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.30: Figures showing the interaction number and strength between each cell group Figure 4.31: Heatmap showing the strength of incoming and outgoing signals for each cell type group across various pathways. Figure 4.32: Figure showing LRs interaction between each cell type group "],["integrated-st-pipeline.html", "5 Integrated ST pipeline 5.1 For 10X Visium data 5.2 For MERFISH data 5.3 For stereo-seq data", " 5 Integrated ST pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(URD) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) library(OpenXGR) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) 5.1 For 10X Visium data Run the integrated 10X Visium pipeline. st_10x_visium_pipeline( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST', # For Step1 Loading rds.file = FALSE, filename = "filtered_feature_bc_matrix.h5", assay = "Spatial", slice = "slice1", filter.matrix = TRUE, to.upper = FALSE, # For Step2 QC Step2_QC = TRUE, min.gene = 200, min.nUMI = 500, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 CNV analysis Step7_CNV = TRUE, copykat.genome = NULL, copykat.LOW.DR = 0.05, copykat.UP.DR = 0.1, copykat.win.size = 25, copykat.distance = "euclidean", copykat.n.cores = 1, # For Step8 Deconvolution Step8_Deconvolution = TRUE, cell2loc.sc.h5ad.dir = NULL, cell2loc.sc.max.epoch = 1000, cell2loc.st.max.epoch = 10000, cell2loc.use.gpu = TRUE, cell2loc.use.dataset = 'LymphNode', # For Step9 Cellcycle Step9_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, # For Step10 Nich Step10_Niche = TRUE, coexistence.method = 'correlation', Niche.cluster.n = 4, # settings pythonPath = 'path/to/python', verbose = FALSE, genReport = TRUE ) 5.2 For MERFISH data Run the integrated MERFISH pipeline. st_MERFISH_pipeline( input.data.dir, output.dir, sampleName = 'Hema_MERFISH', fov = 'fov', tech = 'Vizgen', # For Step1 Loading rds.file = FALSE, assay = NULL, Vizgen.z = 3L, Akoya.type = 'inform', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.4, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) 5.3 For stereo-seq data Run the integrated stereo-seq pipeline. st_stereo_pipeline( input.data.dir, output.dir, sampleName = 'Hema_stereo', # For Step1 Loading data_type = 'gem', sep = '\\t', bin_type = 'bins', bin_size = 100, spot_diameter = 80, is_sparse = TRUE, gene_list = NULL, region = NULL, assay = 'Spatial', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.1, max.n.cluster = 30, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) "],["stey-by-step-st-seq-pipeline.html", "6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading 6.2 Step 2. Quality Control 6.3 Step 3. Clustering 6.4 Step 4. DEGs 6.5 Step 5. Spatially variable features 6.6 Step 6. Spatial interaction 6.7 Step 7. CNV analysis 6.8 Step 8. Deconvolution 6.9 Step 9. Cell cycle 6.10 Step 10. Niche analysis", " 6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading The st_Loading_Data function is designed for loading 10X Visium spatial transcriptomics data from Space Ranger. It will load data from input.data.dir and output it in the SeuratObject format. 6.1.1 Function arguments: input.data.dir: The directory where the input data is stored. output.dir: The directory where the processed output will be saved. If not specified, the output is saved in the current working directory. Default is ‘.’. sampleName: A string naming the sample. Default is ‘Hema_ST’. rds.file: A boolean indicating if the input data is in RDS file format rather than a typical results of Space Ranger. Default is FALSE. filename: The name of the file to be loaded if the data is not in RDS format. Default is “filtered_feature_bc_matrix.h5”. assay: The specific assay to apply to the data. Default is ‘Spatial’. slice: The image slice identifier for the spatial data. Default is ‘slice1’. filter.matrix: A boolean indicating whether to load filtered matrix. Default is TRUE. to.upper: A boolean indicating whether to convert feature names to upper form. Default is FALSE. 6.1.2 Funciton behavior: Directory Creation: The function first checks if the output.dir exists; if not, it creates it. RDS File Handling: If rds.file is TRUE, it reads the RDS file, ensuring the specified assay and slice are present in the Seurat object. Non-RDS File Handling: If rds.file is FALSE, it loads the data using Load10X_Spatial from Seurat. Saving the Object: Uses SaveH5Seurat and Convert to save the Seurat object in rds and h5ad formats. File Copying: Copies any necessary files (filter matrix, spatial image) to the output.dir. Return Value: Returns the processed Seurat object. 6.1.3 An example: st_obj <- st_Loading_Data( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST, rds.file = FALSE, filename = 'filtered_feature_bc_matrix.h5', assay = 'Spatial', slice = 'slice1', filter.matrix = TRUE, to.upper = FALSE ) 6.1.4 Outputs: Spatial transcriptome data in rds and h5ad formats 6.2 Step 2. Quality Control The QC_Spatial function performs basic quality control on a SeuratObject containing 10X visium data and returns the filtered SeuratObject. It provides options to set thresholds for the number of genes, nUMI (unique molecular identifiers), and spots expressing each gene. It also allows for the removal of mitochondrial genes based on species. 6.2.1 Function arguments: st_obj: A SeuratObject of 10X visium data. output.dir: A character string specifying the path to store the results and figures. Default is the current working directory. min.gene: An integer representing the minimum number of genes detected in a spot. Default is 200. max.gene: An integer representing the maximum number of genes detected in a spot. Default is Inf (no upper limit). min.nUMI: An integer representing the minimum number of nUMI detected in a spot. Default is 500. max.nUMI: An integer representing the maximum number of nUMI detected in a spot. Default is Inf (no upper limit). min.spot: An integer representing the minimum number of spots expressing each gene. Default is 3. species: A character string representing the species of sample, either ‘human’ or ‘mouse’. bool.remove.mito: A boolean value indicating whether to remove mitochondrial genes. Default is TRUE. SpatialColors: A function that interpolates a set of given colors to create new color palettes and color ramps. Default is a color palette with reversed Spectral colors from RColorBrewer. 6.2.2 Function behavior: Plots and saves the spatial distribution of nUMI and nGene. Plots and saves violin plots for nUMI and nGene. Identifies and marks low-quality spots based on nUMI and nGene thresholds. Plots the spatial distribution of quality. Plots and saves a histogram for the number of spots expressing each gene. Plots the spatial distribution of mitochondrial genes. Saves the raw SeuratObject before filtering. Removes low-quality spots and genes with fewer occurrences. Optionally removes mitochondrial genes. Saves the filtered SeuratObject. Returns the filtered st_obj. 6.2.3 An example: st_obj <- QC_Spatial( st_obj = st_obj, output.dir = '.', min.gene = 200, min.nUMI = Inf, max.gene = 500, max.nUMI = Inf, min.spot = 3, species = 'human', bool.remove.mito = TRUE, SpatialColors = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = "Spectral"))) ) 6.2.4 Outputs: Figures showing the spatial distribution of nUMI and nGene. Violin plots of nUMI and nGene. Figures showing the quality. Histograms for the number of spots expressing each gene. Figures showing the spatial distribution of mitochondrial genes. Raw and filtered SeuratObject. 6.3 Step 3. Clustering The st_Clustering function is designed to perform clustering analysis on spatial transcriptomics data. It integrates several key steps including data normalization, dimensionality reduction, clustering, and visualization. The function saves the results and visualizations to output.dir. 6.3.1 Function arguments: st_obj: The input spatial transcriptomics seurat object that contains the data to be clustered. output.dir: The directory where the output files will be saved. Default is the current directory (‘.’). normalization.method: The method used for data normalization. Default is ‘SCTransform’. npcs: The number of principal components to use in PCA. Default is 50. pcs.used: The principal components to use for clustering. Default is the first 10 PCs (1:10). resolution: The resolution parameter for the clustering algorithm. Default is 0.8. verbose: A logical flag to print progress messages. Default is FALSE. 6.3.2 Function behavior: Data Normalization and PCA: Depending on the normalization.method, the function either uses SCTransform or a standard normalization method followed by scaling and variable feature detection. Performs PCA on the normalized data. Clustering and Dimensionality Reduction: Finds nearest neighbors using the specified principal components (pcs.used). Identifies clusters using the specified resolution. Performs UMAP and t-SNE for visualization of the clusters. Visualization: Generates spatial, UMAP, and t-SNE plots of the clusters with customized color schemes. Saves these plots as images in the specified directory. Saving Results: Saves the updated st_obj as an RDS file. Exports the metadata of st_obj to a CSV file. Return Value: Returns the updated st_obj containing the clustering results. 6.3.3 An example: st_obj <- st_Clustering( st_obj = st_obj, output.dir = '.', normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, verbose = FALSE ) 6.3.4 Outputs: Figures showing the results of clustering. SeuratObject in rds format. 6.4 Step 4. DEGs The st_Find_DEGs function is designed to identify differentially expressed genes (DEGs) in spatial transcriptomics data. It performs differential expression analysis based on clustering results, visualizes the top markers, and saves the results to output.dir. 6.4.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for DEG analysis. output.dir: The directory where output files will be saved. Default is the current directory (‘.’). ident.label: The metadata label used for identifying clusters. Default is 'seurat_clusters'. only.pos: A logical flag to include only positive markers. Default is TRUE. min.pct: The minimum fraction of cells expressing the gene in either cluster. Default is 0.25. logfc.threshold: The log fold change threshold for considering a gene differentially expressed. Default is 0.25. test.use: The statistical test to use for differential expression analysis. Default is 'wilcox'. verbose: A logical flag to print progress messages. Default is FALSE. 6.4.2 Function behavior: Set Identifiers: Sets the cluster identifiers in the spatial transcriptomics object (st_obj) based on the specified ident.label. Find Differentially Expressed Genes (DEGs): Performs differential expression analysis using the specified parameters (only.pos, min.pct, logfc.threshold, test.use). Top Marker Genes: Selects the top 5 marker genes for each cluster based on the highest average log fold change. Visualization: Generates a dot plot for the top DEGs and saves the plot as an image in the specified directory. Saving Results: Saves the DEG results as a CSV file. Return Value: Returns the data frame containing the identified DEGs. 6.4.3 An example: st.markers <- st_Find_DEGs( st_obj = st_obj, output.dir = '.', ident.label = 'seurat_clusters', only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', verbose = FALSE ) 6.4.4 Outputs: Dot plots showing markers. CSV file containing the information of markers. 6.5 Step 5. Spatially variable features The st_SpatiallyVariableFeatures function identifies and visualizes spatially variable features (SVFs) in spatial transcriptomics data. It integrates the identification of spatially variable features using a specified method, saves the results to a directory, and creates visualizations of the top spatially variable features. 6.5.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. output.dir: The directory where output files will be saved. Default is the current directory. assay: The assay to be used for finding spatially variable features. Default is 'SCT'. selection.method: The method used for selecting spatially variable features. Default is 'moransi'. n.top.show: The number of top spatially variable features to visualize. Default is 10. n.col: The number of columns for the visualization grid. Default is 5. verbose: A logical flag to print progress messages. Default is FALSE. 6.5.2 Function behavior: Identify Spatially Variable Features: Identifies spatially variable features using the specified method and assay. Suppresses warnings during the process. Save Metadata: Extracts metadata features and saves them as a CSV file in output.dir. Visualization: Selects the top n.top.show spatially variable features. Generates and saves a spatial feature plot of these features in the specified directory. Return Value: Returns the updated st_obj containing the identified spatially variable features. 6.5.3 An example: st_obj <- st_SpatiallyVariableFeatures( st_obj = st_obj, output.dir = '.', assay = st_obj@active.assay, selection.method = 'moransi', n.top.show = 10, n.col = 5, verbose = FALSE ) 6.5.4 Outputs: Figures showing SVFs. CSV file containing the information of SVFs. 6.6 Step 6. Spatial interaction The st_Interaction function is used to identify and visualize interactions between clusters based on spatial transcriptomics data. It utilizes Commot to analyze spatial interactions, identify pathway activities, and assess the strength and significance of interactions. 6.6.1 Function arguments: st_data_path: Path to the spatial transcriptomics data. metadata_path: Path to the metadata associated with the spatial transcriptomics data. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. label_key: Key in the metadata to identify cell clusters. Default is 'seurat_clusters'. save_path: The directory where output files will be saved. Default is the current directory. species: The species of the spatial transcriptomics data. Default is 'human'. signaling_type: Type of signaling interactions to consider. Default is 'Secreted Signaling'. database: Database to be used for the analysis. Default is 'CellChat'. min_cell_pct: Minimum percentage of cells to consider for interaction analysis. Default is 0.05. dis_thr: Distance threshold for defining interactions. Default is 500. n_permutations: Number of permutations for assessing significance. Default is 100. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.6.2 Function behavior: Commot Analysis: Uses Commot to perform interaction analysis, identifying interactions within and between clusters. Visualization: Generates visualizations of pathway interactions and interactions between ligand-receptors (LRs) within and between clusters, and saves them in save_path. 6.6.3 An example: st_Interaction( st_data_path = 'path/to/data', metadata_path = 'path/to/metadata', library_id = 'Hema_ST', label_key = 'seurat_clusters', save_path = '.', species = 'human', signaling_type = 'Secreted Signaling', database = 'CellChat', min_cell_pct = 0.05, dis_thr = 500, n_permutations = 100, pythonPath = 'path/to/python' ) 6.6.4 Outputs: Dot plot showing pathway interaction between and within clusters. Dot plot showing LRs interaction between and within clusters. The information of each LR and pathway. 6.7 Step 7. CNV analysis The st_CNV function identifies and visualizes copy number variations (CNVs) in spatial transcriptomics data. It uses CopyKAT to perform the CNV analysis, saves the results, and generates visual representations of CNV states. 6.7.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. save_path: The directory where output files will be saved. assay: The assay to be used for CNV analysis. Default is 'Spatial'. LOW.DR: The lower threshold for the dropout rate in CopyKAT. Default is 0.05. UP.DR: The upper threshold for the dropout rate in CopyKAT. Default is 0.1. win.size: The window size for the CNV analysis. Default is 25. distance: The distance metric to be used for the analysis. Default is \"euclidean\". genome: The genome version to be used, ‘hg20’ or ‘mm10’. Default is \"hg20\". n.cores: The number of cores to be used for parallel processing. Default is 1. species: The species of the spatial transcriptomics data. Default is 'human'. 6.7.2 Function behavior: CopyKAT Analysis: Runs CopyKAT pipeline to perform CNV analysis using the provided parameters. Saving Results: Saves the CopyKAT results as an RDS file. Plotting: Generates plots of the CNV states and saves them in save_path. Updating Metadata: Updates the spatial transcriptomics object with CNV state metadata. Return Value: Returns the updated st_obj containing the CNV state information. 6.7.3 An example: st_obj <- st_CNV( st_obj = st_obj, save_path = '.', assay = 'Spatial', LOW.DR = 0.05, UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = 'hg20', n.cores = 1, species = 'human' ) 6.7.4 Outputs: Figures showing the predicted CNV states. Figures showing the CNV heatmap. rds files of results of copykat. 6.8 Step 8. Deconvolution The st_Deconvolution function aims to perform spatial deconvolution analysis on spatial transcriptomics data to estimate the cell-type composition and abundance in different regions. The function utilizes cell2location to infer cell-type abundance and spatial distributions, allowing for the visualization and interpretation of spatially resolved cell populations within the tissue. 6.8.1 Function arguments: st.data.dir: Path to the spatial transcriptomics data. sc.h5ad.dir: Path to the single-cell RNA-seq data in h5ad format. Default is NULL. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. st_obj: Spatial transcriptomics object containing the data for analysis. Default is NULL. save_path: The directory where output files will be saved. Default is NULL. sc.labels.key: Key in the single-cell metadata to identify cell clusters. Default is 'seurat_clusters'. species: The species of the spatial transcriptomics data. Default is 'mouse'. sc.max.epoch: Maximum number of epochs used for single-cell deconvolution. Default is 1000. st.max.epoch: Maximum number of epochs used for spatial deconvolution. Default is 10000. use.gpu: Logical value indicating whether to use GPU for computation. Default is FALSE. use.Dataset: The dataset to be used for analysis, such as 'HematoMap' or 'LymphNode'. pythonPath: The path to the Python environment containing cell2location to use for the analysis. Default is ‘.’. 6.8.2 Function behavior: Deconvolution Analysis: Performs the spatial deconvolution analysis using the provided spatial transcriptomics and single-cell RNA-seq data. Post-Analysis Processing: Processes the deconvolution results and visualizes the spatial distribution of inferred cell types within the tissue. Returning Results: If a Seurat object is provided, the updated Seurat object with cell type information is returned. 6.8.3 An example: st_obj <- st_Deconvolution( st.data.dir = 'path/to/data', library_id = 'Hema_ST', sc.h5ad.dir = NULL, st_obj = st_obj, save_path = '.', sc.labels.key = 'seurat_clusters', species = 'human', sc.max.epoch = 1000, st.max.epoch = 10000, use.gpu = FALSE, use.Dataset = 'LymphNode', pythonPath = 'path/to/python' ) 6.8.4 Outputs: Figures showing the predicted abundance of each cell-type. The parameters of trained cell2location model. 6.9 Step 9. Cell cycle The st_Cell_cycle function is used to assess the cell cycle phase scores in spatial transcriptomics data. It calculates S phase and G2M phase scores based on the expression of designated cell cycle-related genes and visualizes these scores in spatial and dimensionality-reduced plots. 6.9.1 Function arguments: st_obj: The input Seurat object containing the data for analysis. save_path: The directory where the output images will be saved. Default is the current directory. s.features: A list of genes associated with the S phase. Default is NULL (using genes from Seurat). g2m.features: A list of genes associated with the G2M phase. Default is NULL (using genes from Seurat). species: The species of the spatial transcriptomics data. Default is 'human'. FeatureColors.bi: A color palette for visualization. Default is a two-color ramp palette. 6.9.2 Function behavior: Gene Feature Assignment: Assigns S phase and G2M phase gene lists based on the specified species or provided input. Cell Cycle Scoring: Calculates the S phase and G2M phase scores in the data. Spatial Visualization: Generates spatial feature plots to visualize the S phase and G2M phase scores using the specified color palette and saves the plots as images. Dimensionality-Reduced Plot Visualization: If UMAP or tSNE dimensionality reduction is available in the st_obj, feature plots of the S phase and G2M phase scores are generated in the reduced space and saved as images. Return Value: Returns the updated st_obj containing the cell cycle phase scores. 6.9.3 An example: st_obj <- st_Cell_cycle( st_obj = st_obj, save_path = '.', s.features = NULL, g2m.features = NULL, species = 'human', FeatureColors.bi = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = 'RdYlBu'))) ) 6.9.4 Outputs: Figures showing S scores. Figures showing S scores. 6.10 Step 10. Niche analysis The st_NicheAnalysis function is designed to perform niche analysis on spatial transcriptomics data, enabling the exploration of spatial niches or microenvironments within the tissue. The function encompasses co-occurrence analysis, niche clustering, and niche interaction analysis to uncover the spatial relationships and characteristics of different cell populations or features. 6.10.1 Function arguments: st_obj: The input SeuratObject containing the spatial transcriptomics data for analysis. features: A vector of features representing features (for example, cell types from deconvolution) for niche analysis. save_path: The directory where the analysis results and visualizations will be saved. Default is the current directory. coexistence.method: The method for co-occurrence analysis, accepting 'correlation' or 'Wasserstein'. Default is 'correlation'. kmeans.n: The number of clusters for niche clustering. Default is 4. st_data_path: A path containing the ‘spatial’ file and ‘filtered_feature_bc_matrix.h5’ file, required for niche interaction visualization. slice: The slice to be used for analysis. Default is 'slice1'. species: The species of the sample data. Default is 'mouse'. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.10.2 Function behavior: Co-occurrence Score Calculation: Calculates the co-occurrence scores between the specified features using the chosen coexistence method (‘correlation’ or ‘Wasserstein’). Niche Clustering: Utilizes k-means clustering to identify distinct spatial niches based on the expression profiles of the selected features and visualizes the clustering results. Niche Interaction Visualization: If the st_data_path is provided, performs niche interaction visualization using Commot, which is based on the provided spatial transcriptomics data and generates visualizations of niche interactions within the tissue. Return Value: Returns the updated st_obj with niche analysis results and visualizations. 6.10.3 An example: tmp <- read.csv('path/to/cell2loc_res.csv', row.names = 1) features <- colnames(tmp) if(!all(features %in% names(st_obj@meta.data))){ common.barcodes <- intersect(colnames(st_obj), rownames(tmp)) tmp <- tmp[common.barcodes, ] st_obj <- st_obj[, common.barcodes] st_obj <- AddMetaData(st_obj, metadata = tmp) } st_obj <- st_NicheAnalysis( st_obj, features = features, save_path = '.', coexistence.method = 'correlation', kmeans.n = 4, st_data_path = 'path/to/data', slice = `slice1`, species = 'human', pythonPath = 'path/to/python' ) 6.10.4 Outputs: Figures showing the co-existence results. Figures showing the spatial distribution of each niche. Figures showing the composition of each niche. Figures showing the results of interactions using Commot. "],["step-by-step-shiny.html", "7 Step-by-step shiny 7.1 Step 1. Enter R and get the path of the installed R packages 7.2 Step 2. Run shiny code 7.3 Step 3. Use HemaScopeShiny via the GUI", " 7 Step-by-step shiny #You can run shiny on Linux or on the Rstudio web page Choice 1:Run shiny on Linux - Enter Linux, activate the HemaScope environment,install radian package then you can enter the R environment on Linux and run shiny code raian -You can see “r$>” . It menns you enter R environment on Linux. app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address You’ll see a page like the one below,copy link Open the link with a browser,you can see HemaScopeR shiny home page. Choice2:Run shiny on Rstudio web page 7.1 Step 1. Enter R and get the path of the installed R packages Enter the R environment in the Linux command line. R Get the path of the installed R packages in the R command line. .libPaths() For example, “/An/example/of/the/path/to/installed/R/packages” 7.2 Step 2. Run shiny code .libPaths("/An/example/of/the/path/to/installed/R/packages") app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address 7.3 Step 3. Use HemaScopeShiny via the GUI Start interface. A UI page appears with two buttons: “Start scRNA-seq Analysis” and “Start st-seq Analysis.” Users can click the corresponding button based on their needs to enter the respective analysis page. * The figure showing the start interface. Begin a new analysis, continue the previous analysis, or return to the start interface When clicking the “Start scRNA-seq pipeline” or “Start ST-seq pipeline” button, you will be directed to a second page. This page contains three buttons: “Begin New Analysis,” “Continue Previous Analysis”, and “Back to Home”. If you need to begin a new analysis of scRNA-seq or st-seq data from the first step, click “Begin New Analysis”. If you have already used Shiny to complete several steps (e.g., steps 1, 2, and 3), but the analysis was interrupted during step 4 due to some unexpectedly closing, click “Continue Previous Analysis” to resume from step 4. Please note: users should follow the analysis steps sequentially and not skip steps. For example, analyzing steps 1, 2, and 3 and then jumping directly to step 6 is incorrect. The proper analysis sequence should be step 1, 2, 3, 4, 5, 6, … N. The figure showing the interface for beginning a new analysis, continuing the previous analysis, or returning to the start interface. 7.3.1 scRNA-seq pipeline When the user clicks the “Start scRNA-seq pipeline – Begin New Analysis” button, they will enter the single-cell analysis page. The sidebar of this page includes the following buttons: Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Identify Cell Types Step 5. Visualization Step 6. Find Differential Genes Step 7. Assign Cell Cycles Step 8. Calculate Heterogeneity Step 9. Violin Plot for Marker Genes Step 10. Calculate Lineage Scores Step 11. GSVA Step 12. Construct Trajectories Step 13. Transcription Factors Analysis Step 14. Cell-Cell Interaction Step 15. Generate the Report Back to Prior Page The figure showing the scRNA-seq pipeline. Please start the analysis from step 1 and do not skip any steps. The correct analysis sequence is steps 1 through 15: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15. To return to the previous page, click “Back to Prior Page”. If Shiny unexpectedly exits during data analysis in the Begin New Analysis process (for example, while analyzing Step 5), and the analysis of Step 5 is interrupted, the user will need to restart ShinyApp(ui, server). This will bring up the Home page. The user should click the “Start scRNA-seq pipeline–Continue Previous Analysis” button, enter the Job ID displayed on the UI page during the Step 1.Input data step, and then select the step that did not complete successfully (e.g., Step 5). After entering the necessary parameters for Step 5, click “Run Step 5” to resume the analysis. Once Step 5 is completed, the user should proceed by selecting Step 6, entering the required parameters, and clicking “Run Step 6” to analyze Step 6, and so on, until all scRNA-seq steps are completed. Note that the default parameters for each step are the same as those in Begin New Analysis. After clicking “Run Step,” do not perform any other operations on the parameter page. Wait until the current step’s analysis is complete, and the results for that step will appear on the UI page. The “Start scRNA-seq pipeline–Continue Previous Analysis” page contains the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the Job ID displayed on the page during the Begin New Analysis–Step1.Input data step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.1.1 Step 1 (scRNA-seq pipeline). Input Data The figure showing the step 1 of scRNA-seq pipeline. Enter data path: Input multiple file paths separated by semicolons, for example: /path1/file1/data1;/path2/file2/data2;/path2/file2/data3. For a single file, use: /path2/file2/data2. Enter project name: When entering multiple files, you must also input multiple project names, separated by semicolons. The number of project names must match the number of input files. Example: projectname1;projectname2;projectname3. For a single file, use: projectname1. Enter output path: Specify the path where the results will be output. You can view the results of each step in this path. Example: /home/username/output. Enter the path of database: The path where the database is stored and it varies for each user. Example: /home/username/database. Select Data Type: There are three options: “cellranger-count”, “Seurat”, “Matrix”. Choose according to the type of input data. Gene Column (default: 2): The column where gene names are located; the default is column 2. Minimum Cells (default: 10): The minimum number of cells for filtering; the default is 10. Minimum Features (default: 200): The minimum number of genes that must be detected in each cell; the default is 200. Mt Pattern (default: ‘^MT-’): Mitochondrial pattern; for humans use ^MT-, for mice use ^mt-. After entering the above parameters, click the “LoadData” button to load the data. Once the data is successfully loaded, you will see “OK! Data dimensions” indicating that the data loading is complete, and you will be provided with a JobID. Make sure to note this JobID, as it is crucial. If HemaScopeShiny unexpectedly exits, you can click “Continue Previous Analysis”, enter the JobID, and continue loading the previous analysis results without starting from step 1 again. The JobID is very important! Please note: After clicking the “LoadData” button, do not modify any other parameters on the page. The Step 2-14 pages will consist of three sections: 1) parameter input, 2) result output file names, and 3) generated result figures. If the respective step produces result figures, they will be displayed. Users can switch between images by clicking the arrows on the left or right of the figure. If no figures are generated for the current step, a message stating “NO Figure!” will be displayed. All output files generated at each step are stored in the output directory specified by the user. The UI page will display only the file names, which can be downloaded by clicking on the file name links. 7.3.1.2 Step 2 (scRNA-seq pipeline). Quality Control The figure showing the step 2 of scRNA-seq pipeline. nFeature_RNA.limit: Minimum number of genes detected per cell. Default value: 200 percent.mt.limit: Threshold for filtering mitochondrial genes. Default value: 20 scale.factor: Normalization factor. Default value: 10,000 nfeatures: Number of highly variable genes. Default value: 3,000 ndims: Number of dimensions used. Default value: 50 vars.to.regress: Variables to regress. Default value: NULL PCs: Number of principal components used for clustering. Default value: 1:35 resolution: Resolution parameter for clustering. Default value: 0.4 n.neighbors: k.param parameter in the FindNeighbors function. Default value: 50 doublet.percentage: Doublet rate. Default value: 0.04 doubletFinderWrapper.PCs: Number of principal components used for doublet removal. Default value: 1:20 doubletFinderWrapper.pN: Number of artificial doublets defined for removal. Default value: 0.25 doubletFinderWrapper.pK: Represents the fraction of merged real artificial data. Default value: 0.1 (pK should be adjusted according to each scRNA-seq dataset) Step2_Quality_Control.RemoveBatches: Whether to remove detected batches. Default value: TRUE Step2_Quality_Control.RemoveDoublets: Whether to remove detected doublets. Default value: TRUE Click the “Run Step 2” button to start the process. After clicking the “Run Step 2” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 2 completed” message will appear. After a short while, the result files generated by Step 2 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.3 Step 3 (scRNA-seq pipeline). Clustering The figure showing the step 3 of scRNA-seq pipeline. PCs for clustering (default: 1:20): Principal components used for clustering. Default value: 1:20 n.neighbors for clustering (default: 50): k.param parameter in the FindNeighbors function. Default value: 50 resolution for clustering (default: 0.4): Resolution used for clustering. Default value: 0.4 Click the “Run Step 3” button to start the process. After clicking the “Run Step 3” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 3 completed” message will appear. After a short while, the result files generated by Step 3 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.4 Step 4 (scRNA-seq pipeline). Identify Cell Types The figure showing the step 4 of scRNA-seq pipeline. Choose organism: ‘hsa’ for human, ‘mmu’ for mouse Choose Labels: Cell labels, default value: clustering Run CNV: TRUE if copy number variation (CNV) analysis is to be performed CPU cores for parallel processing: Number of CPU cores for parallel processing, default value: 10 Click the “Run Step 4” button to start the process. After clicking the “Run Step 4” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 4 completed” message will appear. After a short while, the result files generated by Step 4 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.5 Step 5 (scRNA-seq pipeline). Visualization The figure showing the step 5 of scRNA-seq pipeline. Nearest neighbors for PhateR analysis (default: 50): phate.knn parameter, the number of nearest neighbors to consider in the PhateR algorithm. Default value: 50 Principal components for PhateR (default: 20): phate.npca parameter, the number of principal components to use in the PhateR algorithm. Default value: 20 t parameter for PhateR (default: 10): phate.t parameter, the t value for the PhateR algorithm. Default value: 10 Dimensions for PhateR (default: 2): phate.ndim parameter, the number of dimensions for embedding output in the PhateR algorithm. Default value: 2 Click the “Run Step 5” button to start the process. After clicking the “Run Step 5” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 5 completed” message will appear. After a short while, the result files generated by Step 5 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.6 Step 6 (scRNA-seq pipeline). Find Differential Genes The figure showing the step 6 of scRNA-seq pipeline. Minimum gene percentage for differential detection (default: 0.25): The minimum fraction of cells expressing a gene in any cluster. Default value: 0.25 Log-fold threshold for gene analysis (default: 0.25): The log-fold change threshold for differential gene expression analysis. Default value: 0.25 Click the “Run Step 6” button to start the process. After clicking the “Run Step 6” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 6 completed” message will appear. After a short while, the result files generated by Step 6 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.7 Step 7 (scRNA-seq pipeline). Assign Cell Cycles The figure showing the step 7 of scRNA-seq pipeline. Define cell cycle cutoff (default: NULL): The cutoff value used to distinguish between cycling and non-cycling cells. Default value: NULL Click the “Run Step 7” button to start the process. After clicking the “Run Step 7” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 7 completed” message will appear. After a short while, the result files generated by Step 7 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.8 Step 8 (scRNA-seq pipeline). Calculate Heterogeneity The figure showing the step 8 of scRNA-seq pipeline. Order cell types: The order of cell types for visualization. If not provided, the function will use the unique cell types from the input cell_types_groups. Default value: NULL Click the “Run Step 8” button to start the process. After clicking the “Run Step 8” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 8 completed” message will appear. After a short while, the result files generated by Step 8 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.9 Step 9 (scRNA-seq pipeline). Violin Plot for Marker Genes The figure showing the step 9 of scRNA-seq pipeline. Enter marker genes for violin plot (separate by ‘,’): The marker genes for the violin plot. Default value is the built-in marker genes: NULL. Set the hexadecimal codes of colors for cell types (separate by ‘,’): Specify the colors for cell types. The default is the color palette: NULL. Click the “Run Step 9” button to start the process. After clicking the “Run Step 9” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 9 completed” message will appear. After a short while, the result files generated by Step 9 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.10 Step 10 (scRNA-seq pipeline). Calculate Lineage Scores The figure showing the step 10 of scRNA-seq pipeline. The gene sets for calculating lineage scores: The gene sets used for calculating lineage scores. The default is the color palette: NULL. The names for the lineages: The names of the lineages. Default value: NULL. The hexadecimal codes of colors for groups: Specify the colors to be used for different group annotations. The default is the color palette: NULL. Click the “Run Step 10” button to start the process. After clicking the “Run Step 10” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 10 completed” message will appear. After a short while, the result files generated by Step 10 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.11 Step 11 (scRNA-seq pipeline). GSVA The figure showing the step 11 of scRNA-seq pipeline. Option to identify cell type-specific GSVA terms: Whether to identify cell type-specific GSVA terms. Default value: TRUE. Option to identify differential GSVA terms: Whether to identify differential GSVA terms. Default value: TRUE. Click the “Run Step 11” button to start the process. After clicking the “Run Step 11” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 11 completed” message will appear. After a short while, the result files generated by Step 11 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.12 Step 12 (scRNA-seq pipeline). Construct Trajectories The figure showing the step 12 of scRNA-seq pipeline. Set the cell types for constructing trajectories: The cell types to be used for trajectory analysis. Different cell types should be separated by commas. Default value: “all.” Option to run monocle2: Whether to perform Monocle2 trajectory analysis. Default value: TRUE. Option to run slingshot: Whether to perform Slingshot trajectory analysis. Default value: TRUE. Option to run scVelo: Whether to perform scVelo trajectory analysis. Default value: TRUE. Enter the paths of loom files: Specify the paths to the loom files for scVelo analysis. Default value: NULL. Click the “Run Step 12” button to start the process. After clicking the “Run Step 12” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 12 completed” message will appear. After a short while, the result files generated by Step 12 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.13 Step 13 (scRNA-seq pipeline). Transcription Factors Analysis The figure showing the step 13 of scRNA-seq pipeline. Set the hexadecimal codes of colors for cell types: Colors used for visualizing cell types. Default value: NULL (color palette). Set the hexadecimal codes of colors for groups: Colors used for visualizing groups. Default value: NULL (color palette). Click the “Run Step 13” button to start the process. After clicking the “Run Step 13” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 13 completed” message will appear. After a short while, the result files generated by Step 13 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.14 Step 14 (scRNA-seq pipeline). Cell-Cell Interaction The figure showing the step 14 of scRNA-seq pipeline. The cell groups were sorted: Whether to consider the size (number) of cell groups in the cell communication analysis. Default value: TRUE. Click the “Run Step 14” button to start the process. After clicking the “Run Step 14” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 14 completed” message will appear. After a short while, the result files generated by Step 14 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.15 Step 15 (scRNA-seq pipeline). Generate the Report The figure showing the step 15 of scRNA-seq pipeline. Click “Run Step 15” to generate the analysis report. 7.3.2 ST-pipeline When the user clicks the button “Start ST-seq pipeline–Begin New Analysis,” they will be taken to the empty analysis page. The page sidebar includes the following buttons: Please start the analysis from Step 1 and do not skip any steps. The correct analysis sequence is Step 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, and 11. To return to the previous page, please click “Back to Prior Page.” Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Find Differential Genes Step 5. Spatially Variable Features Step 6. Spatial Interaction Step 7. CNV Analysis Step 8. Deconvolution Step 9. Cell Cycle Analysis Step 10. Niche Analysis Step 11. Generate the Report Back to Prior Page In “Begin New Analysis,” users start analyzing data from Step1. If Shiny unexpectedly exits during the analysis process (for example, if you are analyzing Step5 and Shiny crashes, causing Step5 to fail), users need to restart Shiny by running shinyApp(ui, server). This will bring up the Home page. Users should click the “Start ST-seq pipeline–Continue Previous Analysis” button. They need to enter the JobID displayed in the UI page during the Step1.Input data step and then select the step that did not complete successfully to continue the analysis. For example, if Step5 failed, select Step5, enter the necessary parameters, and click “Run Step5” to continue the analysis. After Step5 finishes, select Step6, enter the parameters for Step6, and click “Run Step6” to analyze Step6, and so on for all subsequent steps. Please note that the default parameters for each step are the same as those in “Begin New Analysis.” After clicking “Run Step,” do not make any other changes to the parameter page. Wait until the current step completes, and the results file for the current step will appear on the UI page. The “Start ST-seq pipeline–Continue Previous Analysis” page includes the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the JobID displayed in the “Begin New Analysis–Step1.Input data” step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.2.1 Step 1 (st-seq pipeline). Input Data The figure showing the step 1 of st-seq pipeline. Enter data path: The directory where the input data is stored. The input data should be 10X Visium spatial transcriptomics data. Only one dataset can be input at a time; unlike single-cell data, multiple datasets cannot be entered simultaneously. Enter sample name: A string for naming the sample. The default value is ‘Hema_ST’. Enter output path: The directory where processed outputs will be saved. For example: /home/username/output. Enter the path of Python: The path to the Python executable, as that in scRNA-seq pipeline. After entering the parameters above, click the “LoadData” button to load the data. Once the data is loaded, the system will provide a JobID, which should be noted. If Shiny unexpectedly exits, you can click “Continue Previous Analysis” and enter the JobID to resume loading the previous analysis results, avoiding the need to restart from Step 1. The JobID is very important! Please note: After clicking the “LoadData” button, do not make further changes to other parameters on the page. The Step 2-10 pages will have three sections: Parameter input Result output file names Generated result plots If a step generates result plots, they will be displayed. Users can switch between images by clicking the arrows on either side of the plot. If no result plots are generated for the current step, users will be informed with “NO Figure!” The result files generated for each step are stored in the output path specified by the user. The UI page will only display the file names, and clicking on the file name links will allow downloading the files. 7.3.2.2 Step 2 (st-seq pipeline). Quality Control The figure showing the step 2 of st-seq pipeline. min.gene (default: 200): Specifies the minimum number of genes detected in a spot. The default value is 200. min.nUMI (default: 500): Specifies the minimum number of nUMIs detected in a spot. The default value is 500. max.gene (default: Inf): Specifies the maximum number of genes detected in a spot. The default value is Inf (no upper limit). max.nUMI (default: Inf): Specifies the maximum number of nUMIs detected in a spot. The default value is Inf (no upper limit). min.spot (default: 0): Specifies the minimum number of spots where each gene is expressed. bool.remove.mito: Whether to remove mitochondrial genes. The default value is TRUE. species: Specifies the species: human/mouse. Click “Run Step2” to proceed. After clicking the “Run Step2” button, please do not modify any other parameters on the page. Once Step 2 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.3 Step 3 (st-seq pipeline). Clustering The figure showing the step 3 of st-seq pipeline. normalization.method (default: ‘SCTransform’): The method for data normalization. The default value is ‘SCTransform’. npcs (default: 50): The number of principal components (PCs) to use in PCA. The default value is 50. pcs.used (default: 1:10): The number of PCs used for clustering analysis. The default value is the first 10 PCs (1:10). resolution (default: 0.8): The resolution parameter for the clustering algorithm. The default value is 0.8. Click “Run Step3” to proceed. After clicking the “Run Step3” button, please do not modify any other parameters on the page. Once Step 3 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.4 Step 4 (st-seq pipeline). Find Differential Genes The figure showing the step 4 of st-seq pipeline. only.pos: A logical flag to include only positive markers. The default value is TRUE. min.pct (default: 0.25): The minimum fraction of cells expressing the gene in any cluster. The default value is 0.25. logfc.threshold (default: 0.25): The log-fold change threshold for considering differentially expressed genes. The default value is 0.25. test.use (default: ‘wilcox’): The statistical test used for differential expression analysis. The default value is ‘wilcox’. Click “Run Step4” to proceed. After clicking the “Run Step4” button, please do not modify any other parameters on the page. Once Step 4 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.5 Step 5 (st-seq pipeline). Spatially variable features The figure showing the step 5 of st-seq pipeline. selection.method (default: ‘moransi’): The method used for selecting spatially variable features. The default value is ‘moransi’. n.top.show (default: 10): The number of top spatially variable features to visualize. The default value is 10. n.col.show (default: 5): The number of columns in the visualization grid. The default value is 5. Click “Run Step5” to proceed. After clicking the “Run Step5” button, please do not modify any other parameters on the page. Once Step 5 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.6 Step 6 (st-seq pipeline). Spatial interaction The figure showing the step 6 of st-seq pipeline. commot.signaling_type (default: ‘Secreted Signaling’): The type of signaling interaction to consider. The default value is ‘Secreted Signaling’. commot.database (default: ‘CellChat’): The database used for the analysis. The default value is ‘CellChat’. commot.min_cell_pct (default: 0.05): The minimum cell percentage to consider in interaction analysis. The default value is 0.05. commot.dis_thr (default: 500): The distance threshold used to define interactions. The default value is 500. commot.n_permutations (default: 100): The number of permutations used to assess significance. The default value is 100. Click “Run Step6” to proceed. After clicking the “Run Step6” button, please do not modify any other parameters on the page. Once Step 6 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.7 Step 7 (st-seq pipeline). CNV analysis The figure showing the step 7 of st-seq pipeline. copykat.genome (default: ‘NULL’): The genome version used, either ‘hg20’ or ‘mm10’. The default value is “hg20”. copykat.LOW.DR (default: 0.05): The lower dropout rate threshold in CopyKAT. The default value is 0.05. copykat.UP.DR (default: 0.1): The upper dropout rate threshold in CopyKAT. The default value is 0.1. copykat.win.size (default: 25): The window size for CNV analysis. The default value is 25. copykat.distance (default: ‘euclidean’): The distance metric used for analysis. The default value is “euclidean”. copykat.n.cores (default: 1): The number of cores used for parallel processing. The default value is 1. Click “Run Step7” to proceed. After clicking the “Run Step7” button, please do not modify any other parameters on the page. Once Step 7 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.8 Step 8 (st-seq pipeline). Deconvolution The figure showing the step 8 of st-seq pipeline. cell2loc.sc.h5ad.dir (default: ‘NULL’): The path to the h5ad format single-cell RNA-seq data. The default value is NULL. cell2loc.sc.max.epoch (default: 1000): The maximum number of epochs for single-cell deconvolution. The default value is 1000. cell2loc.st.max.epoch (default: 10000): The maximum number of epochs for spatial deconvolution. The default value is 10000. cell2loc.use.gpu (default: FALSE): A logical value indicating whether to use GPU for computation. The default value is FALSE. Click “Run Step8” to proceed. After clicking the “Run Step8” button, please do not modify any other parameters on the page. Once Step 8 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.9 Step 9 (st-seq pipeline). Cell cycle analysis The figure showing the step 9 of st-seq pipeline. The gene sets for calculating S phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the S phase. The default value is NULL (uses genes from Seurat). The gene sets for calculating G2M phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the G2M phase. The default value is NULL (uses genes from Seurat). Click “Run Step9” to proceed. After clicking the “Run Step9” button, please do not modify any other parameters on the page. Once Step 9 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.10 Step 10 (st-seq pipeline). Niche analysis The figure showing the step 10 of st-seq pipeline. Nich.cluster.n (default: 4): The number of clusters for niche clustering. The default value is 4. Click “Run Step10” to proceed. After clicking the “Run Step10” button, please do not modify any other parameters on the page. Once Step 10 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.11 Step 11 (st-seq pipeline). Generate the Report The figure showing the step 11 of st-seq pipeline. Click “Run Step11” to generate the analysis report. "],["operation-manual-for-the-hemascopecloud.html", "8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.2 Homepage 8.3 Data Page 8.4 Analysis Page 8.5 Projects page", " 8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.1.1 Enter the URL in a web browser: https://hemascope.hiplot.cn/?home=hemascope and click to access the login page. Figure 8.1: Login Page 8.1.2 To obtain free computational resources: Enter your login email, click “Get Code,” input the verification code received in your email, and then click “Login” to complete the login and access the system homepage. 8.1.3 To browse HemaScopeCloud without needing computational resources: Click the “View without Login” button to access the system homepage. You can view demo analysis projects. If you click the button to initiate an analysis, the platform will prompt: “Please log in for analysis!” 8.2 Homepage Figure 8.2: Homepage The left side features a menu bar containing Home, Data, Analysis, Project, and Help. And the upper right section includes statistics on analysis project status, usage of analysis projects, a quick entry for creating new analysis projects, and statistics on allocated storage capacity usage. Statistics on Analysis Project Status Pending Analysis:Waiting for analysis, not yet submitted for analysis. Pending Resources:Waiting for resources, analysis submitted and awaiting resource allocation. Analyzing:Currently analyzing. Completed:Analysis completed. Error:An error occurred during analysis. Total:Total of all analysis statuses. Usage Statistics for Analysis Projects: Number of used analysis projects / Total number of allocated analysis projects. The current allocation for the system is 50 projects. For additional free computational resources, please contact the developer. Quick Entry for Creating New Analysis Projects: Supports quick access to the new analysis project pages corresponding to two pipelines. Storage Capacity Usage: Used Storage Resources / Allocated Storage Resources. The lower section displays the most recently run analysis projects. By default, it shows demo projects upon initial entry. Clicking the “View” option on an entry in the Projects section allows you to access and analyze that specific analysis project. 8.3 Data Page The Data page includes storage for Demo sample project data as well as Personal project data. Data under the Demo tab can be downloaded, while the Personal tab allows users to create new folders and upload files. 8.4 Analysis Page It lists two analysis pipelines: sc_HemaScopeCloud and st_HemaScopeCloud, serving as entries for creating new analysis projects. Click the Analysis button to access the new project and execution page for that pipeline. Figure 8.3: Select Analysis Pipeline Page Figure 8.4: Enter the Analysis Pipeline Page Create New Analysis Project Click the Analysis button under the sc_HemaScopeCloud to enter the new project page for that pipeline. Project Name:Enter the name of the analysis project for identification purposes. Input Data:Click Upload to upload local analysis files. Single and multiple file uploads are supported. Uploaded files must comply with the pipeline’s input file requirements; otherwise, an error will occur during execution. Sample Name:Click Add to enter the sample names, which should correspond to the uploaded analysis files. Items marked with * are required fields. Click the Run button to initiate the analysis:For the scRNA-seq pipeline,this will trigger step1-4; for the st-seq pipeline, it will trigger step1-5. Each subsequent analysis step requires clicking Run on the relevant step page to submit. Before submission, ensure that the previous step has generated result files; otherwise, a notification will indicate that the analysis cannot proceed. Load Demo Data HemaScopeCloud supports loading pre-configured analysis demo files and default parameters to quickly initiate analysis projects. On the new project page, click Load Demo Data to load files from the demo project and fill the required fields. Then, click the Run button to execute the analysis for the demo project. Figure 8.5: Load Demo Data After clicking Run, you will be redirected to the detailed page of the analysis project. Analysis Project Detail Page Notifications Waiting for resources…Do not submit repeatedly: This indicates that the submission is waiting for resources. Do not click the Run button again. Analyzing…Do not submit repeatedly: This indicates that the project is currently analyzing. Do not click the Run button again. Analysis Steps, Current Analysis Step: Displays all stepwise analysis processes and the current step. Click on different steps to navigate to the corresponding analysis step page. For the initial analysis, you must complete the previous step before proceeding to the next one. Refresh Button: Used to refresh the current page. Results: This tab stores the results of the completed step. Visualization: For steps that involve visualizations, the results will be found under the visualization tab. History:Click on Run History to view all historical runs of that step. Status:Corresponds to the analysis status of the project. Log:Click this button to view the run log. Parameter Settings:Used for entering parameter values. Figure 8.6: Analysis Project Page Figure 8.7: Analysis Project Result Page Figure 8.8: This step of the analysis project displays ‘Waiting for resources…Do not submit repeatedlly’ Figure 8.9: This step of the analysis project displays ‘Analyzing…Do not submit repeatedly’ Figure 8.10: History Page Note: For steps that have already been completed (except for the first step), you can adjust the parameters and click Run to perform multiple analyses. The results page will retain only the latest analysis results. 8.5 Projects page The homege includes analysis projects created by the user as well as pre-configured demo analysis projects provided by the system. Figure 8.11: Demo projects and user’s personal projects Clicking “View” allows you to navigate to the analysis project for review and step-by-step analysis. Figure 8.12: Click ‘View’ to access the analysis project Figure 8.13: Enter the detailed analysis project page "]] +[["index.html", "HemaScope Tutorial 1 Introduction", " HemaScope Tutorial HemaScope team 2024-10-22 1 Introduction HemaScope is a specialized bioinformatics toolkit designed for analyzing both single-cell and spatial transcriptome sequencing data from hematopoietic cells, including myeloid and lymphoid lineages. We have developed an R package named HemaScopeR, a Shiny interface named HemaScopeShiny, and a cloud platform named HemaScopeCloud. This tutorial introduces how to install and use the R package and Shiny interface, as well as how to access and operate the cloud platform. "],["installation.html", "2 Installation 2.1 Create a new conda environment and activate it 2.2 Set the channels in conda 2.3 Install R 2.4 Install required R-packages 2.5 Create the required python (v.3.9.12) virtual environment 2.6 The installed packages with versions", " 2 Installation 2.1 Create a new conda environment and activate it conda create --name HemaScope_env conda activate HemaScope_env 2.2 Set the channels in conda # Add the default channel conda config --add channels defaults # Add default channel URLs conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2 # Add custom channels conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2 conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch-lts conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/simpleitk conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/deepmodeling # Set to show channel URLs conda config --set show_channel_urls true 2.3 Install R <<<<<<< Updated upstream ======= >>>>>>> Stashed changes - R 4.3.3 conda install R-base=4.3.3 2.4 Install required R-packages From conda conda install -c conda-forge r-devtools=2.4.5 -y conda install -c conda-forge r-Seurat=4.3.0.1 -y conda install -c conda-forge r-Rfast2=0.1.5.1 -y conda install -c conda-forge r-hdf5r=1.3.10 -y conda install -c conda-forge r-ggpubr=0.6.0 -y conda install pwwang::r-seuratwrappers -y conda install -c bioconda bioconductor-monocle=2.28.0 -y conda install -c bioconda bioconductor-slingshot=2.8.0 -y conda install -c bioconda bioconductor-GSVA=1.48.2 -y conda install -c bioconda bioconductor-org.Mm.eg.db=3.17.0 -y conda install -c bioconda bioconductor-org.Hs.eg.db=3.17.0 -y conda install -c bioconda bioconductor-scran=1.28.1 -y conda install -c bioconda bioconductor-AUCell=1.22.0 -y conda install -c bioconda bioconductor-RcisTarget=1.20.0 -y conda install -c bioconda bioconductor-GENIE3=1.24.0 -y conda install -c bioconda bioconductor-biomaRt=2.56.1 -y conda install -c bioconda r-velocyto.r=0.6 -y #conda install -c bioconda bioconductor-limma=3.56.2 -y Enter the R language environment We suggest users do not manually update any already installed R packages during the installation of the following R packages. R From BiocManager # BiocManager(version = "1.30.23") should already be installed as a dependency of r-seuratwrappers. # If it is not installed, please run the following code to install it. # install.packages("BiocManager",version="1.30.23") BiocManager::install("ComplexHeatmap") BiocManager::install("scmap") BiocManager::install("clusterProfiler") BiocManager::install("BiocNeighbors") From CRAN install.packages("doMC") install.packages("doRNG") install.packages("shinyjs") install.packages("shiny") install.packages("shinyWidgets") install.packages("shinydashboard") install.packages("slickR") install.packages("phateR") install.packages("gelnet") install.packages("parallelDist") install.packages("kableExtra") install.packages("transport") install.packages("feather") install.packages("markdown") install.packages("ggalluvial") install.packages("forcats") install.packages("mcmc") install.packages("MCMCpack") install.packages("fields") install.packages("getopt") install.packages("osfr") From GitHub tips: Sometimes network connection issues may occur, resulting in an error message indicating that GitHub cannot be connected. Please try installing again when the network conditions improve. Usage limitations: Sometimes an API rate limit error occurs, and a GitHub token is needed to provide the GitHub API rate limit. The steps to resolve this are as follows: Register for an account or log in to an existing account on the GitHub website. Then click on your profile picture in the top right corner, go to the dropdown menu and select “Settings.” Next, find “Developer settings” and click on it, then find “Personal access tokens (classic).” Click on it, then click “Create new token (classic).” Create a new token by first naming it anything you like. Then choose the expiration time for the token. Finally, check the “repo” box; the token will be used to download code repositories from GitHub. Click “Generate token.” Copy the generated token password. After that, set the token in the environment variable in R. Since we are using conda, enter R by typing R in the terminal. Then, enter the command: usethis::edit_r_environ(). This will open a file. Press the i key to edit. Paste the token you copied into the code area as follows: GITHUB_TOKEN=“your_token”. Then press Esc, type :wq! (force save). After that, you need to exit Linux and re-enter R. Close and reopen the terminal to apply the environment variable. Reopen Linux, activate the conda environment, and enter R again. devtools::install_github("sqjin/CellChat") devtools::install_github("immunogenomics/presto") devtools::install_github("aertslab/SCENIC@140ad6b") devtools::install_github("pzhulab/abcCellmap@f44c14b") devtools::install_github("navinlabcode/copykat@d7d6569") devtools::install_github('chris-mcginnis-ucsf/DoubletFinder@8c7f76e') devtools::install_github("mojaveazure/seurat-disk@877d4e1") devtools::install_github(c("hfang-bristol/dnet")) Install HemaScopeR from github devtools::install_github(repo="ZhenyiWangTHU/HemaScopeR", dep = FALSE) 2.5 Create the required python (v.3.9.12) virtual environment Run the init_miniconda function to create the miniconda virtual environments for the scRNA-seq pipeline and ST pipeline of 10X Visium data and MERFISH data. library(HemaScopeR) init_miniconda() (Optional) Run the init_miniconda_stereo function to create the miniconda virtual environment for the stereo-seq data. init_miniconda_stereo() 2.6 The installed packages with versions R packages with versions Package Version ------- ------- Python packages with versions Package Version ------------------------ -------------- "],["integrated-scrna-seq-pipeline.html", "3 Integrated scRNA-seq pipeline", " 3 Integrated scRNA-seq pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) Run the integrated scRNA-seq pipeline. scRNASeq_10x_pipeline( # input and output input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix', './SRR7881400/outs/filtered_feature_bc_matrix', './SRR7881401/outs/filtered_feature_bc_matrix', './SRR7881402/outs/filtered_feature_bc_matrix', './SRR7881403/outs/filtered_feature_bc_matrix', './SRR7881404/outs/filtered_feature_bc_matrix', './SRR7881405/outs/filtered_feature_bc_matrix', './SRR7881406/outs/filtered_feature_bc_matrix', './SRR7881407/outs/filtered_feature_bc_matrix', './SRR7881408/outs/filtered_feature_bc_matrix', './SRR7881409/outs/filtered_feature_bc_matrix', './SRR7881410/outs/filtered_feature_bc_matrix', './SRR7881411/outs/filtered_feature_bc_matrix', './SRR7881412/outs/filtered_feature_bc_matrix', './SRR7881413/outs/filtered_feature_bc_matrix', './SRR7881414/outs/filtered_feature_bc_matrix', './SRR7881415/outs/filtered_feature_bc_matrix', './SRR7881416/outs/filtered_feature_bc_matrix', './SRR7881417/outs/filtered_feature_bc_matrix', './SRR7881418/outs/filtered_feature_bc_matrix', './SRR7881419/outs/filtered_feature_bc_matrix', './SRR7881420/outs/filtered_feature_bc_matrix', './SRR7881421/outs/filtered_feature_bc_matrix', './SRR7881422/outs/filtered_feature_bc_matrix', './SRR7881423/outs/filtered_feature_bc_matrix'), project.names = c( 'SRR7881399', 'SRR7881400', 'SRR7881401', 'SRR7881402', 'SRR7881403', 'SRR7881404', 'SRR7881405', 'SRR7881406', 'SRR7881407', 'SRR7881408', 'SRR7881409', 'SRR7881410', 'SRR7881411', 'SRR7881412', 'SRR7881413', 'SRR7881414', 'SRR7881415', 'SRR7881416', 'SRR7881417', 'SRR7881418', 'SRR7881419', 'SRR7881420', 'SRR7881421', 'SRR7881422', 'SRR7881423'), output.dir = './output/', pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python', # quality control and preprocessing gene.column = 2, min.cells = 10, min.feature = 200, mt.pattern = '^MT-', nFeature_RNA.limit = 200, percent.mt.limit = 20, scale.factor = 10000, nfeatures = 3000, ndims = 50, vars.to.regress = NULL, PCs = 1:35, resolution = 0.4, n.neighbors = 50, # remove doublets doublet.percentage = 0.04, doublerFinderwraper.PCs = 1:20, doublerFinderwraper.pN = 0.25, doublerFinderwraper.pK = 0.1, # phateR phate.knn = 50, phate.npca = 20, phate.t = 10, phate.ndim = 2, min.pct = 0.25, logfc.threshold = 0.25, # visualization ViolinPlot.cellTypeOrders = as.character(1:22), ViolinPlot.cellTypeColors = NULL, Org = 'hsa', loom.files.path = c( './SRR7881399/velocyto/SRR7881399.loom', './SRR7881400/velocyto/SRR7881400.loom', './SRR7881401/velocyto/SRR7881401.loom', './SRR7881402/velocyto/SRR7881402.loom', './SRR7881403/velocyto/SRR7881403.loom', './SRR7881404/velocyto/SRR7881404.loom', './SRR7881405/velocyto/SRR7881405.loom', './SRR7881406/velocyto/SRR7881406.loom', './SRR7881407/velocyto/SRR7881407.loom', './SRR7881408/velocyto/SRR7881408.loom', './SRR7881409/velocyto/SRR7881409.loom', './SRR7881410/velocyto/SRR7881410.loom', './SRR7881411/velocyto/SRR7881411.loom', './SRR7881412/velocyto/SRR7881412.loom', './SRR7881413/velocyto/SRR7881413.loom', './SRR7881414/velocyto/SRR7881414.loom', './SRR7881415/velocyto/SRR7881415.loom', './SRR7881416/velocyto/SRR7881416.loom', './SRR7881417/velocyto/SRR7881417.loom', './SRR7881418/velocyto/SRR7881418.loom', './SRR7881419/velocyto/SRR7881419.loom', './SRR7881420/velocyto/SRR7881420.loom', './SRR7881421/velocyto/SRR7881421.loom', './SRR7881422/velocyto/SRR7881422.loom', './SRR7881423/velocyto/SRR7881423.loom'), # cell cycle cellcycleCutoff = NULL, # cell chat sorting = FALSE, ncores = 10, # Verbose = FALSE, # activeEachStep Whether_load_previous_results = FALSE, Step1_Input_Data = TRUE, Step1_Input_Data.type = 'cellranger-count', Step2_Quality_Control = TRUE, Step2_Quality_Control.RemoveBatches = TRUE, Step2_Quality_Control.RemoveDoublets = TRUE, Step3_Clustering = TRUE, Step4_Identify_Cell_Types = TRUE, Step4_Use_Which_Labels = 'clustering', Step4_Cluster_Labels = NULL, Step4_Changed_Labels = NULL, Step4_run_sc_CNV = TRUE, Step5_Visualization = TRUE, Step6_Find_DEGs = TRUE, Step7_Assign_Cell_Cycle = TRUE, Step8_Calculate_Heterogeneity = TRUE, Step9_Violin_Plot_for_Marker_Genes = TRUE, Step10_Calculate_Lineage_Scores = TRUE, Step11_GSVA = TRUE, Step11_GSVA.identify.cellType.features=TRUE, Step11_GSVA.identify.diff.features=FALSE, Step11_GSVA.comparison.design=NULL, Step12_Construct_Trajectories = TRUE, Step12_Construct_Trajectories.clusters = c('3','6','9','10','11','14','15','19'), Step12_Construct_Trajectories.monocle = TRUE, Step12_Construct_Trajectories.slingshot = TRUE, Step12_Construct_Trajectories.scVelo = TRUE, Step13_TF_Analysis = TRUE, Step14_Cell_Cell_Interaction = TRUE, Step15_Generate_the_Report = TRUE ) "],["step-by-step-scrna-seq-pipeline.html", "4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin 4.2 Step 1. Load the input data 4.3 Step 2. Quality Control 4.4 Step 3. Clustering 4.5 Step 4. Identify Cell Types 4.6 Step 5. Visualization 4.7 Step 6. Find DEGs 4.8 Step 7. Assign Cell Cycles 4.9 Step 8. Calculate Heterogeneity 4.10 Step 9. Violin Plot for Marker Genes 4.11 Step 10. Calculate Lineage Scores 4.12 Step 11. GSVA 4.13 Step 12. Construct Trajectories 4.14 Step 13. TF Analysis 4.15 Step 14. Cell-Cell Interaction", " 4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin Load the R packages. library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) library(getopt) library(tools) library(HemaScopeR) Set the paths for the output results, and the Python installation. output.dir = './output' pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python' Create folders for saving the results of HemaScopeR analysis. wdir <- getwd() if(is.null(pythonPath)==FALSE){ reticulate::use_python(pythonPath) }else{print('Please set the path of Python.')} if (!file.exists(paste0(output.dir, '/HemaScopeR_results'))) { dir.create(paste0(output.dir, '/HemaScopeR_results'),recursive =T) } output.dir <- paste0(output.dir,'/HemaScopeR_results') if (!file.exists(paste0(output.dir, '/RDSfiles/'))) { dir.create(paste0(output.dir, '/RDSfiles/')) } #set the path for loading previous results, if necessary previous_results_path <- paste0(output.dir, '/RDSfiles/') # if (Whether_load_previous_results) { # print('Loading the previous results...') # Load_previous_results(previous_results_path = previous_results_path) # } 4.2 Step 1. Load the input data Create a folder for step1 print('Step1. Input data.') if (!file.exists(paste0(output.dir, '/Step1.Input_data/'))) { dir.create(paste0(output.dir, '/Step1.Input_data/')) } Set the parameters for loading the data sets. input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix')#, #'./SRR7881400/outs/filtered_feature_bc_matrix', #'./SRR7881401/outs/filtered_feature_bc_matrix', #'./SRR7881402/outs/filtered_feature_bc_matrix', #'./SRR7881403/outs/filtered_feature_bc_matrix' project.names = c('SRR7881399')#, #'SRR7881400', #'SRR7881401', #'SRR7881402', #'SRR7881403' gene.column = 2 min.cells = 10 min.feature = 200 mt.pattern = '^MT-' # set '^mt-' for mouse data Step1_Input_Data.type = 'cellranger-count' loom.files.path ="./SRR7881399/loom" Load the data sets file.copy(from = input.data.dirs, to = paste0(output.dir,'/Step1.Input_data/'), recursive = TRUE) if(Step1_Input_Data.type == 'cellranger-count'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- Read10X(data.dir = input.data.dirs[i], gene.column = gene.column) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- Read10X(data.dir = input.data.dirs, gene.column = gene.column) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Seurat'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_object.temp <- readRDS(input.data.dirs[i]) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp) } }else{ sc_object <- readRDS(input.data.dirs) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Matrix'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- readRDS(input.data.dirs[i]) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- readRDS(input.data.dirs) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else{ stop('Please input data generated by the cellranger-count software, or a Seurat object, or a gene expression matrix. HemaScopeR does not support other formats of input data.') } Save the variables after executing each step, if necessary. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3 Step 2. Quality Control In this step, the following quality control steps will be performed: Normalize data using the LogNormalize method. Find variable features using the vst method. Scale data using the identified variable features and specified variables to regress out. Perform principal component analysis (PCA) on the scaled data. Find K nearest neighbors based on PCA dimensions. Perform clustering analysis based on the found neighbors. Optionally, remove doublets using doubletFinder. Optionally, integrate multiple datasets by removing batch effects. 4.3.1 Function arguments: nFeature_RNA.limit: The cutoff of the minimum number of detected genes in each cell. percent.mt.limit: The cutoff of the maximum percentage of mitochondria genes in each cell. scale.factor: The scale factor for the ‘data’ slot in the seurat object. nfeatures: The number of selected highly variable features for down stream analysis. ndims: The number of principle components in PCA. vars.to.regress: Variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito. (ScaleData in Seurat) PCs: Which dimensions to use as input features.(RunTSNE and RunUMAP in Seurat) resolution: Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities. (FindClusters in Seurat) n.neighbors: Defines k for the k-nearest neighbor algorithm. (FindNeighbors in Seurat) percentage: Assuming ‘percentage’ doublet formation rate - tailor for your dataset. The default value is 0.05. doublerFinderwraper.PCs Which dimensions to use as input features for doubletFinder. doublerFinderwraper.pN: The percentage of real-artifical data for doubletFinder. doublerFinderwraper.pK: The pK parameter controls the doublet cell detection by determining the number of nearest neighbors and influencing the calculation of pANN scores and the final cell classification results. Adjusting the pK value allows optimization of the doublet cell detection process based on specific data and analysis requirements. 4.3.2 codes for running step2 Create a folder for saving the results of quality control. print('Step2. Quality control.') if (!file.exists(paste0(output.dir, '/Step2.Quality_control/'))) { dir.create(paste0(output.dir, '/Step2.Quality_control/')) } Set the parameters for quality control. # quality control nFeature_RNA.limit = 200 percent.mt.limit = 20 # preprocessing nfeatures = 3000 scale.factor = 10000 ndims = 50 vars.to.regress = NULL PCs = 1:35 resolution = 0.4 n.neighbors = 50 # removing doublets Step2_Quality_Control.RemoveDoublets = TRUE doublet.percentage = 0.04 doublerFinderwraper.PCs = 1:20 doublerFinderwraper.pN = 0.25 doublerFinderwraper.pK = 0.1 # removing batch effect Step2_Quality_Control.RemoveBatches = TRUE Run the quality control process. if(length(input.data.dirs) > 1){ # preprocess and quality control for multiple scRNA-Seq data sets sc_object <- QC_multiple_scRNASeq(seuratObjects = input.data.list, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveBatches = Step2_Quality_Control.RemoveBatches, Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, ndims = ndims, vars.to.regress = vars.to.regress, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK ) }else{ # preprocess and quality control for single scRNA-Seq data set sc_object <- QC_single_scRNASeq(sc_object = sc_object, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, vars.to.regress = vars.to.regress, ndims = ndims, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3.3 Outputs Figure 4.1: Violin plots showing the nFeature, nCount and percent.mt for each sample Figure 4.2: Figures showing the correlation between nFeature and nCount, as well as between nCount and percent.mt Figure 4.3: Figures showing the variable features used for downstream analysis Figure 4.4: ElbowPlot showing suitable number of PCs used for further analysis Figure 4.5: UMAP plot showing doublets found by DoubletFinder 4.4 Step 3. Clustering Create a folder for saving the results of Louvain clustering. print('Step3. Clustering.') if (!file.exists(paste0(output.dir, '/Step3.Clustering/'))) { dir.create(paste0(output.dir, '/Step3.Clustering/')) } Set the parameters for clustering. PCs = 1:35 resolution = 0.4 n.neighbors = 50 Run Louvian clustering. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){graph.name <- 'integrated_snn'}else{graph.name <- 'RNA_snn'} sc_object <- FindNeighbors(sc_object, dims = PCs, k.param = n.neighbors, force.recalc = TRUE) sc_object <- FindClusters(sc_object, resolution = resolution, graph.name = graph.name) sc_object@meta.data$seurat_clusters <- as.character(as.numeric(sc_object@meta.data$seurat_clusters)) # plot clustering pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.6: UMAP plot showing clustering results 4.5 Step 4. Identify Cell Types In this step, users can predict the cell types of hematopoietic cells by implementing two approaches (Scmap and Seurat) through abcCellmap packages. Cells are labeled by 43 different RNA clusters according to unsupervised clustering of single-cell transcriptional profiles, and also labeled by 32 immunophenotypic cell types. In addition, users can use Copykat to measure copy number variation (CNV) and determine the ploidy of each cell. 4.5.1 codes for running abcCellmap Create a folder for saving the results of cell type identification. print('Step4. Identify cell types automatically.') if (!file.exists(paste0(output.dir, '/Step4.Identify_Cell_Types/'))) { dir.create(paste0(output.dir, '/Step4.Identify_Cell_Types/')) } Set the path for the database. databasePath = "~/HemaScopeR/database/" Set the parameters for cell type identification. Step4_Use_Which_Labels = 'clustering' Step4_Cluster_Labels = NULL Step4_Changed_Labels = NULL Org = 'hsa' ncores = 10 Run the cell type identification process. sc_object <- run_cell_annotation(object = sc_object, assay = 'RNA', species = Org, output.dir = paste0(output.dir,'/Step4.Identify_Cell_Types/')) if(Org == 'hsa'){ load(paste0(databasePath,"/HematoMap.reference.rdata")) #the data can be downloaded via the link https://cloud.tsinghua.edu.cn/d/759fd04333274d3f9946 if(length(intersect(rownames(HematoMap.reference), rownames(sc_object))) < 1000){ HematoMap.reference <- RenameGenesSeurat(obj = HematoMap.reference, newnames = toupper(rownames(HematoMap.reference)), gene.use = rownames(HematoMap.reference), de.assay = "RNA", lassays = "RNA") } if(sc_object@active.assay == 'integrated'){ DefaultAssay(sc_object) <- 'RNA' sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) DefaultAssay(sc_object) <- 'integrated' }else{ sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) } } Set the cell labels. # set the cell labels if(Step4_Use_Which_Labels == 'clustering'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$seurat_clusters Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.1'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.2'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.3'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.4'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'HematoMap'){ if(Org == 'hsa'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$predicted.id Idents(sc_object) <- sc_object@meta.data$selectLabels }else{print("'HematoMap' is only applicable to human data ('Org' = 'hsa').")} }else if(Step4_Use_Which_Labels == 'changeLabels'){ if (!is.null(Step4_Cluster_Labels) && !is.null(Step4_Changed_Labels) && length(Step4_Cluster_Labels) == length(Step4_Changed_Labels)){ sc_object@meta.data$selectLabels <- plyr::mapvalues(sc_object@meta.data$seurat_clusters, from = as.character(Step4_Cluster_Labels), to = as.character(Step4_Changed_Labels), warn_missing = FALSE) Idents(sc_object) <- sc_object@meta.data$selectLabels }else{ print("Please input the 'Step4_Cluster_Labels' parameter as Seurat clustering labels, and the 'Step4_Changed_Labels' parameter as new labels. Please note that these two parameters should be of equal length.") } }else{ print('Please set the "Step4_Use_Which_Labels" parameter as "clustering", "abcCellmap.1", "abcCellmap.2", "HematoMap" or "changeLabels".') } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.7: UMAP plots showing cell type annotation results Figure 4.8: Immunophenotype and RNACluster label predicted by scmap Figure 4.9: Immunophenotype and RNACluster label predicted by Seurat 4.5.2 codes for running the CNV analysis sc_CNV(sc_object=sc_object, save_path=paste0(output.dir,'/Step4.Identify_Cell_Types/'), assay = 'RNA', LOW.DR = 0.05, #refer to the Copykat documentation for detailed explanations of the parameters UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = NULL, n.cores = ncores, #note: this step will take a long time, using more ncores could shorten the running time species = Org) Figure 4.10: copykat heatmap Figure 4.11: UMAP plot showing CNV state predicted by copykat 4.6 Step 5. Visualization In this step, users are allowed to gain the statistical results about the numbers and proportions of cell groups, and also use three dimensional reduction methods (TSNE, UMAP, phateR) to visualize the results. 4.6.1 codes for peforming three dimensional reduction methods Create a folder for saving the visualization results. print('Step5. Visualization.') if (!file.exists(paste0(output.dir, '/Step5.Visualization/'))) { dir.create(paste0(output.dir, '/Step5.Visualization/')) } Perform visualization using UMAP and TSNE. # plot cell types pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Figure 4.12: UMAP and TSNE visualization Set the parameters for phateR. phate.knn = 50 #The number of nearest neighbors to consider in the phateR algorithm. Default 50. phate.npca = 20 #The number of principal components to use in the phateR algorithm. Default 20. phate.t = 10 #The t-value for the phateR algorithm, which controls the level of exploration. Default 10. phate.ndim = 2 #The number of dimensions for the output embedding in the phateR algorithm. Default 2. Run phateR for dimensional reduction and visualization. # run phateR if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} if(!is.null(pythonPath)){ run_phateR(sc_object = sc_object, output.dir = paste0(output.dir,'/Step5.Visualization/'), pythonPath = pythonPath, phate.knn = phate.knn, phate.npca = phate.npca, phate.t = phate.t, phate.ndim = phate.ndim) } Figure 4.13: phateR result 4.6.2 codes for calculating the proportions The statistical results for the numbers and proportions of cell groups. # statistical results cells_labels <- as.data.frame(cbind(rownames(sc_object@meta.data), as.character(sc_object@meta.data$selectLabels))) colnames(cells_labels) <- c('cell_id', 'cluster_id') cluster_counts <- cells_labels %>% group_by(cluster_id) %>% summarise(count = n()) total_cells <- nrow(cells_labels) cluster_counts <- cluster_counts %>% mutate(proportion = count / total_cells) cluster_counts <- as.data.frame(cluster_counts) cluster_counts$percentages <- scales::percent(cluster_counts$proportion, accuracy = 0.1) cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='proportion')] cluster_counts$cluster_id_count_percentages <- paste(cluster_counts$cluster_id, " (", cluster_counts$count, ' cells; ', cluster_counts$percentages, ")", sep='') cluster_counts <- cluster_counts[order(cluster_counts$count, decreasing = TRUE),] cluster_counts <- rbind(cluster_counts, c('Total', sum(cluster_counts$count), '100%', 'all cells')) sc_object@meta.data$cluster_id_count_percentages <- mapvalues(sc_object@meta.data$selectLabels, from=cluster_counts$cluster_id, to=cluster_counts$cluster_id_count_percentages, warn_missing=FALSE) colnames(sc_object@meta.data)[which(colnames(sc_object@meta.data) == 'cluster_id_count_percentages')] <- paste('Total ', nrow(sc_object@meta.data), ' cells', sep='') cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='cluster_id_count_percentages')] colnames(cluster_counts) <- c('Cell types', 'Cell counts', 'Percentages') # names(colorvector) <- mapvalues(names(colorvector), # from=cluster_counts$cluster_id, # to=cluster_counts$cluster_id_count_percentages, # warn_missing=FALSE) write.csv(cluster_counts, file=paste(paste0(output.dir, '/Step5.Visualization/'), '/cell types_cell counts_percentages.csv', sep=''), quote=FALSE, row.names=FALSE) The UMAP visualization. pdf(paste(paste0(output.dir, '/Step5.Visualization'), '/cell types_cell counts_percentages_umap.pdf', sep=''), width = 14, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = paste('Total ', nrow(sc_object@meta.data), ' cells', sep=''), label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.14: UMAP plot showing cell type and corresponding proportion 4.7 Step 6. Find DEGs In this step, users can find DEGs (differentially expressed genes) across different cell type group using FindAllMarkers, use GPTCelltype to predict cell label, perform GO and KEGG enrichment analysis, and perform subnetwork analysis for each cell type group. 4.7.1 codes for finding DEGs Set the parameters for identifying differentially expressed genes. min.pct = 0.25 logfc.threshold = 0.25 Create a folder for the DEGs analysis. print('Step6. Find DEGs.') if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/')) } Identify DEGs using Wilcoxon Rank-Sum Test. sc_object.markers <- FindAllMarkers(sc_object, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold) write.csv(sc_object.markers, file = paste0(paste0(output.dir, '/Step6.Find_DEGs/'),'sc_object.markerGenes.csv'), quote=FALSE) # visualization sc_object.markers.top5 <- sc_object.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) pdf(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.pdf'), width = 0.5*length(unique(sc_object.markers.top5$gene)), height = 0.5*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() png(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.png'), width = 20*length(unique(sc_object.markers.top5$gene)), height = 30*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() Figure 4.15: Dotplot showing marker genes of each cell type group 4.7.2 codes for using GPTCelltype Set the parameters for GPTCelltype. your_openai_API_key = '' tissuename = 'human bone marrow' gptmodel = 'gpt-3.5' Use GPTCelltype to assist cell type annotation. GPT_annotation( marker.genes = sc_object.markers, your_openai_API_key = your_openai_API_key, tissuename = tissuename, gptmodel = gptmodel, output.dir = paste0(output.dir, '/Step6.Find_DEGs/')) 4.7.3 Perform GO and KEGG enrichment. # GO enrichment if(Org=='mmu'){ OrgDb <- 'org.Mm.eg.db' }else if(Org=='hsa'){ OrgDb <- 'org.Hs.eg.db' }else{ stop("Org should be 'mmu' or 'hsa'.") } HemaScopeREnrichment(DEGs=sc_object.markers, OrgDb=OrgDb, output.dir=paste0(output.dir, '/Step6.Find_DEGs/')) Figure 4.16: Barplot showing GO(BP)and KEGG enrichment results of each cell type group 4.7.4 Perform subnetwork analysis Create a folder for saving the results of gene network analysis. if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/')) } Perform gene network analysis. OpenXGR_SAG(sc_object.markers = sc_object.markers, output.dir = paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'), subnet.size = 10) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.17: Figure showing subnetwork of each cell type group identified by OpenXGR 4.8 Step 7. Assign Cell Cycles This step assigns cell cycle phases by analyzing cell cycle-related genes and generates plots of the cell cycle analysis results. 4.8.1 Function arguments: sc_object: A Seurat object containing single-cell RNA sequencing data. counts_matrix: The ‘counts’ slot in the Seurat object. data_matrix: The ‘data’ slot in the Seurat object. cellcycleCutoff: The cutoff value for distinguishing between cycling and quiescent cells. Cells with a G1G2Score below this cutoff are considered quiescent. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input Seurat object. databasePath: The path to the database required for the analysis. Org: A character vector specifying the species of cell cycle genes, can be ‘mmu’ (mouse) or ‘hsa’ (human). 4.8.2 codes for step7 Create a folder for saving the results of cell cycle analysis. print('Step7. Assign cell cycles.') if (!file.exists(paste0(output.dir, '/Step7.Assign_cell_cycles/'))) { dir.create(paste0(output.dir, '/Step7.Assign_cell_cycles/')) } Set the parameters for the cell cycle analysis. cellcycleCutoff = NULL Run the cell cycle analysis. datasets.before.batch.removal <- readRDS(paste0(paste0(output.dir, '/RDSfiles/'),'datasets.before.batch.removal.rds')) sc_object <- cellCycle(sc_object=sc_object, counts_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "counts")%>%as.matrix(), data_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix(), cellcycleCutoff = cellcycleCutoff, cellTypeOrders = unique(sc_object@meta.data$selectLabels), output.dir=paste0(output.dir, '/Step7.Assign_cell_cycles/'), databasePath = databasePath, Org = Org) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.8.3 Outputs Figure 4.18: Barplot showing the proportion of different cell cycle within each cell type group Figure 4.19: Density plot showing the distribution of cell cycle scores 4.9 Step 8. Calculate Heterogeneity This step quantifies cell heterogeneity by computing Spearman correlation coefficients between cells within the same cell type groups. 4.9.1 Function arguments: expression_matrix: A numeric matrix representing the expression data, where rows are genes and columns are cells. The matrix should be appropriately preprocessed and filtered before using this function. cell_types_groups: A data frame specifying cell type annotations for each cell, including cell type labels and group information. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input cell_types_groups. 4.9.2 codes for step8 Create a folder for saving the results of heterogeneity calculation. print('Step8. Calculate heterogeneity.') if (!file.exists(paste0(output.dir, '/Step8.Calculate_heterogeneity/'))) { dir.create(paste0(output.dir, '/Step8.Calculate_heterogeneity/')) } Run heterogeneity calculation process. expression_matrix <- GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix() expression_matrix <- expression_matrix[,rownames(sc_object@meta.data)] cell_types_groups <- as.data.frame(cbind(sc_object@meta.data$selectLabels, sc_object@meta.data$datasetID)) colnames(cell_types_groups) <- c('clusters', 'datasetID') if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } heterogeneity(expression_matrix = expression_matrix, cell_types_groups = cell_types_groups, cellTypeOrders = cellTypes_orders, output.dir = paste0(output.dir, '/Step8.Calculate_heterogeneity/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.20: Box plot showing the Spearman correlation coefficients between cells within the same cell type groups(here we take data including more samples as an example) 4.10 Step 9. Violin Plot for Marker Genes This step generates violin plots for marker genes across different cell types. 4.10.1 Function arguments: dataMatrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. features: A character vector specifying the marker genes to plot in the violin plots. CellTypes: A factor vector containing cell type annotations for each cell. cellTypeOrders: A character vector specifying the order of cell types for plotting. Defaults to unique values in CellTypes. cellTypeColors: A character vector specifying the colors to use for cell type groups. Defaults to a color palette. 4.10.2 codes for step9 Create a folder for saving the violin plots of marker genes. print('Step9. Violin plot for marker genes.') if (!file.exists(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'))) { dir.create(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/')) } Run violin plot visualization. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} dataMatrix <- GetAssayData(object = sc_object, slot = "scale.data") if(is.null(marker.genes)&(Org == 'mmu')){ # mpp genes are from 'The bone marrow microenvironment at single cell resolution' # the other genes are from 'single cell characterization of haematopoietic progenitors and their trajectories in homeostasis and perturbed haematopoiesis' # the aliases of these genes were changed in gecodeM16:Gpr64 -> Adgrg2, Sdpr -> Cavin2, Hbb-b1 -> Hbb-bs, Sfpi1 -> Spi1 HSC_lineage_signatures <- c('Slamf1', 'Itga2b', 'Kit', 'Ly6a', 'Bmi1', 'Gata2', 'Hlf', 'Meis1', 'Mpl', 'Mcl1', 'Gfi1', 'Gfi1b', 'Hoxb5') Mpp_genes <- c('Mki67', 'Mpo', 'Elane', 'Ctsg', 'Calr') Erythroid_lineage_signatures <- c('Klf1', 'Gata1', 'Mpl', 'Epor', 'Vwf', 'Zfpm1', 'Fhl1', 'Adgrg2', 'Cavin2','Gypa', 'Tfrc', 'Hbb-bs', 'Hbb-y') Lymphoid_lineage_signatures <- c('Tcf3', 'Ikzf1', 'Notch1', 'Flt3', 'Dntt', 'Btg2', 'Tcf7', 'Rag1', 'Ptprc', 'Ly6a', 'Blnk') Myeloid_lineage_signatures <- c('Gfi1', 'Spi1', 'Mpo', 'Csf2rb', 'Csf1r', 'Gfi1b', 'Hk3', 'Csf2ra', 'Csf3r', 'Sp1', 'Fcgr3') marker.genes <- c(HSC_lineage_signatures, Mpp_genes, Erythroid_lineage_signatures, Lymphoid_lineage_signatures, Myeloid_lineage_signatures) }else if(is.null(marker.genes)&(Org == 'hsa')){ HSPCs_lineage_signatures <- c('CD34','KIT','AVP','FLT3','MME','CD7','CD38','CSF1R','FCGR1A','MPO','ELANE','IL3RA') Myeloids_lineage_signatures <- c('LYZ','CD36','MPO','FCGR1A','CD4','CD14','CD300E','ITGAX','FCGR3A','FLT3','AXL', 'SIGLEC6','CLEC4C','IRF4','LILRA4','IL3RA','IRF8','IRF7','XCR1','CD1C','THBD', 'MRC1','CD34','KIT','ITGA2B','PF4','CD9','ENG','KLF','TFRC') B_cells_lineage_signatures <- c('CD79A','IGLL1','RAG1','RAG2','VPREB1','MME','IL7R','DNTT','MKI67','PCNA','TCL1A','MS4A1','IGHD','CD27','IGHG3') T_NK_cells_lineage_signatures <- c('CD3D','CD3E','CD8A','CCR7','IL7R','SELL','KLRG1','CD27','GNLY', 'NKG7','PDCD1','TNFRSF9','LAG3','CD160','CD4','CD40LG','IL2RA', 'FOXP3','DUSP4','IL2RB','KLRF1','FCGR3A','NCAM1','XCL1','MKI67','PCNA','KLRF') marker.genes <- c(HSPCs_lineage_signatures, Myeloids_lineage_signatures, B_cells_lineage_signatures, T_NK_cells_lineage_signatures) } if(is.null(ViolinPlot.cellTypeOrders)){ ViolinPlot.cellTypeOrders <- unique(sc_object@meta.data$selectLabels) } if(is.null(ViolinPlot.cellTypeColors)){ ViolinPlot.cellTypeColors <- viridis::viridis(length(unique(sc_object@meta.data$selectLabels))) } combinedViolinPlot(dataMatrix = dataMatrix, features = marker.genes, CellTypes = sc_object@meta.data$selectLabels, cellTypeOrders = ViolinPlot.cellTypeOrders, cellTypeColors = ViolinPlot.cellTypeColors, Org = Org, output.dir = paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.21: Violin plot showing the expression of marker genes between cell type groups 4.11 Step 10. Calculate Lineage Scores This step calculates lineage scores for specified gene sets based on the provided expression data. It then generates a heatmap of lineage scores and a heatmap of gene expression patterns. 4.11.1 Function arguments: expression_matrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. cellTypes: A character vector specifying cell type annotations for each cell. e.g. c(“HSC”,“HSC”,“HSC”,“MPP1”,“MPP2”,“MPP2”,“MPP2” …) cellTypes_orders: A character vector specifying the order of cell types for plotting. e.g. c(“HSC”,“MPP1”,“MPP2”) cellTypes_colors: A character vector specifying the colors to use for cell type groups. e.g. c(“HSC” = ‘#006d2c’,“MPP1” = ‘#4292c6’,“MPP2”= ‘#810f7c’). groups: A character vector specifying groups or clusters within each cell type. groups_orders: A character vector specifying the order of groups or clusters for plotting. groups_colors: A character vector specifying the colors to use for group or cluster annotations. e.g. c(‘group1’=‘#d73027’,‘group2’=‘#2171b5’) lineage.genelist: A list of gene sets representing lineage markers. lineage.names: A character vector specifying the names of the lineages. 4.11.2 codes for step10 Create a folder for saving the results of lineage score calculation. print('Step10. Calculate lineage scores.') # we use normalized data here if (!file.exists(paste0(output.dir, '/Step10.Calculate_lineage_scores/'))) { dir.create(paste0(output.dir, '/Step10.Calculate_lineage_scores/')) } Run lineage score calculation. if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'mmu')){ lineage.genelist <- c(list(HSC_lineage_signatures), list(Mpp_genes), list(Erythroid_lineage_signatures), list(Lymphoid_lineage_signatures), list(Myeloid_lineage_signatures)) lineage.names <- c('HSC_lineage_signatures', 'Mpp_genes', 'Erythroid_lineage_signatures', 'Lymphoid_lineage_signatures', 'Myeloid_lineage_signatures') }else if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'hsa')){ lineage.genelist <- c(list(HSPCs_lineage_signatures), list(Myeloids_lineage_signatures), list(B_cells_lineage_signatures), list(T_NK_cells_lineage_signatures)) lineage.names <- c('HSPCs_lineage_signatures', 'Myeloids_lineage_signatures', 'B_cells_lineage_signatures', 'T_NK_cells_lineage_signatures') } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } lineageScores(expression_matrix = expression_matrix, cellTypes = sc_object@meta.data$selectLabels, cellTypes_orders = cellTypes_orders, cellTypes_colors = ViolinPlot.cellTypeColors, groups = sc_object@meta.data$datasetID, groups_orders = unique(sc_object@meta.data$datasetID), groups_colors = groups_colors, lineage.genelist = lineage.genelist, lineage.names = lineage.names, Org = Org, output.dir = paste0(output.dir, '/Step10.Calculate_lineage_scores/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.22: Heatmap showing the expression of lineage genes for each cell Figure 4.23: Heatmap showing the score of lineage signatures for each cell 4.12 Step 11. GSVA This step runs GSVA analysis, which calculates enrichment scores for gene sets in each cell using the provided gene list. It also performs differential GSVA analysis between specified cell groups and generates heatmaps of the results. 4.12.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. GSVA.genelist: A list of gene sets for GSVA analysis. GSVA.cellTypes: A character vector specifying the cell types or labels for each cell. GSVA.cellTypes.orders: A character vector specifying the order of cell types for visualization. GSVA.cellGroups: A character vector specifying the cell groups or conditions for each cell. GSVA.identify.cellType.features: Logical. If TRUE, identify cell type-specific features. GSVA.identify.diff.features: Logical. If TRUE, identify differentially expressed features between cell groups. GSVA.comparison.design: A list specifying the experimental design for differential GSVA analysis. OrgDB: An organism-specific annotation database (OrgDb) for gene symbol conversion. e.g. org.Mm.eg.db or org.Hs.eg.db. 4.12.2 codes for running step11 Create a folder for saving the results of GSVA. print('Step11. GSVA.') if (!file.exists(paste0(output.dir, '/Step11.GSVA/'))) { dir.create(paste0(output.dir, '/Step11.GSVA/')) } Run GSVA. setwd(wdir) if(Org=='mmu'){ load(paste0(databasePath,"/mouse_c2_v5p2.rdata")) GSVA.genelist <- Mm.c2 assign('OrgDB', org.Mm.eg.db) }else if(Org=='hsa'){ load(paste0(databasePath,"/human_c2_v5p2.rdata")) GSVA.genelist <- Hs.c2 assign('OrgDB', org.Hs.eg.db) }else{ stop("Org should be 'mmu' or 'hsa'.") } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } run_GSVA(sc_object = sc_object, GSVA.genelist = GSVA.genelist, GSVA.cellTypes = sc_object@meta.data$selectLabels, GSVA.cellTypes.orders = cellTypes_orders, GSVA.cellGroups = sc_object@meta.data$datasetID, GSVA.identify.cellType.features = Step11_GSVA.identify.cellType.features, GSVA.identify.diff.features = Step11_GSVA.identify.diff.features, GSVA.comparison.design = Step11_GSVA.comparison.design, OrgDB = OrgDB, output.dir = paste0(output.dir, '/Step11.GSVA/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.24: GSVA Heatmap showing the enriched pathways of each cell type group 4.13 Step 12. Construct Trajectories In this step, users are allowed to construct trajectories using three methods including Monocle2, slingshot and scVelo. 4.13.1 data preparation Load gene symbols and ensemble IDs. DefaultAssay(sc_object) <- 'RNA' countsSlot <- GetAssayData(object = sc_object, slot = "counts") gene_metadata <- as.data.frame(rownames(countsSlot)) rownames(gene_metadata) <- gene_metadata[,1] if(Org == 'mmu'){ load(paste0(databasePath,"/mouseGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = mouseGeneSymbolandEnsembleID$geneName, to = mouseGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) }else if(Org == 'hsa'){ load(paste0(databasePath,"/humanGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = humanGeneSymbolandEnsembleID$geneName, to = humanGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) } colnames(gene_metadata) <- c('gene_short_name','ensembleID') Create folders for saving the results of trajectory construction. print('Step12. Construct trajectories.') if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) } Prepare the input data. if(is.null(Step12_Construct_Trajectories.clusters)){ sc_object.subset <- sc_object countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") }else{ sc_object.subset <- subset(sc_object, subset = selectLabels %in% Step12_Construct_Trajectories.clusters) countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") } 4.13.2 monocle2 Running monocle2 involves several steps: Creating a Monocle cellDataSet using the provided cellData, phenoData, and featureData. Estimating size factors, dispersions, and detecting highly variable genes. Performing differential gene expression analysis to identify genes associated with cell state changes. Ordering cells along the inferred trajectories and reducing dimensionality. Generating and saving trajectory plots, including cell trajectory by “State” and by “Cell Types.” 4.13.2.1 Function arguments: cellData: A matrix of gene expression values, where columns represent cells and rows represent genes. phenoData: A data frame containing cell metadata, such as cell labels or other relevant information. featureData: A data frame containing information about features (genes) in the dataset. lowerDetectionLimit: The lower detection limit for gene expression. Genes with expression values below this limit will be treated as non-detected. expressionFamily: The family of the expression distribution used in Monocle analysis. cellTypes: A character vector specifying cell types or labels used for coloring in trajectory plots. monocle.orders: A character vector specifying the order of cell types in the Monocle analysis. monocle.colors: A character vector specifying colors for cell types in trajectory plots. 4.13.2.2 codes for running monocle2 phenoData <- sc_object.subset@meta.data featureData <- gene_metadata run_monocle(cellData = countsSlot.subset, phenoData = phenoData, featureData = featureData, lowerDetectionLimit = 0.5, expressionFamily = VGAM::negbinomial.size(), cellTypes='selectLabels', monocle.orders=Step12_Construct_Trajectories.clusters, monocle.colors = ViolinPlot.cellTypeColors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) Figure 4.25: Figures showing cells in different trajectory states (left) and corresponding cell type groups (right) 4.13.3 Slingshot Running Slingshot to infer cell trajectories and lineage relationships involves several steps: Constructs a Slingshot object using PCA embeddings, cell types, start clusters, and end clusters. Computes and plots the trajectory curves. Computes and plots pseudotime values along the trajectory. 4.13.3.1 Function arguments: slingshot.PCAembeddings: A matrix containing the PCA embeddings of the single-cell data, typically obtained from dimensionality reduction techniques like PCA. slingshot.cellTypes: A character vector specifying cell types or labels for each cell. slingshot.start.clus: A character vector specifying the initial cluster(s) from which cell trajectories should start. slingshot.end.clus: A character vector specifying the target cluster(s) where cell trajectories should end. slingshot.colors: A vector of colors corresponding to cell types for plotting. If not provided, default colors will be used. 4.13.3.2 codes for running Slingshot if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object.subset) <- 'integrated' }else{ DefaultAssay(sc_object.subset) <- 'RNA'} run_slingshot(slingshot.PCAembeddings = Embeddings(sc_object.subset, reduction = "pca")[, PCs], slingshot.cellTypes = sc_object.subset@meta.data$selectLabels, slingshot.start.clus = slingshot.start.clus, slingshot.end.clus = slingshot.end.clus, slingshot.colors = slingshot.colors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) Figure 4.26: Figures showing slingshot curve and infered pseudotime value 4.13.4 scVelo scVelo is implemented in Python, and it takes a Seurat object, cell embeddings, and cell type information as input. The process of data preparation includes the following steps: Format the Seurat object metadata, including cell types and sample names. Extract the spliced, unspliced, and ambiguous count matrices from the Seurat object. Combine the metadata and cell embeddings. Write the necessary input files for scVelo analysis, including cell embeddings, count matrices, and metadata. 4.13.4.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. loom.files.path: A character vector specifying the path(s) to the loom files for scVelo analysis. scvelo.reduction: A character specifying the reduction method used for scVelo analysis (default is ‘pca’). scvelo.column: A character specifying the column in the Seurat object metadata containing cell types. 4.13.4.2 codes for running Scvelo if((!is.null(loom.files.path))&(!is.null(pythonPath))){ prepareDataForScvelo(sc_object = sc_object.subset, loom.files.path = loom.files.path, scvelo.reduction = 'pca', scvelo.column = 'selectLabels', output.dir = paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) reticulate::py_run_string(paste0("import os\\noutputDir = '", output.dir, "'")) reticulate::py_run_file(file.path(system.file(package = "HemaScopeR"), "python/sc_run_scvelo.py"), convert = FALSE) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.27: Figure showing trajectory predicted by scvelo 4.14 Step 13. TF Analysis This step runs SCENIC (Single-Cell Regulatory Network Inference and Clustering) analysis, including the construction of a co-expression network, gene filtering, correlation, and the GENIE3 algorithm to infer regulatory networks. 4.14.1 Function arguments: countMatrix: A matrix containing the raw counts of the single-cell RNA-seq data. cellTypes: A character vector specifying the cell types or labels for each cell. datasetID: A character vector specifying the dataset IDs for each cell. cellTypes_colors: A named vector of colors for cell type visualization. cellTypes_orders: A character vector specifying the desired order of cell types. groups_colors: A named vector of colors for grouping visualization. groups_orders: A character vector specifying the desired order of groups. Org: A character vector specifying the organism (‘mmu’ for mouse or ‘hsa’ for human). 4.14.2 codes for running step13 Create folders for saving the results of TF analysis. print('Step13. TF analysis.') if (!file.exists(paste0(output.dir, '/Step13.TF_analysis/'))) { dir.create(paste0(output.dir, '/Step13.TF_analysis/')) } Run SCENIC to perform TF analysis. run_SCENIC(countMatrix = countsSlot, cellTypes = sc_object@meta.data$selectLabels, datasetID = sc_object@meta.data$datasetID, cellTypes_colors = Step13_TF_Analysis.cellTypes_colors, cellTypes_orders = unique(sc_object@meta.data$selectLabels), groups_colors = Step13_TF_Analysis.groups_colors, groups_orders = unique(sc_object@meta.data$datasetID), Org = Org, output.dir = paste0(output.dir, '/Step13.TF_analysis/'), pythonPath = pythonPath, databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.28: Heatmap showing predicted regulon activity for each cell Figure 4.29: Heatmap showing statistics of regulons 4.15 Step 14. Cell-Cell Interaction The step takes expression data, cluster labels, and other parameters to perform cell-cell communication analysis using the CellChat package. It includes the following steps: Data input and preprocessing. Initialization of a CellChat object. Set the ligand-receptor interaction database based on the specified organism. Preprocess the expression data for cell-cell communication analysis. Identify overexpressed genes and interactions. Project data based on protein-protein interaction networks. Inference of cell-cell communication network. Visualization of the communication network. Systems analysis of cell-cell communication network. 4.15.1 Function arguments: data.input: A matrix of expression data, where rows represent genes and columns represent cells. Row names should be in the format of gene symbols. labels: A vector of cluster labels for each cell, corresponding to the columns of data.input. cell.orders: A character vector specifying the order of cell types or clusters in the analysis. cell.colors: A character vector specifying colors for cell types or clusters in the analysis. sample.names: A vector of sample or cell names, corresponding to the columns of data.input. Org: A string indicating the organism used in the analysis. It should be either “mmu” (mouse) or “hsa” (human). sorting: A logical value indicating whether to consider cell population size in communication analysis. 4.15.2 codes for running step14 Create folders for saving the results of cell-cell interaction analysis. print('Step14. Cell-cell interaction.') if (!file.exists(paste0(output.dir, '/Step14.Cell_cell_interection/'))) { dir.create(paste0(output.dir, '/Step14.Cell_cell_interection/')) } Run CellChat to perform cell-cell interaction analysis. tempwd <- getwd() run_CellChat(data.input=countsSlot, labels = sc_object@meta.data$selectLabels, cell.orders = ViolinPlot.cellTypeOrders, cell.colors = ViolinPlot.cellTypeColors, sample.names = rownames(sc_object@meta.data), Org = Org, sorting = sorting, output.dir = paste0(output.dir, '/Step14.Cell_cell_interection/')) setwd(tempwd) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.30: Figures showing the interaction number and strength between each cell group Figure 4.31: Heatmap showing the strength of incoming and outgoing signals for each cell type group across various pathways. Figure 4.32: Figure showing LRs interaction between each cell type group "],["integrated-st-pipeline.html", "5 Integrated ST pipeline 5.1 For 10X Visium data 5.2 For MERFISH data 5.3 For stereo-seq data", " 5 Integrated ST pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(URD) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) library(OpenXGR) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) 5.1 For 10X Visium data Run the integrated 10X Visium pipeline. st_10x_visium_pipeline( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST', # For Step1 Loading rds.file = FALSE, filename = "filtered_feature_bc_matrix.h5", assay = "Spatial", slice = "slice1", filter.matrix = TRUE, to.upper = FALSE, # For Step2 QC Step2_QC = TRUE, min.gene = 200, min.nUMI = 500, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 CNV analysis Step7_CNV = TRUE, copykat.genome = NULL, copykat.LOW.DR = 0.05, copykat.UP.DR = 0.1, copykat.win.size = 25, copykat.distance = "euclidean", copykat.n.cores = 1, # For Step8 Deconvolution Step8_Deconvolution = TRUE, cell2loc.sc.h5ad.dir = NULL, cell2loc.sc.max.epoch = 1000, cell2loc.st.max.epoch = 10000, cell2loc.use.gpu = TRUE, cell2loc.use.dataset = 'LymphNode', # For Step9 Cellcycle Step9_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, # For Step10 Nich Step10_Niche = TRUE, coexistence.method = 'correlation', Niche.cluster.n = 4, # settings pythonPath = 'path/to/python', verbose = FALSE, genReport = TRUE ) 5.2 For MERFISH data Run the integrated MERFISH pipeline. st_MERFISH_pipeline( input.data.dir, output.dir, sampleName = 'Hema_MERFISH', fov = 'fov', tech = 'Vizgen', # For Step1 Loading rds.file = FALSE, assay = NULL, Vizgen.z = 3L, Akoya.type = 'inform', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.4, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) 5.3 For stereo-seq data Run the integrated stereo-seq pipeline. st_stereo_pipeline( input.data.dir, output.dir, sampleName = 'Hema_stereo', # For Step1 Loading data_type = 'gem', sep = '\\t', bin_type = 'bins', bin_size = 100, spot_diameter = 80, is_sparse = TRUE, gene_list = NULL, region = NULL, assay = 'Spatial', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.1, max.n.cluster = 30, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) "],["stey-by-step-st-seq-pipeline.html", "6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading 6.2 Step 2. Quality Control 6.3 Step 3. Clustering 6.4 Step 4. DEGs 6.5 Step 5. Spatially variable features 6.6 Step 6. Spatial interaction 6.7 Step 7. CNV analysis 6.8 Step 8. Deconvolution 6.9 Step 9. Cell cycle 6.10 Step 10. Niche analysis", " 6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading The st_Loading_Data function is designed for loading 10X Visium spatial transcriptomics data from Space Ranger. It will load data from input.data.dir and output it in the SeuratObject format. 6.1.1 Function arguments: input.data.dir: The directory where the input data is stored. output.dir: The directory where the processed output will be saved. If not specified, the output is saved in the current working directory. Default is ‘.’. sampleName: A string naming the sample. Default is ‘Hema_ST’. rds.file: A boolean indicating if the input data is in RDS file format rather than a typical results of Space Ranger. Default is FALSE. filename: The name of the file to be loaded if the data is not in RDS format. Default is “filtered_feature_bc_matrix.h5”. assay: The specific assay to apply to the data. Default is ‘Spatial’. slice: The image slice identifier for the spatial data. Default is ‘slice1’. filter.matrix: A boolean indicating whether to load filtered matrix. Default is TRUE. to.upper: A boolean indicating whether to convert feature names to upper form. Default is FALSE. 6.1.2 Funciton behavior: Directory Creation: The function first checks if the output.dir exists; if not, it creates it. RDS File Handling: If rds.file is TRUE, it reads the RDS file, ensuring the specified assay and slice are present in the Seurat object. Non-RDS File Handling: If rds.file is FALSE, it loads the data using Load10X_Spatial from Seurat. Saving the Object: Uses SaveH5Seurat and Convert to save the Seurat object in rds and h5ad formats. File Copying: Copies any necessary files (filter matrix, spatial image) to the output.dir. Return Value: Returns the processed Seurat object. 6.1.3 An example: st_obj <- st_Loading_Data( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST, rds.file = FALSE, filename = 'filtered_feature_bc_matrix.h5', assay = 'Spatial', slice = 'slice1', filter.matrix = TRUE, to.upper = FALSE ) 6.1.4 Outputs: Spatial transcriptome data in rds and h5ad formats 6.2 Step 2. Quality Control The QC_Spatial function performs basic quality control on a SeuratObject containing 10X visium data and returns the filtered SeuratObject. It provides options to set thresholds for the number of genes, nUMI (unique molecular identifiers), and spots expressing each gene. It also allows for the removal of mitochondrial genes based on species. 6.2.1 Function arguments: st_obj: A SeuratObject of 10X visium data. output.dir: A character string specifying the path to store the results and figures. Default is the current working directory. min.gene: An integer representing the minimum number of genes detected in a spot. Default is 200. max.gene: An integer representing the maximum number of genes detected in a spot. Default is Inf (no upper limit). min.nUMI: An integer representing the minimum number of nUMI detected in a spot. Default is 500. max.nUMI: An integer representing the maximum number of nUMI detected in a spot. Default is Inf (no upper limit). min.spot: An integer representing the minimum number of spots expressing each gene. Default is 3. species: A character string representing the species of sample, either ‘human’ or ‘mouse’. bool.remove.mito: A boolean value indicating whether to remove mitochondrial genes. Default is TRUE. SpatialColors: A function that interpolates a set of given colors to create new color palettes and color ramps. Default is a color palette with reversed Spectral colors from RColorBrewer. 6.2.2 Function behavior: Plots and saves the spatial distribution of nUMI and nGene. Plots and saves violin plots for nUMI and nGene. Identifies and marks low-quality spots based on nUMI and nGene thresholds. Plots the spatial distribution of quality. Plots and saves a histogram for the number of spots expressing each gene. Plots the spatial distribution of mitochondrial genes. Saves the raw SeuratObject before filtering. Removes low-quality spots and genes with fewer occurrences. Optionally removes mitochondrial genes. Saves the filtered SeuratObject. Returns the filtered st_obj. 6.2.3 An example: st_obj <- QC_Spatial( st_obj = st_obj, output.dir = '.', min.gene = 200, min.nUMI = Inf, max.gene = 500, max.nUMI = Inf, min.spot = 3, species = 'human', bool.remove.mito = TRUE, SpatialColors = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = "Spectral"))) ) 6.2.4 Outputs: Figures showing the spatial distribution of nUMI and nGene. Violin plots of nUMI and nGene. Figures showing the quality. Histograms for the number of spots expressing each gene. Figures showing the spatial distribution of mitochondrial genes. Raw and filtered SeuratObject. 6.3 Step 3. Clustering The st_Clustering function is designed to perform clustering analysis on spatial transcriptomics data. It integrates several key steps including data normalization, dimensionality reduction, clustering, and visualization. The function saves the results and visualizations to output.dir. 6.3.1 Function arguments: st_obj: The input spatial transcriptomics seurat object that contains the data to be clustered. output.dir: The directory where the output files will be saved. Default is the current directory (‘.’). normalization.method: The method used for data normalization. Default is ‘SCTransform’. npcs: The number of principal components to use in PCA. Default is 50. pcs.used: The principal components to use for clustering. Default is the first 10 PCs (1:10). resolution: The resolution parameter for the clustering algorithm. Default is 0.8. verbose: A logical flag to print progress messages. Default is FALSE. 6.3.2 Function behavior: Data Normalization and PCA: Depending on the normalization.method, the function either uses SCTransform or a standard normalization method followed by scaling and variable feature detection. Performs PCA on the normalized data. Clustering and Dimensionality Reduction: Finds nearest neighbors using the specified principal components (pcs.used). Identifies clusters using the specified resolution. Performs UMAP and t-SNE for visualization of the clusters. Visualization: Generates spatial, UMAP, and t-SNE plots of the clusters with customized color schemes. Saves these plots as images in the specified directory. Saving Results: Saves the updated st_obj as an RDS file. Exports the metadata of st_obj to a CSV file. Return Value: Returns the updated st_obj containing the clustering results. 6.3.3 An example: st_obj <- st_Clustering( st_obj = st_obj, output.dir = '.', normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, verbose = FALSE ) 6.3.4 Outputs: Figures showing the results of clustering. SeuratObject in rds format. 6.4 Step 4. DEGs The st_Find_DEGs function is designed to identify differentially expressed genes (DEGs) in spatial transcriptomics data. It performs differential expression analysis based on clustering results, visualizes the top markers, and saves the results to output.dir. 6.4.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for DEG analysis. output.dir: The directory where output files will be saved. Default is the current directory (‘.’). ident.label: The metadata label used for identifying clusters. Default is 'seurat_clusters'. only.pos: A logical flag to include only positive markers. Default is TRUE. min.pct: The minimum fraction of cells expressing the gene in either cluster. Default is 0.25. logfc.threshold: The log fold change threshold for considering a gene differentially expressed. Default is 0.25. test.use: The statistical test to use for differential expression analysis. Default is 'wilcox'. verbose: A logical flag to print progress messages. Default is FALSE. 6.4.2 Function behavior: Set Identifiers: Sets the cluster identifiers in the spatial transcriptomics object (st_obj) based on the specified ident.label. Find Differentially Expressed Genes (DEGs): Performs differential expression analysis using the specified parameters (only.pos, min.pct, logfc.threshold, test.use). Top Marker Genes: Selects the top 5 marker genes for each cluster based on the highest average log fold change. Visualization: Generates a dot plot for the top DEGs and saves the plot as an image in the specified directory. Saving Results: Saves the DEG results as a CSV file. Return Value: Returns the data frame containing the identified DEGs. 6.4.3 An example: st.markers <- st_Find_DEGs( st_obj = st_obj, output.dir = '.', ident.label = 'seurat_clusters', only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', verbose = FALSE ) 6.4.4 Outputs: Dot plots showing markers. CSV file containing the information of markers. 6.5 Step 5. Spatially variable features The st_SpatiallyVariableFeatures function identifies and visualizes spatially variable features (SVFs) in spatial transcriptomics data. It integrates the identification of spatially variable features using a specified method, saves the results to a directory, and creates visualizations of the top spatially variable features. 6.5.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. output.dir: The directory where output files will be saved. Default is the current directory. assay: The assay to be used for finding spatially variable features. Default is 'SCT'. selection.method: The method used for selecting spatially variable features. Default is 'moransi'. n.top.show: The number of top spatially variable features to visualize. Default is 10. n.col: The number of columns for the visualization grid. Default is 5. verbose: A logical flag to print progress messages. Default is FALSE. 6.5.2 Function behavior: Identify Spatially Variable Features: Identifies spatially variable features using the specified method and assay. Suppresses warnings during the process. Save Metadata: Extracts metadata features and saves them as a CSV file in output.dir. Visualization: Selects the top n.top.show spatially variable features. Generates and saves a spatial feature plot of these features in the specified directory. Return Value: Returns the updated st_obj containing the identified spatially variable features. 6.5.3 An example: st_obj <- st_SpatiallyVariableFeatures( st_obj = st_obj, output.dir = '.', assay = st_obj@active.assay, selection.method = 'moransi', n.top.show = 10, n.col = 5, verbose = FALSE ) 6.5.4 Outputs: Figures showing SVFs. CSV file containing the information of SVFs. 6.6 Step 6. Spatial interaction The st_Interaction function is used to identify and visualize interactions between clusters based on spatial transcriptomics data. It utilizes Commot to analyze spatial interactions, identify pathway activities, and assess the strength and significance of interactions. 6.6.1 Function arguments: st_data_path: Path to the spatial transcriptomics data. metadata_path: Path to the metadata associated with the spatial transcriptomics data. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. label_key: Key in the metadata to identify cell clusters. Default is 'seurat_clusters'. save_path: The directory where output files will be saved. Default is the current directory. species: The species of the spatial transcriptomics data. Default is 'human'. signaling_type: Type of signaling interactions to consider. Default is 'Secreted Signaling'. database: Database to be used for the analysis. Default is 'CellChat'. min_cell_pct: Minimum percentage of cells to consider for interaction analysis. Default is 0.05. dis_thr: Distance threshold for defining interactions. Default is 500. n_permutations: Number of permutations for assessing significance. Default is 100. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.6.2 Function behavior: Commot Analysis: Uses Commot to perform interaction analysis, identifying interactions within and between clusters. Visualization: Generates visualizations of pathway interactions and interactions between ligand-receptors (LRs) within and between clusters, and saves them in save_path. 6.6.3 An example: st_Interaction( st_data_path = 'path/to/data', metadata_path = 'path/to/metadata', library_id = 'Hema_ST', label_key = 'seurat_clusters', save_path = '.', species = 'human', signaling_type = 'Secreted Signaling', database = 'CellChat', min_cell_pct = 0.05, dis_thr = 500, n_permutations = 100, pythonPath = 'path/to/python' ) 6.6.4 Outputs: Dot plot showing pathway interaction between and within clusters. Dot plot showing LRs interaction between and within clusters. The information of each LR and pathway. 6.7 Step 7. CNV analysis The st_CNV function identifies and visualizes copy number variations (CNVs) in spatial transcriptomics data. It uses CopyKAT to perform the CNV analysis, saves the results, and generates visual representations of CNV states. 6.7.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. save_path: The directory where output files will be saved. assay: The assay to be used for CNV analysis. Default is 'Spatial'. LOW.DR: The lower threshold for the dropout rate in CopyKAT. Default is 0.05. UP.DR: The upper threshold for the dropout rate in CopyKAT. Default is 0.1. win.size: The window size for the CNV analysis. Default is 25. distance: The distance metric to be used for the analysis. Default is \"euclidean\". genome: The genome version to be used, ‘hg20’ or ‘mm10’. Default is \"hg20\". n.cores: The number of cores to be used for parallel processing. Default is 1. species: The species of the spatial transcriptomics data. Default is 'human'. 6.7.2 Function behavior: CopyKAT Analysis: Runs CopyKAT pipeline to perform CNV analysis using the provided parameters. Saving Results: Saves the CopyKAT results as an RDS file. Plotting: Generates plots of the CNV states and saves them in save_path. Updating Metadata: Updates the spatial transcriptomics object with CNV state metadata. Return Value: Returns the updated st_obj containing the CNV state information. 6.7.3 An example: st_obj <- st_CNV( st_obj = st_obj, save_path = '.', assay = 'Spatial', LOW.DR = 0.05, UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = 'hg20', n.cores = 1, species = 'human' ) 6.7.4 Outputs: Figures showing the predicted CNV states. Figures showing the CNV heatmap. rds files of results of copykat. 6.8 Step 8. Deconvolution The st_Deconvolution function aims to perform spatial deconvolution analysis on spatial transcriptomics data to estimate the cell-type composition and abundance in different regions. The function utilizes cell2location to infer cell-type abundance and spatial distributions, allowing for the visualization and interpretation of spatially resolved cell populations within the tissue. 6.8.1 Function arguments: st.data.dir: Path to the spatial transcriptomics data. sc.h5ad.dir: Path to the single-cell RNA-seq data in h5ad format. Default is NULL. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. st_obj: Spatial transcriptomics object containing the data for analysis. Default is NULL. save_path: The directory where output files will be saved. Default is NULL. sc.labels.key: Key in the single-cell metadata to identify cell clusters. Default is 'seurat_clusters'. species: The species of the spatial transcriptomics data. Default is 'mouse'. sc.max.epoch: Maximum number of epochs used for single-cell deconvolution. Default is 1000. st.max.epoch: Maximum number of epochs used for spatial deconvolution. Default is 10000. use.gpu: Logical value indicating whether to use GPU for computation. Default is FALSE. use.Dataset: The dataset to be used for analysis, such as 'HematoMap' or 'LymphNode'. pythonPath: The path to the Python environment containing cell2location to use for the analysis. Default is ‘.’. 6.8.2 Function behavior: Deconvolution Analysis: Performs the spatial deconvolution analysis using the provided spatial transcriptomics and single-cell RNA-seq data. Post-Analysis Processing: Processes the deconvolution results and visualizes the spatial distribution of inferred cell types within the tissue. Returning Results: If a Seurat object is provided, the updated Seurat object with cell type information is returned. 6.8.3 An example: st_obj <- st_Deconvolution( st.data.dir = 'path/to/data', library_id = 'Hema_ST', sc.h5ad.dir = NULL, st_obj = st_obj, save_path = '.', sc.labels.key = 'seurat_clusters', species = 'human', sc.max.epoch = 1000, st.max.epoch = 10000, use.gpu = FALSE, use.Dataset = 'LymphNode', pythonPath = 'path/to/python' ) 6.8.4 Outputs: Figures showing the predicted abundance of each cell-type. The parameters of trained cell2location model. 6.9 Step 9. Cell cycle The st_Cell_cycle function is used to assess the cell cycle phase scores in spatial transcriptomics data. It calculates S phase and G2M phase scores based on the expression of designated cell cycle-related genes and visualizes these scores in spatial and dimensionality-reduced plots. 6.9.1 Function arguments: st_obj: The input Seurat object containing the data for analysis. save_path: The directory where the output images will be saved. Default is the current directory. s.features: A list of genes associated with the S phase. Default is NULL (using genes from Seurat). g2m.features: A list of genes associated with the G2M phase. Default is NULL (using genes from Seurat). species: The species of the spatial transcriptomics data. Default is 'human'. FeatureColors.bi: A color palette for visualization. Default is a two-color ramp palette. 6.9.2 Function behavior: Gene Feature Assignment: Assigns S phase and G2M phase gene lists based on the specified species or provided input. Cell Cycle Scoring: Calculates the S phase and G2M phase scores in the data. Spatial Visualization: Generates spatial feature plots to visualize the S phase and G2M phase scores using the specified color palette and saves the plots as images. Dimensionality-Reduced Plot Visualization: If UMAP or tSNE dimensionality reduction is available in the st_obj, feature plots of the S phase and G2M phase scores are generated in the reduced space and saved as images. Return Value: Returns the updated st_obj containing the cell cycle phase scores. 6.9.3 An example: st_obj <- st_Cell_cycle( st_obj = st_obj, save_path = '.', s.features = NULL, g2m.features = NULL, species = 'human', FeatureColors.bi = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = 'RdYlBu'))) ) 6.9.4 Outputs: Figures showing S scores. Figures showing S scores. 6.10 Step 10. Niche analysis The st_NicheAnalysis function is designed to perform niche analysis on spatial transcriptomics data, enabling the exploration of spatial niches or microenvironments within the tissue. The function encompasses co-occurrence analysis, niche clustering, and niche interaction analysis to uncover the spatial relationships and characteristics of different cell populations or features. 6.10.1 Function arguments: st_obj: The input SeuratObject containing the spatial transcriptomics data for analysis. features: A vector of features representing features (for example, cell types from deconvolution) for niche analysis. save_path: The directory where the analysis results and visualizations will be saved. Default is the current directory. coexistence.method: The method for co-occurrence analysis, accepting 'correlation' or 'Wasserstein'. Default is 'correlation'. kmeans.n: The number of clusters for niche clustering. Default is 4. st_data_path: A path containing the ‘spatial’ file and ‘filtered_feature_bc_matrix.h5’ file, required for niche interaction visualization. slice: The slice to be used for analysis. Default is 'slice1'. species: The species of the sample data. Default is 'mouse'. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.10.2 Function behavior: Co-occurrence Score Calculation: Calculates the co-occurrence scores between the specified features using the chosen coexistence method (‘correlation’ or ‘Wasserstein’). Niche Clustering: Utilizes k-means clustering to identify distinct spatial niches based on the expression profiles of the selected features and visualizes the clustering results. Niche Interaction Visualization: If the st_data_path is provided, performs niche interaction visualization using Commot, which is based on the provided spatial transcriptomics data and generates visualizations of niche interactions within the tissue. Return Value: Returns the updated st_obj with niche analysis results and visualizations. 6.10.3 An example: tmp <- read.csv('path/to/cell2loc_res.csv', row.names = 1) features <- colnames(tmp) if(!all(features %in% names(st_obj@meta.data))){ common.barcodes <- intersect(colnames(st_obj), rownames(tmp)) tmp <- tmp[common.barcodes, ] st_obj <- st_obj[, common.barcodes] st_obj <- AddMetaData(st_obj, metadata = tmp) } st_obj <- st_NicheAnalysis( st_obj, features = features, save_path = '.', coexistence.method = 'correlation', kmeans.n = 4, st_data_path = 'path/to/data', slice = `slice1`, species = 'human', pythonPath = 'path/to/python' ) 6.10.4 Outputs: Figures showing the co-existence results. Figures showing the spatial distribution of each niche. Figures showing the composition of each niche. Figures showing the results of interactions using Commot. "],["step-by-step-shiny.html", "7 Step-by-step shiny 7.1 Step 1. Enter R and get the path of the installed R packages 7.2 Step 2. Run shiny code 7.3 Step 3. Use HemaScopeShiny via the GUI", " 7 Step-by-step shiny #You can run shiny on Linux or on the Rstudio web page Choice 1:Run shiny on Linux - Enter Linux, activate the HemaScope environment,install radian package then you can enter the R environment on Linux and run shiny code raian -You can see “r$>” . It menns you enter R environment on Linux. app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address You’ll see a page like the one below,copy link Open the link with a browser,you can see HemaScopeR shiny home page. Choice2:Run shiny on Rstudio web page 7.1 Step 1. Enter R and get the path of the installed R packages Enter the R environment in the Linux command line. R Get the path of the installed R packages in the R command line. .libPaths() For example, “/An/example/of/the/path/to/installed/R/packages” 7.2 Step 2. Run shiny code .libPaths("/An/example/of/the/path/to/installed/R/packages") app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address 7.3 Step 3. Use HemaScopeShiny via the GUI Start interface. A UI page appears with two buttons: “Start scRNA-seq Analysis” and “Start st-seq Analysis.” Users can click the corresponding button based on their needs to enter the respective analysis page. * The figure showing the start interface. Begin a new analysis, continue the previous analysis, or return to the start interface When clicking the “Start scRNA-seq pipeline” or “Start ST-seq pipeline” button, you will be directed to a second page. This page contains three buttons: “Begin New Analysis,” “Continue Previous Analysis”, and “Back to Home”. If you need to begin a new analysis of scRNA-seq or st-seq data from the first step, click “Begin New Analysis”. If you have already used Shiny to complete several steps (e.g., steps 1, 2, and 3), but the analysis was interrupted during step 4 due to some unexpectedly closing, click “Continue Previous Analysis” to resume from step 4. Please note: users should follow the analysis steps sequentially and not skip steps. For example, analyzing steps 1, 2, and 3 and then jumping directly to step 6 is incorrect. The proper analysis sequence should be step 1, 2, 3, 4, 5, 6, … N. The figure showing the interface for beginning a new analysis, continuing the previous analysis, or returning to the start interface. 7.3.1 scRNA-seq pipeline When the user clicks the “Start scRNA-seq pipeline – Begin New Analysis” button, they will enter the single-cell analysis page. The sidebar of this page includes the following buttons: Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Identify Cell Types Step 5. Visualization Step 6. Find Differential Genes Step 7. Assign Cell Cycles Step 8. Calculate Heterogeneity Step 9. Violin Plot for Marker Genes Step 10. Calculate Lineage Scores Step 11. GSVA Step 12. Construct Trajectories Step 13. Transcription Factors Analysis Step 14. Cell-Cell Interaction Step 15. Generate the Report Back to Prior Page The figure showing the scRNA-seq pipeline. Please start the analysis from step 1 and do not skip any steps. The correct analysis sequence is steps 1 through 15: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15. To return to the previous page, click “Back to Prior Page”. If Shiny unexpectedly exits during data analysis in the Begin New Analysis process (for example, while analyzing Step 5), and the analysis of Step 5 is interrupted, the user will need to restart ShinyApp(ui, server). This will bring up the Home page. The user should click the “Start scRNA-seq pipeline–Continue Previous Analysis” button, enter the Job ID displayed on the UI page during the Step 1.Input data step, and then select the step that did not complete successfully (e.g., Step 5). After entering the necessary parameters for Step 5, click “Run Step 5” to resume the analysis. Once Step 5 is completed, the user should proceed by selecting Step 6, entering the required parameters, and clicking “Run Step 6” to analyze Step 6, and so on, until all scRNA-seq steps are completed. Note that the default parameters for each step are the same as those in Begin New Analysis. After clicking “Run Step,” do not perform any other operations on the parameter page. Wait until the current step’s analysis is complete, and the results for that step will appear on the UI page. The “Start scRNA-seq pipeline–Continue Previous Analysis” page contains the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the Job ID displayed on the page during the Begin New Analysis–Step1.Input data step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.1.1 Step 1 (scRNA-seq pipeline). Input Data The figure showing the step 1 of scRNA-seq pipeline. Enter data path: Input multiple file paths separated by semicolons, for example: /path1/file1/data1;/path2/file2/data2;/path2/file2/data3. For a single file, use: /path2/file2/data2. Enter project name: When entering multiple files, you must also input multiple project names, separated by semicolons. The number of project names must match the number of input files. Example: projectname1;projectname2;projectname3. For a single file, use: projectname1. Enter output path: Specify the path where the results will be output. You can view the results of each step in this path. Example: /home/username/output. Enter the path of database: The path where the database is stored and it varies for each user. Example: /home/username/database. Select Data Type: There are three options: “cellranger-count”, “Seurat”, “Matrix”. Choose according to the type of input data. Gene Column (default: 2): The column where gene names are located; the default is column 2. Minimum Cells (default: 10): The minimum number of cells for filtering; the default is 10. Minimum Features (default: 200): The minimum number of genes that must be detected in each cell; the default is 200. Mt Pattern (default: ‘^MT-’): Mitochondrial pattern; for humans use ^MT-, for mice use ^mt-. After entering the above parameters, click the “LoadData” button to load the data. Once the data is successfully loaded, you will see “OK! Data dimensions” indicating that the data loading is complete, and you will be provided with a JobID. Make sure to note this JobID, as it is crucial. If HemaScopeShiny unexpectedly exits, you can click “Continue Previous Analysis”, enter the JobID, and continue loading the previous analysis results without starting from step 1 again. The JobID is very important! Please note: After clicking the “LoadData” button, do not modify any other parameters on the page. The Step 2-14 pages will consist of three sections: 1) parameter input, 2) result output file names, and 3) generated result figures. If the respective step produces result figures, they will be displayed. Users can switch between images by clicking the arrows on the left or right of the figure. If no figures are generated for the current step, a message stating “NO Figure!” will be displayed. All output files generated at each step are stored in the output directory specified by the user. The UI page will display only the file names, which can be downloaded by clicking on the file name links. 7.3.1.2 Step 2 (scRNA-seq pipeline). Quality Control The figure showing the step 2 of scRNA-seq pipeline. nFeature_RNA.limit: Minimum number of genes detected per cell. Default value: 200 percent.mt.limit: Threshold for filtering mitochondrial genes. Default value: 20 scale.factor: Normalization factor. Default value: 10,000 nfeatures: Number of highly variable genes. Default value: 3,000 ndims: Number of dimensions used. Default value: 50 vars.to.regress: Variables to regress. Default value: NULL PCs: Number of principal components used for clustering. Default value: 1:35 resolution: Resolution parameter for clustering. Default value: 0.4 n.neighbors: k.param parameter in the FindNeighbors function. Default value: 50 doublet.percentage: Doublet rate. Default value: 0.04 doubletFinderWrapper.PCs: Number of principal components used for doublet removal. Default value: 1:20 doubletFinderWrapper.pN: Number of artificial doublets defined for removal. Default value: 0.25 doubletFinderWrapper.pK: Represents the fraction of merged real artificial data. Default value: 0.1 (pK should be adjusted according to each scRNA-seq dataset) Step2_Quality_Control.RemoveBatches: Whether to remove detected batches. Default value: TRUE Step2_Quality_Control.RemoveDoublets: Whether to remove detected doublets. Default value: TRUE Click the “Run Step 2” button to start the process. After clicking the “Run Step 2” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 2 completed” message will appear. After a short while, the result files generated by Step 2 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.3 Step 3 (scRNA-seq pipeline). Clustering The figure showing the step 3 of scRNA-seq pipeline. PCs for clustering (default: 1:20): Principal components used for clustering. Default value: 1:20 n.neighbors for clustering (default: 50): k.param parameter in the FindNeighbors function. Default value: 50 resolution for clustering (default: 0.4): Resolution used for clustering. Default value: 0.4 Click the “Run Step 3” button to start the process. After clicking the “Run Step 3” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 3 completed” message will appear. After a short while, the result files generated by Step 3 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.4 Step 4 (scRNA-seq pipeline). Identify Cell Types The figure showing the step 4 of scRNA-seq pipeline. Choose organism: ‘hsa’ for human, ‘mmu’ for mouse Choose Labels: Cell labels, default value: clustering Run CNV: TRUE if copy number variation (CNV) analysis is to be performed CPU cores for parallel processing: Number of CPU cores for parallel processing, default value: 10 Click the “Run Step 4” button to start the process. After clicking the “Run Step 4” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 4 completed” message will appear. After a short while, the result files generated by Step 4 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.5 Step 5 (scRNA-seq pipeline). Visualization The figure showing the step 5 of scRNA-seq pipeline. Nearest neighbors for PhateR analysis (default: 50): phate.knn parameter, the number of nearest neighbors to consider in the PhateR algorithm. Default value: 50 Principal components for PhateR (default: 20): phate.npca parameter, the number of principal components to use in the PhateR algorithm. Default value: 20 t parameter for PhateR (default: 10): phate.t parameter, the t value for the PhateR algorithm. Default value: 10 Dimensions for PhateR (default: 2): phate.ndim parameter, the number of dimensions for embedding output in the PhateR algorithm. Default value: 2 Click the “Run Step 5” button to start the process. After clicking the “Run Step 5” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 5 completed” message will appear. After a short while, the result files generated by Step 5 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.6 Step 6 (scRNA-seq pipeline). Find Differential Genes The figure showing the step 6 of scRNA-seq pipeline. Minimum gene percentage for differential detection (default: 0.25): The minimum fraction of cells expressing a gene in any cluster. Default value: 0.25 Log-fold threshold for gene analysis (default: 0.25): The log-fold change threshold for differential gene expression analysis. Default value: 0.25 Click the “Run Step 6” button to start the process. After clicking the “Run Step 6” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 6 completed” message will appear. After a short while, the result files generated by Step 6 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.7 Step 7 (scRNA-seq pipeline). Assign Cell Cycles The figure showing the step 7 of scRNA-seq pipeline. Define cell cycle cutoff (default: NULL): The cutoff value used to distinguish between cycling and non-cycling cells. Default value: NULL Click the “Run Step 7” button to start the process. After clicking the “Run Step 7” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 7 completed” message will appear. After a short while, the result files generated by Step 7 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.8 Step 8 (scRNA-seq pipeline). Calculate Heterogeneity The figure showing the step 8 of scRNA-seq pipeline. Order cell types: The order of cell types for visualization. If not provided, the function will use the unique cell types from the input cell_types_groups. Default value: NULL Click the “Run Step 8” button to start the process. After clicking the “Run Step 8” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 8 completed” message will appear. After a short while, the result files generated by Step 8 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.9 Step 9 (scRNA-seq pipeline). Violin Plot for Marker Genes The figure showing the step 9 of scRNA-seq pipeline. Enter marker genes for violin plot (separate by ‘,’): The marker genes for the violin plot. Default value is the built-in marker genes: NULL. Set the hexadecimal codes of colors for cell types (separate by ‘,’): Specify the colors for cell types. The default is the color palette: NULL. Click the “Run Step 9” button to start the process. After clicking the “Run Step 9” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 9 completed” message will appear. After a short while, the result files generated by Step 9 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.10 Step 10 (scRNA-seq pipeline). Calculate Lineage Scores The figure showing the step 10 of scRNA-seq pipeline. The gene sets for calculating lineage scores: The gene sets used for calculating lineage scores. The default is the color palette: NULL. The names for the lineages: The names of the lineages. Default value: NULL. The hexadecimal codes of colors for groups: Specify the colors to be used for different group annotations. The default is the color palette: NULL. Click the “Run Step 10” button to start the process. After clicking the “Run Step 10” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 10 completed” message will appear. After a short while, the result files generated by Step 10 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.11 Step 11 (scRNA-seq pipeline). GSVA The figure showing the step 11 of scRNA-seq pipeline. Option to identify cell type-specific GSVA terms: Whether to identify cell type-specific GSVA terms. Default value: TRUE. Option to identify differential GSVA terms: Whether to identify differential GSVA terms. Default value: TRUE. Click the “Run Step 11” button to start the process. After clicking the “Run Step 11” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 11 completed” message will appear. After a short while, the result files generated by Step 11 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.12 Step 12 (scRNA-seq pipeline). Construct Trajectories The figure showing the step 12 of scRNA-seq pipeline. Set the cell types for constructing trajectories: The cell types to be used for trajectory analysis. Different cell types should be separated by commas. Default value: “all.” Option to run monocle2: Whether to perform Monocle2 trajectory analysis. Default value: TRUE. Option to run slingshot: Whether to perform Slingshot trajectory analysis. Default value: TRUE. Option to run scVelo: Whether to perform scVelo trajectory analysis. Default value: TRUE. Enter the paths of loom files: Specify the paths to the loom files for scVelo analysis. Default value: NULL. Click the “Run Step 12” button to start the process. After clicking the “Run Step 12” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 12 completed” message will appear. After a short while, the result files generated by Step 12 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.13 Step 13 (scRNA-seq pipeline). Transcription Factors Analysis The figure showing the step 13 of scRNA-seq pipeline. Set the hexadecimal codes of colors for cell types: Colors used for visualizing cell types. Default value: NULL (color palette). Set the hexadecimal codes of colors for groups: Colors used for visualizing groups. Default value: NULL (color palette). Click the “Run Step 13” button to start the process. After clicking the “Run Step 13” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 13 completed” message will appear. After a short while, the result files generated by Step 13 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.14 Step 14 (scRNA-seq pipeline). Cell-Cell Interaction The figure showing the step 14 of scRNA-seq pipeline. The cell groups were sorted: Whether to consider the size (number) of cell groups in the cell communication analysis. Default value: TRUE. Click the “Run Step 14” button to start the process. After clicking the “Run Step 14” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 14 completed” message will appear. After a short while, the result files generated by Step 14 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.15 Step 15 (scRNA-seq pipeline). Generate the Report The figure showing the step 15 of scRNA-seq pipeline. Click “Run Step 15” to generate the analysis report. 7.3.2 ST-pipeline When the user clicks the button “Start ST-seq pipeline–Begin New Analysis,” they will be taken to the empty analysis page. The page sidebar includes the following buttons: Please start the analysis from Step 1 and do not skip any steps. The correct analysis sequence is Step 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, and 11. To return to the previous page, please click “Back to Prior Page.” Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Find Differential Genes Step 5. Spatially Variable Features Step 6. Spatial Interaction Step 7. CNV Analysis Step 8. Deconvolution Step 9. Cell Cycle Analysis Step 10. Niche Analysis Step 11. Generate the Report Back to Prior Page In “Begin New Analysis,” users start analyzing data from Step1. If Shiny unexpectedly exits during the analysis process (for example, if you are analyzing Step5 and Shiny crashes, causing Step5 to fail), users need to restart Shiny by running shinyApp(ui, server). This will bring up the Home page. Users should click the “Start ST-seq pipeline–Continue Previous Analysis” button. They need to enter the JobID displayed in the UI page during the Step1.Input data step and then select the step that did not complete successfully to continue the analysis. For example, if Step5 failed, select Step5, enter the necessary parameters, and click “Run Step5” to continue the analysis. After Step5 finishes, select Step6, enter the parameters for Step6, and click “Run Step6” to analyze Step6, and so on for all subsequent steps. Please note that the default parameters for each step are the same as those in “Begin New Analysis.” After clicking “Run Step,” do not make any other changes to the parameter page. Wait until the current step completes, and the results file for the current step will appear on the UI page. The “Start ST-seq pipeline–Continue Previous Analysis” page includes the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the JobID displayed in the “Begin New Analysis–Step1.Input data” step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.2.1 Step 1 (st-seq pipeline). Input Data The figure showing the step 1 of st-seq pipeline. Enter data path: The directory where the input data is stored. The input data should be 10X Visium spatial transcriptomics data. Only one dataset can be input at a time; unlike single-cell data, multiple datasets cannot be entered simultaneously. Enter sample name: A string for naming the sample. The default value is ‘Hema_ST’. Enter output path: The directory where processed outputs will be saved. For example: /home/username/output. Enter the path of Python: The path to the Python executable, as that in scRNA-seq pipeline. After entering the parameters above, click the “LoadData” button to load the data. Once the data is loaded, the system will provide a JobID, which should be noted. If Shiny unexpectedly exits, you can click “Continue Previous Analysis” and enter the JobID to resume loading the previous analysis results, avoiding the need to restart from Step 1. The JobID is very important! Please note: After clicking the “LoadData” button, do not make further changes to other parameters on the page. The Step 2-10 pages will have three sections: Parameter input Result output file names Generated result plots If a step generates result plots, they will be displayed. Users can switch between images by clicking the arrows on either side of the plot. If no result plots are generated for the current step, users will be informed with “NO Figure!” The result files generated for each step are stored in the output path specified by the user. The UI page will only display the file names, and clicking on the file name links will allow downloading the files. 7.3.2.2 Step 2 (st-seq pipeline). Quality Control The figure showing the step 2 of st-seq pipeline. min.gene (default: 200): Specifies the minimum number of genes detected in a spot. The default value is 200. min.nUMI (default: 500): Specifies the minimum number of nUMIs detected in a spot. The default value is 500. max.gene (default: Inf): Specifies the maximum number of genes detected in a spot. The default value is Inf (no upper limit). max.nUMI (default: Inf): Specifies the maximum number of nUMIs detected in a spot. The default value is Inf (no upper limit). min.spot (default: 0): Specifies the minimum number of spots where each gene is expressed. bool.remove.mito: Whether to remove mitochondrial genes. The default value is TRUE. species: Specifies the species: human/mouse. Click “Run Step2” to proceed. After clicking the “Run Step2” button, please do not modify any other parameters on the page. Once Step 2 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.3 Step 3 (st-seq pipeline). Clustering The figure showing the step 3 of st-seq pipeline. normalization.method (default: ‘SCTransform’): The method for data normalization. The default value is ‘SCTransform’. npcs (default: 50): The number of principal components (PCs) to use in PCA. The default value is 50. pcs.used (default: 1:10): The number of PCs used for clustering analysis. The default value is the first 10 PCs (1:10). resolution (default: 0.8): The resolution parameter for the clustering algorithm. The default value is 0.8. Click “Run Step3” to proceed. After clicking the “Run Step3” button, please do not modify any other parameters on the page. Once Step 3 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.4 Step 4 (st-seq pipeline). Find Differential Genes The figure showing the step 4 of st-seq pipeline. only.pos: A logical flag to include only positive markers. The default value is TRUE. min.pct (default: 0.25): The minimum fraction of cells expressing the gene in any cluster. The default value is 0.25. logfc.threshold (default: 0.25): The log-fold change threshold for considering differentially expressed genes. The default value is 0.25. test.use (default: ‘wilcox’): The statistical test used for differential expression analysis. The default value is ‘wilcox’. Click “Run Step4” to proceed. After clicking the “Run Step4” button, please do not modify any other parameters on the page. Once Step 4 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.5 Step 5 (st-seq pipeline). Spatially variable features The figure showing the step 5 of st-seq pipeline. selection.method (default: ‘moransi’): The method used for selecting spatially variable features. The default value is ‘moransi’. n.top.show (default: 10): The number of top spatially variable features to visualize. The default value is 10. n.col.show (default: 5): The number of columns in the visualization grid. The default value is 5. Click “Run Step5” to proceed. After clicking the “Run Step5” button, please do not modify any other parameters on the page. Once Step 5 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.6 Step 6 (st-seq pipeline). Spatial interaction The figure showing the step 6 of st-seq pipeline. commot.signaling_type (default: ‘Secreted Signaling’): The type of signaling interaction to consider. The default value is ‘Secreted Signaling’. commot.database (default: ‘CellChat’): The database used for the analysis. The default value is ‘CellChat’. commot.min_cell_pct (default: 0.05): The minimum cell percentage to consider in interaction analysis. The default value is 0.05. commot.dis_thr (default: 500): The distance threshold used to define interactions. The default value is 500. commot.n_permutations (default: 100): The number of permutations used to assess significance. The default value is 100. Click “Run Step6” to proceed. After clicking the “Run Step6” button, please do not modify any other parameters on the page. Once Step 6 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.7 Step 7 (st-seq pipeline). CNV analysis The figure showing the step 7 of st-seq pipeline. copykat.genome (default: ‘NULL’): The genome version used, either ‘hg20’ or ‘mm10’. The default value is “hg20”. copykat.LOW.DR (default: 0.05): The lower dropout rate threshold in CopyKAT. The default value is 0.05. copykat.UP.DR (default: 0.1): The upper dropout rate threshold in CopyKAT. The default value is 0.1. copykat.win.size (default: 25): The window size for CNV analysis. The default value is 25. copykat.distance (default: ‘euclidean’): The distance metric used for analysis. The default value is “euclidean”. copykat.n.cores (default: 1): The number of cores used for parallel processing. The default value is 1. Click “Run Step7” to proceed. After clicking the “Run Step7” button, please do not modify any other parameters on the page. Once Step 7 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.8 Step 8 (st-seq pipeline). Deconvolution The figure showing the step 8 of st-seq pipeline. cell2loc.sc.h5ad.dir (default: ‘NULL’): The path to the h5ad format single-cell RNA-seq data. The default value is NULL. cell2loc.sc.max.epoch (default: 1000): The maximum number of epochs for single-cell deconvolution. The default value is 1000. cell2loc.st.max.epoch (default: 10000): The maximum number of epochs for spatial deconvolution. The default value is 10000. cell2loc.use.gpu (default: FALSE): A logical value indicating whether to use GPU for computation. The default value is FALSE. Click “Run Step8” to proceed. After clicking the “Run Step8” button, please do not modify any other parameters on the page. Once Step 8 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.9 Step 9 (st-seq pipeline). Cell cycle analysis The figure showing the step 9 of st-seq pipeline. The gene sets for calculating S phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the S phase. The default value is NULL (uses genes from Seurat). The gene sets for calculating G2M phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the G2M phase. The default value is NULL (uses genes from Seurat). Click “Run Step9” to proceed. After clicking the “Run Step9” button, please do not modify any other parameters on the page. Once Step 9 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.10 Step 10 (st-seq pipeline). Niche analysis The figure showing the step 10 of st-seq pipeline. Nich.cluster.n (default: 4): The number of clusters for niche clustering. The default value is 4. Click “Run Step10” to proceed. After clicking the “Run Step10” button, please do not modify any other parameters on the page. Once Step 10 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.11 Step 11 (st-seq pipeline). Generate the Report The figure showing the step 11 of st-seq pipeline. Click “Run Step11” to generate the analysis report. "],["operation-manual-for-the-hemascopecloud.html", "8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.2 Homepage 8.3 Data Page 8.4 Analysis Page 8.5 Projects page", " 8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.1.1 Enter the URL in a web browser: https://hemascope.hiplot.cn/?home=hemascope and click to access the login page. Figure 8.1: Login Page 8.1.2 To obtain free computational resources: Enter your login email, click “Get Code,” input the verification code received in your email, and then click “Login” to complete the login and access the system homepage. 8.1.3 To browse HemaScopeCloud without needing computational resources: Click the “View without Login” button to access the system homepage. You can view demo analysis projects. If you click the button to initiate an analysis, the platform will prompt: “Please log in for analysis!” 8.2 Homepage Figure 8.2: Homepage The left side features a menu bar containing Home, Data, Analysis, Project, and Help. And the upper right section includes statistics on analysis project status, usage of analysis projects, a quick entry for creating new analysis projects, and statistics on allocated storage capacity usage. Statistics on Analysis Project Status Pending Analysis:Waiting for analysis, not yet submitted for analysis. Pending Resources:Waiting for resources, analysis submitted and awaiting resource allocation. Analyzing:Currently analyzing. Completed:Analysis completed. Error:An error occurred during analysis. Total:Total of all analysis statuses. Usage Statistics for Analysis Projects: Number of used analysis projects / Total number of allocated analysis projects. The current allocation for the system is 50 projects. For additional free computational resources, please contact the developer. Quick Entry for Creating New Analysis Projects: Supports quick access to the new analysis project pages corresponding to two pipelines. Storage Capacity Usage: Used Storage Resources / Allocated Storage Resources. The lower section displays the most recently run analysis projects. By default, it shows demo projects upon initial entry. Clicking the “View” option on an entry in the Projects section allows you to access and analyze that specific analysis project. 8.3 Data Page The Data page includes storage for Demo sample project data as well as Personal project data. Data under the Demo tab can be downloaded, while the Personal tab allows users to create new folders and upload files. 8.4 Analysis Page It lists two analysis pipelines: sc_HemaScopeCloud and st_HemaScopeCloud, serving as entries for creating new analysis projects. Click the Analysis button to access the new project and execution page for that pipeline. Figure 8.3: Select Analysis Pipeline Page Figure 8.4: Enter the Analysis Pipeline Page Create New Analysis Project Click the Analysis button under the sc_HemaScopeCloud to enter the new project page for that pipeline. Project Name:Enter the name of the analysis project for identification purposes. Input Data:Click Upload to upload local analysis files. Single and multiple file uploads are supported. Uploaded files must comply with the pipeline’s input file requirements; otherwise, an error will occur during execution. Sample Name:Click Add to enter the sample names, which should correspond to the uploaded analysis files. Items marked with * are required fields. Click the Run button to initiate the analysis:For the scRNA-seq pipeline,this will trigger step1-4; for the st-seq pipeline, it will trigger step1-5. Each subsequent analysis step requires clicking Run on the relevant step page to submit. Before submission, ensure that the previous step has generated result files; otherwise, a notification will indicate that the analysis cannot proceed. Load Demo Data HemaScopeCloud supports loading pre-configured analysis demo files and default parameters to quickly initiate analysis projects. On the new project page, click Load Demo Data to load files from the demo project and fill the required fields. Then, click the Run button to execute the analysis for the demo project. Figure 8.5: Load Demo Data After clicking Run, you will be redirected to the detailed page of the analysis project. Analysis Project Detail Page Notifications Waiting for resources…Do not submit repeatedly: This indicates that the submission is waiting for resources. Do not click the Run button again. Analyzing…Do not submit repeatedly: This indicates that the project is currently analyzing. Do not click the Run button again. Analysis Steps, Current Analysis Step: Displays all stepwise analysis processes and the current step. Click on different steps to navigate to the corresponding analysis step page. For the initial analysis, you must complete the previous step before proceeding to the next one. Refresh Button: Used to refresh the current page. Results: This tab stores the results of the completed step. Visualization: For steps that involve visualizations, the results will be found under the visualization tab. History:Click on Run History to view all historical runs of that step. Status:Corresponds to the analysis status of the project. Log:Click this button to view the run log. Parameter Settings:Used for entering parameter values. Figure 8.6: Analysis Project Page Figure 8.7: Analysis Project Result Page Figure 8.8: This step of the analysis project displays ‘Waiting for resources…Do not submit repeatedlly’ Figure 8.9: This step of the analysis project displays ‘Analyzing…Do not submit repeatedly’ Figure 8.10: History Page Note: For steps that have already been completed (except for the first step), you can adjust the parameters and click Run to perform multiple analyses. The results page will retain only the latest analysis results. 8.5 Projects page The homege includes analysis projects created by the user as well as pre-configured demo analysis projects provided by the system. Figure 8.11: Demo projects and user’s personal projects Clicking “View” allows you to navigate to the analysis project for review and step-by-step analysis. Figure 8.12: Click ‘View’ to access the analysis project Figure 8.13: Enter the detailed analysis project page "]] diff --git a/docs/installation.html b/docs/installation.html index 3f61c28..abb7067 100644 --- a/docs/installation.html +++ b/docs/installation.html @@ -377,9 +377,11 @@

    2.2 Set the channels in conda

    2.3 Install R

    -
      -
    • R 4.3.3
    • -
    +

    <<<<<<< Updated upstream + +======= +>>>>>>> Stashed changes +- R 4.3.3

    conda install R-base=4.3.3
    @@ -423,27 +425,27 @@

    2.4 Install required R-packages
  • From CRAN
  • -
    install.packages(c("doMC",
    -"doRNG",
    -"shinyjs",
    -"shiny",
    -"shinyWidgets",
    -"shinydashboard",
    -"slickR",
    -"phateR",
    -"gelnet",
    -"parallelDist",
    -"kableExtra",
    -"transport",
    -"feather",
    -"markdown",
    -"ggalluvial",
    -"forcats",
    -"mcmc",
    -"MCMCpack",
    -"fields",
    -"getopt",
    -"osfr"))
    +
    install.packages("doMC")
    +install.packages("doRNG")
    +install.packages("shinyjs")
    +install.packages("shiny")
    +install.packages("shinyWidgets")
    +install.packages("shinydashboard")
    +install.packages("slickR")
    +install.packages("phateR")
    +install.packages("gelnet")
    +install.packages("parallelDist")
    +install.packages("kableExtra")
    +install.packages("transport")
    +install.packages("feather")
    +install.packages("markdown")
    +install.packages("ggalluvial")
    +install.packages("forcats")
    +install.packages("mcmc")
    +install.packages("MCMCpack")
    +install.packages("fields")
    +install.packages("getopt")
    +install.packages("osfr")
    • From GitHub
    diff --git a/docs/search_index.json b/docs/search_index.json index 4c8f48c..40803c4 100644 --- a/docs/search_index.json +++ b/docs/search_index.json @@ -1 +1 @@ -[["index.html", "HemaScope Tutorial 1 Introduction", " HemaScope Tutorial HemaScope team 2024-10-22 1 Introduction HemaScope is a specialized bioinformatics toolkit designed for analyzing both single-cell and spatial transcriptome sequencing data from hematopoietic cells, including myeloid and lymphoid lineages. We have developed an R package named HemaScopeR, a Shiny interface named HemaScopeShiny, and a cloud platform named HemaScopeCloud. This tutorial introduces how to install and use the R package and Shiny interface, as well as how to access and operate the cloud platform. "],["installation.html", "2 Installation 2.1 Create a new conda environment and activate it 2.2 Set the channels in conda 2.3 Install R 2.4 Install required R-packages 2.5 Create the required python (v.3.9.12) virtual environment 2.6 The installed packages with versions", " 2 Installation 2.1 Create a new conda environment and activate it conda create --name HemaScope_env conda activate HemaScope_env 2.2 Set the channels in conda # Add the default channel conda config --add channels defaults # Add default channel URLs conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2 # Add custom channels conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2 conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch-lts conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/simpleitk conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/deepmodeling # Set to show channel URLs conda config --set show_channel_urls true 2.3 Install R R 4.3.3 conda install R-base=4.3.3 2.4 Install required R-packages From conda conda install -c conda-forge r-devtools=2.4.5 -y conda install -c conda-forge r-Seurat=4.3.0.1 -y conda install -c conda-forge r-Rfast2=0.1.5.1 -y conda install -c conda-forge r-hdf5r=1.3.10 -y conda install -c conda-forge r-ggpubr=0.6.0 -y conda install pwwang::r-seuratwrappers -y conda install -c bioconda bioconductor-monocle=2.28.0 -y conda install -c bioconda bioconductor-slingshot=2.8.0 -y conda install -c bioconda bioconductor-GSVA=1.48.2 -y conda install -c bioconda bioconductor-org.Mm.eg.db=3.17.0 -y conda install -c bioconda bioconductor-org.Hs.eg.db=3.17.0 -y conda install -c bioconda bioconductor-scran=1.28.1 -y conda install -c bioconda bioconductor-AUCell=1.22.0 -y conda install -c bioconda bioconductor-RcisTarget=1.20.0 -y conda install -c bioconda bioconductor-GENIE3=1.24.0 -y conda install -c bioconda bioconductor-biomaRt=2.56.1 -y conda install -c bioconda r-velocyto.r=0.6 -y #conda install -c bioconda bioconductor-limma=3.56.2 -y Enter the R language environment We suggest users do not manually update any already installed R packages during the installation of the following R packages. R From BiocManager # BiocManager(version = "1.30.23") should already be installed as a dependency of r-seuratwrappers. # If it is not installed, please run the following code to install it. # install.packages("BiocManager",version="1.30.23") BiocManager::install("ComplexHeatmap") BiocManager::install("scmap") BiocManager::install("clusterProfiler") BiocManager::install("BiocNeighbors") From CRAN install.packages(c("doMC", "doRNG", "shinyjs", "shiny", "shinyWidgets", "shinydashboard", "slickR", "phateR", "gelnet", "parallelDist", "kableExtra", "transport", "feather", "markdown", "ggalluvial", "forcats", "mcmc", "MCMCpack", "fields", "getopt", "osfr")) From GitHub tips: Sometimes network connection issues may occur, resulting in an error message indicating that GitHub cannot be connected. Please try installing again when the network conditions improve. Usage limitations: Sometimes an API rate limit error occurs, and a GitHub token is needed to provide the GitHub API rate limit. The steps to resolve this are as follows: Register for an account or log in to an existing account on the GitHub website. Then click on your profile picture in the top right corner, go to the dropdown menu and select “Settings.” Next, find “Developer settings” and click on it, then find “Personal access tokens (classic).” Click on it, then click “Create new token (classic).” Create a new token by first naming it anything you like. Then choose the expiration time for the token. Finally, check the “repo” box; the token will be used to download code repositories from GitHub. Click “Generate token.” Copy the generated token password. After that, set the token in the environment variable in R. Since we are using conda, enter R by typing R in the terminal. Then, enter the command: usethis::edit_r_environ(). This will open a file. Press the i key to edit. Paste the token you copied into the code area as follows: GITHUB_TOKEN=“your_token”. Then press Esc, type :wq! (force save). After that, you need to exit Linux and re-enter R. Close and reopen the terminal to apply the environment variable. Reopen Linux, activate the conda environment, and enter R again. devtools::install_github("sqjin/CellChat") devtools::install_github("immunogenomics/presto") devtools::install_github("aertslab/SCENIC@140ad6b") devtools::install_github("pzhulab/abcCellmap@f44c14b") devtools::install_github("navinlabcode/copykat@d7d6569") devtools::install_github('chris-mcginnis-ucsf/DoubletFinder@8c7f76e') devtools::install_github("mojaveazure/seurat-disk@877d4e1") devtools::install_github(c("hfang-bristol/dnet")) Install HemaScopeR from github devtools::install_github(repo="ZhenyiWangTHU/HemaScopeR", dep = FALSE) 2.5 Create the required python (v.3.9.12) virtual environment Run the init_miniconda function to create the miniconda virtual environments for the scRNA-seq pipeline and ST pipeline of 10X Visium data and MERFISH data. library(HemaScopeR) init_miniconda() (Optional) Run the init_miniconda_stereo function to create the miniconda virtual environment for the stereo-seq data. init_miniconda_stereo() 2.6 The installed packages with versions R packages with versions Package Version ------- ------- Python packages with versions Package Version ------------------------ -------------- "],["integrated-scrna-seq-pipeline.html", "3 Integrated scRNA-seq pipeline", " 3 Integrated scRNA-seq pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) Run the integrated scRNA-seq pipeline. scRNASeq_10x_pipeline( # input and output input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix', './SRR7881400/outs/filtered_feature_bc_matrix', './SRR7881401/outs/filtered_feature_bc_matrix', './SRR7881402/outs/filtered_feature_bc_matrix', './SRR7881403/outs/filtered_feature_bc_matrix', './SRR7881404/outs/filtered_feature_bc_matrix', './SRR7881405/outs/filtered_feature_bc_matrix', './SRR7881406/outs/filtered_feature_bc_matrix', './SRR7881407/outs/filtered_feature_bc_matrix', './SRR7881408/outs/filtered_feature_bc_matrix', './SRR7881409/outs/filtered_feature_bc_matrix', './SRR7881410/outs/filtered_feature_bc_matrix', './SRR7881411/outs/filtered_feature_bc_matrix', './SRR7881412/outs/filtered_feature_bc_matrix', './SRR7881413/outs/filtered_feature_bc_matrix', './SRR7881414/outs/filtered_feature_bc_matrix', './SRR7881415/outs/filtered_feature_bc_matrix', './SRR7881416/outs/filtered_feature_bc_matrix', './SRR7881417/outs/filtered_feature_bc_matrix', './SRR7881418/outs/filtered_feature_bc_matrix', './SRR7881419/outs/filtered_feature_bc_matrix', './SRR7881420/outs/filtered_feature_bc_matrix', './SRR7881421/outs/filtered_feature_bc_matrix', './SRR7881422/outs/filtered_feature_bc_matrix', './SRR7881423/outs/filtered_feature_bc_matrix'), project.names = c( 'SRR7881399', 'SRR7881400', 'SRR7881401', 'SRR7881402', 'SRR7881403', 'SRR7881404', 'SRR7881405', 'SRR7881406', 'SRR7881407', 'SRR7881408', 'SRR7881409', 'SRR7881410', 'SRR7881411', 'SRR7881412', 'SRR7881413', 'SRR7881414', 'SRR7881415', 'SRR7881416', 'SRR7881417', 'SRR7881418', 'SRR7881419', 'SRR7881420', 'SRR7881421', 'SRR7881422', 'SRR7881423'), output.dir = './output/', pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python', # quality control and preprocessing gene.column = 2, min.cells = 10, min.feature = 200, mt.pattern = '^MT-', nFeature_RNA.limit = 200, percent.mt.limit = 20, scale.factor = 10000, nfeatures = 3000, ndims = 50, vars.to.regress = NULL, PCs = 1:35, resolution = 0.4, n.neighbors = 50, # remove doublets doublet.percentage = 0.04, doublerFinderwraper.PCs = 1:20, doublerFinderwraper.pN = 0.25, doublerFinderwraper.pK = 0.1, # phateR phate.knn = 50, phate.npca = 20, phate.t = 10, phate.ndim = 2, min.pct = 0.25, logfc.threshold = 0.25, # visualization ViolinPlot.cellTypeOrders = as.character(1:22), ViolinPlot.cellTypeColors = NULL, Org = 'hsa', loom.files.path = c( './SRR7881399/velocyto/SRR7881399.loom', './SRR7881400/velocyto/SRR7881400.loom', './SRR7881401/velocyto/SRR7881401.loom', './SRR7881402/velocyto/SRR7881402.loom', './SRR7881403/velocyto/SRR7881403.loom', './SRR7881404/velocyto/SRR7881404.loom', './SRR7881405/velocyto/SRR7881405.loom', './SRR7881406/velocyto/SRR7881406.loom', './SRR7881407/velocyto/SRR7881407.loom', './SRR7881408/velocyto/SRR7881408.loom', './SRR7881409/velocyto/SRR7881409.loom', './SRR7881410/velocyto/SRR7881410.loom', './SRR7881411/velocyto/SRR7881411.loom', './SRR7881412/velocyto/SRR7881412.loom', './SRR7881413/velocyto/SRR7881413.loom', './SRR7881414/velocyto/SRR7881414.loom', './SRR7881415/velocyto/SRR7881415.loom', './SRR7881416/velocyto/SRR7881416.loom', './SRR7881417/velocyto/SRR7881417.loom', './SRR7881418/velocyto/SRR7881418.loom', './SRR7881419/velocyto/SRR7881419.loom', './SRR7881420/velocyto/SRR7881420.loom', './SRR7881421/velocyto/SRR7881421.loom', './SRR7881422/velocyto/SRR7881422.loom', './SRR7881423/velocyto/SRR7881423.loom'), # cell cycle cellcycleCutoff = NULL, # cell chat sorting = FALSE, ncores = 10, # Verbose = FALSE, # activeEachStep Whether_load_previous_results = FALSE, Step1_Input_Data = TRUE, Step1_Input_Data.type = 'cellranger-count', Step2_Quality_Control = TRUE, Step2_Quality_Control.RemoveBatches = TRUE, Step2_Quality_Control.RemoveDoublets = TRUE, Step3_Clustering = TRUE, Step4_Identify_Cell_Types = TRUE, Step4_Use_Which_Labels = 'clustering', Step4_Cluster_Labels = NULL, Step4_Changed_Labels = NULL, Step4_run_sc_CNV = TRUE, Step5_Visualization = TRUE, Step6_Find_DEGs = TRUE, Step7_Assign_Cell_Cycle = TRUE, Step8_Calculate_Heterogeneity = TRUE, Step9_Violin_Plot_for_Marker_Genes = TRUE, Step10_Calculate_Lineage_Scores = TRUE, Step11_GSVA = TRUE, Step11_GSVA.identify.cellType.features=TRUE, Step11_GSVA.identify.diff.features=FALSE, Step11_GSVA.comparison.design=NULL, Step12_Construct_Trajectories = TRUE, Step12_Construct_Trajectories.clusters = c('3','6','9','10','11','14','15','19'), Step12_Construct_Trajectories.monocle = TRUE, Step12_Construct_Trajectories.slingshot = TRUE, Step12_Construct_Trajectories.scVelo = TRUE, Step13_TF_Analysis = TRUE, Step14_Cell_Cell_Interaction = TRUE, Step15_Generate_the_Report = TRUE ) "],["step-by-step-scrna-seq-pipeline.html", "4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin 4.2 Step 1. Load the input data 4.3 Step 2. Quality Control 4.4 Step 3. Clustering 4.5 Step 4. Identify Cell Types 4.6 Step 5. Visualization 4.7 Step 6. Find DEGs 4.8 Step 7. Assign Cell Cycles 4.9 Step 8. Calculate Heterogeneity 4.10 Step 9. Violin Plot for Marker Genes 4.11 Step 10. Calculate Lineage Scores 4.12 Step 11. GSVA 4.13 Step 12. Construct Trajectories 4.14 Step 13. TF Analysis 4.15 Step 14. Cell-Cell Interaction", " 4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin Load the R packages. library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) library(getopt) library(tools) library(HemaScopeR) Set the paths for the output results, and the Python installation. output.dir = './output' pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python' Create folders for saving the results of HemaScopeR analysis. wdir <- getwd() if(is.null(pythonPath)==FALSE){ reticulate::use_python(pythonPath) }else{print('Please set the path of Python.')} if (!file.exists(paste0(output.dir, '/HemaScopeR_results'))) { dir.create(paste0(output.dir, '/HemaScopeR_results'),recursive =T) } output.dir <- paste0(output.dir,'/HemaScopeR_results') if (!file.exists(paste0(output.dir, '/RDSfiles/'))) { dir.create(paste0(output.dir, '/RDSfiles/')) } #set the path for loading previous results, if necessary previous_results_path <- paste0(output.dir, '/RDSfiles/') # if (Whether_load_previous_results) { # print('Loading the previous results...') # Load_previous_results(previous_results_path = previous_results_path) # } 4.2 Step 1. Load the input data Create a folder for step1 print('Step1. Input data.') if (!file.exists(paste0(output.dir, '/Step1.Input_data/'))) { dir.create(paste0(output.dir, '/Step1.Input_data/')) } Set the parameters for loading the data sets. input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix')#, #'./SRR7881400/outs/filtered_feature_bc_matrix', #'./SRR7881401/outs/filtered_feature_bc_matrix', #'./SRR7881402/outs/filtered_feature_bc_matrix', #'./SRR7881403/outs/filtered_feature_bc_matrix' project.names = c('SRR7881399')#, #'SRR7881400', #'SRR7881401', #'SRR7881402', #'SRR7881403' gene.column = 2 min.cells = 10 min.feature = 200 mt.pattern = '^MT-' # set '^mt-' for mouse data Step1_Input_Data.type = 'cellranger-count' loom.files.path ="./SRR7881399/loom" Load the data sets file.copy(from = input.data.dirs, to = paste0(output.dir,'/Step1.Input_data/'), recursive = TRUE) if(Step1_Input_Data.type == 'cellranger-count'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- Read10X(data.dir = input.data.dirs[i], gene.column = gene.column) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- Read10X(data.dir = input.data.dirs, gene.column = gene.column) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Seurat'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_object.temp <- readRDS(input.data.dirs[i]) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp) } }else{ sc_object <- readRDS(input.data.dirs) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Matrix'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- readRDS(input.data.dirs[i]) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- readRDS(input.data.dirs) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else{ stop('Please input data generated by the cellranger-count software, or a Seurat object, or a gene expression matrix. HemaScopeR does not support other formats of input data.') } Save the variables after executing each step, if necessary. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3 Step 2. Quality Control In this step, the following quality control steps will be performed: Normalize data using the LogNormalize method. Find variable features using the vst method. Scale data using the identified variable features and specified variables to regress out. Perform principal component analysis (PCA) on the scaled data. Find K nearest neighbors based on PCA dimensions. Perform clustering analysis based on the found neighbors. Optionally, remove doublets using doubletFinder. Optionally, integrate multiple datasets by removing batch effects. 4.3.1 Function arguments: nFeature_RNA.limit: The cutoff of the minimum number of detected genes in each cell. percent.mt.limit: The cutoff of the maximum percentage of mitochondria genes in each cell. scale.factor: The scale factor for the ‘data’ slot in the seurat object. nfeatures: The number of selected highly variable features for down stream analysis. ndims: The number of principle components in PCA. vars.to.regress: Variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito. (ScaleData in Seurat) PCs: Which dimensions to use as input features.(RunTSNE and RunUMAP in Seurat) resolution: Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities. (FindClusters in Seurat) n.neighbors: Defines k for the k-nearest neighbor algorithm. (FindNeighbors in Seurat) percentage: Assuming ‘percentage’ doublet formation rate - tailor for your dataset. The default value is 0.05. doublerFinderwraper.PCs Which dimensions to use as input features for doubletFinder. doublerFinderwraper.pN: The percentage of real-artifical data for doubletFinder. doublerFinderwraper.pK: The pK parameter controls the doublet cell detection by determining the number of nearest neighbors and influencing the calculation of pANN scores and the final cell classification results. Adjusting the pK value allows optimization of the doublet cell detection process based on specific data and analysis requirements. 4.3.2 codes for running step2 Create a folder for saving the results of quality control. print('Step2. Quality control.') if (!file.exists(paste0(output.dir, '/Step2.Quality_control/'))) { dir.create(paste0(output.dir, '/Step2.Quality_control/')) } Set the parameters for quality control. # quality control nFeature_RNA.limit = 200 percent.mt.limit = 20 # preprocessing nfeatures = 3000 scale.factor = 10000 ndims = 50 vars.to.regress = NULL PCs = 1:35 resolution = 0.4 n.neighbors = 50 # removing doublets Step2_Quality_Control.RemoveDoublets = TRUE doublet.percentage = 0.04 doublerFinderwraper.PCs = 1:20 doublerFinderwraper.pN = 0.25 doublerFinderwraper.pK = 0.1 # removing batch effect Step2_Quality_Control.RemoveBatches = TRUE Run the quality control process. if(length(input.data.dirs) > 1){ # preprocess and quality control for multiple scRNA-Seq data sets sc_object <- QC_multiple_scRNASeq(seuratObjects = input.data.list, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveBatches = Step2_Quality_Control.RemoveBatches, Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, ndims = ndims, vars.to.regress = vars.to.regress, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK ) }else{ # preprocess and quality control for single scRNA-Seq data set sc_object <- QC_single_scRNASeq(sc_object = sc_object, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, vars.to.regress = vars.to.regress, ndims = ndims, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3.3 Outputs Figure 4.1: Violin plots showing the nFeature, nCount and percent.mt for each sample Figure 4.2: Figures showing the correlation between nFeature and nCount, as well as between nCount and percent.mt Figure 4.3: Figures showing the variable features used for downstream analysis Figure 4.4: ElbowPlot showing suitable number of PCs used for further analysis Figure 4.5: UMAP plot showing doublets found by DoubletFinder 4.4 Step 3. Clustering Create a folder for saving the results of Louvain clustering. print('Step3. Clustering.') if (!file.exists(paste0(output.dir, '/Step3.Clustering/'))) { dir.create(paste0(output.dir, '/Step3.Clustering/')) } Set the parameters for clustering. PCs = 1:35 resolution = 0.4 n.neighbors = 50 Run Louvian clustering. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){graph.name <- 'integrated_snn'}else{graph.name <- 'RNA_snn'} sc_object <- FindNeighbors(sc_object, dims = PCs, k.param = n.neighbors, force.recalc = TRUE) sc_object <- FindClusters(sc_object, resolution = resolution, graph.name = graph.name) sc_object@meta.data$seurat_clusters <- as.character(as.numeric(sc_object@meta.data$seurat_clusters)) # plot clustering pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.6: UMAP plot showing clustering results 4.5 Step 4. Identify Cell Types In this step, users can predict the cell types of hematopoietic cells by implementing two approaches (Scmap and Seurat) through abcCellmap packages. Cells are labeled by 43 different RNA clusters according to unsupervised clustering of single-cell transcriptional profiles, and also labeled by 32 immunophenotypic cell types. In addition, users can use Copykat to measure copy number variation (CNV) and determine the ploidy of each cell. 4.5.1 codes for running abcCellmap Create a folder for saving the results of cell type identification. print('Step4. Identify cell types automatically.') if (!file.exists(paste0(output.dir, '/Step4.Identify_Cell_Types/'))) { dir.create(paste0(output.dir, '/Step4.Identify_Cell_Types/')) } Set the path for the database. databasePath = "~/HemaScopeR/database/" Set the parameters for cell type identification. Step4_Use_Which_Labels = 'clustering' Step4_Cluster_Labels = NULL Step4_Changed_Labels = NULL Org = 'hsa' ncores = 10 Run the cell type identification process. sc_object <- run_cell_annotation(object = sc_object, assay = 'RNA', species = Org, output.dir = paste0(output.dir,'/Step4.Identify_Cell_Types/')) if(Org == 'hsa'){ load(paste0(databasePath,"/HematoMap.reference.rdata")) #the data can be downloaded via the link https://cloud.tsinghua.edu.cn/d/759fd04333274d3f9946 if(length(intersect(rownames(HematoMap.reference), rownames(sc_object))) < 1000){ HematoMap.reference <- RenameGenesSeurat(obj = HematoMap.reference, newnames = toupper(rownames(HematoMap.reference)), gene.use = rownames(HematoMap.reference), de.assay = "RNA", lassays = "RNA") } if(sc_object@active.assay == 'integrated'){ DefaultAssay(sc_object) <- 'RNA' sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) DefaultAssay(sc_object) <- 'integrated' }else{ sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) } } Set the cell labels. # set the cell labels if(Step4_Use_Which_Labels == 'clustering'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$seurat_clusters Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.1'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.2'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.3'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.4'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'HematoMap'){ if(Org == 'hsa'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$predicted.id Idents(sc_object) <- sc_object@meta.data$selectLabels }else{print("'HematoMap' is only applicable to human data ('Org' = 'hsa').")} }else if(Step4_Use_Which_Labels == 'changeLabels'){ if (!is.null(Step4_Cluster_Labels) && !is.null(Step4_Changed_Labels) && length(Step4_Cluster_Labels) == length(Step4_Changed_Labels)){ sc_object@meta.data$selectLabels <- plyr::mapvalues(sc_object@meta.data$seurat_clusters, from = as.character(Step4_Cluster_Labels), to = as.character(Step4_Changed_Labels), warn_missing = FALSE) Idents(sc_object) <- sc_object@meta.data$selectLabels }else{ print("Please input the 'Step4_Cluster_Labels' parameter as Seurat clustering labels, and the 'Step4_Changed_Labels' parameter as new labels. Please note that these two parameters should be of equal length.") } }else{ print('Please set the "Step4_Use_Which_Labels" parameter as "clustering", "abcCellmap.1", "abcCellmap.2", "HematoMap" or "changeLabels".') } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.7: UMAP plots showing cell type annotation results Figure 4.8: Immunophenotype and RNACluster label predicted by scmap Figure 4.9: Immunophenotype and RNACluster label predicted by Seurat 4.5.2 codes for running the CNV analysis sc_CNV(sc_object=sc_object, save_path=paste0(output.dir,'/Step4.Identify_Cell_Types/'), assay = 'RNA', LOW.DR = 0.05, #refer to the Copykat documentation for detailed explanations of the parameters UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = NULL, n.cores = ncores, #note: this step will take a long time, using more ncores could shorten the running time species = Org) Figure 4.10: copykat heatmap Figure 4.11: UMAP plot showing CNV state predicted by copykat 4.6 Step 5. Visualization In this step, users are allowed to gain the statistical results about the numbers and proportions of cell groups, and also use three dimensional reduction methods (TSNE, UMAP, phateR) to visualize the results. 4.6.1 codes for peforming three dimensional reduction methods Create a folder for saving the visualization results. print('Step5. Visualization.') if (!file.exists(paste0(output.dir, '/Step5.Visualization/'))) { dir.create(paste0(output.dir, '/Step5.Visualization/')) } Perform visualization using UMAP and TSNE. # plot cell types pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Figure 4.12: UMAP and TSNE visualization Set the parameters for phateR. phate.knn = 50 #The number of nearest neighbors to consider in the phateR algorithm. Default 50. phate.npca = 20 #The number of principal components to use in the phateR algorithm. Default 20. phate.t = 10 #The t-value for the phateR algorithm, which controls the level of exploration. Default 10. phate.ndim = 2 #The number of dimensions for the output embedding in the phateR algorithm. Default 2. Run phateR for dimensional reduction and visualization. # run phateR if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} if(!is.null(pythonPath)){ run_phateR(sc_object = sc_object, output.dir = paste0(output.dir,'/Step5.Visualization/'), pythonPath = pythonPath, phate.knn = phate.knn, phate.npca = phate.npca, phate.t = phate.t, phate.ndim = phate.ndim) } Figure 4.13: phateR result 4.6.2 codes for calculating the proportions The statistical results for the numbers and proportions of cell groups. # statistical results cells_labels <- as.data.frame(cbind(rownames(sc_object@meta.data), as.character(sc_object@meta.data$selectLabels))) colnames(cells_labels) <- c('cell_id', 'cluster_id') cluster_counts <- cells_labels %>% group_by(cluster_id) %>% summarise(count = n()) total_cells <- nrow(cells_labels) cluster_counts <- cluster_counts %>% mutate(proportion = count / total_cells) cluster_counts <- as.data.frame(cluster_counts) cluster_counts$percentages <- scales::percent(cluster_counts$proportion, accuracy = 0.1) cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='proportion')] cluster_counts$cluster_id_count_percentages <- paste(cluster_counts$cluster_id, " (", cluster_counts$count, ' cells; ', cluster_counts$percentages, ")", sep='') cluster_counts <- cluster_counts[order(cluster_counts$count, decreasing = TRUE),] cluster_counts <- rbind(cluster_counts, c('Total', sum(cluster_counts$count), '100%', 'all cells')) sc_object@meta.data$cluster_id_count_percentages <- mapvalues(sc_object@meta.data$selectLabels, from=cluster_counts$cluster_id, to=cluster_counts$cluster_id_count_percentages, warn_missing=FALSE) colnames(sc_object@meta.data)[which(colnames(sc_object@meta.data) == 'cluster_id_count_percentages')] <- paste('Total ', nrow(sc_object@meta.data), ' cells', sep='') cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='cluster_id_count_percentages')] colnames(cluster_counts) <- c('Cell types', 'Cell counts', 'Percentages') # names(colorvector) <- mapvalues(names(colorvector), # from=cluster_counts$cluster_id, # to=cluster_counts$cluster_id_count_percentages, # warn_missing=FALSE) write.csv(cluster_counts, file=paste(paste0(output.dir, '/Step5.Visualization/'), '/cell types_cell counts_percentages.csv', sep=''), quote=FALSE, row.names=FALSE) The UMAP visualization. pdf(paste(paste0(output.dir, '/Step5.Visualization'), '/cell types_cell counts_percentages_umap.pdf', sep=''), width = 14, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = paste('Total ', nrow(sc_object@meta.data), ' cells', sep=''), label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.14: UMAP plot showing cell type and corresponding proportion 4.7 Step 6. Find DEGs In this step, users can find DEGs (differentially expressed genes) across different cell type group using FindAllMarkers, use GPTCelltype to predict cell label, perform GO and KEGG enrichment analysis, and perform subnetwork analysis for each cell type group. 4.7.1 codes for finding DEGs Set the parameters for identifying differentially expressed genes. min.pct = 0.25 logfc.threshold = 0.25 Create a folder for the DEGs analysis. print('Step6. Find DEGs.') if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/')) } Identify DEGs using Wilcoxon Rank-Sum Test. sc_object.markers <- FindAllMarkers(sc_object, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold) write.csv(sc_object.markers, file = paste0(paste0(output.dir, '/Step6.Find_DEGs/'),'sc_object.markerGenes.csv'), quote=FALSE) # visualization sc_object.markers.top5 <- sc_object.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) pdf(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.pdf'), width = 0.5*length(unique(sc_object.markers.top5$gene)), height = 0.5*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() png(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.png'), width = 20*length(unique(sc_object.markers.top5$gene)), height = 30*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() Figure 4.15: Dotplot showing marker genes of each cell type group 4.7.2 codes for using GPTCelltype Set the parameters for GPTCelltype. your_openai_API_key = '' tissuename = 'human bone marrow' gptmodel = 'gpt-3.5' Use GPTCelltype to assist cell type annotation. GPT_annotation( marker.genes = sc_object.markers, your_openai_API_key = your_openai_API_key, tissuename = tissuename, gptmodel = gptmodel, output.dir = paste0(output.dir, '/Step6.Find_DEGs/')) 4.7.3 Perform GO and KEGG enrichment. # GO enrichment if(Org=='mmu'){ OrgDb <- 'org.Mm.eg.db' }else if(Org=='hsa'){ OrgDb <- 'org.Hs.eg.db' }else{ stop("Org should be 'mmu' or 'hsa'.") } HemaScopeREnrichment(DEGs=sc_object.markers, OrgDb=OrgDb, output.dir=paste0(output.dir, '/Step6.Find_DEGs/')) Figure 4.16: Barplot showing GO(BP)and KEGG enrichment results of each cell type group 4.7.4 Perform subnetwork analysis Create a folder for saving the results of gene network analysis. if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/')) } Perform gene network analysis. OpenXGR_SAG(sc_object.markers = sc_object.markers, output.dir = paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'), subnet.size = 10) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.17: Figure showing subnetwork of each cell type group identified by OpenXGR 4.8 Step 7. Assign Cell Cycles This step assigns cell cycle phases by analyzing cell cycle-related genes and generates plots of the cell cycle analysis results. 4.8.1 Function arguments: sc_object: A Seurat object containing single-cell RNA sequencing data. counts_matrix: The ‘counts’ slot in the Seurat object. data_matrix: The ‘data’ slot in the Seurat object. cellcycleCutoff: The cutoff value for distinguishing between cycling and quiescent cells. Cells with a G1G2Score below this cutoff are considered quiescent. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input Seurat object. databasePath: The path to the database required for the analysis. Org: A character vector specifying the species of cell cycle genes, can be ‘mmu’ (mouse) or ‘hsa’ (human). 4.8.2 codes for step7 Create a folder for saving the results of cell cycle analysis. print('Step7. Assign cell cycles.') if (!file.exists(paste0(output.dir, '/Step7.Assign_cell_cycles/'))) { dir.create(paste0(output.dir, '/Step7.Assign_cell_cycles/')) } Set the parameters for the cell cycle analysis. cellcycleCutoff = NULL Run the cell cycle analysis. datasets.before.batch.removal <- readRDS(paste0(paste0(output.dir, '/RDSfiles/'),'datasets.before.batch.removal.rds')) sc_object <- cellCycle(sc_object=sc_object, counts_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "counts")%>%as.matrix(), data_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix(), cellcycleCutoff = cellcycleCutoff, cellTypeOrders = unique(sc_object@meta.data$selectLabels), output.dir=paste0(output.dir, '/Step7.Assign_cell_cycles/'), databasePath = databasePath, Org = Org) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.8.3 Outputs Figure 4.18: Barplot showing the proportion of different cell cycle within each cell type group Figure 4.19: Density plot showing the distribution of cell cycle scores 4.9 Step 8. Calculate Heterogeneity This step quantifies cell heterogeneity by computing Spearman correlation coefficients between cells within the same cell type groups. 4.9.1 Function arguments: expression_matrix: A numeric matrix representing the expression data, where rows are genes and columns are cells. The matrix should be appropriately preprocessed and filtered before using this function. cell_types_groups: A data frame specifying cell type annotations for each cell, including cell type labels and group information. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input cell_types_groups. 4.9.2 codes for step8 Create a folder for saving the results of heterogeneity calculation. print('Step8. Calculate heterogeneity.') if (!file.exists(paste0(output.dir, '/Step8.Calculate_heterogeneity/'))) { dir.create(paste0(output.dir, '/Step8.Calculate_heterogeneity/')) } Run heterogeneity calculation process. expression_matrix <- GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix() expression_matrix <- expression_matrix[,rownames(sc_object@meta.data)] cell_types_groups <- as.data.frame(cbind(sc_object@meta.data$selectLabels, sc_object@meta.data$datasetID)) colnames(cell_types_groups) <- c('clusters', 'datasetID') if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } heterogeneity(expression_matrix = expression_matrix, cell_types_groups = cell_types_groups, cellTypeOrders = cellTypes_orders, output.dir = paste0(output.dir, '/Step8.Calculate_heterogeneity/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.20: Box plot showing the Spearman correlation coefficients between cells within the same cell type groups(here we take data including more samples as an example) 4.10 Step 9. Violin Plot for Marker Genes This step generates violin plots for marker genes across different cell types. 4.10.1 Function arguments: dataMatrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. features: A character vector specifying the marker genes to plot in the violin plots. CellTypes: A factor vector containing cell type annotations for each cell. cellTypeOrders: A character vector specifying the order of cell types for plotting. Defaults to unique values in CellTypes. cellTypeColors: A character vector specifying the colors to use for cell type groups. Defaults to a color palette. 4.10.2 codes for step9 Create a folder for saving the violin plots of marker genes. print('Step9. Violin plot for marker genes.') if (!file.exists(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'))) { dir.create(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/')) } Run violin plot visualization. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} dataMatrix <- GetAssayData(object = sc_object, slot = "scale.data") if(is.null(marker.genes)&(Org == 'mmu')){ # mpp genes are from 'The bone marrow microenvironment at single cell resolution' # the other genes are from 'single cell characterization of haematopoietic progenitors and their trajectories in homeostasis and perturbed haematopoiesis' # the aliases of these genes were changed in gecodeM16:Gpr64 -> Adgrg2, Sdpr -> Cavin2, Hbb-b1 -> Hbb-bs, Sfpi1 -> Spi1 HSC_lineage_signatures <- c('Slamf1', 'Itga2b', 'Kit', 'Ly6a', 'Bmi1', 'Gata2', 'Hlf', 'Meis1', 'Mpl', 'Mcl1', 'Gfi1', 'Gfi1b', 'Hoxb5') Mpp_genes <- c('Mki67', 'Mpo', 'Elane', 'Ctsg', 'Calr') Erythroid_lineage_signatures <- c('Klf1', 'Gata1', 'Mpl', 'Epor', 'Vwf', 'Zfpm1', 'Fhl1', 'Adgrg2', 'Cavin2','Gypa', 'Tfrc', 'Hbb-bs', 'Hbb-y') Lymphoid_lineage_signatures <- c('Tcf3', 'Ikzf1', 'Notch1', 'Flt3', 'Dntt', 'Btg2', 'Tcf7', 'Rag1', 'Ptprc', 'Ly6a', 'Blnk') Myeloid_lineage_signatures <- c('Gfi1', 'Spi1', 'Mpo', 'Csf2rb', 'Csf1r', 'Gfi1b', 'Hk3', 'Csf2ra', 'Csf3r', 'Sp1', 'Fcgr3') marker.genes <- c(HSC_lineage_signatures, Mpp_genes, Erythroid_lineage_signatures, Lymphoid_lineage_signatures, Myeloid_lineage_signatures) }else if(is.null(marker.genes)&(Org == 'hsa')){ HSPCs_lineage_signatures <- c('CD34','KIT','AVP','FLT3','MME','CD7','CD38','CSF1R','FCGR1A','MPO','ELANE','IL3RA') Myeloids_lineage_signatures <- c('LYZ','CD36','MPO','FCGR1A','CD4','CD14','CD300E','ITGAX','FCGR3A','FLT3','AXL', 'SIGLEC6','CLEC4C','IRF4','LILRA4','IL3RA','IRF8','IRF7','XCR1','CD1C','THBD', 'MRC1','CD34','KIT','ITGA2B','PF4','CD9','ENG','KLF','TFRC') B_cells_lineage_signatures <- c('CD79A','IGLL1','RAG1','RAG2','VPREB1','MME','IL7R','DNTT','MKI67','PCNA','TCL1A','MS4A1','IGHD','CD27','IGHG3') T_NK_cells_lineage_signatures <- c('CD3D','CD3E','CD8A','CCR7','IL7R','SELL','KLRG1','CD27','GNLY', 'NKG7','PDCD1','TNFRSF9','LAG3','CD160','CD4','CD40LG','IL2RA', 'FOXP3','DUSP4','IL2RB','KLRF1','FCGR3A','NCAM1','XCL1','MKI67','PCNA','KLRF') marker.genes <- c(HSPCs_lineage_signatures, Myeloids_lineage_signatures, B_cells_lineage_signatures, T_NK_cells_lineage_signatures) } if(is.null(ViolinPlot.cellTypeOrders)){ ViolinPlot.cellTypeOrders <- unique(sc_object@meta.data$selectLabels) } if(is.null(ViolinPlot.cellTypeColors)){ ViolinPlot.cellTypeColors <- viridis::viridis(length(unique(sc_object@meta.data$selectLabels))) } combinedViolinPlot(dataMatrix = dataMatrix, features = marker.genes, CellTypes = sc_object@meta.data$selectLabels, cellTypeOrders = ViolinPlot.cellTypeOrders, cellTypeColors = ViolinPlot.cellTypeColors, Org = Org, output.dir = paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.21: Violin plot showing the expression of marker genes between cell type groups 4.11 Step 10. Calculate Lineage Scores This step calculates lineage scores for specified gene sets based on the provided expression data. It then generates a heatmap of lineage scores and a heatmap of gene expression patterns. 4.11.1 Function arguments: expression_matrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. cellTypes: A character vector specifying cell type annotations for each cell. e.g. c(“HSC”,“HSC”,“HSC”,“MPP1”,“MPP2”,“MPP2”,“MPP2” …) cellTypes_orders: A character vector specifying the order of cell types for plotting. e.g. c(“HSC”,“MPP1”,“MPP2”) cellTypes_colors: A character vector specifying the colors to use for cell type groups. e.g. c(“HSC” = ‘#006d2c’,“MPP1” = ‘#4292c6’,“MPP2”= ‘#810f7c’). groups: A character vector specifying groups or clusters within each cell type. groups_orders: A character vector specifying the order of groups or clusters for plotting. groups_colors: A character vector specifying the colors to use for group or cluster annotations. e.g. c(‘group1’=‘#d73027’,‘group2’=‘#2171b5’) lineage.genelist: A list of gene sets representing lineage markers. lineage.names: A character vector specifying the names of the lineages. 4.11.2 codes for step10 Create a folder for saving the results of lineage score calculation. print('Step10. Calculate lineage scores.') # we use normalized data here if (!file.exists(paste0(output.dir, '/Step10.Calculate_lineage_scores/'))) { dir.create(paste0(output.dir, '/Step10.Calculate_lineage_scores/')) } Run lineage score calculation. if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'mmu')){ lineage.genelist <- c(list(HSC_lineage_signatures), list(Mpp_genes), list(Erythroid_lineage_signatures), list(Lymphoid_lineage_signatures), list(Myeloid_lineage_signatures)) lineage.names <- c('HSC_lineage_signatures', 'Mpp_genes', 'Erythroid_lineage_signatures', 'Lymphoid_lineage_signatures', 'Myeloid_lineage_signatures') }else if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'hsa')){ lineage.genelist <- c(list(HSPCs_lineage_signatures), list(Myeloids_lineage_signatures), list(B_cells_lineage_signatures), list(T_NK_cells_lineage_signatures)) lineage.names <- c('HSPCs_lineage_signatures', 'Myeloids_lineage_signatures', 'B_cells_lineage_signatures', 'T_NK_cells_lineage_signatures') } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } lineageScores(expression_matrix = expression_matrix, cellTypes = sc_object@meta.data$selectLabels, cellTypes_orders = cellTypes_orders, cellTypes_colors = ViolinPlot.cellTypeColors, groups = sc_object@meta.data$datasetID, groups_orders = unique(sc_object@meta.data$datasetID), groups_colors = groups_colors, lineage.genelist = lineage.genelist, lineage.names = lineage.names, Org = Org, output.dir = paste0(output.dir, '/Step10.Calculate_lineage_scores/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.22: Heatmap showing the expression of lineage genes for each cell Figure 4.23: Heatmap showing the score of lineage signatures for each cell 4.12 Step 11. GSVA This step runs GSVA analysis, which calculates enrichment scores for gene sets in each cell using the provided gene list. It also performs differential GSVA analysis between specified cell groups and generates heatmaps of the results. 4.12.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. GSVA.genelist: A list of gene sets for GSVA analysis. GSVA.cellTypes: A character vector specifying the cell types or labels for each cell. GSVA.cellTypes.orders: A character vector specifying the order of cell types for visualization. GSVA.cellGroups: A character vector specifying the cell groups or conditions for each cell. GSVA.identify.cellType.features: Logical. If TRUE, identify cell type-specific features. GSVA.identify.diff.features: Logical. If TRUE, identify differentially expressed features between cell groups. GSVA.comparison.design: A list specifying the experimental design for differential GSVA analysis. OrgDB: An organism-specific annotation database (OrgDb) for gene symbol conversion. e.g. org.Mm.eg.db or org.Hs.eg.db. 4.12.2 codes for running step11 Create a folder for saving the results of GSVA. print('Step11. GSVA.') if (!file.exists(paste0(output.dir, '/Step11.GSVA/'))) { dir.create(paste0(output.dir, '/Step11.GSVA/')) } Run GSVA. setwd(wdir) if(Org=='mmu'){ load(paste0(databasePath,"/mouse_c2_v5p2.rdata")) GSVA.genelist <- Mm.c2 assign('OrgDB', org.Mm.eg.db) }else if(Org=='hsa'){ load(paste0(databasePath,"/human_c2_v5p2.rdata")) GSVA.genelist <- Hs.c2 assign('OrgDB', org.Hs.eg.db) }else{ stop("Org should be 'mmu' or 'hsa'.") } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } run_GSVA(sc_object = sc_object, GSVA.genelist = GSVA.genelist, GSVA.cellTypes = sc_object@meta.data$selectLabels, GSVA.cellTypes.orders = cellTypes_orders, GSVA.cellGroups = sc_object@meta.data$datasetID, GSVA.identify.cellType.features = Step11_GSVA.identify.cellType.features, GSVA.identify.diff.features = Step11_GSVA.identify.diff.features, GSVA.comparison.design = Step11_GSVA.comparison.design, OrgDB = OrgDB, output.dir = paste0(output.dir, '/Step11.GSVA/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.24: GSVA Heatmap showing the enriched pathways of each cell type group 4.13 Step 12. Construct Trajectories In this step, users are allowed to construct trajectories using three methods including Monocle2, slingshot and scVelo. 4.13.1 data preparation Load gene symbols and ensemble IDs. DefaultAssay(sc_object) <- 'RNA' countsSlot <- GetAssayData(object = sc_object, slot = "counts") gene_metadata <- as.data.frame(rownames(countsSlot)) rownames(gene_metadata) <- gene_metadata[,1] if(Org == 'mmu'){ load(paste0(databasePath,"/mouseGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = mouseGeneSymbolandEnsembleID$geneName, to = mouseGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) }else if(Org == 'hsa'){ load(paste0(databasePath,"/humanGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = humanGeneSymbolandEnsembleID$geneName, to = humanGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) } colnames(gene_metadata) <- c('gene_short_name','ensembleID') Create folders for saving the results of trajectory construction. print('Step12. Construct trajectories.') if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) } Prepare the input data. if(is.null(Step12_Construct_Trajectories.clusters)){ sc_object.subset <- sc_object countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") }else{ sc_object.subset <- subset(sc_object, subset = selectLabels %in% Step12_Construct_Trajectories.clusters) countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") } 4.13.2 monocle2 Running monocle2 involves several steps: Creating a Monocle cellDataSet using the provided cellData, phenoData, and featureData. Estimating size factors, dispersions, and detecting highly variable genes. Performing differential gene expression analysis to identify genes associated with cell state changes. Ordering cells along the inferred trajectories and reducing dimensionality. Generating and saving trajectory plots, including cell trajectory by “State” and by “Cell Types.” 4.13.2.1 Function arguments: cellData: A matrix of gene expression values, where columns represent cells and rows represent genes. phenoData: A data frame containing cell metadata, such as cell labels or other relevant information. featureData: A data frame containing information about features (genes) in the dataset. lowerDetectionLimit: The lower detection limit for gene expression. Genes with expression values below this limit will be treated as non-detected. expressionFamily: The family of the expression distribution used in Monocle analysis. cellTypes: A character vector specifying cell types or labels used for coloring in trajectory plots. monocle.orders: A character vector specifying the order of cell types in the Monocle analysis. monocle.colors: A character vector specifying colors for cell types in trajectory plots. 4.13.2.2 codes for running monocle2 phenoData <- sc_object.subset@meta.data featureData <- gene_metadata run_monocle(cellData = countsSlot.subset, phenoData = phenoData, featureData = featureData, lowerDetectionLimit = 0.5, expressionFamily = VGAM::negbinomial.size(), cellTypes='selectLabels', monocle.orders=Step12_Construct_Trajectories.clusters, monocle.colors = ViolinPlot.cellTypeColors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) Figure 4.25: Figures showing cells in different trajectory states (left) and corresponding cell type groups (right) 4.13.3 Slingshot Running Slingshot to infer cell trajectories and lineage relationships involves several steps: Constructs a Slingshot object using PCA embeddings, cell types, start clusters, and end clusters. Computes and plots the trajectory curves. Computes and plots pseudotime values along the trajectory. 4.13.3.1 Function arguments: slingshot.PCAembeddings: A matrix containing the PCA embeddings of the single-cell data, typically obtained from dimensionality reduction techniques like PCA. slingshot.cellTypes: A character vector specifying cell types or labels for each cell. slingshot.start.clus: A character vector specifying the initial cluster(s) from which cell trajectories should start. slingshot.end.clus: A character vector specifying the target cluster(s) where cell trajectories should end. slingshot.colors: A vector of colors corresponding to cell types for plotting. If not provided, default colors will be used. 4.13.3.2 codes for running Slingshot if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object.subset) <- 'integrated' }else{ DefaultAssay(sc_object.subset) <- 'RNA'} run_slingshot(slingshot.PCAembeddings = Embeddings(sc_object.subset, reduction = "pca")[, PCs], slingshot.cellTypes = sc_object.subset@meta.data$selectLabels, slingshot.start.clus = slingshot.start.clus, slingshot.end.clus = slingshot.end.clus, slingshot.colors = slingshot.colors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) Figure 4.26: Figures showing slingshot curve and infered pseudotime value 4.13.4 scVelo scVelo is implemented in Python, and it takes a Seurat object, cell embeddings, and cell type information as input. The process of data preparation includes the following steps: Format the Seurat object metadata, including cell types and sample names. Extract the spliced, unspliced, and ambiguous count matrices from the Seurat object. Combine the metadata and cell embeddings. Write the necessary input files for scVelo analysis, including cell embeddings, count matrices, and metadata. 4.13.4.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. loom.files.path: A character vector specifying the path(s) to the loom files for scVelo analysis. scvelo.reduction: A character specifying the reduction method used for scVelo analysis (default is ‘pca’). scvelo.column: A character specifying the column in the Seurat object metadata containing cell types. 4.13.4.2 codes for running Scvelo if((!is.null(loom.files.path))&(!is.null(pythonPath))){ prepareDataForScvelo(sc_object = sc_object.subset, loom.files.path = loom.files.path, scvelo.reduction = 'pca', scvelo.column = 'selectLabels', output.dir = paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) reticulate::py_run_string(paste0("import os\\noutputDir = '", output.dir, "'")) reticulate::py_run_file(file.path(system.file(package = "HemaScopeR"), "python/sc_run_scvelo.py"), convert = FALSE) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.27: Figure showing trajectory predicted by scvelo 4.14 Step 13. TF Analysis This step runs SCENIC (Single-Cell Regulatory Network Inference and Clustering) analysis, including the construction of a co-expression network, gene filtering, correlation, and the GENIE3 algorithm to infer regulatory networks. 4.14.1 Function arguments: countMatrix: A matrix containing the raw counts of the single-cell RNA-seq data. cellTypes: A character vector specifying the cell types or labels for each cell. datasetID: A character vector specifying the dataset IDs for each cell. cellTypes_colors: A named vector of colors for cell type visualization. cellTypes_orders: A character vector specifying the desired order of cell types. groups_colors: A named vector of colors for grouping visualization. groups_orders: A character vector specifying the desired order of groups. Org: A character vector specifying the organism (‘mmu’ for mouse or ‘hsa’ for human). 4.14.2 codes for running step13 Create folders for saving the results of TF analysis. print('Step13. TF analysis.') if (!file.exists(paste0(output.dir, '/Step13.TF_analysis/'))) { dir.create(paste0(output.dir, '/Step13.TF_analysis/')) } Run SCENIC to perform TF analysis. run_SCENIC(countMatrix = countsSlot, cellTypes = sc_object@meta.data$selectLabels, datasetID = sc_object@meta.data$datasetID, cellTypes_colors = Step13_TF_Analysis.cellTypes_colors, cellTypes_orders = unique(sc_object@meta.data$selectLabels), groups_colors = Step13_TF_Analysis.groups_colors, groups_orders = unique(sc_object@meta.data$datasetID), Org = Org, output.dir = paste0(output.dir, '/Step13.TF_analysis/'), pythonPath = pythonPath, databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.28: Heatmap showing predicted regulon activity for each cell Figure 4.29: Heatmap showing statistics of regulons 4.15 Step 14. Cell-Cell Interaction The step takes expression data, cluster labels, and other parameters to perform cell-cell communication analysis using the CellChat package. It includes the following steps: Data input and preprocessing. Initialization of a CellChat object. Set the ligand-receptor interaction database based on the specified organism. Preprocess the expression data for cell-cell communication analysis. Identify overexpressed genes and interactions. Project data based on protein-protein interaction networks. Inference of cell-cell communication network. Visualization of the communication network. Systems analysis of cell-cell communication network. 4.15.1 Function arguments: data.input: A matrix of expression data, where rows represent genes and columns represent cells. Row names should be in the format of gene symbols. labels: A vector of cluster labels for each cell, corresponding to the columns of data.input. cell.orders: A character vector specifying the order of cell types or clusters in the analysis. cell.colors: A character vector specifying colors for cell types or clusters in the analysis. sample.names: A vector of sample or cell names, corresponding to the columns of data.input. Org: A string indicating the organism used in the analysis. It should be either “mmu” (mouse) or “hsa” (human). sorting: A logical value indicating whether to consider cell population size in communication analysis. 4.15.2 codes for running step14 Create folders for saving the results of cell-cell interaction analysis. print('Step14. Cell-cell interaction.') if (!file.exists(paste0(output.dir, '/Step14.Cell_cell_interection/'))) { dir.create(paste0(output.dir, '/Step14.Cell_cell_interection/')) } Run CellChat to perform cell-cell interaction analysis. tempwd <- getwd() run_CellChat(data.input=countsSlot, labels = sc_object@meta.data$selectLabels, cell.orders = ViolinPlot.cellTypeOrders, cell.colors = ViolinPlot.cellTypeColors, sample.names = rownames(sc_object@meta.data), Org = Org, sorting = sorting, output.dir = paste0(output.dir, '/Step14.Cell_cell_interection/')) setwd(tempwd) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.30: Figures showing the interaction number and strength between each cell group Figure 4.31: Heatmap showing the strength of incoming and outgoing signals for each cell type group across various pathways. Figure 4.32: Figure showing LRs interaction between each cell type group "],["integrated-st-pipeline.html", "5 Integrated ST pipeline 5.1 For 10X Visium data 5.2 For MERFISH data 5.3 For stereo-seq data", " 5 Integrated ST pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(URD) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) library(OpenXGR) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) 5.1 For 10X Visium data Run the integrated 10X Visium pipeline. st_10x_visium_pipeline( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST', # For Step1 Loading rds.file = FALSE, filename = "filtered_feature_bc_matrix.h5", assay = "Spatial", slice = "slice1", filter.matrix = TRUE, to.upper = FALSE, # For Step2 QC Step2_QC = TRUE, min.gene = 200, min.nUMI = 500, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 CNV analysis Step7_CNV = TRUE, copykat.genome = NULL, copykat.LOW.DR = 0.05, copykat.UP.DR = 0.1, copykat.win.size = 25, copykat.distance = "euclidean", copykat.n.cores = 1, # For Step8 Deconvolution Step8_Deconvolution = TRUE, cell2loc.sc.h5ad.dir = NULL, cell2loc.sc.max.epoch = 1000, cell2loc.st.max.epoch = 10000, cell2loc.use.gpu = TRUE, cell2loc.use.dataset = 'LymphNode', # For Step9 Cellcycle Step9_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, # For Step10 Nich Step10_Niche = TRUE, coexistence.method = 'correlation', Niche.cluster.n = 4, # settings pythonPath = 'path/to/python', verbose = FALSE, genReport = TRUE ) 5.2 For MERFISH data Run the integrated MERFISH pipeline. st_MERFISH_pipeline( input.data.dir, output.dir, sampleName = 'Hema_MERFISH', fov = 'fov', tech = 'Vizgen', # For Step1 Loading rds.file = FALSE, assay = NULL, Vizgen.z = 3L, Akoya.type = 'inform', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.4, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) 5.3 For stereo-seq data Run the integrated stereo-seq pipeline. st_stereo_pipeline( input.data.dir, output.dir, sampleName = 'Hema_stereo', # For Step1 Loading data_type = 'gem', sep = '\\t', bin_type = 'bins', bin_size = 100, spot_diameter = 80, is_sparse = TRUE, gene_list = NULL, region = NULL, assay = 'Spatial', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.1, max.n.cluster = 30, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) "],["stey-by-step-st-seq-pipeline.html", "6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading 6.2 Step 2. Quality Control 6.3 Step 3. Clustering 6.4 Step 4. DEGs 6.5 Step 5. Spatially variable features 6.6 Step 6. Spatial interaction 6.7 Step 7. CNV analysis 6.8 Step 8. Deconvolution 6.9 Step 9. Cell cycle 6.10 Step 10. Niche analysis", " 6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading The st_Loading_Data function is designed for loading 10X Visium spatial transcriptomics data from Space Ranger. It will load data from input.data.dir and output it in the SeuratObject format. 6.1.1 Function arguments: input.data.dir: The directory where the input data is stored. output.dir: The directory where the processed output will be saved. If not specified, the output is saved in the current working directory. Default is ‘.’. sampleName: A string naming the sample. Default is ‘Hema_ST’. rds.file: A boolean indicating if the input data is in RDS file format rather than a typical results of Space Ranger. Default is FALSE. filename: The name of the file to be loaded if the data is not in RDS format. Default is “filtered_feature_bc_matrix.h5”. assay: The specific assay to apply to the data. Default is ‘Spatial’. slice: The image slice identifier for the spatial data. Default is ‘slice1’. filter.matrix: A boolean indicating whether to load filtered matrix. Default is TRUE. to.upper: A boolean indicating whether to convert feature names to upper form. Default is FALSE. 6.1.2 Funciton behavior: Directory Creation: The function first checks if the output.dir exists; if not, it creates it. RDS File Handling: If rds.file is TRUE, it reads the RDS file, ensuring the specified assay and slice are present in the Seurat object. Non-RDS File Handling: If rds.file is FALSE, it loads the data using Load10X_Spatial from Seurat. Saving the Object: Uses SaveH5Seurat and Convert to save the Seurat object in rds and h5ad formats. File Copying: Copies any necessary files (filter matrix, spatial image) to the output.dir. Return Value: Returns the processed Seurat object. 6.1.3 An example: st_obj <- st_Loading_Data( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST, rds.file = FALSE, filename = 'filtered_feature_bc_matrix.h5', assay = 'Spatial', slice = 'slice1', filter.matrix = TRUE, to.upper = FALSE ) 6.1.4 Outputs: Spatial transcriptome data in rds and h5ad formats 6.2 Step 2. Quality Control The QC_Spatial function performs basic quality control on a SeuratObject containing 10X visium data and returns the filtered SeuratObject. It provides options to set thresholds for the number of genes, nUMI (unique molecular identifiers), and spots expressing each gene. It also allows for the removal of mitochondrial genes based on species. 6.2.1 Function arguments: st_obj: A SeuratObject of 10X visium data. output.dir: A character string specifying the path to store the results and figures. Default is the current working directory. min.gene: An integer representing the minimum number of genes detected in a spot. Default is 200. max.gene: An integer representing the maximum number of genes detected in a spot. Default is Inf (no upper limit). min.nUMI: An integer representing the minimum number of nUMI detected in a spot. Default is 500. max.nUMI: An integer representing the maximum number of nUMI detected in a spot. Default is Inf (no upper limit). min.spot: An integer representing the minimum number of spots expressing each gene. Default is 3. species: A character string representing the species of sample, either ‘human’ or ‘mouse’. bool.remove.mito: A boolean value indicating whether to remove mitochondrial genes. Default is TRUE. SpatialColors: A function that interpolates a set of given colors to create new color palettes and color ramps. Default is a color palette with reversed Spectral colors from RColorBrewer. 6.2.2 Function behavior: Plots and saves the spatial distribution of nUMI and nGene. Plots and saves violin plots for nUMI and nGene. Identifies and marks low-quality spots based on nUMI and nGene thresholds. Plots the spatial distribution of quality. Plots and saves a histogram for the number of spots expressing each gene. Plots the spatial distribution of mitochondrial genes. Saves the raw SeuratObject before filtering. Removes low-quality spots and genes with fewer occurrences. Optionally removes mitochondrial genes. Saves the filtered SeuratObject. Returns the filtered st_obj. 6.2.3 An example: st_obj <- QC_Spatial( st_obj = st_obj, output.dir = '.', min.gene = 200, min.nUMI = Inf, max.gene = 500, max.nUMI = Inf, min.spot = 3, species = 'human', bool.remove.mito = TRUE, SpatialColors = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = "Spectral"))) ) 6.2.4 Outputs: Figures showing the spatial distribution of nUMI and nGene. Violin plots of nUMI and nGene. Figures showing the quality. Histograms for the number of spots expressing each gene. Figures showing the spatial distribution of mitochondrial genes. Raw and filtered SeuratObject. 6.3 Step 3. Clustering The st_Clustering function is designed to perform clustering analysis on spatial transcriptomics data. It integrates several key steps including data normalization, dimensionality reduction, clustering, and visualization. The function saves the results and visualizations to output.dir. 6.3.1 Function arguments: st_obj: The input spatial transcriptomics seurat object that contains the data to be clustered. output.dir: The directory where the output files will be saved. Default is the current directory (‘.’). normalization.method: The method used for data normalization. Default is ‘SCTransform’. npcs: The number of principal components to use in PCA. Default is 50. pcs.used: The principal components to use for clustering. Default is the first 10 PCs (1:10). resolution: The resolution parameter for the clustering algorithm. Default is 0.8. verbose: A logical flag to print progress messages. Default is FALSE. 6.3.2 Function behavior: Data Normalization and PCA: Depending on the normalization.method, the function either uses SCTransform or a standard normalization method followed by scaling and variable feature detection. Performs PCA on the normalized data. Clustering and Dimensionality Reduction: Finds nearest neighbors using the specified principal components (pcs.used). Identifies clusters using the specified resolution. Performs UMAP and t-SNE for visualization of the clusters. Visualization: Generates spatial, UMAP, and t-SNE plots of the clusters with customized color schemes. Saves these plots as images in the specified directory. Saving Results: Saves the updated st_obj as an RDS file. Exports the metadata of st_obj to a CSV file. Return Value: Returns the updated st_obj containing the clustering results. 6.3.3 An example: st_obj <- st_Clustering( st_obj = st_obj, output.dir = '.', normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, verbose = FALSE ) 6.3.4 Outputs: Figures showing the results of clustering. SeuratObject in rds format. 6.4 Step 4. DEGs The st_Find_DEGs function is designed to identify differentially expressed genes (DEGs) in spatial transcriptomics data. It performs differential expression analysis based on clustering results, visualizes the top markers, and saves the results to output.dir. 6.4.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for DEG analysis. output.dir: The directory where output files will be saved. Default is the current directory (‘.’). ident.label: The metadata label used for identifying clusters. Default is 'seurat_clusters'. only.pos: A logical flag to include only positive markers. Default is TRUE. min.pct: The minimum fraction of cells expressing the gene in either cluster. Default is 0.25. logfc.threshold: The log fold change threshold for considering a gene differentially expressed. Default is 0.25. test.use: The statistical test to use for differential expression analysis. Default is 'wilcox'. verbose: A logical flag to print progress messages. Default is FALSE. 6.4.2 Function behavior: Set Identifiers: Sets the cluster identifiers in the spatial transcriptomics object (st_obj) based on the specified ident.label. Find Differentially Expressed Genes (DEGs): Performs differential expression analysis using the specified parameters (only.pos, min.pct, logfc.threshold, test.use). Top Marker Genes: Selects the top 5 marker genes for each cluster based on the highest average log fold change. Visualization: Generates a dot plot for the top DEGs and saves the plot as an image in the specified directory. Saving Results: Saves the DEG results as a CSV file. Return Value: Returns the data frame containing the identified DEGs. 6.4.3 An example: st.markers <- st_Find_DEGs( st_obj = st_obj, output.dir = '.', ident.label = 'seurat_clusters', only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', verbose = FALSE ) 6.4.4 Outputs: Dot plots showing markers. CSV file containing the information of markers. 6.5 Step 5. Spatially variable features The st_SpatiallyVariableFeatures function identifies and visualizes spatially variable features (SVFs) in spatial transcriptomics data. It integrates the identification of spatially variable features using a specified method, saves the results to a directory, and creates visualizations of the top spatially variable features. 6.5.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. output.dir: The directory where output files will be saved. Default is the current directory. assay: The assay to be used for finding spatially variable features. Default is 'SCT'. selection.method: The method used for selecting spatially variable features. Default is 'moransi'. n.top.show: The number of top spatially variable features to visualize. Default is 10. n.col: The number of columns for the visualization grid. Default is 5. verbose: A logical flag to print progress messages. Default is FALSE. 6.5.2 Function behavior: Identify Spatially Variable Features: Identifies spatially variable features using the specified method and assay. Suppresses warnings during the process. Save Metadata: Extracts metadata features and saves them as a CSV file in output.dir. Visualization: Selects the top n.top.show spatially variable features. Generates and saves a spatial feature plot of these features in the specified directory. Return Value: Returns the updated st_obj containing the identified spatially variable features. 6.5.3 An example: st_obj <- st_SpatiallyVariableFeatures( st_obj = st_obj, output.dir = '.', assay = st_obj@active.assay, selection.method = 'moransi', n.top.show = 10, n.col = 5, verbose = FALSE ) 6.5.4 Outputs: Figures showing SVFs. CSV file containing the information of SVFs. 6.6 Step 6. Spatial interaction The st_Interaction function is used to identify and visualize interactions between clusters based on spatial transcriptomics data. It utilizes Commot to analyze spatial interactions, identify pathway activities, and assess the strength and significance of interactions. 6.6.1 Function arguments: st_data_path: Path to the spatial transcriptomics data. metadata_path: Path to the metadata associated with the spatial transcriptomics data. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. label_key: Key in the metadata to identify cell clusters. Default is 'seurat_clusters'. save_path: The directory where output files will be saved. Default is the current directory. species: The species of the spatial transcriptomics data. Default is 'human'. signaling_type: Type of signaling interactions to consider. Default is 'Secreted Signaling'. database: Database to be used for the analysis. Default is 'CellChat'. min_cell_pct: Minimum percentage of cells to consider for interaction analysis. Default is 0.05. dis_thr: Distance threshold for defining interactions. Default is 500. n_permutations: Number of permutations for assessing significance. Default is 100. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.6.2 Function behavior: Commot Analysis: Uses Commot to perform interaction analysis, identifying interactions within and between clusters. Visualization: Generates visualizations of pathway interactions and interactions between ligand-receptors (LRs) within and between clusters, and saves them in save_path. 6.6.3 An example: st_Interaction( st_data_path = 'path/to/data', metadata_path = 'path/to/metadata', library_id = 'Hema_ST', label_key = 'seurat_clusters', save_path = '.', species = 'human', signaling_type = 'Secreted Signaling', database = 'CellChat', min_cell_pct = 0.05, dis_thr = 500, n_permutations = 100, pythonPath = 'path/to/python' ) 6.6.4 Outputs: Dot plot showing pathway interaction between and within clusters. Dot plot showing LRs interaction between and within clusters. The information of each LR and pathway. 6.7 Step 7. CNV analysis The st_CNV function identifies and visualizes copy number variations (CNVs) in spatial transcriptomics data. It uses CopyKAT to perform the CNV analysis, saves the results, and generates visual representations of CNV states. 6.7.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. save_path: The directory where output files will be saved. assay: The assay to be used for CNV analysis. Default is 'Spatial'. LOW.DR: The lower threshold for the dropout rate in CopyKAT. Default is 0.05. UP.DR: The upper threshold for the dropout rate in CopyKAT. Default is 0.1. win.size: The window size for the CNV analysis. Default is 25. distance: The distance metric to be used for the analysis. Default is \"euclidean\". genome: The genome version to be used, ‘hg20’ or ‘mm10’. Default is \"hg20\". n.cores: The number of cores to be used for parallel processing. Default is 1. species: The species of the spatial transcriptomics data. Default is 'human'. 6.7.2 Function behavior: CopyKAT Analysis: Runs CopyKAT pipeline to perform CNV analysis using the provided parameters. Saving Results: Saves the CopyKAT results as an RDS file. Plotting: Generates plots of the CNV states and saves them in save_path. Updating Metadata: Updates the spatial transcriptomics object with CNV state metadata. Return Value: Returns the updated st_obj containing the CNV state information. 6.7.3 An example: st_obj <- st_CNV( st_obj = st_obj, save_path = '.', assay = 'Spatial', LOW.DR = 0.05, UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = 'hg20', n.cores = 1, species = 'human' ) 6.7.4 Outputs: Figures showing the predicted CNV states. Figures showing the CNV heatmap. rds files of results of copykat. 6.8 Step 8. Deconvolution The st_Deconvolution function aims to perform spatial deconvolution analysis on spatial transcriptomics data to estimate the cell-type composition and abundance in different regions. The function utilizes cell2location to infer cell-type abundance and spatial distributions, allowing for the visualization and interpretation of spatially resolved cell populations within the tissue. 6.8.1 Function arguments: st.data.dir: Path to the spatial transcriptomics data. sc.h5ad.dir: Path to the single-cell RNA-seq data in h5ad format. Default is NULL. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. st_obj: Spatial transcriptomics object containing the data for analysis. Default is NULL. save_path: The directory where output files will be saved. Default is NULL. sc.labels.key: Key in the single-cell metadata to identify cell clusters. Default is 'seurat_clusters'. species: The species of the spatial transcriptomics data. Default is 'mouse'. sc.max.epoch: Maximum number of epochs used for single-cell deconvolution. Default is 1000. st.max.epoch: Maximum number of epochs used for spatial deconvolution. Default is 10000. use.gpu: Logical value indicating whether to use GPU for computation. Default is FALSE. use.Dataset: The dataset to be used for analysis, such as 'HematoMap' or 'LymphNode'. pythonPath: The path to the Python environment containing cell2location to use for the analysis. Default is ‘.’. 6.8.2 Function behavior: Deconvolution Analysis: Performs the spatial deconvolution analysis using the provided spatial transcriptomics and single-cell RNA-seq data. Post-Analysis Processing: Processes the deconvolution results and visualizes the spatial distribution of inferred cell types within the tissue. Returning Results: If a Seurat object is provided, the updated Seurat object with cell type information is returned. 6.8.3 An example: st_obj <- st_Deconvolution( st.data.dir = 'path/to/data', library_id = 'Hema_ST', sc.h5ad.dir = NULL, st_obj = st_obj, save_path = '.', sc.labels.key = 'seurat_clusters', species = 'human', sc.max.epoch = 1000, st.max.epoch = 10000, use.gpu = FALSE, use.Dataset = 'LymphNode', pythonPath = 'path/to/python' ) 6.8.4 Outputs: Figures showing the predicted abundance of each cell-type. The parameters of trained cell2location model. 6.9 Step 9. Cell cycle The st_Cell_cycle function is used to assess the cell cycle phase scores in spatial transcriptomics data. It calculates S phase and G2M phase scores based on the expression of designated cell cycle-related genes and visualizes these scores in spatial and dimensionality-reduced plots. 6.9.1 Function arguments: st_obj: The input Seurat object containing the data for analysis. save_path: The directory where the output images will be saved. Default is the current directory. s.features: A list of genes associated with the S phase. Default is NULL (using genes from Seurat). g2m.features: A list of genes associated with the G2M phase. Default is NULL (using genes from Seurat). species: The species of the spatial transcriptomics data. Default is 'human'. FeatureColors.bi: A color palette for visualization. Default is a two-color ramp palette. 6.9.2 Function behavior: Gene Feature Assignment: Assigns S phase and G2M phase gene lists based on the specified species or provided input. Cell Cycle Scoring: Calculates the S phase and G2M phase scores in the data. Spatial Visualization: Generates spatial feature plots to visualize the S phase and G2M phase scores using the specified color palette and saves the plots as images. Dimensionality-Reduced Plot Visualization: If UMAP or tSNE dimensionality reduction is available in the st_obj, feature plots of the S phase and G2M phase scores are generated in the reduced space and saved as images. Return Value: Returns the updated st_obj containing the cell cycle phase scores. 6.9.3 An example: st_obj <- st_Cell_cycle( st_obj = st_obj, save_path = '.', s.features = NULL, g2m.features = NULL, species = 'human', FeatureColors.bi = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = 'RdYlBu'))) ) 6.9.4 Outputs: Figures showing S scores. Figures showing S scores. 6.10 Step 10. Niche analysis The st_NicheAnalysis function is designed to perform niche analysis on spatial transcriptomics data, enabling the exploration of spatial niches or microenvironments within the tissue. The function encompasses co-occurrence analysis, niche clustering, and niche interaction analysis to uncover the spatial relationships and characteristics of different cell populations or features. 6.10.1 Function arguments: st_obj: The input SeuratObject containing the spatial transcriptomics data for analysis. features: A vector of features representing features (for example, cell types from deconvolution) for niche analysis. save_path: The directory where the analysis results and visualizations will be saved. Default is the current directory. coexistence.method: The method for co-occurrence analysis, accepting 'correlation' or 'Wasserstein'. Default is 'correlation'. kmeans.n: The number of clusters for niche clustering. Default is 4. st_data_path: A path containing the ‘spatial’ file and ‘filtered_feature_bc_matrix.h5’ file, required for niche interaction visualization. slice: The slice to be used for analysis. Default is 'slice1'. species: The species of the sample data. Default is 'mouse'. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.10.2 Function behavior: Co-occurrence Score Calculation: Calculates the co-occurrence scores between the specified features using the chosen coexistence method (‘correlation’ or ‘Wasserstein’). Niche Clustering: Utilizes k-means clustering to identify distinct spatial niches based on the expression profiles of the selected features and visualizes the clustering results. Niche Interaction Visualization: If the st_data_path is provided, performs niche interaction visualization using Commot, which is based on the provided spatial transcriptomics data and generates visualizations of niche interactions within the tissue. Return Value: Returns the updated st_obj with niche analysis results and visualizations. 6.10.3 An example: tmp <- read.csv('path/to/cell2loc_res.csv', row.names = 1) features <- colnames(tmp) if(!all(features %in% names(st_obj@meta.data))){ common.barcodes <- intersect(colnames(st_obj), rownames(tmp)) tmp <- tmp[common.barcodes, ] st_obj <- st_obj[, common.barcodes] st_obj <- AddMetaData(st_obj, metadata = tmp) } st_obj <- st_NicheAnalysis( st_obj, features = features, save_path = '.', coexistence.method = 'correlation', kmeans.n = 4, st_data_path = 'path/to/data', slice = `slice1`, species = 'human', pythonPath = 'path/to/python' ) 6.10.4 Outputs: Figures showing the co-existence results. Figures showing the spatial distribution of each niche. Figures showing the composition of each niche. Figures showing the results of interactions using Commot. "],["step-by-step-shiny.html", "7 Step-by-step shiny 7.1 Step 1. Enter R and get the path of the installed R packages 7.2 Step 2. Run shiny code 7.3 Step 3. Use HemaScopeShiny via the GUI", " 7 Step-by-step shiny #You can run shiny on Linux or on the Rstudio web page Choice 1:Run shiny on Linux - Enter Linux, activate the HemaScope environment,install radian package then you can enter the R environment on Linux and run shiny code raian -You can see “r$>” . It menns you enter R environment on Linux. app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address You’ll see a page like the one below,copy link Open the link with a browser,you can see HemaScopeR shiny home page. Choice2:Run shiny on Rstudio web page 7.1 Step 1. Enter R and get the path of the installed R packages Enter the R environment in the Linux command line. R Get the path of the installed R packages in the R command line. .libPaths() For example, “/An/example/of/the/path/to/installed/R/packages” 7.2 Step 2. Run shiny code .libPaths("/An/example/of/the/path/to/installed/R/packages") app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address 7.3 Step 3. Use HemaScopeShiny via the GUI Start interface. A UI page appears with two buttons: “Start scRNA-seq Analysis” and “Start st-seq Analysis.” Users can click the corresponding button based on their needs to enter the respective analysis page. * The figure showing the start interface. Begin a new analysis, continue the previous analysis, or return to the start interface When clicking the “Start scRNA-seq pipeline” or “Start ST-seq pipeline” button, you will be directed to a second page. This page contains three buttons: “Begin New Analysis,” “Continue Previous Analysis”, and “Back to Home”. If you need to begin a new analysis of scRNA-seq or st-seq data from the first step, click “Begin New Analysis”. If you have already used Shiny to complete several steps (e.g., steps 1, 2, and 3), but the analysis was interrupted during step 4 due to some unexpectedly closing, click “Continue Previous Analysis” to resume from step 4. Please note: users should follow the analysis steps sequentially and not skip steps. For example, analyzing steps 1, 2, and 3 and then jumping directly to step 6 is incorrect. The proper analysis sequence should be step 1, 2, 3, 4, 5, 6, … N. The figure showing the interface for beginning a new analysis, continuing the previous analysis, or returning to the start interface. 7.3.1 scRNA-seq pipeline When the user clicks the “Start scRNA-seq pipeline – Begin New Analysis” button, they will enter the single-cell analysis page. The sidebar of this page includes the following buttons: Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Identify Cell Types Step 5. Visualization Step 6. Find Differential Genes Step 7. Assign Cell Cycles Step 8. Calculate Heterogeneity Step 9. Violin Plot for Marker Genes Step 10. Calculate Lineage Scores Step 11. GSVA Step 12. Construct Trajectories Step 13. Transcription Factors Analysis Step 14. Cell-Cell Interaction Step 15. Generate the Report Back to Prior Page The figure showing the scRNA-seq pipeline. Please start the analysis from step 1 and do not skip any steps. The correct analysis sequence is steps 1 through 15: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15. To return to the previous page, click “Back to Prior Page”. If Shiny unexpectedly exits during data analysis in the Begin New Analysis process (for example, while analyzing Step 5), and the analysis of Step 5 is interrupted, the user will need to restart ShinyApp(ui, server). This will bring up the Home page. The user should click the “Start scRNA-seq pipeline–Continue Previous Analysis” button, enter the Job ID displayed on the UI page during the Step 1.Input data step, and then select the step that did not complete successfully (e.g., Step 5). After entering the necessary parameters for Step 5, click “Run Step 5” to resume the analysis. Once Step 5 is completed, the user should proceed by selecting Step 6, entering the required parameters, and clicking “Run Step 6” to analyze Step 6, and so on, until all scRNA-seq steps are completed. Note that the default parameters for each step are the same as those in Begin New Analysis. After clicking “Run Step,” do not perform any other operations on the parameter page. Wait until the current step’s analysis is complete, and the results for that step will appear on the UI page. The “Start scRNA-seq pipeline–Continue Previous Analysis” page contains the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the Job ID displayed on the page during the Begin New Analysis–Step1.Input data step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.1.1 Step 1 (scRNA-seq pipeline). Input Data The figure showing the step 1 of scRNA-seq pipeline. Enter data path: Input multiple file paths separated by semicolons, for example: /path1/file1/data1;/path2/file2/data2;/path2/file2/data3. For a single file, use: /path2/file2/data2. Enter project name: When entering multiple files, you must also input multiple project names, separated by semicolons. The number of project names must match the number of input files. Example: projectname1;projectname2;projectname3. For a single file, use: projectname1. Enter output path: Specify the path where the results will be output. You can view the results of each step in this path. Example: /home/username/output. Enter the path of database: The path where the database is stored and it varies for each user. Example: /home/username/database. Select Data Type: There are three options: “cellranger-count”, “Seurat”, “Matrix”. Choose according to the type of input data. Gene Column (default: 2): The column where gene names are located; the default is column 2. Minimum Cells (default: 10): The minimum number of cells for filtering; the default is 10. Minimum Features (default: 200): The minimum number of genes that must be detected in each cell; the default is 200. Mt Pattern (default: ‘^MT-’): Mitochondrial pattern; for humans use ^MT-, for mice use ^mt-. After entering the above parameters, click the “LoadData” button to load the data. Once the data is successfully loaded, you will see “OK! Data dimensions” indicating that the data loading is complete, and you will be provided with a JobID. Make sure to note this JobID, as it is crucial. If HemaScopeShiny unexpectedly exits, you can click “Continue Previous Analysis”, enter the JobID, and continue loading the previous analysis results without starting from step 1 again. The JobID is very important! Please note: After clicking the “LoadData” button, do not modify any other parameters on the page. The Step 2-14 pages will consist of three sections: 1) parameter input, 2) result output file names, and 3) generated result figures. If the respective step produces result figures, they will be displayed. Users can switch between images by clicking the arrows on the left or right of the figure. If no figures are generated for the current step, a message stating “NO Figure!” will be displayed. All output files generated at each step are stored in the output directory specified by the user. The UI page will display only the file names, which can be downloaded by clicking on the file name links. 7.3.1.2 Step 2 (scRNA-seq pipeline). Quality Control The figure showing the step 2 of scRNA-seq pipeline. nFeature_RNA.limit: Minimum number of genes detected per cell. Default value: 200 percent.mt.limit: Threshold for filtering mitochondrial genes. Default value: 20 scale.factor: Normalization factor. Default value: 10,000 nfeatures: Number of highly variable genes. Default value: 3,000 ndims: Number of dimensions used. Default value: 50 vars.to.regress: Variables to regress. Default value: NULL PCs: Number of principal components used for clustering. Default value: 1:35 resolution: Resolution parameter for clustering. Default value: 0.4 n.neighbors: k.param parameter in the FindNeighbors function. Default value: 50 doublet.percentage: Doublet rate. Default value: 0.04 doubletFinderWrapper.PCs: Number of principal components used for doublet removal. Default value: 1:20 doubletFinderWrapper.pN: Number of artificial doublets defined for removal. Default value: 0.25 doubletFinderWrapper.pK: Represents the fraction of merged real artificial data. Default value: 0.1 (pK should be adjusted according to each scRNA-seq dataset) Step2_Quality_Control.RemoveBatches: Whether to remove detected batches. Default value: TRUE Step2_Quality_Control.RemoveDoublets: Whether to remove detected doublets. Default value: TRUE Click the “Run Step 2” button to start the process. After clicking the “Run Step 2” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 2 completed” message will appear. After a short while, the result files generated by Step 2 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.3 Step 3 (scRNA-seq pipeline). Clustering The figure showing the step 3 of scRNA-seq pipeline. PCs for clustering (default: 1:20): Principal components used for clustering. Default value: 1:20 n.neighbors for clustering (default: 50): k.param parameter in the FindNeighbors function. Default value: 50 resolution for clustering (default: 0.4): Resolution used for clustering. Default value: 0.4 Click the “Run Step 3” button to start the process. After clicking the “Run Step 3” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 3 completed” message will appear. After a short while, the result files generated by Step 3 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.4 Step 4 (scRNA-seq pipeline). Identify Cell Types The figure showing the step 4 of scRNA-seq pipeline. Choose organism: ‘hsa’ for human, ‘mmu’ for mouse Choose Labels: Cell labels, default value: clustering Run CNV: TRUE if copy number variation (CNV) analysis is to be performed CPU cores for parallel processing: Number of CPU cores for parallel processing, default value: 10 Click the “Run Step 4” button to start the process. After clicking the “Run Step 4” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 4 completed” message will appear. After a short while, the result files generated by Step 4 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.5 Step 5 (scRNA-seq pipeline). Visualization The figure showing the step 5 of scRNA-seq pipeline. Nearest neighbors for PhateR analysis (default: 50): phate.knn parameter, the number of nearest neighbors to consider in the PhateR algorithm. Default value: 50 Principal components for PhateR (default: 20): phate.npca parameter, the number of principal components to use in the PhateR algorithm. Default value: 20 t parameter for PhateR (default: 10): phate.t parameter, the t value for the PhateR algorithm. Default value: 10 Dimensions for PhateR (default: 2): phate.ndim parameter, the number of dimensions for embedding output in the PhateR algorithm. Default value: 2 Click the “Run Step 5” button to start the process. After clicking the “Run Step 5” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 5 completed” message will appear. After a short while, the result files generated by Step 5 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.6 Step 6 (scRNA-seq pipeline). Find Differential Genes The figure showing the step 6 of scRNA-seq pipeline. Minimum gene percentage for differential detection (default: 0.25): The minimum fraction of cells expressing a gene in any cluster. Default value: 0.25 Log-fold threshold for gene analysis (default: 0.25): The log-fold change threshold for differential gene expression analysis. Default value: 0.25 Click the “Run Step 6” button to start the process. After clicking the “Run Step 6” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 6 completed” message will appear. After a short while, the result files generated by Step 6 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.7 Step 7 (scRNA-seq pipeline). Assign Cell Cycles The figure showing the step 7 of scRNA-seq pipeline. Define cell cycle cutoff (default: NULL): The cutoff value used to distinguish between cycling and non-cycling cells. Default value: NULL Click the “Run Step 7” button to start the process. After clicking the “Run Step 7” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 7 completed” message will appear. After a short while, the result files generated by Step 7 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.8 Step 8 (scRNA-seq pipeline). Calculate Heterogeneity The figure showing the step 8 of scRNA-seq pipeline. Order cell types: The order of cell types for visualization. If not provided, the function will use the unique cell types from the input cell_types_groups. Default value: NULL Click the “Run Step 8” button to start the process. After clicking the “Run Step 8” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 8 completed” message will appear. After a short while, the result files generated by Step 8 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.9 Step 9 (scRNA-seq pipeline). Violin Plot for Marker Genes The figure showing the step 9 of scRNA-seq pipeline. Enter marker genes for violin plot (separate by ‘,’): The marker genes for the violin plot. Default value is the built-in marker genes: NULL. Set the hexadecimal codes of colors for cell types (separate by ‘,’): Specify the colors for cell types. The default is the color palette: NULL. Click the “Run Step 9” button to start the process. After clicking the “Run Step 9” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 9 completed” message will appear. After a short while, the result files generated by Step 9 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.10 Step 10 (scRNA-seq pipeline). Calculate Lineage Scores The figure showing the step 10 of scRNA-seq pipeline. The gene sets for calculating lineage scores: The gene sets used for calculating lineage scores. The default is the color palette: NULL. The names for the lineages: The names of the lineages. Default value: NULL. The hexadecimal codes of colors for groups: Specify the colors to be used for different group annotations. The default is the color palette: NULL. Click the “Run Step 10” button to start the process. After clicking the “Run Step 10” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 10 completed” message will appear. After a short while, the result files generated by Step 10 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.11 Step 11 (scRNA-seq pipeline). GSVA The figure showing the step 11 of scRNA-seq pipeline. Option to identify cell type-specific GSVA terms: Whether to identify cell type-specific GSVA terms. Default value: TRUE. Option to identify differential GSVA terms: Whether to identify differential GSVA terms. Default value: TRUE. Click the “Run Step 11” button to start the process. After clicking the “Run Step 11” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 11 completed” message will appear. After a short while, the result files generated by Step 11 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.12 Step 12 (scRNA-seq pipeline). Construct Trajectories The figure showing the step 12 of scRNA-seq pipeline. Set the cell types for constructing trajectories: The cell types to be used for trajectory analysis. Different cell types should be separated by commas. Default value: “all.” Option to run monocle2: Whether to perform Monocle2 trajectory analysis. Default value: TRUE. Option to run slingshot: Whether to perform Slingshot trajectory analysis. Default value: TRUE. Option to run scVelo: Whether to perform scVelo trajectory analysis. Default value: TRUE. Enter the paths of loom files: Specify the paths to the loom files for scVelo analysis. Default value: NULL. Click the “Run Step 12” button to start the process. After clicking the “Run Step 12” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 12 completed” message will appear. After a short while, the result files generated by Step 12 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.13 Step 13 (scRNA-seq pipeline). Transcription Factors Analysis The figure showing the step 13 of scRNA-seq pipeline. Set the hexadecimal codes of colors for cell types: Colors used for visualizing cell types. Default value: NULL (color palette). Set the hexadecimal codes of colors for groups: Colors used for visualizing groups. Default value: NULL (color palette). Click the “Run Step 13” button to start the process. After clicking the “Run Step 13” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 13 completed” message will appear. After a short while, the result files generated by Step 13 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.14 Step 14 (scRNA-seq pipeline). Cell-Cell Interaction The figure showing the step 14 of scRNA-seq pipeline. The cell groups were sorted: Whether to consider the size (number) of cell groups in the cell communication analysis. Default value: TRUE. Click the “Run Step 14” button to start the process. After clicking the “Run Step 14” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 14 completed” message will appear. After a short while, the result files generated by Step 14 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.15 Step 15 (scRNA-seq pipeline). Generate the Report The figure showing the step 15 of scRNA-seq pipeline. Click “Run Step 15” to generate the analysis report. 7.3.2 ST-pipeline When the user clicks the button “Start ST-seq pipeline–Begin New Analysis,” they will be taken to the empty analysis page. The page sidebar includes the following buttons: Please start the analysis from Step 1 and do not skip any steps. The correct analysis sequence is Step 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, and 11. To return to the previous page, please click “Back to Prior Page.” Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Find Differential Genes Step 5. Spatially Variable Features Step 6. Spatial Interaction Step 7. CNV Analysis Step 8. Deconvolution Step 9. Cell Cycle Analysis Step 10. Niche Analysis Step 11. Generate the Report Back to Prior Page In “Begin New Analysis,” users start analyzing data from Step1. If Shiny unexpectedly exits during the analysis process (for example, if you are analyzing Step5 and Shiny crashes, causing Step5 to fail), users need to restart Shiny by running shinyApp(ui, server). This will bring up the Home page. Users should click the “Start ST-seq pipeline–Continue Previous Analysis” button. They need to enter the JobID displayed in the UI page during the Step1.Input data step and then select the step that did not complete successfully to continue the analysis. For example, if Step5 failed, select Step5, enter the necessary parameters, and click “Run Step5” to continue the analysis. After Step5 finishes, select Step6, enter the parameters for Step6, and click “Run Step6” to analyze Step6, and so on for all subsequent steps. Please note that the default parameters for each step are the same as those in “Begin New Analysis.” After clicking “Run Step,” do not make any other changes to the parameter page. Wait until the current step completes, and the results file for the current step will appear on the UI page. The “Start ST-seq pipeline–Continue Previous Analysis” page includes the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the JobID displayed in the “Begin New Analysis–Step1.Input data” step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.2.1 Step 1 (st-seq pipeline). Input Data The figure showing the step 1 of st-seq pipeline. Enter data path: The directory where the input data is stored. The input data should be 10X Visium spatial transcriptomics data. Only one dataset can be input at a time; unlike single-cell data, multiple datasets cannot be entered simultaneously. Enter sample name: A string for naming the sample. The default value is ‘Hema_ST’. Enter output path: The directory where processed outputs will be saved. For example: /home/username/output. Enter the path of Python: The path to the Python executable, as that in scRNA-seq pipeline. After entering the parameters above, click the “LoadData” button to load the data. Once the data is loaded, the system will provide a JobID, which should be noted. If Shiny unexpectedly exits, you can click “Continue Previous Analysis” and enter the JobID to resume loading the previous analysis results, avoiding the need to restart from Step 1. The JobID is very important! Please note: After clicking the “LoadData” button, do not make further changes to other parameters on the page. The Step 2-10 pages will have three sections: Parameter input Result output file names Generated result plots If a step generates result plots, they will be displayed. Users can switch between images by clicking the arrows on either side of the plot. If no result plots are generated for the current step, users will be informed with “NO Figure!” The result files generated for each step are stored in the output path specified by the user. The UI page will only display the file names, and clicking on the file name links will allow downloading the files. 7.3.2.2 Step 2 (st-seq pipeline). Quality Control The figure showing the step 2 of st-seq pipeline. min.gene (default: 200): Specifies the minimum number of genes detected in a spot. The default value is 200. min.nUMI (default: 500): Specifies the minimum number of nUMIs detected in a spot. The default value is 500. max.gene (default: Inf): Specifies the maximum number of genes detected in a spot. The default value is Inf (no upper limit). max.nUMI (default: Inf): Specifies the maximum number of nUMIs detected in a spot. The default value is Inf (no upper limit). min.spot (default: 0): Specifies the minimum number of spots where each gene is expressed. bool.remove.mito: Whether to remove mitochondrial genes. The default value is TRUE. species: Specifies the species: human/mouse. Click “Run Step2” to proceed. After clicking the “Run Step2” button, please do not modify any other parameters on the page. Once Step 2 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.3 Step 3 (st-seq pipeline). Clustering The figure showing the step 3 of st-seq pipeline. normalization.method (default: ‘SCTransform’): The method for data normalization. The default value is ‘SCTransform’. npcs (default: 50): The number of principal components (PCs) to use in PCA. The default value is 50. pcs.used (default: 1:10): The number of PCs used for clustering analysis. The default value is the first 10 PCs (1:10). resolution (default: 0.8): The resolution parameter for the clustering algorithm. The default value is 0.8. Click “Run Step3” to proceed. After clicking the “Run Step3” button, please do not modify any other parameters on the page. Once Step 3 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.4 Step 4 (st-seq pipeline). Find Differential Genes The figure showing the step 4 of st-seq pipeline. only.pos: A logical flag to include only positive markers. The default value is TRUE. min.pct (default: 0.25): The minimum fraction of cells expressing the gene in any cluster. The default value is 0.25. logfc.threshold (default: 0.25): The log-fold change threshold for considering differentially expressed genes. The default value is 0.25. test.use (default: ‘wilcox’): The statistical test used for differential expression analysis. The default value is ‘wilcox’. Click “Run Step4” to proceed. After clicking the “Run Step4” button, please do not modify any other parameters on the page. Once Step 4 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.5 Step 5 (st-seq pipeline). Spatially variable features The figure showing the step 5 of st-seq pipeline. selection.method (default: ‘moransi’): The method used for selecting spatially variable features. The default value is ‘moransi’. n.top.show (default: 10): The number of top spatially variable features to visualize. The default value is 10. n.col.show (default: 5): The number of columns in the visualization grid. The default value is 5. Click “Run Step5” to proceed. After clicking the “Run Step5” button, please do not modify any other parameters on the page. Once Step 5 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.6 Step 6 (st-seq pipeline). Spatial interaction The figure showing the step 6 of st-seq pipeline. commot.signaling_type (default: ‘Secreted Signaling’): The type of signaling interaction to consider. The default value is ‘Secreted Signaling’. commot.database (default: ‘CellChat’): The database used for the analysis. The default value is ‘CellChat’. commot.min_cell_pct (default: 0.05): The minimum cell percentage to consider in interaction analysis. The default value is 0.05. commot.dis_thr (default: 500): The distance threshold used to define interactions. The default value is 500. commot.n_permutations (default: 100): The number of permutations used to assess significance. The default value is 100. Click “Run Step6” to proceed. After clicking the “Run Step6” button, please do not modify any other parameters on the page. Once Step 6 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.7 Step 7 (st-seq pipeline). CNV analysis The figure showing the step 7 of st-seq pipeline. copykat.genome (default: ‘NULL’): The genome version used, either ‘hg20’ or ‘mm10’. The default value is “hg20”. copykat.LOW.DR (default: 0.05): The lower dropout rate threshold in CopyKAT. The default value is 0.05. copykat.UP.DR (default: 0.1): The upper dropout rate threshold in CopyKAT. The default value is 0.1. copykat.win.size (default: 25): The window size for CNV analysis. The default value is 25. copykat.distance (default: ‘euclidean’): The distance metric used for analysis. The default value is “euclidean”. copykat.n.cores (default: 1): The number of cores used for parallel processing. The default value is 1. Click “Run Step7” to proceed. After clicking the “Run Step7” button, please do not modify any other parameters on the page. Once Step 7 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.8 Step 8 (st-seq pipeline). Deconvolution The figure showing the step 8 of st-seq pipeline. cell2loc.sc.h5ad.dir (default: ‘NULL’): The path to the h5ad format single-cell RNA-seq data. The default value is NULL. cell2loc.sc.max.epoch (default: 1000): The maximum number of epochs for single-cell deconvolution. The default value is 1000. cell2loc.st.max.epoch (default: 10000): The maximum number of epochs for spatial deconvolution. The default value is 10000. cell2loc.use.gpu (default: FALSE): A logical value indicating whether to use GPU for computation. The default value is FALSE. Click “Run Step8” to proceed. After clicking the “Run Step8” button, please do not modify any other parameters on the page. Once Step 8 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.9 Step 9 (st-seq pipeline). Cell cycle analysis The figure showing the step 9 of st-seq pipeline. The gene sets for calculating S phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the S phase. The default value is NULL (uses genes from Seurat). The gene sets for calculating G2M phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the G2M phase. The default value is NULL (uses genes from Seurat). Click “Run Step9” to proceed. After clicking the “Run Step9” button, please do not modify any other parameters on the page. Once Step 9 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.10 Step 10 (st-seq pipeline). Niche analysis The figure showing the step 10 of st-seq pipeline. Nich.cluster.n (default: 4): The number of clusters for niche clustering. The default value is 4. Click “Run Step10” to proceed. After clicking the “Run Step10” button, please do not modify any other parameters on the page. Once Step 10 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.11 Step 11 (st-seq pipeline). Generate the Report The figure showing the step 11 of st-seq pipeline. Click “Run Step11” to generate the analysis report. "],["operation-manual-for-the-hemascopecloud.html", "8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.2 Homepage 8.3 Data Page 8.4 Analysis Page 8.5 Projects page", " 8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.1.1 Enter the URL in a web browser: https://hemascope.hiplot.cn/?home=hemascope and click to access the login page. Figure 8.1: Login Page 8.1.2 To obtain free computational resources: Enter your login email, click “Get Code,” input the verification code received in your email, and then click “Login” to complete the login and access the system homepage. 8.1.3 To browse HemaScopeCloud without needing computational resources: Click the “View without Login” button to access the system homepage. You can view demo analysis projects. If you click the button to initiate an analysis, the platform will prompt: “Please log in for analysis!” 8.2 Homepage Figure 8.2: Homepage The left side features a menu bar containing Home, Data, Analysis, Project, and Help. And the upper right section includes statistics on analysis project status, usage of analysis projects, a quick entry for creating new analysis projects, and statistics on allocated storage capacity usage. Statistics on Analysis Project Status Pending Analysis:Waiting for analysis, not yet submitted for analysis. Pending Resources:Waiting for resources, analysis submitted and awaiting resource allocation. Analyzing:Currently analyzing. Completed:Analysis completed. Error:An error occurred during analysis. Total:Total of all analysis statuses. Usage Statistics for Analysis Projects: Number of used analysis projects / Total number of allocated analysis projects. The current allocation for the system is 50 projects. For additional free computational resources, please contact the developer. Quick Entry for Creating New Analysis Projects: Supports quick access to the new analysis project pages corresponding to two pipelines. Storage Capacity Usage: Used Storage Resources / Allocated Storage Resources. The lower section displays the most recently run analysis projects. By default, it shows demo projects upon initial entry. Clicking the “View” option on an entry in the Projects section allows you to access and analyze that specific analysis project. 8.3 Data Page The Data page includes storage for Demo sample project data as well as Personal project data. Data under the Demo tab can be downloaded, while the Personal tab allows users to create new folders and upload files. 8.4 Analysis Page It lists two analysis pipelines: sc_HemaScopeCloud and st_HemaScopeCloud, serving as entries for creating new analysis projects. Click the Analysis button to access the new project and execution page for that pipeline. Figure 8.3: Select Analysis Pipeline Page Figure 8.4: Enter the Analysis Pipeline Page Create New Analysis Project Click the Analysis button under the sc_HemaScopeCloud to enter the new project page for that pipeline. Project Name:Enter the name of the analysis project for identification purposes. Input Data:Click Upload to upload local analysis files. Single and multiple file uploads are supported. Uploaded files must comply with the pipeline’s input file requirements; otherwise, an error will occur during execution. Sample Name:Click Add to enter the sample names, which should correspond to the uploaded analysis files. Items marked with * are required fields. Click the Run button to initiate the analysis:For the scRNA-seq pipeline,this will trigger step1-4; for the st-seq pipeline, it will trigger step1-5. Each subsequent analysis step requires clicking Run on the relevant step page to submit. Before submission, ensure that the previous step has generated result files; otherwise, a notification will indicate that the analysis cannot proceed. Load Demo Data HemaScopeCloud supports loading pre-configured analysis demo files and default parameters to quickly initiate analysis projects. On the new project page, click Load Demo Data to load files from the demo project and fill the required fields. Then, click the Run button to execute the analysis for the demo project. Figure 8.5: Load Demo Data After clicking Run, you will be redirected to the detailed page of the analysis project. Analysis Project Detail Page Notifications Waiting for resources…Do not submit repeatedly: This indicates that the submission is waiting for resources. Do not click the Run button again. Analyzing…Do not submit repeatedly: This indicates that the project is currently analyzing. Do not click the Run button again. Analysis Steps, Current Analysis Step: Displays all stepwise analysis processes and the current step. Click on different steps to navigate to the corresponding analysis step page. For the initial analysis, you must complete the previous step before proceeding to the next one. Refresh Button: Used to refresh the current page. Results: This tab stores the results of the completed step. Visualization: For steps that involve visualizations, the results will be found under the visualization tab. History:Click on Run History to view all historical runs of that step. Status:Corresponds to the analysis status of the project. Log:Click this button to view the run log. Parameter Settings:Used for entering parameter values. Figure 8.6: Analysis Project Page Figure 8.7: Analysis Project Result Page Figure 8.8: This step of the analysis project displays ‘Waiting for resources…Do not submit repeatedlly’ Figure 8.9: This step of the analysis project displays ‘Analyzing…Do not submit repeatedly’ Figure 8.10: History Page Note: For steps that have already been completed (except for the first step), you can adjust the parameters and click Run to perform multiple analyses. The results page will retain only the latest analysis results. 8.5 Projects page The homege includes analysis projects created by the user as well as pre-configured demo analysis projects provided by the system. Figure 8.11: Demo projects and user’s personal projects Clicking “View” allows you to navigate to the analysis project for review and step-by-step analysis. Figure 8.12: Click ‘View’ to access the analysis project Figure 8.13: Enter the detailed analysis project page "]] +[["index.html", "HemaScope Tutorial 1 Introduction", " HemaScope Tutorial HemaScope team 2024-10-22 1 Introduction HemaScope is a specialized bioinformatics toolkit designed for analyzing both single-cell and spatial transcriptome sequencing data from hematopoietic cells, including myeloid and lymphoid lineages. We have developed an R package named HemaScopeR, a Shiny interface named HemaScopeShiny, and a cloud platform named HemaScopeCloud. This tutorial introduces how to install and use the R package and Shiny interface, as well as how to access and operate the cloud platform. "],["installation.html", "2 Installation 2.1 Create a new conda environment and activate it 2.2 Set the channels in conda 2.3 Install R 2.4 Install required R-packages 2.5 Create the required python (v.3.9.12) virtual environment 2.6 The installed packages with versions", " 2 Installation 2.1 Create a new conda environment and activate it conda create --name HemaScope_env conda activate HemaScope_env 2.2 Set the channels in conda # Add the default channel conda config --add channels defaults # Add default channel URLs conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r conda config --add default_channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2 # Add custom channels conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2 conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch-lts conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/simpleitk conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/deepmodeling # Set to show channel URLs conda config --set show_channel_urls true 2.3 Install R <<<<<<< Updated upstream ======= >>>>>>> Stashed changes - R 4.3.3 conda install R-base=4.3.3 2.4 Install required R-packages From conda conda install -c conda-forge r-devtools=2.4.5 -y conda install -c conda-forge r-Seurat=4.3.0.1 -y conda install -c conda-forge r-Rfast2=0.1.5.1 -y conda install -c conda-forge r-hdf5r=1.3.10 -y conda install -c conda-forge r-ggpubr=0.6.0 -y conda install pwwang::r-seuratwrappers -y conda install -c bioconda bioconductor-monocle=2.28.0 -y conda install -c bioconda bioconductor-slingshot=2.8.0 -y conda install -c bioconda bioconductor-GSVA=1.48.2 -y conda install -c bioconda bioconductor-org.Mm.eg.db=3.17.0 -y conda install -c bioconda bioconductor-org.Hs.eg.db=3.17.0 -y conda install -c bioconda bioconductor-scran=1.28.1 -y conda install -c bioconda bioconductor-AUCell=1.22.0 -y conda install -c bioconda bioconductor-RcisTarget=1.20.0 -y conda install -c bioconda bioconductor-GENIE3=1.24.0 -y conda install -c bioconda bioconductor-biomaRt=2.56.1 -y conda install -c bioconda r-velocyto.r=0.6 -y #conda install -c bioconda bioconductor-limma=3.56.2 -y Enter the R language environment We suggest users do not manually update any already installed R packages during the installation of the following R packages. R From BiocManager # BiocManager(version = "1.30.23") should already be installed as a dependency of r-seuratwrappers. # If it is not installed, please run the following code to install it. # install.packages("BiocManager",version="1.30.23") BiocManager::install("ComplexHeatmap") BiocManager::install("scmap") BiocManager::install("clusterProfiler") BiocManager::install("BiocNeighbors") From CRAN install.packages("doMC") install.packages("doRNG") install.packages("shinyjs") install.packages("shiny") install.packages("shinyWidgets") install.packages("shinydashboard") install.packages("slickR") install.packages("phateR") install.packages("gelnet") install.packages("parallelDist") install.packages("kableExtra") install.packages("transport") install.packages("feather") install.packages("markdown") install.packages("ggalluvial") install.packages("forcats") install.packages("mcmc") install.packages("MCMCpack") install.packages("fields") install.packages("getopt") install.packages("osfr") From GitHub tips: Sometimes network connection issues may occur, resulting in an error message indicating that GitHub cannot be connected. Please try installing again when the network conditions improve. Usage limitations: Sometimes an API rate limit error occurs, and a GitHub token is needed to provide the GitHub API rate limit. The steps to resolve this are as follows: Register for an account or log in to an existing account on the GitHub website. Then click on your profile picture in the top right corner, go to the dropdown menu and select “Settings.” Next, find “Developer settings” and click on it, then find “Personal access tokens (classic).” Click on it, then click “Create new token (classic).” Create a new token by first naming it anything you like. Then choose the expiration time for the token. Finally, check the “repo” box; the token will be used to download code repositories from GitHub. Click “Generate token.” Copy the generated token password. After that, set the token in the environment variable in R. Since we are using conda, enter R by typing R in the terminal. Then, enter the command: usethis::edit_r_environ(). This will open a file. Press the i key to edit. Paste the token you copied into the code area as follows: GITHUB_TOKEN=“your_token”. Then press Esc, type :wq! (force save). After that, you need to exit Linux and re-enter R. Close and reopen the terminal to apply the environment variable. Reopen Linux, activate the conda environment, and enter R again. devtools::install_github("sqjin/CellChat") devtools::install_github("immunogenomics/presto") devtools::install_github("aertslab/SCENIC@140ad6b") devtools::install_github("pzhulab/abcCellmap@f44c14b") devtools::install_github("navinlabcode/copykat@d7d6569") devtools::install_github('chris-mcginnis-ucsf/DoubletFinder@8c7f76e') devtools::install_github("mojaveazure/seurat-disk@877d4e1") devtools::install_github(c("hfang-bristol/dnet")) Install HemaScopeR from github devtools::install_github(repo="ZhenyiWangTHU/HemaScopeR", dep = FALSE) 2.5 Create the required python (v.3.9.12) virtual environment Run the init_miniconda function to create the miniconda virtual environments for the scRNA-seq pipeline and ST pipeline of 10X Visium data and MERFISH data. library(HemaScopeR) init_miniconda() (Optional) Run the init_miniconda_stereo function to create the miniconda virtual environment for the stereo-seq data. init_miniconda_stereo() 2.6 The installed packages with versions R packages with versions Package Version ------- ------- Python packages with versions Package Version ------------------------ -------------- "],["integrated-scrna-seq-pipeline.html", "3 Integrated scRNA-seq pipeline", " 3 Integrated scRNA-seq pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) Run the integrated scRNA-seq pipeline. scRNASeq_10x_pipeline( # input and output input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix', './SRR7881400/outs/filtered_feature_bc_matrix', './SRR7881401/outs/filtered_feature_bc_matrix', './SRR7881402/outs/filtered_feature_bc_matrix', './SRR7881403/outs/filtered_feature_bc_matrix', './SRR7881404/outs/filtered_feature_bc_matrix', './SRR7881405/outs/filtered_feature_bc_matrix', './SRR7881406/outs/filtered_feature_bc_matrix', './SRR7881407/outs/filtered_feature_bc_matrix', './SRR7881408/outs/filtered_feature_bc_matrix', './SRR7881409/outs/filtered_feature_bc_matrix', './SRR7881410/outs/filtered_feature_bc_matrix', './SRR7881411/outs/filtered_feature_bc_matrix', './SRR7881412/outs/filtered_feature_bc_matrix', './SRR7881413/outs/filtered_feature_bc_matrix', './SRR7881414/outs/filtered_feature_bc_matrix', './SRR7881415/outs/filtered_feature_bc_matrix', './SRR7881416/outs/filtered_feature_bc_matrix', './SRR7881417/outs/filtered_feature_bc_matrix', './SRR7881418/outs/filtered_feature_bc_matrix', './SRR7881419/outs/filtered_feature_bc_matrix', './SRR7881420/outs/filtered_feature_bc_matrix', './SRR7881421/outs/filtered_feature_bc_matrix', './SRR7881422/outs/filtered_feature_bc_matrix', './SRR7881423/outs/filtered_feature_bc_matrix'), project.names = c( 'SRR7881399', 'SRR7881400', 'SRR7881401', 'SRR7881402', 'SRR7881403', 'SRR7881404', 'SRR7881405', 'SRR7881406', 'SRR7881407', 'SRR7881408', 'SRR7881409', 'SRR7881410', 'SRR7881411', 'SRR7881412', 'SRR7881413', 'SRR7881414', 'SRR7881415', 'SRR7881416', 'SRR7881417', 'SRR7881418', 'SRR7881419', 'SRR7881420', 'SRR7881421', 'SRR7881422', 'SRR7881423'), output.dir = './output/', pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python', # quality control and preprocessing gene.column = 2, min.cells = 10, min.feature = 200, mt.pattern = '^MT-', nFeature_RNA.limit = 200, percent.mt.limit = 20, scale.factor = 10000, nfeatures = 3000, ndims = 50, vars.to.regress = NULL, PCs = 1:35, resolution = 0.4, n.neighbors = 50, # remove doublets doublet.percentage = 0.04, doublerFinderwraper.PCs = 1:20, doublerFinderwraper.pN = 0.25, doublerFinderwraper.pK = 0.1, # phateR phate.knn = 50, phate.npca = 20, phate.t = 10, phate.ndim = 2, min.pct = 0.25, logfc.threshold = 0.25, # visualization ViolinPlot.cellTypeOrders = as.character(1:22), ViolinPlot.cellTypeColors = NULL, Org = 'hsa', loom.files.path = c( './SRR7881399/velocyto/SRR7881399.loom', './SRR7881400/velocyto/SRR7881400.loom', './SRR7881401/velocyto/SRR7881401.loom', './SRR7881402/velocyto/SRR7881402.loom', './SRR7881403/velocyto/SRR7881403.loom', './SRR7881404/velocyto/SRR7881404.loom', './SRR7881405/velocyto/SRR7881405.loom', './SRR7881406/velocyto/SRR7881406.loom', './SRR7881407/velocyto/SRR7881407.loom', './SRR7881408/velocyto/SRR7881408.loom', './SRR7881409/velocyto/SRR7881409.loom', './SRR7881410/velocyto/SRR7881410.loom', './SRR7881411/velocyto/SRR7881411.loom', './SRR7881412/velocyto/SRR7881412.loom', './SRR7881413/velocyto/SRR7881413.loom', './SRR7881414/velocyto/SRR7881414.loom', './SRR7881415/velocyto/SRR7881415.loom', './SRR7881416/velocyto/SRR7881416.loom', './SRR7881417/velocyto/SRR7881417.loom', './SRR7881418/velocyto/SRR7881418.loom', './SRR7881419/velocyto/SRR7881419.loom', './SRR7881420/velocyto/SRR7881420.loom', './SRR7881421/velocyto/SRR7881421.loom', './SRR7881422/velocyto/SRR7881422.loom', './SRR7881423/velocyto/SRR7881423.loom'), # cell cycle cellcycleCutoff = NULL, # cell chat sorting = FALSE, ncores = 10, # Verbose = FALSE, # activeEachStep Whether_load_previous_results = FALSE, Step1_Input_Data = TRUE, Step1_Input_Data.type = 'cellranger-count', Step2_Quality_Control = TRUE, Step2_Quality_Control.RemoveBatches = TRUE, Step2_Quality_Control.RemoveDoublets = TRUE, Step3_Clustering = TRUE, Step4_Identify_Cell_Types = TRUE, Step4_Use_Which_Labels = 'clustering', Step4_Cluster_Labels = NULL, Step4_Changed_Labels = NULL, Step4_run_sc_CNV = TRUE, Step5_Visualization = TRUE, Step6_Find_DEGs = TRUE, Step7_Assign_Cell_Cycle = TRUE, Step8_Calculate_Heterogeneity = TRUE, Step9_Violin_Plot_for_Marker_Genes = TRUE, Step10_Calculate_Lineage_Scores = TRUE, Step11_GSVA = TRUE, Step11_GSVA.identify.cellType.features=TRUE, Step11_GSVA.identify.diff.features=FALSE, Step11_GSVA.comparison.design=NULL, Step12_Construct_Trajectories = TRUE, Step12_Construct_Trajectories.clusters = c('3','6','9','10','11','14','15','19'), Step12_Construct_Trajectories.monocle = TRUE, Step12_Construct_Trajectories.slingshot = TRUE, Step12_Construct_Trajectories.scVelo = TRUE, Step13_TF_Analysis = TRUE, Step14_Cell_Cell_Interaction = TRUE, Step15_Generate_the_Report = TRUE ) "],["step-by-step-scrna-seq-pipeline.html", "4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin 4.2 Step 1. Load the input data 4.3 Step 2. Quality Control 4.4 Step 3. Clustering 4.5 Step 4. Identify Cell Types 4.6 Step 5. Visualization 4.7 Step 6. Find DEGs 4.8 Step 7. Assign Cell Cycles 4.9 Step 8. Calculate Heterogeneity 4.10 Step 9. Violin Plot for Marker Genes 4.11 Step 10. Calculate Lineage Scores 4.12 Step 11. GSVA 4.13 Step 12. Construct Trajectories 4.14 Step 13. TF Analysis 4.15 Step 14. Cell-Cell Interaction", " 4 Step-by-step scRNA-seq Pipeline 4.1 Before you begin Load the R packages. library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) library(getopt) library(tools) library(HemaScopeR) Set the paths for the output results, and the Python installation. output.dir = './output' pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python' Create folders for saving the results of HemaScopeR analysis. wdir <- getwd() if(is.null(pythonPath)==FALSE){ reticulate::use_python(pythonPath) }else{print('Please set the path of Python.')} if (!file.exists(paste0(output.dir, '/HemaScopeR_results'))) { dir.create(paste0(output.dir, '/HemaScopeR_results'),recursive =T) } output.dir <- paste0(output.dir,'/HemaScopeR_results') if (!file.exists(paste0(output.dir, '/RDSfiles/'))) { dir.create(paste0(output.dir, '/RDSfiles/')) } #set the path for loading previous results, if necessary previous_results_path <- paste0(output.dir, '/RDSfiles/') # if (Whether_load_previous_results) { # print('Loading the previous results...') # Load_previous_results(previous_results_path = previous_results_path) # } 4.2 Step 1. Load the input data Create a folder for step1 print('Step1. Input data.') if (!file.exists(paste0(output.dir, '/Step1.Input_data/'))) { dir.create(paste0(output.dir, '/Step1.Input_data/')) } Set the parameters for loading the data sets. input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix')#, #'./SRR7881400/outs/filtered_feature_bc_matrix', #'./SRR7881401/outs/filtered_feature_bc_matrix', #'./SRR7881402/outs/filtered_feature_bc_matrix', #'./SRR7881403/outs/filtered_feature_bc_matrix' project.names = c('SRR7881399')#, #'SRR7881400', #'SRR7881401', #'SRR7881402', #'SRR7881403' gene.column = 2 min.cells = 10 min.feature = 200 mt.pattern = '^MT-' # set '^mt-' for mouse data Step1_Input_Data.type = 'cellranger-count' loom.files.path ="./SRR7881399/loom" Load the data sets file.copy(from = input.data.dirs, to = paste0(output.dir,'/Step1.Input_data/'), recursive = TRUE) if(Step1_Input_Data.type == 'cellranger-count'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- Read10X(data.dir = input.data.dirs[i], gene.column = gene.column) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- Read10X(data.dir = input.data.dirs, gene.column = gene.column) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Seurat'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_object.temp <- readRDS(input.data.dirs[i]) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp) } }else{ sc_object <- readRDS(input.data.dirs) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else if(Step1_Input_Data.type == 'Matrix'){ if(length(input.data.dirs) > 1){ input.data.list <- c() for (i in 1:length(input.data.dirs)) { sc_data.temp <- readRDS(input.data.dirs[i]) sc_object.temp <- CreateSeuratObject(counts = sc_data.temp, project = project.names[i], min.cells = min.cells, min.feature = min.feature) sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern) input.data.list <- c(input.data.list, sc_object.temp)} }else{ sc_data <- readRDS(input.data.dirs) sc_object <- CreateSeuratObject(counts = sc_data, project = project.names, min.cells = min.cells, min.feature = min.feature) sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern) } }else{ stop('Please input data generated by the cellranger-count software, or a Seurat object, or a gene expression matrix. HemaScopeR does not support other formats of input data.') } Save the variables after executing each step, if necessary. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3 Step 2. Quality Control In this step, the following quality control steps will be performed: Normalize data using the LogNormalize method. Find variable features using the vst method. Scale data using the identified variable features and specified variables to regress out. Perform principal component analysis (PCA) on the scaled data. Find K nearest neighbors based on PCA dimensions. Perform clustering analysis based on the found neighbors. Optionally, remove doublets using doubletFinder. Optionally, integrate multiple datasets by removing batch effects. 4.3.1 Function arguments: nFeature_RNA.limit: The cutoff of the minimum number of detected genes in each cell. percent.mt.limit: The cutoff of the maximum percentage of mitochondria genes in each cell. scale.factor: The scale factor for the ‘data’ slot in the seurat object. nfeatures: The number of selected highly variable features for down stream analysis. ndims: The number of principle components in PCA. vars.to.regress: Variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito. (ScaleData in Seurat) PCs: Which dimensions to use as input features.(RunTSNE and RunUMAP in Seurat) resolution: Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities. (FindClusters in Seurat) n.neighbors: Defines k for the k-nearest neighbor algorithm. (FindNeighbors in Seurat) percentage: Assuming ‘percentage’ doublet formation rate - tailor for your dataset. The default value is 0.05. doublerFinderwraper.PCs Which dimensions to use as input features for doubletFinder. doublerFinderwraper.pN: The percentage of real-artifical data for doubletFinder. doublerFinderwraper.pK: The pK parameter controls the doublet cell detection by determining the number of nearest neighbors and influencing the calculation of pANN scores and the final cell classification results. Adjusting the pK value allows optimization of the doublet cell detection process based on specific data and analysis requirements. 4.3.2 codes for running step2 Create a folder for saving the results of quality control. print('Step2. Quality control.') if (!file.exists(paste0(output.dir, '/Step2.Quality_control/'))) { dir.create(paste0(output.dir, '/Step2.Quality_control/')) } Set the parameters for quality control. # quality control nFeature_RNA.limit = 200 percent.mt.limit = 20 # preprocessing nfeatures = 3000 scale.factor = 10000 ndims = 50 vars.to.regress = NULL PCs = 1:35 resolution = 0.4 n.neighbors = 50 # removing doublets Step2_Quality_Control.RemoveDoublets = TRUE doublet.percentage = 0.04 doublerFinderwraper.PCs = 1:20 doublerFinderwraper.pN = 0.25 doublerFinderwraper.pK = 0.1 # removing batch effect Step2_Quality_Control.RemoveBatches = TRUE Run the quality control process. if(length(input.data.dirs) > 1){ # preprocess and quality control for multiple scRNA-Seq data sets sc_object <- QC_multiple_scRNASeq(seuratObjects = input.data.list, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveBatches = Step2_Quality_Control.RemoveBatches, Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, ndims = ndims, vars.to.regress = vars.to.regress, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK ) }else{ # preprocess and quality control for single scRNA-Seq data set sc_object <- QC_single_scRNASeq(sc_object = sc_object, datasetID = project.names, output.dir = paste0(output.dir,'/Step2.Quality_control/'), Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets, nFeature_RNA.limit = nFeature_RNA.limit, percent.mt.limit = percent.mt.limit, scale.factor = scale.factor, nfeatures = nfeatures, vars.to.regress = vars.to.regress, ndims = ndims, PCs = PCs, resolution = resolution, n.neighbors = n.neighbors, percentage = doublet.percentage, doublerFinderwraper.PCs = doublerFinderwraper.PCs, doublerFinderwraper.pN = doublerFinderwraper.pN, doublerFinderwraper.pK = doublerFinderwraper.pK) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.3.3 Outputs Figure 4.1: Violin plots showing the nFeature, nCount and percent.mt for each sample Figure 4.2: Figures showing the correlation between nFeature and nCount, as well as between nCount and percent.mt Figure 4.3: Figures showing the variable features used for downstream analysis Figure 4.4: ElbowPlot showing suitable number of PCs used for further analysis Figure 4.5: UMAP plot showing doublets found by DoubletFinder 4.4 Step 3. Clustering Create a folder for saving the results of Louvain clustering. print('Step3. Clustering.') if (!file.exists(paste0(output.dir, '/Step3.Clustering/'))) { dir.create(paste0(output.dir, '/Step3.Clustering/')) } Set the parameters for clustering. PCs = 1:35 resolution = 0.4 n.neighbors = 50 Run Louvian clustering. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){graph.name <- 'integrated_snn'}else{graph.name <- 'RNA_snn'} sc_object <- FindNeighbors(sc_object, dims = PCs, k.param = n.neighbors, force.recalc = TRUE) sc_object <- FindClusters(sc_object, resolution = resolution, graph.name = graph.name) sc_object@meta.data$seurat_clusters <- as.character(as.numeric(sc_object@meta.data$seurat_clusters)) # plot clustering pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.6: UMAP plot showing clustering results 4.5 Step 4. Identify Cell Types In this step, users can predict the cell types of hematopoietic cells by implementing two approaches (Scmap and Seurat) through abcCellmap packages. Cells are labeled by 43 different RNA clusters according to unsupervised clustering of single-cell transcriptional profiles, and also labeled by 32 immunophenotypic cell types. In addition, users can use Copykat to measure copy number variation (CNV) and determine the ploidy of each cell. 4.5.1 codes for running abcCellmap Create a folder for saving the results of cell type identification. print('Step4. Identify cell types automatically.') if (!file.exists(paste0(output.dir, '/Step4.Identify_Cell_Types/'))) { dir.create(paste0(output.dir, '/Step4.Identify_Cell_Types/')) } Set the path for the database. databasePath = "~/HemaScopeR/database/" Set the parameters for cell type identification. Step4_Use_Which_Labels = 'clustering' Step4_Cluster_Labels = NULL Step4_Changed_Labels = NULL Org = 'hsa' ncores = 10 Run the cell type identification process. sc_object <- run_cell_annotation(object = sc_object, assay = 'RNA', species = Org, output.dir = paste0(output.dir,'/Step4.Identify_Cell_Types/')) if(Org == 'hsa'){ load(paste0(databasePath,"/HematoMap.reference.rdata")) #the data can be downloaded via the link https://cloud.tsinghua.edu.cn/d/759fd04333274d3f9946 if(length(intersect(rownames(HematoMap.reference), rownames(sc_object))) < 1000){ HematoMap.reference <- RenameGenesSeurat(obj = HematoMap.reference, newnames = toupper(rownames(HematoMap.reference)), gene.use = rownames(HematoMap.reference), de.assay = "RNA", lassays = "RNA") } if(sc_object@active.assay == 'integrated'){ DefaultAssay(sc_object) <- 'RNA' sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) DefaultAssay(sc_object) <- 'integrated' }else{ sc_object <- mapDataToRef(ref_object = HematoMap.reference, ref_labels = HematoMap.reference@meta.data$CellType, query_object = sc_object, PCs = PCs, output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/')) } } Set the cell labels. # set the cell labels if(Step4_Use_Which_Labels == 'clustering'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$seurat_clusters Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.1'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.2'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.RNACluster Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.3'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'abcCellmap.4'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.Immunophenotype Idents(sc_object) <- sc_object@meta.data$selectLabels }else if(Step4_Use_Which_Labels == 'HematoMap'){ if(Org == 'hsa'){ sc_object@meta.data$selectLabels <- sc_object@meta.data$predicted.id Idents(sc_object) <- sc_object@meta.data$selectLabels }else{print("'HematoMap' is only applicable to human data ('Org' = 'hsa').")} }else if(Step4_Use_Which_Labels == 'changeLabels'){ if (!is.null(Step4_Cluster_Labels) && !is.null(Step4_Changed_Labels) && length(Step4_Cluster_Labels) == length(Step4_Changed_Labels)){ sc_object@meta.data$selectLabels <- plyr::mapvalues(sc_object@meta.data$seurat_clusters, from = as.character(Step4_Cluster_Labels), to = as.character(Step4_Changed_Labels), warn_missing = FALSE) Idents(sc_object) <- sc_object@meta.data$selectLabels }else{ print("Please input the 'Step4_Cluster_Labels' parameter as Seurat clustering labels, and the 'Step4_Changed_Labels' parameter as new labels. Please note that these two parameters should be of equal length.") } }else{ print('Please set the "Step4_Use_Which_Labels" parameter as "clustering", "abcCellmap.1", "abcCellmap.2", "HematoMap" or "changeLabels".') } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.7: UMAP plots showing cell type annotation results Figure 4.8: Immunophenotype and RNACluster label predicted by scmap Figure 4.9: Immunophenotype and RNACluster label predicted by Seurat 4.5.2 codes for running the CNV analysis sc_CNV(sc_object=sc_object, save_path=paste0(output.dir,'/Step4.Identify_Cell_Types/'), assay = 'RNA', LOW.DR = 0.05, #refer to the Copykat documentation for detailed explanations of the parameters UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = NULL, n.cores = ncores, #note: this step will take a long time, using more ncores could shorten the running time species = Org) Figure 4.10: copykat heatmap Figure 4.11: UMAP plot showing CNV state predicted by copykat 4.6 Step 5. Visualization In this step, users are allowed to gain the statistical results about the numbers and proportions of cell groups, and also use three dimensional reduction methods (TSNE, UMAP, phateR) to visualize the results. 4.6.1 codes for peforming three dimensional reduction methods Create a folder for saving the visualization results. print('Step5. Visualization.') if (!file.exists(paste0(output.dir, '/Step5.Visualization/'))) { dir.create(paste0(output.dir, '/Step5.Visualization/')) } Perform visualization using UMAP and TSNE. # plot cell types pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.pdf'), width = 6, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.png'), width = 600, height = 600) print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Figure 4.12: UMAP and TSNE visualization Set the parameters for phateR. phate.knn = 50 #The number of nearest neighbors to consider in the phateR algorithm. Default 50. phate.npca = 20 #The number of principal components to use in the phateR algorithm. Default 20. phate.t = 10 #The t-value for the phateR algorithm, which controls the level of exploration. Default 10. phate.ndim = 2 #The number of dimensions for the output embedding in the phateR algorithm. Default 2. Run phateR for dimensional reduction and visualization. # run phateR if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} if(!is.null(pythonPath)){ run_phateR(sc_object = sc_object, output.dir = paste0(output.dir,'/Step5.Visualization/'), pythonPath = pythonPath, phate.knn = phate.knn, phate.npca = phate.npca, phate.t = phate.t, phate.ndim = phate.ndim) } Figure 4.13: phateR result 4.6.2 codes for calculating the proportions The statistical results for the numbers and proportions of cell groups. # statistical results cells_labels <- as.data.frame(cbind(rownames(sc_object@meta.data), as.character(sc_object@meta.data$selectLabels))) colnames(cells_labels) <- c('cell_id', 'cluster_id') cluster_counts <- cells_labels %>% group_by(cluster_id) %>% summarise(count = n()) total_cells <- nrow(cells_labels) cluster_counts <- cluster_counts %>% mutate(proportion = count / total_cells) cluster_counts <- as.data.frame(cluster_counts) cluster_counts$percentages <- scales::percent(cluster_counts$proportion, accuracy = 0.1) cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='proportion')] cluster_counts$cluster_id_count_percentages <- paste(cluster_counts$cluster_id, " (", cluster_counts$count, ' cells; ', cluster_counts$percentages, ")", sep='') cluster_counts <- cluster_counts[order(cluster_counts$count, decreasing = TRUE),] cluster_counts <- rbind(cluster_counts, c('Total', sum(cluster_counts$count), '100%', 'all cells')) sc_object@meta.data$cluster_id_count_percentages <- mapvalues(sc_object@meta.data$selectLabels, from=cluster_counts$cluster_id, to=cluster_counts$cluster_id_count_percentages, warn_missing=FALSE) colnames(sc_object@meta.data)[which(colnames(sc_object@meta.data) == 'cluster_id_count_percentages')] <- paste('Total ', nrow(sc_object@meta.data), ' cells', sep='') cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='cluster_id_count_percentages')] colnames(cluster_counts) <- c('Cell types', 'Cell counts', 'Percentages') # names(colorvector) <- mapvalues(names(colorvector), # from=cluster_counts$cluster_id, # to=cluster_counts$cluster_id_count_percentages, # warn_missing=FALSE) write.csv(cluster_counts, file=paste(paste0(output.dir, '/Step5.Visualization/'), '/cell types_cell counts_percentages.csv', sep=''), quote=FALSE, row.names=FALSE) The UMAP visualization. pdf(paste(paste0(output.dir, '/Step5.Visualization'), '/cell types_cell counts_percentages_umap.pdf', sep=''), width = 14, height = 6) print(DimPlot(sc_object, reduction = "umap", group.by = paste('Total ', nrow(sc_object@meta.data), ' cells', sep=''), label = FALSE, pt.size = 0.1, raster = FALSE)) dev.off() Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.14: UMAP plot showing cell type and corresponding proportion 4.7 Step 6. Find DEGs In this step, users can find DEGs (differentially expressed genes) across different cell type group using FindAllMarkers, use GPTCelltype to predict cell label, perform GO and KEGG enrichment analysis, and perform subnetwork analysis for each cell type group. 4.7.1 codes for finding DEGs Set the parameters for identifying differentially expressed genes. min.pct = 0.25 logfc.threshold = 0.25 Create a folder for the DEGs analysis. print('Step6. Find DEGs.') if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/')) } Identify DEGs using Wilcoxon Rank-Sum Test. sc_object.markers <- FindAllMarkers(sc_object, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold) write.csv(sc_object.markers, file = paste0(paste0(output.dir, '/Step6.Find_DEGs/'),'sc_object.markerGenes.csv'), quote=FALSE) # visualization sc_object.markers.top5 <- sc_object.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) pdf(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.pdf'), width = 0.5*length(unique(sc_object.markers.top5$gene)), height = 0.5*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() png(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.png'), width = 20*length(unique(sc_object.markers.top5$gene)), height = 30*length(unique(Idents(sc_object)))) print(DotPlot(sc_object, features = unique(sc_object.markers.top5$gene), cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1))) dev.off() Figure 4.15: Dotplot showing marker genes of each cell type group 4.7.2 codes for using GPTCelltype Set the parameters for GPTCelltype. your_openai_API_key = '' tissuename = 'human bone marrow' gptmodel = 'gpt-3.5' Use GPTCelltype to assist cell type annotation. GPT_annotation( marker.genes = sc_object.markers, your_openai_API_key = your_openai_API_key, tissuename = tissuename, gptmodel = gptmodel, output.dir = paste0(output.dir, '/Step6.Find_DEGs/')) 4.7.3 Perform GO and KEGG enrichment. # GO enrichment if(Org=='mmu'){ OrgDb <- 'org.Mm.eg.db' }else if(Org=='hsa'){ OrgDb <- 'org.Hs.eg.db' }else{ stop("Org should be 'mmu' or 'hsa'.") } HemaScopeREnrichment(DEGs=sc_object.markers, OrgDb=OrgDb, output.dir=paste0(output.dir, '/Step6.Find_DEGs/')) Figure 4.16: Barplot showing GO(BP)and KEGG enrichment results of each cell type group 4.7.4 Perform subnetwork analysis Create a folder for saving the results of gene network analysis. if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'))) { dir.create(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/')) } Perform gene network analysis. OpenXGR_SAG(sc_object.markers = sc_object.markers, output.dir = paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'), subnet.size = 10) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.17: Figure showing subnetwork of each cell type group identified by OpenXGR 4.8 Step 7. Assign Cell Cycles This step assigns cell cycle phases by analyzing cell cycle-related genes and generates plots of the cell cycle analysis results. 4.8.1 Function arguments: sc_object: A Seurat object containing single-cell RNA sequencing data. counts_matrix: The ‘counts’ slot in the Seurat object. data_matrix: The ‘data’ slot in the Seurat object. cellcycleCutoff: The cutoff value for distinguishing between cycling and quiescent cells. Cells with a G1G2Score below this cutoff are considered quiescent. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input Seurat object. databasePath: The path to the database required for the analysis. Org: A character vector specifying the species of cell cycle genes, can be ‘mmu’ (mouse) or ‘hsa’ (human). 4.8.2 codes for step7 Create a folder for saving the results of cell cycle analysis. print('Step7. Assign cell cycles.') if (!file.exists(paste0(output.dir, '/Step7.Assign_cell_cycles/'))) { dir.create(paste0(output.dir, '/Step7.Assign_cell_cycles/')) } Set the parameters for the cell cycle analysis. cellcycleCutoff = NULL Run the cell cycle analysis. datasets.before.batch.removal <- readRDS(paste0(paste0(output.dir, '/RDSfiles/'),'datasets.before.batch.removal.rds')) sc_object <- cellCycle(sc_object=sc_object, counts_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "counts")%>%as.matrix(), data_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix(), cellcycleCutoff = cellcycleCutoff, cellTypeOrders = unique(sc_object@meta.data$selectLabels), output.dir=paste0(output.dir, '/Step7.Assign_cell_cycles/'), databasePath = databasePath, Org = Org) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } 4.8.3 Outputs Figure 4.18: Barplot showing the proportion of different cell cycle within each cell type group Figure 4.19: Density plot showing the distribution of cell cycle scores 4.9 Step 8. Calculate Heterogeneity This step quantifies cell heterogeneity by computing Spearman correlation coefficients between cells within the same cell type groups. 4.9.1 Function arguments: expression_matrix: A numeric matrix representing the expression data, where rows are genes and columns are cells. The matrix should be appropriately preprocessed and filtered before using this function. cell_types_groups: A data frame specifying cell type annotations for each cell, including cell type labels and group information. cellTypeOrders: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input cell_types_groups. 4.9.2 codes for step8 Create a folder for saving the results of heterogeneity calculation. print('Step8. Calculate heterogeneity.') if (!file.exists(paste0(output.dir, '/Step8.Calculate_heterogeneity/'))) { dir.create(paste0(output.dir, '/Step8.Calculate_heterogeneity/')) } Run heterogeneity calculation process. expression_matrix <- GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix() expression_matrix <- expression_matrix[,rownames(sc_object@meta.data)] cell_types_groups <- as.data.frame(cbind(sc_object@meta.data$selectLabels, sc_object@meta.data$datasetID)) colnames(cell_types_groups) <- c('clusters', 'datasetID') if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } heterogeneity(expression_matrix = expression_matrix, cell_types_groups = cell_types_groups, cellTypeOrders = cellTypes_orders, output.dir = paste0(output.dir, '/Step8.Calculate_heterogeneity/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.20: Box plot showing the Spearman correlation coefficients between cells within the same cell type groups(here we take data including more samples as an example) 4.10 Step 9. Violin Plot for Marker Genes This step generates violin plots for marker genes across different cell types. 4.10.1 Function arguments: dataMatrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. features: A character vector specifying the marker genes to plot in the violin plots. CellTypes: A factor vector containing cell type annotations for each cell. cellTypeOrders: A character vector specifying the order of cell types for plotting. Defaults to unique values in CellTypes. cellTypeColors: A character vector specifying the colors to use for cell type groups. Defaults to a color palette. 4.10.2 codes for step9 Create a folder for saving the violin plots of marker genes. print('Step9. Violin plot for marker genes.') if (!file.exists(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'))) { dir.create(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/')) } Run violin plot visualization. if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object) <- 'integrated' }else{ DefaultAssay(sc_object) <- 'RNA'} dataMatrix <- GetAssayData(object = sc_object, slot = "scale.data") if(is.null(marker.genes)&(Org == 'mmu')){ # mpp genes are from 'The bone marrow microenvironment at single cell resolution' # the other genes are from 'single cell characterization of haematopoietic progenitors and their trajectories in homeostasis and perturbed haematopoiesis' # the aliases of these genes were changed in gecodeM16:Gpr64 -> Adgrg2, Sdpr -> Cavin2, Hbb-b1 -> Hbb-bs, Sfpi1 -> Spi1 HSC_lineage_signatures <- c('Slamf1', 'Itga2b', 'Kit', 'Ly6a', 'Bmi1', 'Gata2', 'Hlf', 'Meis1', 'Mpl', 'Mcl1', 'Gfi1', 'Gfi1b', 'Hoxb5') Mpp_genes <- c('Mki67', 'Mpo', 'Elane', 'Ctsg', 'Calr') Erythroid_lineage_signatures <- c('Klf1', 'Gata1', 'Mpl', 'Epor', 'Vwf', 'Zfpm1', 'Fhl1', 'Adgrg2', 'Cavin2','Gypa', 'Tfrc', 'Hbb-bs', 'Hbb-y') Lymphoid_lineage_signatures <- c('Tcf3', 'Ikzf1', 'Notch1', 'Flt3', 'Dntt', 'Btg2', 'Tcf7', 'Rag1', 'Ptprc', 'Ly6a', 'Blnk') Myeloid_lineage_signatures <- c('Gfi1', 'Spi1', 'Mpo', 'Csf2rb', 'Csf1r', 'Gfi1b', 'Hk3', 'Csf2ra', 'Csf3r', 'Sp1', 'Fcgr3') marker.genes <- c(HSC_lineage_signatures, Mpp_genes, Erythroid_lineage_signatures, Lymphoid_lineage_signatures, Myeloid_lineage_signatures) }else if(is.null(marker.genes)&(Org == 'hsa')){ HSPCs_lineage_signatures <- c('CD34','KIT','AVP','FLT3','MME','CD7','CD38','CSF1R','FCGR1A','MPO','ELANE','IL3RA') Myeloids_lineage_signatures <- c('LYZ','CD36','MPO','FCGR1A','CD4','CD14','CD300E','ITGAX','FCGR3A','FLT3','AXL', 'SIGLEC6','CLEC4C','IRF4','LILRA4','IL3RA','IRF8','IRF7','XCR1','CD1C','THBD', 'MRC1','CD34','KIT','ITGA2B','PF4','CD9','ENG','KLF','TFRC') B_cells_lineage_signatures <- c('CD79A','IGLL1','RAG1','RAG2','VPREB1','MME','IL7R','DNTT','MKI67','PCNA','TCL1A','MS4A1','IGHD','CD27','IGHG3') T_NK_cells_lineage_signatures <- c('CD3D','CD3E','CD8A','CCR7','IL7R','SELL','KLRG1','CD27','GNLY', 'NKG7','PDCD1','TNFRSF9','LAG3','CD160','CD4','CD40LG','IL2RA', 'FOXP3','DUSP4','IL2RB','KLRF1','FCGR3A','NCAM1','XCL1','MKI67','PCNA','KLRF') marker.genes <- c(HSPCs_lineage_signatures, Myeloids_lineage_signatures, B_cells_lineage_signatures, T_NK_cells_lineage_signatures) } if(is.null(ViolinPlot.cellTypeOrders)){ ViolinPlot.cellTypeOrders <- unique(sc_object@meta.data$selectLabels) } if(is.null(ViolinPlot.cellTypeColors)){ ViolinPlot.cellTypeColors <- viridis::viridis(length(unique(sc_object@meta.data$selectLabels))) } combinedViolinPlot(dataMatrix = dataMatrix, features = marker.genes, CellTypes = sc_object@meta.data$selectLabels, cellTypeOrders = ViolinPlot.cellTypeOrders, cellTypeColors = ViolinPlot.cellTypeColors, Org = Org, output.dir = paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.21: Violin plot showing the expression of marker genes between cell type groups 4.11 Step 10. Calculate Lineage Scores This step calculates lineage scores for specified gene sets based on the provided expression data. It then generates a heatmap of lineage scores and a heatmap of gene expression patterns. 4.11.1 Function arguments: expression_matrix: A data frame or matrix representing the expression data, where rows are cells and columns are genes. cellTypes: A character vector specifying cell type annotations for each cell. e.g. c(“HSC”,“HSC”,“HSC”,“MPP1”,“MPP2”,“MPP2”,“MPP2” …) cellTypes_orders: A character vector specifying the order of cell types for plotting. e.g. c(“HSC”,“MPP1”,“MPP2”) cellTypes_colors: A character vector specifying the colors to use for cell type groups. e.g. c(“HSC” = ‘#006d2c’,“MPP1” = ‘#4292c6’,“MPP2”= ‘#810f7c’). groups: A character vector specifying groups or clusters within each cell type. groups_orders: A character vector specifying the order of groups or clusters for plotting. groups_colors: A character vector specifying the colors to use for group or cluster annotations. e.g. c(‘group1’=‘#d73027’,‘group2’=‘#2171b5’) lineage.genelist: A list of gene sets representing lineage markers. lineage.names: A character vector specifying the names of the lineages. 4.11.2 codes for step10 Create a folder for saving the results of lineage score calculation. print('Step10. Calculate lineage scores.') # we use normalized data here if (!file.exists(paste0(output.dir, '/Step10.Calculate_lineage_scores/'))) { dir.create(paste0(output.dir, '/Step10.Calculate_lineage_scores/')) } Run lineage score calculation. if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'mmu')){ lineage.genelist <- c(list(HSC_lineage_signatures), list(Mpp_genes), list(Erythroid_lineage_signatures), list(Lymphoid_lineage_signatures), list(Myeloid_lineage_signatures)) lineage.names <- c('HSC_lineage_signatures', 'Mpp_genes', 'Erythroid_lineage_signatures', 'Lymphoid_lineage_signatures', 'Myeloid_lineage_signatures') }else if(is.null(lineage.genelist)&is.null(lineage.names)&(Org == 'hsa')){ lineage.genelist <- c(list(HSPCs_lineage_signatures), list(Myeloids_lineage_signatures), list(B_cells_lineage_signatures), list(T_NK_cells_lineage_signatures)) lineage.names <- c('HSPCs_lineage_signatures', 'Myeloids_lineage_signatures', 'B_cells_lineage_signatures', 'T_NK_cells_lineage_signatures') } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } lineageScores(expression_matrix = expression_matrix, cellTypes = sc_object@meta.data$selectLabels, cellTypes_orders = cellTypes_orders, cellTypes_colors = ViolinPlot.cellTypeColors, groups = sc_object@meta.data$datasetID, groups_orders = unique(sc_object@meta.data$datasetID), groups_colors = groups_colors, lineage.genelist = lineage.genelist, lineage.names = lineage.names, Org = Org, output.dir = paste0(output.dir, '/Step10.Calculate_lineage_scores/'), databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.22: Heatmap showing the expression of lineage genes for each cell Figure 4.23: Heatmap showing the score of lineage signatures for each cell 4.12 Step 11. GSVA This step runs GSVA analysis, which calculates enrichment scores for gene sets in each cell using the provided gene list. It also performs differential GSVA analysis between specified cell groups and generates heatmaps of the results. 4.12.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. GSVA.genelist: A list of gene sets for GSVA analysis. GSVA.cellTypes: A character vector specifying the cell types or labels for each cell. GSVA.cellTypes.orders: A character vector specifying the order of cell types for visualization. GSVA.cellGroups: A character vector specifying the cell groups or conditions for each cell. GSVA.identify.cellType.features: Logical. If TRUE, identify cell type-specific features. GSVA.identify.diff.features: Logical. If TRUE, identify differentially expressed features between cell groups. GSVA.comparison.design: A list specifying the experimental design for differential GSVA analysis. OrgDB: An organism-specific annotation database (OrgDb) for gene symbol conversion. e.g. org.Mm.eg.db or org.Hs.eg.db. 4.12.2 codes for running step11 Create a folder for saving the results of GSVA. print('Step11. GSVA.') if (!file.exists(paste0(output.dir, '/Step11.GSVA/'))) { dir.create(paste0(output.dir, '/Step11.GSVA/')) } Run GSVA. setwd(wdir) if(Org=='mmu'){ load(paste0(databasePath,"/mouse_c2_v5p2.rdata")) GSVA.genelist <- Mm.c2 assign('OrgDB', org.Mm.eg.db) }else if(Org=='hsa'){ load(paste0(databasePath,"/human_c2_v5p2.rdata")) GSVA.genelist <- Hs.c2 assign('OrgDB', org.Hs.eg.db) }else{ stop("Org should be 'mmu' or 'hsa'.") } if(is.null(ViolinPlot.cellTypeOrders)){ cellTypes_orders <- unique(sc_object@meta.data$selectLabels) }else{ cellTypes_orders <- ViolinPlot.cellTypeOrders } run_GSVA(sc_object = sc_object, GSVA.genelist = GSVA.genelist, GSVA.cellTypes = sc_object@meta.data$selectLabels, GSVA.cellTypes.orders = cellTypes_orders, GSVA.cellGroups = sc_object@meta.data$datasetID, GSVA.identify.cellType.features = Step11_GSVA.identify.cellType.features, GSVA.identify.diff.features = Step11_GSVA.identify.diff.features, GSVA.comparison.design = Step11_GSVA.comparison.design, OrgDB = OrgDB, output.dir = paste0(output.dir, '/Step11.GSVA/')) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.24: GSVA Heatmap showing the enriched pathways of each cell type group 4.13 Step 12. Construct Trajectories In this step, users are allowed to construct trajectories using three methods including Monocle2, slingshot and scVelo. 4.13.1 data preparation Load gene symbols and ensemble IDs. DefaultAssay(sc_object) <- 'RNA' countsSlot <- GetAssayData(object = sc_object, slot = "counts") gene_metadata <- as.data.frame(rownames(countsSlot)) rownames(gene_metadata) <- gene_metadata[,1] if(Org == 'mmu'){ load(paste0(databasePath,"/mouseGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = mouseGeneSymbolandEnsembleID$geneName, to = mouseGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) }else if(Org == 'hsa'){ load(paste0(databasePath,"/humanGeneSymbolandEnsembleID.rdata")) gene_metadata $ ensembleID <- mapvalues(x = gene_metadata[,1], from = humanGeneSymbolandEnsembleID$geneName, to = humanGeneSymbolandEnsembleID$ensemblIDNoDot, warn_missing = FALSE) } colnames(gene_metadata) <- c('gene_short_name','ensembleID') Create folders for saving the results of trajectory construction. print('Step12. Construct trajectories.') if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) } if (!file.exists(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/'))) { dir.create(paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) } Prepare the input data. if(is.null(Step12_Construct_Trajectories.clusters)){ sc_object.subset <- sc_object countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") }else{ sc_object.subset <- subset(sc_object, subset = selectLabels %in% Step12_Construct_Trajectories.clusters) countsSlot.subset <- GetAssayData(object = sc_object.subset, slot = "counts") } 4.13.2 monocle2 Running monocle2 involves several steps: Creating a Monocle cellDataSet using the provided cellData, phenoData, and featureData. Estimating size factors, dispersions, and detecting highly variable genes. Performing differential gene expression analysis to identify genes associated with cell state changes. Ordering cells along the inferred trajectories and reducing dimensionality. Generating and saving trajectory plots, including cell trajectory by “State” and by “Cell Types.” 4.13.2.1 Function arguments: cellData: A matrix of gene expression values, where columns represent cells and rows represent genes. phenoData: A data frame containing cell metadata, such as cell labels or other relevant information. featureData: A data frame containing information about features (genes) in the dataset. lowerDetectionLimit: The lower detection limit for gene expression. Genes with expression values below this limit will be treated as non-detected. expressionFamily: The family of the expression distribution used in Monocle analysis. cellTypes: A character vector specifying cell types or labels used for coloring in trajectory plots. monocle.orders: A character vector specifying the order of cell types in the Monocle analysis. monocle.colors: A character vector specifying colors for cell types in trajectory plots. 4.13.2.2 codes for running monocle2 phenoData <- sc_object.subset@meta.data featureData <- gene_metadata run_monocle(cellData = countsSlot.subset, phenoData = phenoData, featureData = featureData, lowerDetectionLimit = 0.5, expressionFamily = VGAM::negbinomial.size(), cellTypes='selectLabels', monocle.orders=Step12_Construct_Trajectories.clusters, monocle.colors = ViolinPlot.cellTypeColors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/monocle2/')) Figure 4.25: Figures showing cells in different trajectory states (left) and corresponding cell type groups (right) 4.13.3 Slingshot Running Slingshot to infer cell trajectories and lineage relationships involves several steps: Constructs a Slingshot object using PCA embeddings, cell types, start clusters, and end clusters. Computes and plots the trajectory curves. Computes and plots pseudotime values along the trajectory. 4.13.3.1 Function arguments: slingshot.PCAembeddings: A matrix containing the PCA embeddings of the single-cell data, typically obtained from dimensionality reduction techniques like PCA. slingshot.cellTypes: A character vector specifying cell types or labels for each cell. slingshot.start.clus: A character vector specifying the initial cluster(s) from which cell trajectories should start. slingshot.end.clus: A character vector specifying the target cluster(s) where cell trajectories should end. slingshot.colors: A vector of colors corresponding to cell types for plotting. If not provided, default colors will be used. 4.13.3.2 codes for running Slingshot if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){ DefaultAssay(sc_object.subset) <- 'integrated' }else{ DefaultAssay(sc_object.subset) <- 'RNA'} run_slingshot(slingshot.PCAembeddings = Embeddings(sc_object.subset, reduction = "pca")[, PCs], slingshot.cellTypes = sc_object.subset@meta.data$selectLabels, slingshot.start.clus = slingshot.start.clus, slingshot.end.clus = slingshot.end.clus, slingshot.colors = slingshot.colors, output.dir = paste0(output.dir, '/Step12.Construct_trajectories/slingshot/')) Figure 4.26: Figures showing slingshot curve and infered pseudotime value 4.13.4 scVelo scVelo is implemented in Python, and it takes a Seurat object, cell embeddings, and cell type information as input. The process of data preparation includes the following steps: Format the Seurat object metadata, including cell types and sample names. Extract the spliced, unspliced, and ambiguous count matrices from the Seurat object. Combine the metadata and cell embeddings. Write the necessary input files for scVelo analysis, including cell embeddings, count matrices, and metadata. 4.13.4.1 Function arguments: sc_object: A Seurat object containing the single-cell RNA-seq data. loom.files.path: A character vector specifying the path(s) to the loom files for scVelo analysis. scvelo.reduction: A character specifying the reduction method used for scVelo analysis (default is ‘pca’). scvelo.column: A character specifying the column in the Seurat object metadata containing cell types. 4.13.4.2 codes for running Scvelo if((!is.null(loom.files.path))&(!is.null(pythonPath))){ prepareDataForScvelo(sc_object = sc_object.subset, loom.files.path = loom.files.path, scvelo.reduction = 'pca', scvelo.column = 'selectLabels', output.dir = paste0(output.dir, '/Step12.Construct_trajectories/scVelo/')) reticulate::py_run_string(paste0("import os\\noutputDir = '", output.dir, "'")) reticulate::py_run_file(file.path(system.file(package = "HemaScopeR"), "python/sc_run_scvelo.py"), convert = FALSE) } Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.27: Figure showing trajectory predicted by scvelo 4.14 Step 13. TF Analysis This step runs SCENIC (Single-Cell Regulatory Network Inference and Clustering) analysis, including the construction of a co-expression network, gene filtering, correlation, and the GENIE3 algorithm to infer regulatory networks. 4.14.1 Function arguments: countMatrix: A matrix containing the raw counts of the single-cell RNA-seq data. cellTypes: A character vector specifying the cell types or labels for each cell. datasetID: A character vector specifying the dataset IDs for each cell. cellTypes_colors: A named vector of colors for cell type visualization. cellTypes_orders: A character vector specifying the desired order of cell types. groups_colors: A named vector of colors for grouping visualization. groups_orders: A character vector specifying the desired order of groups. Org: A character vector specifying the organism (‘mmu’ for mouse or ‘hsa’ for human). 4.14.2 codes for running step13 Create folders for saving the results of TF analysis. print('Step13. TF analysis.') if (!file.exists(paste0(output.dir, '/Step13.TF_analysis/'))) { dir.create(paste0(output.dir, '/Step13.TF_analysis/')) } Run SCENIC to perform TF analysis. run_SCENIC(countMatrix = countsSlot, cellTypes = sc_object@meta.data$selectLabels, datasetID = sc_object@meta.data$datasetID, cellTypes_colors = Step13_TF_Analysis.cellTypes_colors, cellTypes_orders = unique(sc_object@meta.data$selectLabels), groups_colors = Step13_TF_Analysis.groups_colors, groups_orders = unique(sc_object@meta.data$datasetID), Org = Org, output.dir = paste0(output.dir, '/Step13.TF_analysis/'), pythonPath = pythonPath, databasePath = databasePath) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.28: Heatmap showing predicted regulon activity for each cell Figure 4.29: Heatmap showing statistics of regulons 4.15 Step 14. Cell-Cell Interaction The step takes expression data, cluster labels, and other parameters to perform cell-cell communication analysis using the CellChat package. It includes the following steps: Data input and preprocessing. Initialization of a CellChat object. Set the ligand-receptor interaction database based on the specified organism. Preprocess the expression data for cell-cell communication analysis. Identify overexpressed genes and interactions. Project data based on protein-protein interaction networks. Inference of cell-cell communication network. Visualization of the communication network. Systems analysis of cell-cell communication network. 4.15.1 Function arguments: data.input: A matrix of expression data, where rows represent genes and columns represent cells. Row names should be in the format of gene symbols. labels: A vector of cluster labels for each cell, corresponding to the columns of data.input. cell.orders: A character vector specifying the order of cell types or clusters in the analysis. cell.colors: A character vector specifying colors for cell types or clusters in the analysis. sample.names: A vector of sample or cell names, corresponding to the columns of data.input. Org: A string indicating the organism used in the analysis. It should be either “mmu” (mouse) or “hsa” (human). sorting: A logical value indicating whether to consider cell population size in communication analysis. 4.15.2 codes for running step14 Create folders for saving the results of cell-cell interaction analysis. print('Step14. Cell-cell interaction.') if (!file.exists(paste0(output.dir, '/Step14.Cell_cell_interection/'))) { dir.create(paste0(output.dir, '/Step14.Cell_cell_interection/')) } Run CellChat to perform cell-cell interaction analysis. tempwd <- getwd() run_CellChat(data.input=countsSlot, labels = sc_object@meta.data$selectLabels, cell.orders = ViolinPlot.cellTypeOrders, cell.colors = ViolinPlot.cellTypeColors, sample.names = rownames(sc_object@meta.data), Org = Org, sorting = sorting, output.dir = paste0(output.dir, '/Step14.Cell_cell_interection/')) setwd(tempwd) Save the variables. # Get the names of all variables in the current environment variable_names <- ls() # Loop through the variable names and save them as RDS files for (var_name in variable_names) { var <- get(var_name) # Get the variable by its name saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name } Figure 4.30: Figures showing the interaction number and strength between each cell group Figure 4.31: Heatmap showing the strength of incoming and outgoing signals for each cell type group across various pathways. Figure 4.32: Figure showing LRs interaction between each cell type group "],["integrated-st-pipeline.html", "5 Integrated ST pipeline 5.1 For 10X Visium data 5.2 For MERFISH data 5.3 For stereo-seq data", " 5 Integrated ST pipeline Load the R packages. # sc libraries library(Seurat) library(phateR) library(DoubletFinder) library(monocle) library(slingshot) library(URD) library(GSVA) library(limma) library(plyr) library(dplyr) library(org.Mm.eg.db) library(org.Hs.eg.db) library(CellChat) library(velocyto.R) library(SeuratWrappers) library(stringr) library(scran) library(ggpubr) library(viridis) library(pheatmap) library(parallel) library(reticulate) library(SCENIC) library(feather) library(AUCell) library(RcisTarget) library(Matrix) library(foreach) library(doParallel) library(clusterProfiler) library(OpenXGR) # st libraries library(RColorBrewer) library(Rfast2) library(SeuratDisk) library(abcCellmap) library(biomaRt) library(copykat) library(gelnet) library(ggplot2) library(parallelDist) library(patchwork) library(markdown) # getpot library(getopt) library(tools) # HemaScopeR library(HemaScopeR) 5.1 For 10X Visium data Run the integrated 10X Visium pipeline. st_10x_visium_pipeline( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST', # For Step1 Loading rds.file = FALSE, filename = "filtered_feature_bc_matrix.h5", assay = "Spatial", slice = "slice1", filter.matrix = TRUE, to.upper = FALSE, # For Step2 QC Step2_QC = TRUE, min.gene = 200, min.nUMI = 500, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 CNV analysis Step7_CNV = TRUE, copykat.genome = NULL, copykat.LOW.DR = 0.05, copykat.UP.DR = 0.1, copykat.win.size = 25, copykat.distance = "euclidean", copykat.n.cores = 1, # For Step8 Deconvolution Step8_Deconvolution = TRUE, cell2loc.sc.h5ad.dir = NULL, cell2loc.sc.max.epoch = 1000, cell2loc.st.max.epoch = 10000, cell2loc.use.gpu = TRUE, cell2loc.use.dataset = 'LymphNode', # For Step9 Cellcycle Step9_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, # For Step10 Nich Step10_Niche = TRUE, coexistence.method = 'correlation', Niche.cluster.n = 4, # settings pythonPath = 'path/to/python', verbose = FALSE, genReport = TRUE ) 5.2 For MERFISH data Run the integrated MERFISH pipeline. st_MERFISH_pipeline( input.data.dir, output.dir, sampleName = 'Hema_MERFISH', fov = 'fov', tech = 'Vizgen', # For Step1 Loading rds.file = FALSE, assay = NULL, Vizgen.z = 3L, Akoya.type = 'inform', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.4, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) 5.3 For stereo-seq data Run the integrated stereo-seq pipeline. st_stereo_pipeline( input.data.dir, output.dir, sampleName = 'Hema_stereo', # For Step1 Loading data_type = 'gem', sep = '\\t', bin_type = 'bins', bin_size = 100, spot_diameter = 80, is_sparse = TRUE, gene_list = NULL, region = NULL, assay = 'Spatial', # For Step2 QC Step2_QC = TRUE, min.gene = 20, min.nUMI = 50, max.gene = Inf, max.nUMI = Inf, min.spot = 0, bool.remove.mito = FALSE, species = 'mouse', # 'human' or 'mosue' # For Step3 Clustering Step3_Clustering = TRUE, normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.1, max.n.cluster = 30, # For Step4 Find DEGs Step4_Find_DEGs = TRUE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', # For Step5 SVF Step5_SVFs = TRUE, selection.method = 'moransi', n.top.show = 10, n.col.show = 5, # For Step6 Interaction Step6_Interaction = TRUE, h5ad_path = NULL, counts_path = NULL, coordinates_path = NULL, coordinates_index_col = 0, counts_transpose = TRUE, commot.signaling_type = 'Secreted Signaling', commot.database = 'CellChat', commot.min_cell_pct = 0.05, commot.dis_thr = 500, commot.n_permutations = 100, # For Step7 Cellcycle Step7_Cellcycle = TRUE, s.features = NULL, g2m.features = NULL, verbose = FALSE, pythonPath = NULL ) "],["stey-by-step-st-seq-pipeline.html", "6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading 6.2 Step 2. Quality Control 6.3 Step 3. Clustering 6.4 Step 4. DEGs 6.5 Step 5. Spatially variable features 6.6 Step 6. Spatial interaction 6.7 Step 7. CNV analysis 6.8 Step 8. Deconvolution 6.9 Step 9. Cell cycle 6.10 Step 10. Niche analysis", " 6 Stey-by-step st-seq pipeline 6.1 Step 1. Data loading The st_Loading_Data function is designed for loading 10X Visium spatial transcriptomics data from Space Ranger. It will load data from input.data.dir and output it in the SeuratObject format. 6.1.1 Function arguments: input.data.dir: The directory where the input data is stored. output.dir: The directory where the processed output will be saved. If not specified, the output is saved in the current working directory. Default is ‘.’. sampleName: A string naming the sample. Default is ‘Hema_ST’. rds.file: A boolean indicating if the input data is in RDS file format rather than a typical results of Space Ranger. Default is FALSE. filename: The name of the file to be loaded if the data is not in RDS format. Default is “filtered_feature_bc_matrix.h5”. assay: The specific assay to apply to the data. Default is ‘Spatial’. slice: The image slice identifier for the spatial data. Default is ‘slice1’. filter.matrix: A boolean indicating whether to load filtered matrix. Default is TRUE. to.upper: A boolean indicating whether to convert feature names to upper form. Default is FALSE. 6.1.2 Funciton behavior: Directory Creation: The function first checks if the output.dir exists; if not, it creates it. RDS File Handling: If rds.file is TRUE, it reads the RDS file, ensuring the specified assay and slice are present in the Seurat object. Non-RDS File Handling: If rds.file is FALSE, it loads the data using Load10X_Spatial from Seurat. Saving the Object: Uses SaveH5Seurat and Convert to save the Seurat object in rds and h5ad formats. File Copying: Copies any necessary files (filter matrix, spatial image) to the output.dir. Return Value: Returns the processed Seurat object. 6.1.3 An example: st_obj <- st_Loading_Data( input.data.dir = 'path/to/data', output.dir = '.', sampleName = 'Hema_ST, rds.file = FALSE, filename = 'filtered_feature_bc_matrix.h5', assay = 'Spatial', slice = 'slice1', filter.matrix = TRUE, to.upper = FALSE ) 6.1.4 Outputs: Spatial transcriptome data in rds and h5ad formats 6.2 Step 2. Quality Control The QC_Spatial function performs basic quality control on a SeuratObject containing 10X visium data and returns the filtered SeuratObject. It provides options to set thresholds for the number of genes, nUMI (unique molecular identifiers), and spots expressing each gene. It also allows for the removal of mitochondrial genes based on species. 6.2.1 Function arguments: st_obj: A SeuratObject of 10X visium data. output.dir: A character string specifying the path to store the results and figures. Default is the current working directory. min.gene: An integer representing the minimum number of genes detected in a spot. Default is 200. max.gene: An integer representing the maximum number of genes detected in a spot. Default is Inf (no upper limit). min.nUMI: An integer representing the minimum number of nUMI detected in a spot. Default is 500. max.nUMI: An integer representing the maximum number of nUMI detected in a spot. Default is Inf (no upper limit). min.spot: An integer representing the minimum number of spots expressing each gene. Default is 3. species: A character string representing the species of sample, either ‘human’ or ‘mouse’. bool.remove.mito: A boolean value indicating whether to remove mitochondrial genes. Default is TRUE. SpatialColors: A function that interpolates a set of given colors to create new color palettes and color ramps. Default is a color palette with reversed Spectral colors from RColorBrewer. 6.2.2 Function behavior: Plots and saves the spatial distribution of nUMI and nGene. Plots and saves violin plots for nUMI and nGene. Identifies and marks low-quality spots based on nUMI and nGene thresholds. Plots the spatial distribution of quality. Plots and saves a histogram for the number of spots expressing each gene. Plots the spatial distribution of mitochondrial genes. Saves the raw SeuratObject before filtering. Removes low-quality spots and genes with fewer occurrences. Optionally removes mitochondrial genes. Saves the filtered SeuratObject. Returns the filtered st_obj. 6.2.3 An example: st_obj <- QC_Spatial( st_obj = st_obj, output.dir = '.', min.gene = 200, min.nUMI = Inf, max.gene = 500, max.nUMI = Inf, min.spot = 3, species = 'human', bool.remove.mito = TRUE, SpatialColors = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = "Spectral"))) ) 6.2.4 Outputs: Figures showing the spatial distribution of nUMI and nGene. Violin plots of nUMI and nGene. Figures showing the quality. Histograms for the number of spots expressing each gene. Figures showing the spatial distribution of mitochondrial genes. Raw and filtered SeuratObject. 6.3 Step 3. Clustering The st_Clustering function is designed to perform clustering analysis on spatial transcriptomics data. It integrates several key steps including data normalization, dimensionality reduction, clustering, and visualization. The function saves the results and visualizations to output.dir. 6.3.1 Function arguments: st_obj: The input spatial transcriptomics seurat object that contains the data to be clustered. output.dir: The directory where the output files will be saved. Default is the current directory (‘.’). normalization.method: The method used for data normalization. Default is ‘SCTransform’. npcs: The number of principal components to use in PCA. Default is 50. pcs.used: The principal components to use for clustering. Default is the first 10 PCs (1:10). resolution: The resolution parameter for the clustering algorithm. Default is 0.8. verbose: A logical flag to print progress messages. Default is FALSE. 6.3.2 Function behavior: Data Normalization and PCA: Depending on the normalization.method, the function either uses SCTransform or a standard normalization method followed by scaling and variable feature detection. Performs PCA on the normalized data. Clustering and Dimensionality Reduction: Finds nearest neighbors using the specified principal components (pcs.used). Identifies clusters using the specified resolution. Performs UMAP and t-SNE for visualization of the clusters. Visualization: Generates spatial, UMAP, and t-SNE plots of the clusters with customized color schemes. Saves these plots as images in the specified directory. Saving Results: Saves the updated st_obj as an RDS file. Exports the metadata of st_obj to a CSV file. Return Value: Returns the updated st_obj containing the clustering results. 6.3.3 An example: st_obj <- st_Clustering( st_obj = st_obj, output.dir = '.', normalization.method = 'SCTransform', npcs = 50, pcs.used = 1:10, resolution = 0.8, verbose = FALSE ) 6.3.4 Outputs: Figures showing the results of clustering. SeuratObject in rds format. 6.4 Step 4. DEGs The st_Find_DEGs function is designed to identify differentially expressed genes (DEGs) in spatial transcriptomics data. It performs differential expression analysis based on clustering results, visualizes the top markers, and saves the results to output.dir. 6.4.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for DEG analysis. output.dir: The directory where output files will be saved. Default is the current directory (‘.’). ident.label: The metadata label used for identifying clusters. Default is 'seurat_clusters'. only.pos: A logical flag to include only positive markers. Default is TRUE. min.pct: The minimum fraction of cells expressing the gene in either cluster. Default is 0.25. logfc.threshold: The log fold change threshold for considering a gene differentially expressed. Default is 0.25. test.use: The statistical test to use for differential expression analysis. Default is 'wilcox'. verbose: A logical flag to print progress messages. Default is FALSE. 6.4.2 Function behavior: Set Identifiers: Sets the cluster identifiers in the spatial transcriptomics object (st_obj) based on the specified ident.label. Find Differentially Expressed Genes (DEGs): Performs differential expression analysis using the specified parameters (only.pos, min.pct, logfc.threshold, test.use). Top Marker Genes: Selects the top 5 marker genes for each cluster based on the highest average log fold change. Visualization: Generates a dot plot for the top DEGs and saves the plot as an image in the specified directory. Saving Results: Saves the DEG results as a CSV file. Return Value: Returns the data frame containing the identified DEGs. 6.4.3 An example: st.markers <- st_Find_DEGs( st_obj = st_obj, output.dir = '.', ident.label = 'seurat_clusters', only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = 'wilcox', verbose = FALSE ) 6.4.4 Outputs: Dot plots showing markers. CSV file containing the information of markers. 6.5 Step 5. Spatially variable features The st_SpatiallyVariableFeatures function identifies and visualizes spatially variable features (SVFs) in spatial transcriptomics data. It integrates the identification of spatially variable features using a specified method, saves the results to a directory, and creates visualizations of the top spatially variable features. 6.5.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. output.dir: The directory where output files will be saved. Default is the current directory. assay: The assay to be used for finding spatially variable features. Default is 'SCT'. selection.method: The method used for selecting spatially variable features. Default is 'moransi'. n.top.show: The number of top spatially variable features to visualize. Default is 10. n.col: The number of columns for the visualization grid. Default is 5. verbose: A logical flag to print progress messages. Default is FALSE. 6.5.2 Function behavior: Identify Spatially Variable Features: Identifies spatially variable features using the specified method and assay. Suppresses warnings during the process. Save Metadata: Extracts metadata features and saves them as a CSV file in output.dir. Visualization: Selects the top n.top.show spatially variable features. Generates and saves a spatial feature plot of these features in the specified directory. Return Value: Returns the updated st_obj containing the identified spatially variable features. 6.5.3 An example: st_obj <- st_SpatiallyVariableFeatures( st_obj = st_obj, output.dir = '.', assay = st_obj@active.assay, selection.method = 'moransi', n.top.show = 10, n.col = 5, verbose = FALSE ) 6.5.4 Outputs: Figures showing SVFs. CSV file containing the information of SVFs. 6.6 Step 6. Spatial interaction The st_Interaction function is used to identify and visualize interactions between clusters based on spatial transcriptomics data. It utilizes Commot to analyze spatial interactions, identify pathway activities, and assess the strength and significance of interactions. 6.6.1 Function arguments: st_data_path: Path to the spatial transcriptomics data. metadata_path: Path to the metadata associated with the spatial transcriptomics data. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. label_key: Key in the metadata to identify cell clusters. Default is 'seurat_clusters'. save_path: The directory where output files will be saved. Default is the current directory. species: The species of the spatial transcriptomics data. Default is 'human'. signaling_type: Type of signaling interactions to consider. Default is 'Secreted Signaling'. database: Database to be used for the analysis. Default is 'CellChat'. min_cell_pct: Minimum percentage of cells to consider for interaction analysis. Default is 0.05. dis_thr: Distance threshold for defining interactions. Default is 500. n_permutations: Number of permutations for assessing significance. Default is 100. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.6.2 Function behavior: Commot Analysis: Uses Commot to perform interaction analysis, identifying interactions within and between clusters. Visualization: Generates visualizations of pathway interactions and interactions between ligand-receptors (LRs) within and between clusters, and saves them in save_path. 6.6.3 An example: st_Interaction( st_data_path = 'path/to/data', metadata_path = 'path/to/metadata', library_id = 'Hema_ST', label_key = 'seurat_clusters', save_path = '.', species = 'human', signaling_type = 'Secreted Signaling', database = 'CellChat', min_cell_pct = 0.05, dis_thr = 500, n_permutations = 100, pythonPath = 'path/to/python' ) 6.6.4 Outputs: Dot plot showing pathway interaction between and within clusters. Dot plot showing LRs interaction between and within clusters. The information of each LR and pathway. 6.7 Step 7. CNV analysis The st_CNV function identifies and visualizes copy number variations (CNVs) in spatial transcriptomics data. It uses CopyKAT to perform the CNV analysis, saves the results, and generates visual representations of CNV states. 6.7.1 Function arguments: st_obj: The input spatial transcriptomics object containing the data for analysis. save_path: The directory where output files will be saved. assay: The assay to be used for CNV analysis. Default is 'Spatial'. LOW.DR: The lower threshold for the dropout rate in CopyKAT. Default is 0.05. UP.DR: The upper threshold for the dropout rate in CopyKAT. Default is 0.1. win.size: The window size for the CNV analysis. Default is 25. distance: The distance metric to be used for the analysis. Default is \"euclidean\". genome: The genome version to be used, ‘hg20’ or ‘mm10’. Default is \"hg20\". n.cores: The number of cores to be used for parallel processing. Default is 1. species: The species of the spatial transcriptomics data. Default is 'human'. 6.7.2 Function behavior: CopyKAT Analysis: Runs CopyKAT pipeline to perform CNV analysis using the provided parameters. Saving Results: Saves the CopyKAT results as an RDS file. Plotting: Generates plots of the CNV states and saves them in save_path. Updating Metadata: Updates the spatial transcriptomics object with CNV state metadata. Return Value: Returns the updated st_obj containing the CNV state information. 6.7.3 An example: st_obj <- st_CNV( st_obj = st_obj, save_path = '.', assay = 'Spatial', LOW.DR = 0.05, UP.DR = 0.1, win.size = 25, distance = "euclidean", genome = 'hg20', n.cores = 1, species = 'human' ) 6.7.4 Outputs: Figures showing the predicted CNV states. Figures showing the CNV heatmap. rds files of results of copykat. 6.8 Step 8. Deconvolution The st_Deconvolution function aims to perform spatial deconvolution analysis on spatial transcriptomics data to estimate the cell-type composition and abundance in different regions. The function utilizes cell2location to infer cell-type abundance and spatial distributions, allowing for the visualization and interpretation of spatially resolved cell populations within the tissue. 6.8.1 Function arguments: st.data.dir: Path to the spatial transcriptomics data. sc.h5ad.dir: Path to the single-cell RNA-seq data in h5ad format. Default is NULL. library_id: Identifier for the spatial transcriptomics library. Default is 'Hema_ST'. st_obj: Spatial transcriptomics object containing the data for analysis. Default is NULL. save_path: The directory where output files will be saved. Default is NULL. sc.labels.key: Key in the single-cell metadata to identify cell clusters. Default is 'seurat_clusters'. species: The species of the spatial transcriptomics data. Default is 'mouse'. sc.max.epoch: Maximum number of epochs used for single-cell deconvolution. Default is 1000. st.max.epoch: Maximum number of epochs used for spatial deconvolution. Default is 10000. use.gpu: Logical value indicating whether to use GPU for computation. Default is FALSE. use.Dataset: The dataset to be used for analysis, such as 'HematoMap' or 'LymphNode'. pythonPath: The path to the Python environment containing cell2location to use for the analysis. Default is ‘.’. 6.8.2 Function behavior: Deconvolution Analysis: Performs the spatial deconvolution analysis using the provided spatial transcriptomics and single-cell RNA-seq data. Post-Analysis Processing: Processes the deconvolution results and visualizes the spatial distribution of inferred cell types within the tissue. Returning Results: If a Seurat object is provided, the updated Seurat object with cell type information is returned. 6.8.3 An example: st_obj <- st_Deconvolution( st.data.dir = 'path/to/data', library_id = 'Hema_ST', sc.h5ad.dir = NULL, st_obj = st_obj, save_path = '.', sc.labels.key = 'seurat_clusters', species = 'human', sc.max.epoch = 1000, st.max.epoch = 10000, use.gpu = FALSE, use.Dataset = 'LymphNode', pythonPath = 'path/to/python' ) 6.8.4 Outputs: Figures showing the predicted abundance of each cell-type. The parameters of trained cell2location model. 6.9 Step 9. Cell cycle The st_Cell_cycle function is used to assess the cell cycle phase scores in spatial transcriptomics data. It calculates S phase and G2M phase scores based on the expression of designated cell cycle-related genes and visualizes these scores in spatial and dimensionality-reduced plots. 6.9.1 Function arguments: st_obj: The input Seurat object containing the data for analysis. save_path: The directory where the output images will be saved. Default is the current directory. s.features: A list of genes associated with the S phase. Default is NULL (using genes from Seurat). g2m.features: A list of genes associated with the G2M phase. Default is NULL (using genes from Seurat). species: The species of the spatial transcriptomics data. Default is 'human'. FeatureColors.bi: A color palette for visualization. Default is a two-color ramp palette. 6.9.2 Function behavior: Gene Feature Assignment: Assigns S phase and G2M phase gene lists based on the specified species or provided input. Cell Cycle Scoring: Calculates the S phase and G2M phase scores in the data. Spatial Visualization: Generates spatial feature plots to visualize the S phase and G2M phase scores using the specified color palette and saves the plots as images. Dimensionality-Reduced Plot Visualization: If UMAP or tSNE dimensionality reduction is available in the st_obj, feature plots of the S phase and G2M phase scores are generated in the reduced space and saved as images. Return Value: Returns the updated st_obj containing the cell cycle phase scores. 6.9.3 An example: st_obj <- st_Cell_cycle( st_obj = st_obj, save_path = '.', s.features = NULL, g2m.features = NULL, species = 'human', FeatureColors.bi = colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = 'RdYlBu'))) ) 6.9.4 Outputs: Figures showing S scores. Figures showing S scores. 6.10 Step 10. Niche analysis The st_NicheAnalysis function is designed to perform niche analysis on spatial transcriptomics data, enabling the exploration of spatial niches or microenvironments within the tissue. The function encompasses co-occurrence analysis, niche clustering, and niche interaction analysis to uncover the spatial relationships and characteristics of different cell populations or features. 6.10.1 Function arguments: st_obj: The input SeuratObject containing the spatial transcriptomics data for analysis. features: A vector of features representing features (for example, cell types from deconvolution) for niche analysis. save_path: The directory where the analysis results and visualizations will be saved. Default is the current directory. coexistence.method: The method for co-occurrence analysis, accepting 'correlation' or 'Wasserstein'. Default is 'correlation'. kmeans.n: The number of clusters for niche clustering. Default is 4. st_data_path: A path containing the ‘spatial’ file and ‘filtered_feature_bc_matrix.h5’ file, required for niche interaction visualization. slice: The slice to be used for analysis. Default is 'slice1'. species: The species of the sample data. Default is 'mouse'. pythonPath: The path to the Python environment containing Commot to use for the analysis. Default is ‘.’. 6.10.2 Function behavior: Co-occurrence Score Calculation: Calculates the co-occurrence scores between the specified features using the chosen coexistence method (‘correlation’ or ‘Wasserstein’). Niche Clustering: Utilizes k-means clustering to identify distinct spatial niches based on the expression profiles of the selected features and visualizes the clustering results. Niche Interaction Visualization: If the st_data_path is provided, performs niche interaction visualization using Commot, which is based on the provided spatial transcriptomics data and generates visualizations of niche interactions within the tissue. Return Value: Returns the updated st_obj with niche analysis results and visualizations. 6.10.3 An example: tmp <- read.csv('path/to/cell2loc_res.csv', row.names = 1) features <- colnames(tmp) if(!all(features %in% names(st_obj@meta.data))){ common.barcodes <- intersect(colnames(st_obj), rownames(tmp)) tmp <- tmp[common.barcodes, ] st_obj <- st_obj[, common.barcodes] st_obj <- AddMetaData(st_obj, metadata = tmp) } st_obj <- st_NicheAnalysis( st_obj, features = features, save_path = '.', coexistence.method = 'correlation', kmeans.n = 4, st_data_path = 'path/to/data', slice = `slice1`, species = 'human', pythonPath = 'path/to/python' ) 6.10.4 Outputs: Figures showing the co-existence results. Figures showing the spatial distribution of each niche. Figures showing the composition of each niche. Figures showing the results of interactions using Commot. "],["step-by-step-shiny.html", "7 Step-by-step shiny 7.1 Step 1. Enter R and get the path of the installed R packages 7.2 Step 2. Run shiny code 7.3 Step 3. Use HemaScopeShiny via the GUI", " 7 Step-by-step shiny #You can run shiny on Linux or on the Rstudio web page Choice 1:Run shiny on Linux - Enter Linux, activate the HemaScope environment,install radian package then you can enter the R environment on Linux and run shiny code raian -You can see “r$>” . It menns you enter R environment on Linux. app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address You’ll see a page like the one below,copy link Open the link with a browser,you can see HemaScopeR shiny home page. Choice2:Run shiny on Rstudio web page 7.1 Step 1. Enter R and get the path of the installed R packages Enter the R environment in the Linux command line. R Get the path of the installed R packages in the R command line. .libPaths() For example, “/An/example/of/the/path/to/installed/R/packages” 7.2 Step 2. Run shiny code .libPaths("/An/example/of/the/path/to/installed/R/packages") app_path <- system.file("shinyapp/shiny_sc_st_all.R", package = "HemaScopeR") #The path where shiny_sc_st_all.R is located #Run shiny code shiny::runApp(app_path,launch.browser = FALSE,host = "xx.xx.xx.xx") #host parameter:Your server IP address 7.3 Step 3. Use HemaScopeShiny via the GUI Start interface. A UI page appears with two buttons: “Start scRNA-seq Analysis” and “Start st-seq Analysis.” Users can click the corresponding button based on their needs to enter the respective analysis page. * The figure showing the start interface. Begin a new analysis, continue the previous analysis, or return to the start interface When clicking the “Start scRNA-seq pipeline” or “Start ST-seq pipeline” button, you will be directed to a second page. This page contains three buttons: “Begin New Analysis,” “Continue Previous Analysis”, and “Back to Home”. If you need to begin a new analysis of scRNA-seq or st-seq data from the first step, click “Begin New Analysis”. If you have already used Shiny to complete several steps (e.g., steps 1, 2, and 3), but the analysis was interrupted during step 4 due to some unexpectedly closing, click “Continue Previous Analysis” to resume from step 4. Please note: users should follow the analysis steps sequentially and not skip steps. For example, analyzing steps 1, 2, and 3 and then jumping directly to step 6 is incorrect. The proper analysis sequence should be step 1, 2, 3, 4, 5, 6, … N. The figure showing the interface for beginning a new analysis, continuing the previous analysis, or returning to the start interface. 7.3.1 scRNA-seq pipeline When the user clicks the “Start scRNA-seq pipeline – Begin New Analysis” button, they will enter the single-cell analysis page. The sidebar of this page includes the following buttons: Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Identify Cell Types Step 5. Visualization Step 6. Find Differential Genes Step 7. Assign Cell Cycles Step 8. Calculate Heterogeneity Step 9. Violin Plot for Marker Genes Step 10. Calculate Lineage Scores Step 11. GSVA Step 12. Construct Trajectories Step 13. Transcription Factors Analysis Step 14. Cell-Cell Interaction Step 15. Generate the Report Back to Prior Page The figure showing the scRNA-seq pipeline. Please start the analysis from step 1 and do not skip any steps. The correct analysis sequence is steps 1 through 15: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15. To return to the previous page, click “Back to Prior Page”. If Shiny unexpectedly exits during data analysis in the Begin New Analysis process (for example, while analyzing Step 5), and the analysis of Step 5 is interrupted, the user will need to restart ShinyApp(ui, server). This will bring up the Home page. The user should click the “Start scRNA-seq pipeline–Continue Previous Analysis” button, enter the Job ID displayed on the UI page during the Step 1.Input data step, and then select the step that did not complete successfully (e.g., Step 5). After entering the necessary parameters for Step 5, click “Run Step 5” to resume the analysis. Once Step 5 is completed, the user should proceed by selecting Step 6, entering the required parameters, and clicking “Run Step 6” to analyze Step 6, and so on, until all scRNA-seq steps are completed. Note that the default parameters for each step are the same as those in Begin New Analysis. After clicking “Run Step,” do not perform any other operations on the parameter page. Wait until the current step’s analysis is complete, and the results for that step will appear on the UI page. The “Start scRNA-seq pipeline–Continue Previous Analysis” page contains the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the Job ID displayed on the page during the Begin New Analysis–Step1.Input data step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.1.1 Step 1 (scRNA-seq pipeline). Input Data The figure showing the step 1 of scRNA-seq pipeline. Enter data path: Input multiple file paths separated by semicolons, for example: /path1/file1/data1;/path2/file2/data2;/path2/file2/data3. For a single file, use: /path2/file2/data2. Enter project name: When entering multiple files, you must also input multiple project names, separated by semicolons. The number of project names must match the number of input files. Example: projectname1;projectname2;projectname3. For a single file, use: projectname1. Enter output path: Specify the path where the results will be output. You can view the results of each step in this path. Example: /home/username/output. Enter the path of database: The path where the database is stored and it varies for each user. Example: /home/username/database. Select Data Type: There are three options: “cellranger-count”, “Seurat”, “Matrix”. Choose according to the type of input data. Gene Column (default: 2): The column where gene names are located; the default is column 2. Minimum Cells (default: 10): The minimum number of cells for filtering; the default is 10. Minimum Features (default: 200): The minimum number of genes that must be detected in each cell; the default is 200. Mt Pattern (default: ‘^MT-’): Mitochondrial pattern; for humans use ^MT-, for mice use ^mt-. After entering the above parameters, click the “LoadData” button to load the data. Once the data is successfully loaded, you will see “OK! Data dimensions” indicating that the data loading is complete, and you will be provided with a JobID. Make sure to note this JobID, as it is crucial. If HemaScopeShiny unexpectedly exits, you can click “Continue Previous Analysis”, enter the JobID, and continue loading the previous analysis results without starting from step 1 again. The JobID is very important! Please note: After clicking the “LoadData” button, do not modify any other parameters on the page. The Step 2-14 pages will consist of three sections: 1) parameter input, 2) result output file names, and 3) generated result figures. If the respective step produces result figures, they will be displayed. Users can switch between images by clicking the arrows on the left or right of the figure. If no figures are generated for the current step, a message stating “NO Figure!” will be displayed. All output files generated at each step are stored in the output directory specified by the user. The UI page will display only the file names, which can be downloaded by clicking on the file name links. 7.3.1.2 Step 2 (scRNA-seq pipeline). Quality Control The figure showing the step 2 of scRNA-seq pipeline. nFeature_RNA.limit: Minimum number of genes detected per cell. Default value: 200 percent.mt.limit: Threshold for filtering mitochondrial genes. Default value: 20 scale.factor: Normalization factor. Default value: 10,000 nfeatures: Number of highly variable genes. Default value: 3,000 ndims: Number of dimensions used. Default value: 50 vars.to.regress: Variables to regress. Default value: NULL PCs: Number of principal components used for clustering. Default value: 1:35 resolution: Resolution parameter for clustering. Default value: 0.4 n.neighbors: k.param parameter in the FindNeighbors function. Default value: 50 doublet.percentage: Doublet rate. Default value: 0.04 doubletFinderWrapper.PCs: Number of principal components used for doublet removal. Default value: 1:20 doubletFinderWrapper.pN: Number of artificial doublets defined for removal. Default value: 0.25 doubletFinderWrapper.pK: Represents the fraction of merged real artificial data. Default value: 0.1 (pK should be adjusted according to each scRNA-seq dataset) Step2_Quality_Control.RemoveBatches: Whether to remove detected batches. Default value: TRUE Step2_Quality_Control.RemoveDoublets: Whether to remove detected doublets. Default value: TRUE Click the “Run Step 2” button to start the process. After clicking the “Run Step 2” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 2 completed” message will appear. After a short while, the result files generated by Step 2 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.3 Step 3 (scRNA-seq pipeline). Clustering The figure showing the step 3 of scRNA-seq pipeline. PCs for clustering (default: 1:20): Principal components used for clustering. Default value: 1:20 n.neighbors for clustering (default: 50): k.param parameter in the FindNeighbors function. Default value: 50 resolution for clustering (default: 0.4): Resolution used for clustering. Default value: 0.4 Click the “Run Step 3” button to start the process. After clicking the “Run Step 3” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 3 completed” message will appear. After a short while, the result files generated by Step 3 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.4 Step 4 (scRNA-seq pipeline). Identify Cell Types The figure showing the step 4 of scRNA-seq pipeline. Choose organism: ‘hsa’ for human, ‘mmu’ for mouse Choose Labels: Cell labels, default value: clustering Run CNV: TRUE if copy number variation (CNV) analysis is to be performed CPU cores for parallel processing: Number of CPU cores for parallel processing, default value: 10 Click the “Run Step 4” button to start the process. After clicking the “Run Step 4” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 4 completed” message will appear. After a short while, the result files generated by Step 4 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.5 Step 5 (scRNA-seq pipeline). Visualization The figure showing the step 5 of scRNA-seq pipeline. Nearest neighbors for PhateR analysis (default: 50): phate.knn parameter, the number of nearest neighbors to consider in the PhateR algorithm. Default value: 50 Principal components for PhateR (default: 20): phate.npca parameter, the number of principal components to use in the PhateR algorithm. Default value: 20 t parameter for PhateR (default: 10): phate.t parameter, the t value for the PhateR algorithm. Default value: 10 Dimensions for PhateR (default: 2): phate.ndim parameter, the number of dimensions for embedding output in the PhateR algorithm. Default value: 2 Click the “Run Step 5” button to start the process. After clicking the “Run Step 5” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 5 completed” message will appear. After a short while, the result files generated by Step 5 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.6 Step 6 (scRNA-seq pipeline). Find Differential Genes The figure showing the step 6 of scRNA-seq pipeline. Minimum gene percentage for differential detection (default: 0.25): The minimum fraction of cells expressing a gene in any cluster. Default value: 0.25 Log-fold threshold for gene analysis (default: 0.25): The log-fold change threshold for differential gene expression analysis. Default value: 0.25 Click the “Run Step 6” button to start the process. After clicking the “Run Step 6” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 6 completed” message will appear. After a short while, the result files generated by Step 6 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.7 Step 7 (scRNA-seq pipeline). Assign Cell Cycles The figure showing the step 7 of scRNA-seq pipeline. Define cell cycle cutoff (default: NULL): The cutoff value used to distinguish between cycling and non-cycling cells. Default value: NULL Click the “Run Step 7” button to start the process. After clicking the “Run Step 7” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 7 completed” message will appear. After a short while, the result files generated by Step 7 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.8 Step 8 (scRNA-seq pipeline). Calculate Heterogeneity The figure showing the step 8 of scRNA-seq pipeline. Order cell types: The order of cell types for visualization. If not provided, the function will use the unique cell types from the input cell_types_groups. Default value: NULL Click the “Run Step 8” button to start the process. After clicking the “Run Step 8” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 8 completed” message will appear. After a short while, the result files generated by Step 8 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.9 Step 9 (scRNA-seq pipeline). Violin Plot for Marker Genes The figure showing the step 9 of scRNA-seq pipeline. Enter marker genes for violin plot (separate by ‘,’): The marker genes for the violin plot. Default value is the built-in marker genes: NULL. Set the hexadecimal codes of colors for cell types (separate by ‘,’): Specify the colors for cell types. The default is the color palette: NULL. Click the “Run Step 9” button to start the process. After clicking the “Run Step 9” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 9 completed” message will appear. After a short while, the result files generated by Step 9 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.10 Step 10 (scRNA-seq pipeline). Calculate Lineage Scores The figure showing the step 10 of scRNA-seq pipeline. The gene sets for calculating lineage scores: The gene sets used for calculating lineage scores. The default is the color palette: NULL. The names for the lineages: The names of the lineages. Default value: NULL. The hexadecimal codes of colors for groups: Specify the colors to be used for different group annotations. The default is the color palette: NULL. Click the “Run Step 10” button to start the process. After clicking the “Run Step 10” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 10 completed” message will appear. After a short while, the result files generated by Step 10 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.11 Step 11 (scRNA-seq pipeline). GSVA The figure showing the step 11 of scRNA-seq pipeline. Option to identify cell type-specific GSVA terms: Whether to identify cell type-specific GSVA terms. Default value: TRUE. Option to identify differential GSVA terms: Whether to identify differential GSVA terms. Default value: TRUE. Click the “Run Step 11” button to start the process. After clicking the “Run Step 11” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 11 completed” message will appear. After a short while, the result files generated by Step 11 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.12 Step 12 (scRNA-seq pipeline). Construct Trajectories The figure showing the step 12 of scRNA-seq pipeline. Set the cell types for constructing trajectories: The cell types to be used for trajectory analysis. Different cell types should be separated by commas. Default value: “all.” Option to run monocle2: Whether to perform Monocle2 trajectory analysis. Default value: TRUE. Option to run slingshot: Whether to perform Slingshot trajectory analysis. Default value: TRUE. Option to run scVelo: Whether to perform scVelo trajectory analysis. Default value: TRUE. Enter the paths of loom files: Specify the paths to the loom files for scVelo analysis. Default value: NULL. Click the “Run Step 12” button to start the process. After clicking the “Run Step 12” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 12 completed” message will appear. After a short while, the result files generated by Step 12 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.13 Step 13 (scRNA-seq pipeline). Transcription Factors Analysis The figure showing the step 13 of scRNA-seq pipeline. Set the hexadecimal codes of colors for cell types: Colors used for visualizing cell types. Default value: NULL (color palette). Set the hexadecimal codes of colors for groups: Colors used for visualizing groups. Default value: NULL (color palette). Click the “Run Step 13” button to start the process. After clicking the “Run Step 13” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 13 completed” message will appear. After a short while, the result files generated by Step 13 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.14 Step 14 (scRNA-seq pipeline). Cell-Cell Interaction The figure showing the step 14 of scRNA-seq pipeline. The cell groups were sorted: Whether to consider the size (number) of cell groups in the cell communication analysis. Default value: TRUE. Click the “Run Step 14” button to start the process. After clicking the “Run Step 14” button, please do not interact with other parameters or buttons on the page. Once the process is complete, a “Step 14 completed” message will appear. After a short while, the result files generated by Step 14 will be displayed on the UI page. The result files are stored in the folder specified by the user’s output.dir parameter. 7.3.1.15 Step 15 (scRNA-seq pipeline). Generate the Report The figure showing the step 15 of scRNA-seq pipeline. Click “Run Step 15” to generate the analysis report. 7.3.2 ST-pipeline When the user clicks the button “Start ST-seq pipeline–Begin New Analysis,” they will be taken to the empty analysis page. The page sidebar includes the following buttons: Please start the analysis from Step 1 and do not skip any steps. The correct analysis sequence is Step 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, and 11. To return to the previous page, please click “Back to Prior Page.” Step 1. Input Data Step 2. Quality Control Step 3. Clustering Step 4. Find Differential Genes Step 5. Spatially Variable Features Step 6. Spatial Interaction Step 7. CNV Analysis Step 8. Deconvolution Step 9. Cell Cycle Analysis Step 10. Niche Analysis Step 11. Generate the Report Back to Prior Page In “Begin New Analysis,” users start analyzing data from Step1. If Shiny unexpectedly exits during the analysis process (for example, if you are analyzing Step5 and Shiny crashes, causing Step5 to fail), users need to restart Shiny by running shinyApp(ui, server). This will bring up the Home page. Users should click the “Start ST-seq pipeline–Continue Previous Analysis” button. They need to enter the JobID displayed in the UI page during the Step1.Input data step and then select the step that did not complete successfully to continue the analysis. For example, if Step5 failed, select Step5, enter the necessary parameters, and click “Run Step5” to continue the analysis. After Step5 finishes, select Step6, enter the parameters for Step6, and click “Run Step6” to analyze Step6, and so on for all subsequent steps. Please note that the default parameters for each step are the same as those in “Begin New Analysis.” After clicking “Run Step,” do not make any other changes to the parameter page. Wait until the current step completes, and the results file for the current step will appear on the UI page. The “Start ST-seq pipeline–Continue Previous Analysis” page includes the following buttons: Back to Prior Page: Click to return to the previous page. Enter your Job ID: Enter the JobID displayed in the “Begin New Analysis–Step1.Input data” step. Choose a step you want to analyze: Select the step you want to continue analyzing. 7.3.2.1 Step 1 (st-seq pipeline). Input Data The figure showing the step 1 of st-seq pipeline. Enter data path: The directory where the input data is stored. The input data should be 10X Visium spatial transcriptomics data. Only one dataset can be input at a time; unlike single-cell data, multiple datasets cannot be entered simultaneously. Enter sample name: A string for naming the sample. The default value is ‘Hema_ST’. Enter output path: The directory where processed outputs will be saved. For example: /home/username/output. Enter the path of Python: The path to the Python executable, as that in scRNA-seq pipeline. After entering the parameters above, click the “LoadData” button to load the data. Once the data is loaded, the system will provide a JobID, which should be noted. If Shiny unexpectedly exits, you can click “Continue Previous Analysis” and enter the JobID to resume loading the previous analysis results, avoiding the need to restart from Step 1. The JobID is very important! Please note: After clicking the “LoadData” button, do not make further changes to other parameters on the page. The Step 2-10 pages will have three sections: Parameter input Result output file names Generated result plots If a step generates result plots, they will be displayed. Users can switch between images by clicking the arrows on either side of the plot. If no result plots are generated for the current step, users will be informed with “NO Figure!” The result files generated for each step are stored in the output path specified by the user. The UI page will only display the file names, and clicking on the file name links will allow downloading the files. 7.3.2.2 Step 2 (st-seq pipeline). Quality Control The figure showing the step 2 of st-seq pipeline. min.gene (default: 200): Specifies the minimum number of genes detected in a spot. The default value is 200. min.nUMI (default: 500): Specifies the minimum number of nUMIs detected in a spot. The default value is 500. max.gene (default: Inf): Specifies the maximum number of genes detected in a spot. The default value is Inf (no upper limit). max.nUMI (default: Inf): Specifies the maximum number of nUMIs detected in a spot. The default value is Inf (no upper limit). min.spot (default: 0): Specifies the minimum number of spots where each gene is expressed. bool.remove.mito: Whether to remove mitochondrial genes. The default value is TRUE. species: Specifies the species: human/mouse. Click “Run Step2” to proceed. After clicking the “Run Step2” button, please do not modify any other parameters on the page. Once Step 2 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.3 Step 3 (st-seq pipeline). Clustering The figure showing the step 3 of st-seq pipeline. normalization.method (default: ‘SCTransform’): The method for data normalization. The default value is ‘SCTransform’. npcs (default: 50): The number of principal components (PCs) to use in PCA. The default value is 50. pcs.used (default: 1:10): The number of PCs used for clustering analysis. The default value is the first 10 PCs (1:10). resolution (default: 0.8): The resolution parameter for the clustering algorithm. The default value is 0.8. Click “Run Step3” to proceed. After clicking the “Run Step3” button, please do not modify any other parameters on the page. Once Step 3 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.4 Step 4 (st-seq pipeline). Find Differential Genes The figure showing the step 4 of st-seq pipeline. only.pos: A logical flag to include only positive markers. The default value is TRUE. min.pct (default: 0.25): The minimum fraction of cells expressing the gene in any cluster. The default value is 0.25. logfc.threshold (default: 0.25): The log-fold change threshold for considering differentially expressed genes. The default value is 0.25. test.use (default: ‘wilcox’): The statistical test used for differential expression analysis. The default value is ‘wilcox’. Click “Run Step4” to proceed. After clicking the “Run Step4” button, please do not modify any other parameters on the page. Once Step 4 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.5 Step 5 (st-seq pipeline). Spatially variable features The figure showing the step 5 of st-seq pipeline. selection.method (default: ‘moransi’): The method used for selecting spatially variable features. The default value is ‘moransi’. n.top.show (default: 10): The number of top spatially variable features to visualize. The default value is 10. n.col.show (default: 5): The number of columns in the visualization grid. The default value is 5. Click “Run Step5” to proceed. After clicking the “Run Step5” button, please do not modify any other parameters on the page. Once Step 5 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.6 Step 6 (st-seq pipeline). Spatial interaction The figure showing the step 6 of st-seq pipeline. commot.signaling_type (default: ‘Secreted Signaling’): The type of signaling interaction to consider. The default value is ‘Secreted Signaling’. commot.database (default: ‘CellChat’): The database used for the analysis. The default value is ‘CellChat’. commot.min_cell_pct (default: 0.05): The minimum cell percentage to consider in interaction analysis. The default value is 0.05. commot.dis_thr (default: 500): The distance threshold used to define interactions. The default value is 500. commot.n_permutations (default: 100): The number of permutations used to assess significance. The default value is 100. Click “Run Step6” to proceed. After clicking the “Run Step6” button, please do not modify any other parameters on the page. Once Step 6 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.7 Step 7 (st-seq pipeline). CNV analysis The figure showing the step 7 of st-seq pipeline. copykat.genome (default: ‘NULL’): The genome version used, either ‘hg20’ or ‘mm10’. The default value is “hg20”. copykat.LOW.DR (default: 0.05): The lower dropout rate threshold in CopyKAT. The default value is 0.05. copykat.UP.DR (default: 0.1): The upper dropout rate threshold in CopyKAT. The default value is 0.1. copykat.win.size (default: 25): The window size for CNV analysis. The default value is 25. copykat.distance (default: ‘euclidean’): The distance metric used for analysis. The default value is “euclidean”. copykat.n.cores (default: 1): The number of cores used for parallel processing. The default value is 1. Click “Run Step7” to proceed. After clicking the “Run Step7” button, please do not modify any other parameters on the page. Once Step 7 is completed, the result files will appear in the UI, and they will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.8 Step 8 (st-seq pipeline). Deconvolution The figure showing the step 8 of st-seq pipeline. cell2loc.sc.h5ad.dir (default: ‘NULL’): The path to the h5ad format single-cell RNA-seq data. The default value is NULL. cell2loc.sc.max.epoch (default: 1000): The maximum number of epochs for single-cell deconvolution. The default value is 1000. cell2loc.st.max.epoch (default: 10000): The maximum number of epochs for spatial deconvolution. The default value is 10000. cell2loc.use.gpu (default: FALSE): A logical value indicating whether to use GPU for computation. The default value is FALSE. Click “Run Step8” to proceed. After clicking the “Run Step8” button, please do not modify any other parameters on the page. Once Step 8 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.9 Step 9 (st-seq pipeline). Cell cycle analysis The figure showing the step 9 of st-seq pipeline. The gene sets for calculating S phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the S phase. The default value is NULL (uses genes from Seurat). The gene sets for calculating G2M phase scores (e.g. “gene1,gene2,gene3”): A list of genes associated with the G2M phase. The default value is NULL (uses genes from Seurat). Click “Run Step9” to proceed. After clicking the “Run Step9” button, please do not modify any other parameters on the page. Once Step 9 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.10 Step 10 (st-seq pipeline). Niche analysis The figure showing the step 10 of st-seq pipeline. Nich.cluster.n (default: 4): The number of clusters for niche clustering. The default value is 4. Click “Run Step10” to proceed. After clicking the “Run Step10” button, please do not modify any other parameters on the page. Once Step 10 is completed, the result files will appear in the UI and will be stored in the folder specified by the user in the output.dir parameter. 7.3.2.11 Step 11 (st-seq pipeline). Generate the Report The figure showing the step 11 of st-seq pipeline. Click “Run Step11” to generate the analysis report. "],["operation-manual-for-the-hemascopecloud.html", "8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.2 Homepage 8.3 Data Page 8.4 Analysis Page 8.5 Projects page", " 8 Operation Manual for the HemaScopeCloud 8.1 User Login 8.1.1 Enter the URL in a web browser: https://hemascope.hiplot.cn/?home=hemascope and click to access the login page. Figure 8.1: Login Page 8.1.2 To obtain free computational resources: Enter your login email, click “Get Code,” input the verification code received in your email, and then click “Login” to complete the login and access the system homepage. 8.1.3 To browse HemaScopeCloud without needing computational resources: Click the “View without Login” button to access the system homepage. You can view demo analysis projects. If you click the button to initiate an analysis, the platform will prompt: “Please log in for analysis!” 8.2 Homepage Figure 8.2: Homepage The left side features a menu bar containing Home, Data, Analysis, Project, and Help. And the upper right section includes statistics on analysis project status, usage of analysis projects, a quick entry for creating new analysis projects, and statistics on allocated storage capacity usage. Statistics on Analysis Project Status Pending Analysis:Waiting for analysis, not yet submitted for analysis. Pending Resources:Waiting for resources, analysis submitted and awaiting resource allocation. Analyzing:Currently analyzing. Completed:Analysis completed. Error:An error occurred during analysis. Total:Total of all analysis statuses. Usage Statistics for Analysis Projects: Number of used analysis projects / Total number of allocated analysis projects. The current allocation for the system is 50 projects. For additional free computational resources, please contact the developer. Quick Entry for Creating New Analysis Projects: Supports quick access to the new analysis project pages corresponding to two pipelines. Storage Capacity Usage: Used Storage Resources / Allocated Storage Resources. The lower section displays the most recently run analysis projects. By default, it shows demo projects upon initial entry. Clicking the “View” option on an entry in the Projects section allows you to access and analyze that specific analysis project. 8.3 Data Page The Data page includes storage for Demo sample project data as well as Personal project data. Data under the Demo tab can be downloaded, while the Personal tab allows users to create new folders and upload files. 8.4 Analysis Page It lists two analysis pipelines: sc_HemaScopeCloud and st_HemaScopeCloud, serving as entries for creating new analysis projects. Click the Analysis button to access the new project and execution page for that pipeline. Figure 8.3: Select Analysis Pipeline Page Figure 8.4: Enter the Analysis Pipeline Page Create New Analysis Project Click the Analysis button under the sc_HemaScopeCloud to enter the new project page for that pipeline. Project Name:Enter the name of the analysis project for identification purposes. Input Data:Click Upload to upload local analysis files. Single and multiple file uploads are supported. Uploaded files must comply with the pipeline’s input file requirements; otherwise, an error will occur during execution. Sample Name:Click Add to enter the sample names, which should correspond to the uploaded analysis files. Items marked with * are required fields. Click the Run button to initiate the analysis:For the scRNA-seq pipeline,this will trigger step1-4; for the st-seq pipeline, it will trigger step1-5. Each subsequent analysis step requires clicking Run on the relevant step page to submit. Before submission, ensure that the previous step has generated result files; otherwise, a notification will indicate that the analysis cannot proceed. Load Demo Data HemaScopeCloud supports loading pre-configured analysis demo files and default parameters to quickly initiate analysis projects. On the new project page, click Load Demo Data to load files from the demo project and fill the required fields. Then, click the Run button to execute the analysis for the demo project. Figure 8.5: Load Demo Data After clicking Run, you will be redirected to the detailed page of the analysis project. Analysis Project Detail Page Notifications Waiting for resources…Do not submit repeatedly: This indicates that the submission is waiting for resources. Do not click the Run button again. Analyzing…Do not submit repeatedly: This indicates that the project is currently analyzing. Do not click the Run button again. Analysis Steps, Current Analysis Step: Displays all stepwise analysis processes and the current step. Click on different steps to navigate to the corresponding analysis step page. For the initial analysis, you must complete the previous step before proceeding to the next one. Refresh Button: Used to refresh the current page. Results: This tab stores the results of the completed step. Visualization: For steps that involve visualizations, the results will be found under the visualization tab. History:Click on Run History to view all historical runs of that step. Status:Corresponds to the analysis status of the project. Log:Click this button to view the run log. Parameter Settings:Used for entering parameter values. Figure 8.6: Analysis Project Page Figure 8.7: Analysis Project Result Page Figure 8.8: This step of the analysis project displays ‘Waiting for resources…Do not submit repeatedlly’ Figure 8.9: This step of the analysis project displays ‘Analyzing…Do not submit repeatedly’ Figure 8.10: History Page Note: For steps that have already been completed (except for the first step), you can adjust the parameters and click Run to perform multiple analyses. The results page will retain only the latest analysis results. 8.5 Projects page The homege includes analysis projects created by the user as well as pre-configured demo analysis projects provided by the system. Figure 8.11: Demo projects and user’s personal projects Clicking “View” allows you to navigate to the analysis project for review and step-by-step analysis. Figure 8.12: Click ‘View’ to access the analysis project Figure 8.13: Enter the detailed analysis project page "]]