Merge pull request #19 from sinanugur/development

readme update
sinanugur · Aug 14, 2023 · 3ef587f · 3ef587f
2 parents 5d10c6d + 1d85fb3
commit 3ef587f
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 57 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -24,7 +24,7 @@ RUN mamba env create --name cellsnake --file /tmp/environment.yml && \
 # Activate Conda environment and install a package from PyPI
 SHELL ["bash", "-c"]
 RUN source activate cellsnake && \
-    pip install -i https://test.pypi.org/simple/ cellsnake==0.2.0.11rc3
+    pip install -i https://test.pypi.org/simple/ cellsnake==0.2.0.11rc4
 
 #RUN source activate cellsnake && cellsnake --install-packages
 COPY workflow/scripts/scrna-install-packages.R /tmp/scrna-install-packages.R 

diff --git a/README.md b/README.md
@@ -4,21 +4,34 @@
 Introduction
 ------------
 
-This is a scRNA-seq analysis pipeline. The pipeline is built in Snakemake and can be run on different platforms and high performance computing (HPC) systems.
+Cellsnake can be run directly using the snakemake workflow. We recommend the wrapper but the snakemake workflow give more control in some use cases.
+
+The main cellsnake repo is here : https://github.com/sinanugur/cellsnake
+
 
 Installation
 ------------
 
+You may pull the workflow from the GitHub repo and create a clean environment. Mamba installation is highly recommended.
+
 ```
+conda install mamba -c conda-forge # to install Mamba
+
 git clone https://github.com/sinanugur/scrna-workflow.git
 cd scrna-workflow
-conda env create --name scrna-workflow --file environment.yml
+mamba env create --name scrna-workflow --file environment.yml
 conda activate scrna-workflow
 
 mamba env create --name cellsnake_testing --file environment.yml
 ```
 
-To install R packages:
+For Apple Silicon (i.e. M1, M2 etc.) architecture, you have to put CONDA_SUBDIR=osx-64 before creating the environment.
+```
+CONDA_SUBDIR=osx-64 mamba env create --name scrna-workflow --file environment.yml
+```
+
+
+After the environent created and activated, to install required R packages:
 ```
 ./install_r_packages.sh
 ```
@@ -27,18 +40,24 @@ To install R packages:
 Quick Start Example
 -------------------
 
-The workflow expects 10x samples under data folder in this format:
-
-__"data/{sample}/outs/filtered_feature_bc_matrix.h5"__
+You can start a minimal run by calling, sample runs are expected in data folder.
 
-This will register the directory name as sample name for later processing.
+```shell
+snakemake -j 10 --config datafolder=data option=minimal
+```
 
-You can start the pipeline by calling,
+Then we can run integration.
+```shell
+snakemake -j 10 --config option=integration
 ```
-snakemake -j 5
 
+Now it is time to work on the integrated sample. We can run full advanced run on the integrated object which is always generates at the same location.
+```shell
+snakemake -j 10 --config  datafolder=analyses_integrated/seurat/integrated.rds resolution=0.3 option=advanced is_integrated_sample=True --rerun-incomplete
 ```
-which will create a 5 threads job.
+
+
+
 
 
 Do a dry run:

diff --git a/workflow/scripts/scrna-cellchat.R b/workflow/scripts/scrna-cellchat.R
@@ -35,6 +35,9 @@ if (is.null(opt$rds)) {
 }
 
 if (!requireNamespace("CellChat", quietly = TRUE)) {
+  remotes::install_version("NMF", "0.26")
+  remotes::install_version("circlize", "0.4.15")
+  remotes::install_version("igraph", "1.4.3")
   remotes::install_github("sqjin/CellChat", upgrade = "never")
 }
 

diff --git a/workflow/scripts/scrna-clusteringtree.R b/workflow/scripts/scrna-clusteringtree.R
@@ -10,8 +10,7 @@ option_list <- list(
     type = "integer", default = 2000,
     help = "Highly variable features [default= %default]", metavar = "integer"
   ),
-
-        optparse::make_option(c("--variable.selection.method"),
+  optparse::make_option(c("--variable.selection.method"),
     type = "character", default = "vst",
     help = "Find variable features selection method [default= %default]", metavar = "character"
   ),
@@ -23,7 +22,6 @@ option_list <- list(
     type = "character", default = "LogNormalize",
     help = "Normalization method[default= %default]", metavar = "character"
   ),
-
   optparse::make_option(c("--integration"), action = "store_true", default = FALSE),
   optparse::make_option(c("--clplot"),
     type = "character", default = "clustree.pdf",
@@ -55,70 +53,73 @@ require(tidyverse)
 require(optparse)
 require(Seurat)
 require(clustree)
-try({source("workflow/scripts/scrna-functions.R")},silent=TRUE)
-try({source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE),"/scrna/workflow/scripts/scrna-functions.R"))},silent=TRUE)
+try(
+  {
+    source("workflow/scripts/scrna-functions.R")
+  },
+  silent = TRUE
+)
+try(
+  {
+    source(paste0(system("python -c 'import os; import cellsnake; print(os.path.dirname(cellsnake.__file__))'", intern = TRUE), "/scrna/workflow/scripts/scrna-functions.R"))
+  },
+  silent = TRUE
+)
 
 scrna <- readRDS(file = opt$rds)
 
-if(isFALSE(opt$integration)) {
-
-
-scrna <- NormalizeData(scrna, normalization.method = opt$normalization.method, scale.factor = opt$scale.factor)
-scrna <- FindVariableFeatures(scrna, selection.method = opt$variable.selection.method, nfeatures = opt$nfeatures)
+if (isFALSE(opt$integration)) {
+  scrna <- NormalizeData(scrna, normalization.method = opt$normalization.method, scale.factor = opt$scale.factor)
+  scrna <- FindVariableFeatures(scrna, selection.method = opt$variable.selection.method, nfeatures = opt$nfeatures)
 } else {
-
-try({DefaultAssay(scrna) <- "integrated"}) #for now only for Seurat, Harmony will come
-
+  try({
+    DefaultAssay(scrna) <- "integrated"
+  }) # for now only for Seurat, Harmony will come
 }
 
 
-#all.genes <- rownames(scrna) memory requirements can be large if using all genes
-not.all.genes <- VariableFeatures(scrna) #only variable features
+# all.genes <- rownames(scrna) memory requirements can be large if using all genes
+not.all.genes <- VariableFeatures(scrna) # only variable features
 
 scrna <- ScaleData(scrna, features = not.all.genes)
 scrna <- RunPCA(scrna, features = not.all.genes)
 
 
-if(isFALSE(opt$integration)) {
-# output.dir=paste0("results/",opt$sampleid,"/technicals/")
-# dir.create(output.dir,recursive = T)
-
-# Identify the 10 most highly variable genes
-top10 <- head(not.all.genes, 10)
+if (isFALSE(opt$integration)) {
+  # output.dir=paste0("results/",opt$sampleid,"/technicals/")
+  # dir.create(output.dir,recursive = T)
 
-# plot variable features with and without labels
-plot1 <- VariableFeaturePlot(scrna)
-plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
+  # Identify the 10 most highly variable genes
+  top10 <- head(not.all.genes, 10)
 
+  # plot variable features with and without labels
+  plot1 <- VariableFeaturePlot(scrna)
+  plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
 
 
-# ggsave(paste0(output.dir,"highly-variable-features.pdf"), plot2 ,width = 8,height = 9)
-ggsave(opt$hvfplot, plot2, width = 8, height = 9)
 
-DimHeatmap(scrna, dims = 1:15, cells = 500, balanced = TRUE, fast = FALSE)
-# ggsave(paste0(output.dir,"DimHeatMap_plot.pdf") ,width = 8,height = 15)
-ggsave(opt$heplot, width = 8, height = 15)
+  # ggsave(paste0(output.dir,"highly-variable-features.pdf"), plot2 ,width = 8,height = 9)
+  ggsave(opt$hvfplot, plot2, width = 8, height = 9)
 
+  DimHeatmap(scrna, dims = 1:15, cells = 500, balanced = TRUE, fast = FALSE)
+  # ggsave(paste0(output.dir,"DimHeatMap_plot.pdf") ,width = 8,height = 15)
+  ggsave(opt$heplot, width = 8, height = 15)
 
-scrna <- JackStraw(scrna, num.replicate = 20, dims = 50,verbose=FALSE)
-scrna <- ScoreJackStraw(scrna, dims = 1:50)
-plot1 <- JackStrawPlot(scrna, dims = 1:50)
-plot2 <- ElbowPlot(scrna, ndims = 50)
-# ggsave(paste0(output.dir,"JackandElbow_plot.pdf"), plot1 + plot2,width = 13,height = 5)
-ggsave(opt$jeplot, plot1 + plot2, width = 13, height = 5)
 
+  scrna <- JackStraw(scrna, num.replicate = 20, dims = 50, verbose = FALSE)
+  scrna <- ScoreJackStraw(scrna, dims = 1:50)
+  plot1 <- JackStrawPlot(scrna, dims = 1:50)
+  plot2 <- ElbowPlot(scrna, ndims = 50)
+  # ggsave(paste0(output.dir,"JackandElbow_plot.pdf"), plot1 + plot2,width = 13,height = 5)
+  ggsave(opt$jeplot, plot1 + plot2, width = 13, height = 5)
 }
 
-if(isFALSE(opt$integration)) {
-
- resolution=seq(0.1, 2.5, 0.1)
-
+if (isFALSE(opt$integration)) {
+  resolution <- seq(0.1, 2.5, 0.1)
 } else {
+  resolution <- seq(0.1, 1.5, 0.1)
+}
 
-resolution=seq(0.1, 1.5, 0.1)
-
-  }
-
 
 
 dimensionReduction <- function_pca_dimensions(scrna)
@@ -131,4 +132,4 @@ clustree(scrna) -> p1
 # dir.create(output.dir,recursive = T)
 
 # ggsave(paste0(output.dir,"/clusteringTree-",opt$sampleid,".pdf"),p1,width=8,height=15)
-ggsave(opt$clplot, p1, width = 8, height = 15)
+ggsave(opt$clplot, p1, width = 8, height = 15)
diff --git a/workflow/scripts/scrna-marker-plots.R b/workflow/scripts/scrna-marker-plots.R
@@ -84,7 +84,7 @@ suppressMessages(for (i in 1:nrow(Positive_Features)) {
     gene <- Positive_Features[i, ]$gene
     cluster <- Positive_Features[i, ]$cluster
 
-    p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = gene) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 12))
+    p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = gene, raster = FALSE) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 12))
     p2 <- DotPlot(scrna, features = gene) & scale_color_continuous(type = "viridis") & labs(color = "Average Expression", size = "Percent Expressed") & ylab("Identity") & theme(axis.title.x = element_blank(), axis.text = element_text(size = 12)) & theme(legend.position = "right")
     p3 <- VlnPlot(scrna, features = gene) & ggthemes::theme_hc() & scale_fill_manual(values = palette) & theme(legend.position = "right", axis.text = element_text(size = 12)) & labs(fill = "") & xlab("Identity") & ylab("Expression Level")
 

diff --git a/workflow/scripts/scrna-monocle3.R b/workflow/scripts/scrna-monocle3.R
@@ -24,6 +24,7 @@ if (!requireNamespace("SeuratWrappers", quietly = TRUE)) {
 }
 
 if (!requireNamespace("monocle3", quietly = TRUE)) {
+    remotes::install_version("igraph", "1.4.3")
     remotes::install_github("cole-trapnell-lab/monocle3", upgrade = "never")
 }
 

diff --git a/workflow/scripts/scrna-selected-marker-plots.R b/workflow/scripts/scrna-selected-marker-plots.R
@@ -90,7 +90,7 @@ suppressMessages(for (i in markers) {
 
   tryCatch(
     {
-      p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = i) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 10))
+      p1 <- FeaturePlot(scrna, reduction = opt$reduction.type, features = i, raster = FALSE) & scale_color_continuous(type = "viridis") & labs(color = "Expression") & theme(axis.text = element_text(size = 10))
       p2 <- DotPlot(scrna, features = i) & scale_color_continuous(type = "viridis") & labs(color = "Average Expression", size = "Percent Expressed") & ylab("Identity") & theme(axis.title.x = element_blank(), axis.text = element_text(size = 10)) & theme(legend.position = "right")
       p3 <- VlnPlot(scrna, features = i) & ggthemes::theme_hc() & scale_fill_manual(values = palette) & theme(legend.position = "right", axis.text = element_text(size = 10)) & labs(fill = "") & xlab("Identity") & ylab("Expression Level")
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,6 +24,7 @@ if (!requireNamespace("SeuratWrappers", quietly = TRUE)) { @@
     }
     if (!requireNamespace("monocle3", quietly = TRUE)) {
+        remotes::install_version("igraph", "1.4.3")
         remotes::install_github("cole-trapnell-lab/monocle3", upgrade = "never")
     }
@@ Expand Down @@