fix vignettes

semraulab · Dec 16, 2021 · 8daf9de · 8daf9de
1 parent 43ca450
commit 8daf9de
Show file tree

Hide file tree

Showing 16 changed files with 213 additions and 78 deletions.
diff --git a/.Rproj.user/6C69BF22/sources/prop/4350BDF6 b/.Rproj.user/6C69BF22/sources/prop/4350BDF6
@@ -1,4 +1,4 @@
 {
-    "cursorPosition" : "4,28",
-    "scrollLine" : "0"
+    "cursorPosition" : "52,59",
+    "scrollLine" : "50"
 }
diff --git a/.Rproj.user/6C69BF22/sources/prop/A3D314A5 b/.Rproj.user/6C69BF22/sources/prop/A3D314A5
@@ -1,4 +1,4 @@
 {
-    "cursorPosition" : "4,28",
-    "scrollLine" : "0"
+    "cursorPosition" : "114,11",
+    "scrollLine" : "97"
 }
diff --git a/.Rproj.user/6C69BF22/sources/s-1EF4D921/58CF214A b/.Rproj.user/6C69BF22/sources/s-1EF4D921/58CF214A
@@ -5,15 +5,15 @@
     "dirty" : false,
     "encoding" : "UTF-8",
     "folds" : "",
-    "hash" : "0",
+    "hash" : "2146242614",
     "id" : "58CF214A",
-    "lastKnownWriteTime" : 1639585670,
-    "last_content_update" : 1639585670,
+    "lastKnownWriteTime" : 1639649704,
+    "last_content_update" : 1639649704963,
     "path" : "~/Documents/R/NACC/phiclust/vignettes/Guide_to_phiclust.Rmd",
     "project_path" : "vignettes/Guide_to_phiclust.Rmd",
     "properties" : {
-        "cursorPosition" : "4,28",
-        "scrollLine" : "0"
+        "cursorPosition" : "52,59",
+        "scrollLine" : "50"
     },
     "relative_order" : 1,
     "source_on_save" : false,

diff --git a/.Rproj.user/6C69BF22/sources/s-1EF4D921/58CF214A-contents b/.Rproj.user/6C69BF22/sources/s-1EF4D921/58CF214A-contents
@@ -23,7 +23,7 @@ library(splatter)
 library(ggplot2)
 ```
 
-Here, we import splatter data from the package, called "splatO". And undergo the necessary processings steps for the measure. 
+We import splatter data saved in the package phiclust, called "splatO". And run all the processings steps for the measure. 
 ```{r}
 #Load sample data simulated with splatter
 data("splatO")
@@ -50,23 +50,23 @@ out <- phiclust(expr = expr.norm.log, clusters = test.cluster,
 
 We can have a look at the main output of this function. For each cluster, the corresponding clusterability measure is shown. 
 ```{r, fig.align = "center", fig.height = 5, fig.width = 8}
-#Evaluate the output of the measure
+#Evaluate the output of the measure [saved in out$phiclust]
 
-#plot all values for sigma
+#plot all values for phiclust
 plot_phiclust(out)
 ```
 
-If you would like to go into more detail, then you can have a look at all sigmas and g-sigmas that are available per cluster. 
+If you would like to go into more detail, then you can have a look at all phiclusts and g-phiclusts that are available per cluster. 
 ```{r, fig.align = "center", fig.height = 5, fig.width = 8}
-#Plot all values for sigma and g_sigma
+#Plot all values for phiclust and g_phiclust
 plot_all_phiclusts(out)
 plot_all_g_phiclusts(out)
 
 ```
 
-If you are interested in the values of all sigmas, g-sigmas and singular values of the signal matrix, then this information can be obtained with the help of this function. 
+If you are interested in the values of all phiclusts, g-phiclusts and singular values of the signal matrix, then this information can be obtained with the help of this function. 
 ```{r}
-#obtain the values for sigma and additional information
+#obtain the values for phiclust and additional information
 get_info(out, "Group2")
 ```
 

diff --git a/.Rproj.user/6C69BF22/sources/s-1EF4D921/C841848D b/.Rproj.user/6C69BF22/sources/s-1EF4D921/C841848D
@@ -0,0 +1,22 @@
+{
+    "collab_server" : "",
+    "contents" : "",
+    "created" : 1639649240605.000,
+    "dirty" : false,
+    "encoding" : "UTF-8",
+    "folds" : "",
+    "hash" : "1899148262",
+    "id" : "C841848D",
+    "lastKnownWriteTime" : 1639649593,
+    "last_content_update" : 1639649593110,
+    "path" : "~/Documents/R/NACC/phiclust/vignettes/Analysis_kidney.Rmd",
+    "project_path" : "vignettes/Analysis_kidney.Rmd",
+    "properties" : {
+        "cursorPosition" : "114,11",
+        "scrollLine" : "97"
+    },
+    "relative_order" : 2,
+    "source_on_save" : false,
+    "source_window" : "",
+    "type" : "r_markdown"
+}
diff --git a/.Rproj.user/6C69BF22/sources/s-1EF4D921/C841848D-contents b/.Rproj.user/6C69BF22/sources/s-1EF4D921/C841848D-contents
@@ -0,0 +1,123 @@
+---
+title: "Analysis_kidney"
+output: 
+  md_document:
+    variant: markdown_github
+vignette: >
+  %\VignetteIndexEntry{Analysis_kidney}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  cache.lazy = FALSE
+)
+```
+
+```{r setup}
+library(phiclust)
+library(ggplot2)
+library(Seurat)
+```
+The authors who have anlyzed this data already normalized the data set with the R package "scran" and determined clusters by hierachical clustering. In total they have found 22 clusters. 
+```{r, fig.align = "center", fig.height = 5, fig.width = 8, cache = TRUE}
+data("force_gr_kidney")
+data("sce_kidney")
+
+paga.coord$Group <- sce_kidney$cell.type
+
+ggplot(paga.coord, aes(x = V1, y = V2, colour = Group)) +
+  geom_point(shape = 16)
+```
+With phiclust, we are now able to assess the variability for each cluster and see if possible sub-clusters can be found. First, we load the preprocessed SingleCellObject of the kidney data.  
+```{r}
+#Load kidney data from package
+
+#Extract scran normalized counts and log-transform
+expr.norm.log <- as.matrix(log(assay(sce_kidney, "scran")+1))
+
+#Change the name of the rows to readable gene names
+rownames(expr.norm.log) <- as.character(rowData(sce_kidney)$HUGO)
+rownames(sce_kidney) <- as.character(rowData(sce_kidney)$HUGO)
+```
+In the next step, we would like to exclude certain variances from appearing in the measure. For example, in this fetal kidney data set, several factors would not be of interest to cluster on: cell cycle related variances, ribosomal and mitochondrial gene expression, as well as stress related genes, which arise during dissociation. Cycling genes, we determine here with the Seurat package. Thus, we first need to create a Seurat object and normalize it. Another important factor is technical variability, for example the varying number of transcripts. It's important to *always* include the number of transcripts in the data frame.
+```{r}
+#Creating Seurat object
+cnts <- counts(sce_kidney)
+colnames(cnts) <- 1:ncol(cnts)
+rownames(cnts) <- as.character(rowData(sce_kidney)$HUGO)
+
+fetalkidney <- CreateSeuratObject(cnts)
+fetalkidney <- NormalizeData(fetalkidney)
+
+#Cell cycle analysis
+s.genes <- cc.genes$s.genes
+g2m.genes <- cc.genes$g2m.genes
+
+fetalkidney <- CellCycleScoring(fetalkidney, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
+
+#Determining the expression of MT-genes, Rb-genes and stress genes:
+data("ribosomal_genes")
+data("stress_genes")
+
+rb <- rownames(fetalkidney) %in% rb.genes 
+stress.genes <- intersect(stress.genes, rownames(expr.norm.log))
+
+#Creating the final data frame with all the factors to be excluded from considering while calculating the clusterability measure:
+exclude <- data.frame(clsm = log(colSums(cnts) + 1), cellcycle = fetalkidney$G2M.Score, 
+                      mt = colMeans(expr.norm.log[grep("^MT-", rownames(expr.norm.log)),]), 
+                      ribosomal = colMeans(expr.norm.log[rb,]), stress = colMeans(expr.norm.log[stress.genes,]))
+```
+
+Now we are ready to apply the main function to determine clusterability:
+```{r}
+#Main funcion phiclust
+out_kidney <- phiclust(expr.norm.log, clusters = sce_kidney$cell.type, exclude = exclude)
+```
+
+We can have a look at the main output of this function. For each cluster, the corresponding clusterability measure is shown. 
+```{r, fig.align = "center", fig.height = 5, fig.width = 8}
+#Evaluate the output of the measure
+
+#plot all values for phiclust
+plot_phiclust(out_kidney)
+```
+
+If you would like to go into more detail, then you can have a look at all phiclusts and g-phiclusts that are available per cluster. 
+```{r, fig.align = "center", fig.height = 5, fig.width = 8}
+#Plot all values for phiclust and g_phiclust
+plot_all_phiclusts(out_kidney)
+plot_all_g_phiclusts(out_kidney)
+
+```
+
+If you are interested in the values of all phiclusts, g-phiclusts and singular values of the signal matrix, then this information can be obtained with the help of this function. 
+```{r}
+#obtain the values for phiclust and additional information
+get_info(out_kidney, "UBCD")
+```
+
+To decide if the clusters with a high clusterability measure have variances that are meaningful for you to sub-cluster, have a look at the variance driving genes.  These will tell you which genes cause the signal to appear. For example, if genes are only related to differentiation, then sub-clustering might not be necessary but could be of interest.
+```{r}
+#See which genes cause variances in the data
+get_var_genes(out_kidney, "UBCD")[,1:3]
+```
+
+You can also check out the fit of the MP distribution for each cluster. 
+```{r, fig.align = "center", fig.height = 5, fig.width = 8}
+#Check if the MP distribution fits to the data
+plot_MP(out_kidney, "UBCD")
+```
+
+And for further validation, see if the singular vectors of the significant singular values look meaningful. By plotting either clusters or genes with the singular vectors. 
+```{r, fig.align = "center", fig.height = 5, fig.width = 8}
+#Plot clusters
+plot_singular_vectors(out_kidney, "UBCD", colour = sce_kidney@metadata$ubcd.cluster)
+
+#Plot variance driving genes
+plot_singular_vectors(out_kidney, "UBCD", colour = "UPK1A", scaled = FALSE)
+
+```
diff --git a/.Rproj.user/shared/notebooks/3C2DDC9A-Analysis_kidney/1/6C69BF221EF4D921/chunks.json b/.Rproj.user/shared/notebooks/3C2DDC9A-Analysis_kidney/1/6C69BF221EF4D921/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1639649250}
diff --git a/.Rproj.user/shared/notebooks/3C2DDC9A-Analysis_kidney/1/s/chunks.json b/.Rproj.user/shared/notebooks/3C2DDC9A-Analysis_kidney/1/s/chunks.json
@@ -1 +1 @@
-{"chunk_definitions":[],"doc_write_time":1639578103}
+{"chunk_definitions":[],"doc_write_time":1639649250}
diff --git a/.Rproj.user/shared/notebooks/DAB6398B-Guide_to_phiclust/1/6C69BF221EF4D921/chunks.json b/.Rproj.user/shared/notebooks/DAB6398B-Guide_to_phiclust/1/6C69BF221EF4D921/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1639649186}
diff --git a/.Rproj.user/shared/notebooks/DAB6398B-Guide_to_phiclust/1/s/chunks.json b/.Rproj.user/shared/notebooks/DAB6398B-Guide_to_phiclust/1/s/chunks.json
@@ -1 +1 @@
-{"chunk_definitions":[],"doc_write_time":1639585669}
+{"chunk_definitions":[],"doc_write_time":1639649186}
diff --git a/vignettes/Analysis_kidney.Rmd b/vignettes/Analysis_kidney.Rmd
@@ -32,7 +32,7 @@ paga.coord$Group <- sce_kidney$cell.type
 ggplot(paga.coord, aes(x = V1, y = V2, colour = Group)) +
   geom_point(shape = 16)
 ```
-With SIGMA, we are now able to assess the variability for each cluster and see if possible sub-clusters can be found. First, we load the preprocessed SingleCellObject of the kidney data.  
+With phiclust, we are now able to assess the variability for each cluster and see if possible sub-clusters can be found. First, we load the preprocessed SingleCellObject of the kidney data.  
 ```{r}
 #Load kidney data from package
 
@@ -43,7 +43,7 @@ expr.norm.log <- as.matrix(log(assay(sce_kidney, "scran")+1))
 rownames(expr.norm.log) <- as.character(rowData(sce_kidney)$HUGO)
 rownames(sce_kidney) <- as.character(rowData(sce_kidney)$HUGO)
 ```
-In the next step we would like to exclude certain variances from appearing in the measure. For example, in this fetal kidney data set, several factors would not be of interest to cluster on: cell cycle related variances, ribosomal and mitochondrial gene expression. As, well as stress related genes, which arise during dissociation. Cycling genes, we determine here with the Seurat package, so for that we first need to create a Seurat object and normalize it. Another important factor is technical variability, for example the varying number of transcripts. It's important to also include that in the data frame.
+In the next step, we would like to exclude certain variances from appearing in the measure. For example, in this fetal kidney data set, several factors would not be of interest to cluster on: cell cycle related variances, ribosomal and mitochondrial gene expression, as well as stress related genes, which arise during dissociation. Cycling genes, we determine here with the Seurat package. Thus, we first need to create a Seurat object and normalize it. Another important factor is technical variability, for example the varying number of transcripts. It's important to *always* include the number of transcripts in the data frame.
 ```{r}
 #Creating Seurat object
 cnts <- counts(sce_kidney)
@@ -74,33 +74,33 @@ exclude <- data.frame(clsm = log(colSums(cnts) + 1), cellcycle = fetalkidney$G2M
 
 Now we are ready to apply the main function to determine clusterability:
 ```{r}
-#Main funcion SIGMA
+#Main funcion phiclust
 out_kidney <- phiclust(expr.norm.log, clusters = sce_kidney$cell.type, exclude = exclude)
 ```
 
 We can have a look at the main output of this function. For each cluster, the corresponding clusterability measure is shown. 
 ```{r, fig.align = "center", fig.height = 5, fig.width = 8}
 #Evaluate the output of the measure
 
-#plot all values for sigma
+#plot all values for phiclust
 plot_phiclust(out_kidney)
 ```
 
-If you would like to go into more detail, then you can have a look at all sigmas and g-sigmas that are available per cluster. 
+If you would like to go into more detail, then you can have a look at all phiclusts and g-phiclusts that are available per cluster. 
 ```{r, fig.align = "center", fig.height = 5, fig.width = 8}
-#Plot all values for sigma and g_sigma
+#Plot all values for phiclust and g_phiclust
 plot_all_phiclusts(out_kidney)
 plot_all_g_phiclusts(out_kidney)
 
 ```
 
-If you are interested in the values of all sigmas, g-sigmas and singular values of the signal matrix, then this information can be obtained with the help of this function. 
+If you are interested in the values of all phiclusts, g-phiclusts and singular values of the signal matrix, then this information can be obtained with the help of this function. 
 ```{r}
-#obtain the values for sigma and additional information
+#obtain the values for phiclust and additional information
 get_info(out_kidney, "UBCD")
 ```
 
-Now, to determine if the clustrs with a high clusterability measure have variances that are meaningful for you to sub-cluster, have a look at the variance driving genes, which will tell you which genes cause the signal to appear. For example, if genes are only related to differentiation, then sub-clustering might not be necessary but could be of interest.
+To decide if the clusters with a high clusterability measure have variances that are meaningful for you to sub-cluster, have a look at the variance driving genes.  These will tell you which genes cause the signal to appear. For example, if genes are only related to differentiation, then sub-clustering might not be necessary but could be of interest.
 ```{r}
 #See which genes cause variances in the data
 get_var_genes(out_kidney, "UBCD")[,1:3]
@@ -112,7 +112,7 @@ You can also check out the fit of the MP distribution for each cluster.
 plot_MP(out_kidney, "UBCD")
 ```
 
-And for fruther validation, see if the singular vectors of the significant singular values look meaningful. By plotting either clusters or genes with the singular vectors. 
+And for further validation, see if the singular vectors of the significant singular values look meaningful. By plotting either clusters or genes with the singular vectors. 
 ```{r, fig.align = "center", fig.height = 5, fig.width = 8}
 #Plot clusters
 plot_singular_vectors(out_kidney, "UBCD", colour = sce_kidney@metadata$ubcd.cluster)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"chunk_definitions":[],"doc_write_time":1639649250}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"chunk_definitions":[],"doc_write_time":1639578103}
		{"chunk_definitions":[],"doc_write_time":1639649250}