``` r
-counts_SE.norm = counts_SE.aggr %>% identify_abundant(factor_of_interest = condition) %>% scale_abundance()
+counts_SE.norm = counts_SE.aggr |> identify_abundant(factor_of_interest = condition) |> scale_abundance()
+ ## tidybulk says: the sample with largest library size SRR1740080 was chosen as reference for scaling
@@ -229,8 +227,8 @@ the x axis we have the log scaled counts, on the y axes we have the
density, data is grouped by sample and coloured by cell type.
``` r
-counts_SE.norm %>%
- ggplot(aes(count_scaled + 1, group=sample, color=`Cell.type`)) +
+counts_SE.norm |>
+ ggplot(aes(count_scaled + 1, group=.sample, color=`Cell.type`)) +
geom_density() +
scale_x_log10() +
@@ -247,9 +245,11 @@ We may want to identify and filter variable transcripts.
``` r
-counts_SE.norm.variable = counts_SE.norm %>% keep_variable()
+counts_SE.norm.variable = counts_SE.norm |> keep_variable()
+ ## Getting the 500 most variable genes
@@ -298,10 +298,14 @@ TidyTranscriptomics
``` r
counts_SE.norm.MDS =
- counts_SE.norm %>%
+ counts_SE.norm |>
reduce_dimensions(method="MDS", .dims = 6)
+ ## Getting the 500 most variable genes
+ ## tidybulk says: to access the raw results do `attr(..., "internals")$MDS`
@@ -315,7 +319,7 @@ count_m_log = log(count_m + 1)
cmds = limma::plotMDS(ndim = .dims, plot = FALSE)
cmds = cmds %$%
- cmdscale.out %>%
+ cmdscale.out |>
setNames(sprintf("Dim%s", 1:6))
cmds$cell_type = tibble_counts[
@@ -334,33 +338,42 @@ On the x and y axes axis we have the reduced dimensions 1 to 3, data is
coloured by cell type.
``` r
-counts_SE.norm.MDS %>% pivot_sample() %>% select(contains("Dim"), everything())
+counts_SE.norm.MDS |> pivot_sample() |> select(contains("Dim"), everything())
## # A tibble: 48 × 14
- ## Dim1 Dim2 Dim3 Dim4 Dim5 Dim6 .sample Cell.…¹ time condi…²
- ##
- ## 1 -1.46 0.220 -1.68 0.0553 0.0658 -0.126 SRR17400… b_cell 0 d TRUE
- ## 2 -1.46 0.226 -1.71 0.0300 0.0454 -0.137 SRR17400… b_cell 1 d TRUE
- ## 3 -1.44 0.193 -1.60 0.0890 0.0503 -0.121 SRR17400… b_cell 3 d TRUE
- ## 4 -1.44 0.198 -1.67 0.0891 0.0543 -0.110 SRR17400… b_cell 7 d TRUE
- ## 5 0.243 -1.42 0.182 0.00642 -0.503 -0.131 SRR17400… dendri… 0 d FALSE
- ## 6 0.191 -1.42 0.195 0.0180 -0.457 -0.130 SRR17400… dendri… 1 d FALSE
- ## 7 0.257 -1.42 0.152 0.0130 -0.582 -0.0927 SRR17400… dendri… 3 d FALSE
- ## 8 0.162 -1.43 0.189 0.0232 -0.452 -0.109 SRR17400… dendri… 7 d FALSE
- ## 9 0.516 -1.47 0.240 -0.251 0.457 -0.119 SRR17400… monocy… 0 d FALSE
- ## 10 0.514 -1.41 0.231 -0.219 0.458 -0.131 SRR17400… monocy… 1 d FALSE
- ## # … with 38 more rows, 4 more variables: batch , factor_of_interest ,
- ## # TMM , multiplier , and abbreviated variable names ¹Cell.type,
- ## # ²condition
- ## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
+ ## Dim1 Dim2 Dim3 Dim4 Dim5 Dim6 .sample Cell.type time
+ ##
+ ## 1 -1.46 0.220 -1.68 0.0553 0.0658 -0.126 SRR1740034 b_cell 0 d
+ ## 2 -1.46 0.226 -1.71 0.0300 0.0454 -0.137 SRR1740035 b_cell 1 d
+ ## 3 -1.44 0.193 -1.60 0.0890 0.0503 -0.121 SRR1740036 b_cell 3 d
+ ## 4 -1.44 0.198 -1.67 0.0891 0.0543 -0.110 SRR1740037 b_cell 7 d
+ ## 5 0.243 -1.42 0.182 0.00642 -0.503 -0.131 SRR1740038 dendritic_mye… 0 d
+ ## 6 0.191 -1.42 0.195 0.0180 -0.457 -0.130 SRR1740039 dendritic_mye… 1 d
+ ## 7 0.257 -1.42 0.152 0.0130 -0.582 -0.0927 SRR1740040 dendritic_mye… 3 d
+ ## 8 0.162 -1.43 0.189 0.0232 -0.452 -0.109 SRR1740041 dendritic_mye… 7 d
+ ## 9 0.516 -1.47 0.240 -0.251 0.457 -0.119 SRR1740042 monocyte 0 d
+ ## 10 0.514 -1.41 0.231 -0.219 0.458 -0.131 SRR1740043 monocyte 1 d
+ ## # ℹ 38 more rows
+ ## # ℹ 5 more variables: condition , batch , factor_of_interest ,
+ ## # TMM , multiplier
``` r
-counts_SE.norm.MDS %>%
- pivot_sample() %>%
+counts_SE.norm.MDS |>
+ pivot_sample() |>
GGally::ggpairs(columns = 6:(6+5), ggplot2::aes(colour=`Cell.type`))
+ ## Registered S3 method overwritten by 'GGally':
+ ## method from
+ ## +.gg ggplot2
+ ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+ ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+ ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+ ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+ ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
@@ -371,7 +384,7 @@ TidyTranscriptomics
``` r
counts_SE.norm.PCA =
- counts_SE.norm %>%
+ counts_SE.norm |>
reduce_dimensions(method="PCA", .dims = 6)
@@ -383,7 +396,7 @@ Standard procedure (comparative purpose)
``` r
count_m_log = log(count_m + 1)
-pc = count_m_log %>% prcomp(scale = TRUE)
+pc = count_m_log |> prcomp(scale = TRUE)
variance = pc$sdev^2
variance = (variance / sum(variance))[1:6]
pc$cell_type = counts[
@@ -402,29 +415,29 @@ On the x and y axes axis we have the reduced dimensions 1 to 3, data is
coloured by cell type.
``` r
-counts_SE.norm.PCA %>% pivot_sample() %>% select(contains("PC"), everything())
+counts_SE.norm.PCA |> pivot_sample() |> select(contains("PC"), everything())
## # A tibble: 48 × 14
- ## PC1 PC2 PC3 PC4 PC5 PC6 .sample Cell.…¹ time condi…² batch
- ##
- ## 1 -12.6 -2.52 -14.9 -0.424 -0.592 -1.22 SRR174… b_cell 0 d TRUE 0
- ## 2 -12.6 -2.57 -15.2 -0.140 -0.388 -1.30 SRR174… b_cell 1 d TRUE 1
- ## 3 -12.6 -2.41 -14.5 -0.714 -0.344 -1.10 SRR174… b_cell 3 d TRUE 1
- ## 4 -12.5 -2.34 -14.9 -0.816 -0.427 -1.00 SRR174… b_cell 7 d TRUE 1
- ## 5 0.189 13.0 1.66 -0.0269 4.64 -1.35 SRR174… dendri… 0 d FALSE 0
- ## 6 -0.293 12.9 1.76 -0.0727 4.21 -1.28 SRR174… dendri… 1 d FALSE 0
- ## 7 0.407 13.0 1.42 -0.0529 5.37 -1.01 SRR174… dendri… 3 d FALSE 1
- ## 8 -0.620 13.0 1.73 -0.201 4.17 -1.07 SRR174… dendri… 7 d FALSE 0
- ## 9 2.56 13.5 2.32 2.03 -4.32 -1.22 SRR174… monocy… 0 d FALSE 1
- ## 10 2.65 13.1 2.21 1.80 -4.29 -1.30 SRR174… monocy… 1 d FALSE 1
- ## # … with 38 more rows, 3 more variables: factor_of_interest , TMM ,
- ## # multiplier , and abbreviated variable names ¹Cell.type, ²condition
- ## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
+ ## PC1 PC2 PC3 PC4 PC5 PC6 .sample Cell.type time condition
+ ##
+ ## 1 -12.6 -2.52 -14.9 -0.424 -0.592 -1.22 SRR17400… b_cell 0 d TRUE
+ ## 2 -12.6 -2.57 -15.2 -0.140 -0.388 -1.30 SRR17400… b_cell 1 d TRUE
+ ## 3 -12.6 -2.41 -14.5 -0.714 -0.344 -1.10 SRR17400… b_cell 3 d TRUE
+ ## 4 -12.5 -2.34 -14.9 -0.816 -0.427 -1.00 SRR17400… b_cell 7 d TRUE
+ ## 5 0.189 13.0 1.66 -0.0269 4.64 -1.35 SRR17400… dendriti… 0 d FALSE
+ ## 6 -0.293 12.9 1.76 -0.0727 4.21 -1.28 SRR17400… dendriti… 1 d FALSE
+ ## 7 0.407 13.0 1.42 -0.0529 5.37 -1.01 SRR17400… dendriti… 3 d FALSE
+ ## 8 -0.620 13.0 1.73 -0.201 4.17 -1.07 SRR17400… dendriti… 7 d FALSE
+ ## 9 2.56 13.5 2.32 2.03 -4.32 -1.22 SRR17400… monocyte 0 d FALSE
+ ## 10 2.65 13.1 2.21 1.80 -4.29 -1.30 SRR17400… monocyte 1 d FALSE
+ ## # ℹ 38 more rows
+ ## # ℹ 4 more variables: batch , factor_of_interest , TMM ,
+ ## # multiplier
``` r
-counts_SE.norm.PCA %>%
- pivot_sample() %>%
+counts_SE.norm.PCA |>
+ pivot_sample() |>
GGally::ggpairs(columns = 11:13, ggplot2::aes(colour=`Cell.type`))
@@ -438,8 +451,8 @@ TidyTranscriptomics
``` r
counts_SE.norm.tSNE =
- breast_tcga_mini_SE %>%
- identify_abundant() %>%
+ breast_tcga_mini_SE |>
+ identify_abundant() |>
method = "tSNE",
@@ -476,30 +489,29 @@ tsne$cell_type = tibble_counts[
``` r
-counts_SE.norm.tSNE %>%
- pivot_sample() %>%
+counts_SE.norm.tSNE |>
+ pivot_sample() |>
select(contains("tSNE"), everything())
## # A tibble: 251 × 4
- ## tSNE1 tSNE2 .sample Call
- ##
- ## 1 -5.25 10.2 TCGA-A1-A0SD-01A-11R-A115-07 LumA
- ## 2 6.41 2.79 TCGA-A1-A0SF-01A-11R-A144-07 LumA
- ## 3 -9.28 6.63 TCGA-A1-A0SG-01A-11R-A144-07 LumA
- ## 4 -1.76 4.82 TCGA-A1-A0SH-01A-11R-A084-07 LumA
- ## 5 -1.41 12.2 TCGA-A1-A0SI-01A-11R-A144-07 LumB
- ## 6 -1.89 -3.60 TCGA-A1-A0SJ-01A-11R-A084-07 LumA
- ## 7 18.5 -13.4 TCGA-A1-A0SK-01A-12R-A084-07 Basal
- ## 8 -4.03 -10.4 TCGA-A1-A0SM-01A-11R-A084-07 LumA
- ## 9 -2.84 -10.8 TCGA-A1-A0SN-01A-11R-A144-07 LumB
- ## 10 -19.3 5.03 TCGA-A1-A0SQ-01A-21R-A144-07 LumA
- ## # … with 241 more rows
- ## # ℹ Use `print(n = ...)` to see more rows
+ ## tSNE1 tSNE2 .sample Call
+ ##
+ ## 1 -4.29 5.40 TCGA-A1-A0SD-01A-11R-A115-07 LumA
+ ## 2 4.48 -2.82 TCGA-A1-A0SF-01A-11R-A144-07 LumA
+ ## 3 -9.06 0.637 TCGA-A1-A0SG-01A-11R-A144-07 LumA
+ ## 4 7.05 7.28 TCGA-A1-A0SH-01A-11R-A084-07 LumA
+ ## 5 -2.38 2.77 TCGA-A1-A0SI-01A-11R-A144-07 LumB
+ ## 6 -1.63 -5.67 TCGA-A1-A0SJ-01A-11R-A084-07 LumA
+ ## 7 18.2 -18.3 TCGA-A1-A0SK-01A-12R-A084-07 Basal
+ ## 8 -9.06 -11.7 TCGA-A1-A0SM-01A-11R-A084-07 LumA
+ ## 9 -8.88 -10.3 TCGA-A1-A0SN-01A-11R-A144-07 LumB
+ ## 10 -8.30 23.4 TCGA-A1-A0SQ-01A-21R-A144-07 LumA
+ ## # ℹ 241 more rows
``` r
-counts_SE.norm.tSNE %>%
- pivot_sample() %>%
+counts_SE.norm.tSNE |>
+ pivot_sample() |>
ggplot(aes(x = `tSNE1`, y = `tSNE2`, color=Call)) + geom_point() + my_theme
@@ -521,7 +533,7 @@ TidyTranscriptomics
``` r
counts_SE.norm.MDS.rotated =
- counts_SE.norm.MDS %>%
+ counts_SE.norm.MDS |>
rotate_dimensions(`Dim1`, `Dim2`, rotation_degrees = 45, action="get")
@@ -537,9 +549,9 @@ rotation = function(m, d) {
c(`1` = cos(r), `2` = -sin(r)),
c(`1` = sin(r), `2` = cos(r))
- ) %>% as_matrix) %*% m)
+ ) |> as_matrix()) %*% m)
-mds_r = pca %>% rotation(rotation_degrees)
+mds_r = pca |> rotation(rotation_degrees)
mds_r$cell_type = counts[
match(counts$sample, rownames(mds_r)),
@@ -556,7 +568,7 @@ mds_r$cell_type = counts[
dimensions, data is coloured by cell type.
``` r
-counts_SE.norm.MDS.rotated %>%
+counts_SE.norm.MDS.rotated |>
ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type` )) +
geom_point() +
@@ -568,8 +580,8 @@ counts_SE.norm.MDS.rotated %>%
dimensions rotated of 45 degrees, data is coloured by cell type.
``` r
-counts_SE.norm.MDS.rotated %>%
- pivot_sample() %>%
+counts_SE.norm.MDS.rotated |>
+ pivot_sample() |>
ggplot(aes(x=`Dim1_rotated_45`, y=`Dim2_rotated_45`, color=`Cell.type` )) +
geom_point() +
@@ -593,7 +605,7 @@ TidyTranscriptomics
``` r
counts_SE.de =
- counts_SE %>%
+ counts_SE |>
test_differential_abundance( ~ condition, action="get")
@@ -630,8 +642,8 @@ The constrasts hve the name of the design matrix (generally
``` r
counts_SE.de =
- counts_SE %>%
- identify_abundant(factor_of_interest = condition) %>%
+ counts_SE |>
+ identify_abundant(factor_of_interest = condition) |>
~ 0 + condition,
.contrasts = c( "conditionTRUE - conditionFALSE"),
@@ -656,7 +668,7 @@ TidyTranscriptomics
``` r
counts_SE.norm.adj =
- counts_SE.norm %>% adjust_abundance( ~ factor_of_interest + batch)
+ counts_SE.norm |> adjust_abundance( .factor_unwanted = batch, .factor_of_interest = factor_of_interest)
@@ -710,14 +722,10 @@ TidyTranscriptomics
``` r
counts_SE.cibersort =
- counts_SE %>%
+ counts_SE |>
deconvolve_cellularity(action="get", cores=1, prefix = "cibersort__")
- ##
- ## The downloaded binary packages are in
- ## /var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T//RtmpIi5KN6/downloaded_packages
@@ -726,7 +734,7 @@ Standard procedure (comparative purpose)
``` r
-count_m %>% write.table("mixture_file.txt")
+count_m |> write.table("mixture_file.txt")
results <- CIBERSORT(
@@ -752,13 +760,13 @@ proportions. The data is facetted and coloured by nominal cell types
(annotation given by the researcher after FACS sorting).
``` r
-counts_SE.cibersort %>%
+counts_SE.cibersort |>
names_to= "Cell_type_inferred",
values_to = "proportion",
names_prefix ="cibersort__",
- ) %>%
+ ) |>
ggplot(aes(x=`Cell_type_inferred`, y=proportion, fill=`Cell.type`)) +
geom_boxplot() +
facet_wrap(~`Cell.type`) +
@@ -766,6 +774,8 @@ counts_SE.cibersort %>%
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), aspect.ratio=1/5)
+ ## tidySummarizedExperiment says: A data frame is returned for independent data analysis.
## Test differential cell-type abundance
@@ -774,31 +784,27 @@ We can also perform a statistical test on the differential cell-type
abundance across conditions
``` r
- counts_SE %>%
+ counts_SE |>
test_differential_cellularity(. ~ condition )
- ##
- ## The downloaded binary packages are in
- ## /var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T//RtmpIi5KN6/downloaded_packages
## # A tibble: 22 × 7
- ## .cell_type cell_t…¹ estim…² estim…³ std.e…⁴ stati…⁵ p.valu…⁶
- ##
- ## 1 cibersort.B.cells.naive -2.94 2.25 0.367 6.13 8.77e-10
- ## 2 cibersort.B.cells.memory -4.86 1.48 0.436 3.40 6.77e- 4
- ## 3 cibersort.Plasma.cells -5.33 -0.487 0.507 -0.960 3.37e- 1
- ## 4 cibersort.T.cells.CD8 -2.33 0.924 0.475 1.94 5.18e- 2
- ## 5 cibersort.T.cells.CD4.naive -2.83 -0.620 0.531 -1.17 2.43e- 1
- ## 6 cibersort.T.cells.CD4.memo… -2.46 0.190 0.500 0.380 7.04e- 1
- ## 7 cibersort.T.cells.CD4.memo… -3.67 2.23 0.427 5.22 1.80e- 7
- ## 8 cibersort.T.cells.follicul… -5.68 -0.217 0.507 -0.427 6.69e- 1
- ## 9 cibersort.T.cells.regulato… -5.04 1.94 0.360 5.39 6.86e- 8
- ## 10 cibersort.T.cells.gamma.de… -4.78 -0.250 0.514 -0.486 6.27e- 1
- ## # … with 12 more rows, and abbreviated variable names ¹cell_type_proportions,
- ## # ²`estimate_(Intercept)`, ³estimate_conditionTRUE, ⁴std.error_conditionTRUE,
- ## # ⁵statistic_conditionTRUE, ⁶p.value_conditionTRUE
- ## # ℹ Use `print(n = ...)` to see more rows
+ ## .cell_type cell_type_proportions `estimate_(Intercept)`
+ ##
+ ## 1 cibersort.B.cells.naive -2.94
+ ## 2 cibersort.B.cells.memory -4.86
+ ## 3 cibersort.Plasma.cells -5.33
+ ## 4 cibersort.T.cells.CD8 -2.33
+ ## 5 cibersort.T.cells.CD4.naive -2.83
+ ## 6 cibersort.T.cells.CD4.memory.re… -2.46
+ ## 7 cibersort.T.cells.CD4.memory.ac… -3.67
+ ## 8 cibersort.T.cells.follicular.he… -5.68
+ ## 9 cibersort.T.cells.regulatory..T… -5.04
+ ## 10 cibersort.T.cells.gamma.delta -4.78
+ ## # ℹ 12 more rows
+ ## # ℹ 4 more variables: estimate_conditionTRUE ,
+ ## # std.error_conditionTRUE , statistic_conditionTRUE ,
+ ## # p.value_conditionTRUE
We can also perform regression analysis with censored data (coxph).
@@ -806,41 +812,55 @@ We can also perform regression analysis with censored data (coxph).
# Add survival data
counts_SE_survival =
- counts_SE %>%
- nest(data = -sample) %>%
+ counts_SE |>
+ nest(data = -sample) |>
days = sample(1:1000, size = n()),
dead = sample(c(0,1), size = n(), replace = TRUE)
- ) %>%
+ ) |>
+ ## Warning in is_sample_feature_deprecated_used(.data, .cols):
+ ## tidySummarizedExperiment says: from version 1.3.1, the special columns
+ ## including sample/feature id (colnames(se), rownames(se)) has changed to
+ ## ".sample" and ".feature". This dataset is returned with the old-style
+ ## vocabulary (feature and sample), however we suggest to update your workflow to
+ ## reflect the new vocabulary (.feature, .sample)
+ ## Warning in is_sample_feature_deprecated_used(.data, .cols):
+ ## tidySummarizedExperiment says: from version 1.3.1, the special columns
+ ## including sample/feature id (colnames(se), rownames(se)) has changed to
+ ## ".sample" and ".feature". This dataset is returned with the old-style
+ ## vocabulary (feature and sample), however we suggest to update your workflow to
+ ## reflect the new vocabulary (.feature, .sample)
+``` r
# Test
-counts_SE_survival %>%
+counts_SE_survival |>
test_differential_cellularity(survival::Surv(days, dead) ~ .)
## # A tibble: 22 × 6
- ## .cell_type cell_t…¹ estim…² std.e…³ stati…⁴ p.value
- ##
- ## 1 cibersort.B.cells.naive -0.224 0.415 -0.540 0.589
- ## 2 cibersort.B.cells.memory 0.510 0.346 1.48 0.140
- ## 3 cibersort.Plasma.cells 0.892 0.449 1.99 0.0467
- ## 4 cibersort.T.cells.CD8 0.531 0.639 0.831 0.406
- ## 5 cibersort.T.cells.CD4.naive 0.112 0.386 0.290 0.772
- ## 6 cibersort.T.cells.CD4.memory.resting 0.498 0.540 0.921 0.357
- ## 7 cibersort.T.cells.CD4.memory.activa… 2.37 0.939 2.52 0.0117
- ## 8 cibersort.T.cells.follicular.helper -0.544 0.421 -1.29 0.197
- ## 9 cibersort.T.cells.regulatory..Tregs. 1.59 0.656 2.42 0.0157
- ## 10 cibersort.T.cells.gamma.delta 0.510 0.688 0.741 0.459
- ## # … with 12 more rows, and abbreviated variable names ¹cell_type_proportions,
- ## # ²estimate, ³std.error, ⁴statistic
- ## # ℹ Use `print(n = ...)` to see more rows
+ ## .cell_type cell_type_proportions estimate std.error statistic p.value
+ ##
+ ## 1 cibersort.B.cells… 5.15 1.58 3.27 0.00108
+ ## 2 cibersort.B.cells… 2.12 1.48 1.43 0.153
+ ## 3 cibersort.Plasma.… 2.96 1.35 2.20 0.0279
+ ## 4 cibersort.T.cells… 3.94 1.71 2.30 0.0215
+ ## 5 cibersort.T.cells… 3.34 1.75 1.91 0.0560
+ ## 6 cibersort.T.cells… -0.785 0.868 -0.904 0.366
+ ## 7 cibersort.T.cells… -3.15 1.65 -1.91 0.0568
+ ## 8 cibersort.T.cells… -0.435 0.421 -1.03 0.301
+ ## 9 cibersort.T.cells… 0.795 0.757 1.05 0.294
+ ## 10 cibersort.T.cells… -0.0292 0.641 -0.0456 0.964
+ ## # ℹ 12 more rows
We can also perform test of Kaplan-Meier curves.
``` r
counts_stratified =
- counts_SE_survival %>%
+ counts_SE_survival |>
# Test
@@ -852,21 +872,22 @@ counts_stratified
## # A tibble: 22 × 6
- ## .cell_type cell_t…¹ .low_…² .high…³ pvalue plot
- ##
- ## 1 cibersort.B.cells.naive 14.4 7.56 0.506
- ## 2 cibersort.B.cells.memory 17.2 4.77 0.500
- ## 3 cibersort.Plasma.cells 13.3 8.73 0.903
- ## 4 cibersort.T.cells.CD8 13.9 8.06 0.369
- ## 5 cibersort.T.cells.CD4.naive 12.8 9.15 0.407
- ## 6 cibersort.T.cells.CD4.memory.rest… 7.65 14.4 0.105
- ## 7 cibersort.T.cells.CD4.memory.acti… 15.7 6.26 0.392
- ## 8 cibersort.T.cells.follicular.help… 17.1 4.88 0.949
- ## 9 cibersort.T.cells.regulatory..Tre… 13.7 8.35 0.771
- ## 10 cibersort.T.cells.gamma.delta 16.2 5.76 0.379
- ## # … with 12 more rows, and abbreviated variable names ¹cell_type_proportions,
- ## # ².low_cellularity_expected, ³.high_cellularity_expected
- ## # ℹ Use `print(n = ...)` to see more rows
+ ## .cell_type cell_type_proportions .low_cellularity_exp…¹
+ ##
+ ## 1 cibersort.B.cells.naive 9.41
+ ## 2 cibersort.B.cells.memory 10.5
+ ## 3 cibersort.Plasma.cells 12.5
+ ## 4 cibersort.T.cells.CD8 11.0
+ ## 5 cibersort.T.cells.CD4.naive 8.40
+ ## 6 cibersort.T.cells.CD4.memory.re… 9.09
+ ## 7 cibersort.T.cells.CD4.memory.ac… 11.2
+ ## 8 cibersort.T.cells.follicular.he… 13.7
+ ## 9 cibersort.T.cells.regulatory..T… 8.16
+ ## 10 cibersort.T.cells.gamma.delta 14.8
+ ## # ℹ 12 more rows
+ ## # ℹ abbreviated name: ¹.low_cellularity_expected
+ ## # ℹ 3 more variables: .high_cellularity_expected , pvalue ,
+ ## # plot
Plot Kaplan-Meier curves
@@ -892,7 +913,7 @@ clustering methods.
``` r
-counts_SE.norm.cluster = counts_SE.norm.MDS %>%
+counts_SE.norm.cluster = counts_SE.norm.MDS |>
cluster_elements(method="kmeans", centers = 2, action="get" )
@@ -924,7 +945,7 @@ We can add cluster annotation to the MDS dimension reduced data set and
``` r
- counts_SE.norm.cluster %>%
+ counts_SE.norm.cluster |>
ggplot(aes(x=`Dim1`, y=`Dim2`, color=`cluster_kmeans`)) +
geom_point() +
@@ -944,7 +965,7 @@ TidyTranscriptomics
``` r
counts_SE.norm.SNN =
- counts_SE.norm.tSNE %>%
+ counts_SE.norm.tSNE |>
cluster_elements(method = "SNN")
@@ -982,20 +1003,20 @@ snn$cell_type = tibble_counts[
``` r
-counts_SE.norm.SNN %>%
- pivot_sample() %>%
+counts_SE.norm.SNN |>
+ pivot_sample() |>
select(contains("tSNE"), everything())
-counts_SE.norm.SNN %>%
- pivot_sample() %>%
- gather(source, Call, c("cluster_SNN", "Call")) %>%
- distinct() %>%
+counts_SE.norm.SNN |>
+ pivot_sample() |>
+ gather(source, Call, c("cluster_SNN", "Call")) |>
+ distinct() |>
ggplot(aes(x = `tSNE1`, y = `tSNE2`, color=Call)) + geom_point() + facet_grid(~source) + my_theme
# Do differential transcription between clusters
-counts_SE.norm.SNN %>%
- mutate(factor_of_interest = `cluster_SNN` == 3) %>%
+counts_SE.norm.SNN |>
+ mutate(factor_of_interest = `cluster_SNN` == 3) |>
~ factor_of_interest,
@@ -1012,10 +1033,9 @@ symbols; for `sample`, `transcript` and `count`) and returns a tibble
with redundant elements removed (e.g., samples). Two redundancy
estimation approaches are supported:
-- removal of highly correlated clusters of elements (keeping a
- representative) with method=“correlation”
-- removal of most proximal element pairs in a reduced dimensional
- space.
+- removal of highly correlated clusters of elements (keeping a
+ representative) with method=“correlation”
+- removal of most proximal element pairs in a reduced dimensional space.
**Approach 1**
@@ -1025,13 +1045,11 @@ TidyTranscriptomics
``` r
counts_SE.norm.non_redundant =
- counts_SE.norm.MDS %>%
+ counts_SE.norm.MDS |>
remove_redundancy( method = "correlation" )
- ##
- ## The downloaded binary packages are in
- ## /var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T//RtmpIi5KN6/downloaded_packages
+ ## Getting the 8513 most variable genes
@@ -1051,14 +1069,14 @@ library(widyr)
sort = TRUE,
diag = FALSE,
upper = FALSE
- ) %>%
- filter(correlation > correlation_threshold) %>%
- distinct(item1) %>%
+ ) |>
+ filter(correlation > correlation_threshold) |>
+ distinct(item1) |>
rename(!!.element := item1)
# Return non redudant data frame
-counts %>% anti_join(.data.correlated) %>%
- spread(sample, rc, - transcript) %>%
+counts |> anti_join(.data.correlated) |>
+ spread(sample, rc, - transcript) |>
@@ -1072,8 +1090,8 @@ We can visualise how the reduced redundancy with the reduced dimentions
look like
``` r
-counts_SE.norm.non_redundant %>%
- pivot_sample() %>%
+counts_SE.norm.non_redundant |>
+ pivot_sample() |>
ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type`)) +
geom_point() +
@@ -1085,7 +1103,7 @@ counts_SE.norm.non_redundant %>%
``` r
counts_SE.norm.non_redundant =
- counts_SE.norm.MDS %>%
+ counts_SE.norm.MDS |>
method = "reduced_dimensions",
Dim_a_column = `Dim1`,
@@ -1097,8 +1115,8 @@ We can visualise MDS reduced dimensions of the samples with the closest
pair removed.
``` r
-counts_SE.norm.non_redundant %>%
- pivot_sample() %>%
+counts_SE.norm.non_redundant |>
+ pivot_sample() |>
ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type`)) +
geom_point() +
@@ -1135,26 +1153,26 @@ different resources use ensembl IDs while others use gene symbol IDs.
This currently works for human and mouse.
``` r
-counts_ensembl %>% ensembl_to_symbol(ens)
+counts_ensembl |> ensembl_to_symbol(ens)
## # A tibble: 119 × 8
- ## ens iso `read count` sample cases…¹ cases…² trans…³ ref_g…⁴
- ##
- ## 1 ENSG00000000003 13 144 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 2 ENSG00000000003 13 72 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 3 ENSG00000000003 13 0 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 4 ENSG00000000003 13 1099 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 5 ENSG00000000003 13 11 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 6 ENSG00000000003 13 2 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 7 ENSG00000000003 13 3 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 8 ENSG00000000003 13 2678 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 9 ENSG00000000003 13 751 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## 10 ENSG00000000003 13 1 TARGET-20… Acute … Primar… TSPAN6 hg38
- ## # … with 109 more rows, and abbreviated variable names
- ## # ¹cases_0_project_disease_type, ²cases_0_samples_0_sample_type, ³transcript,
- ## # ⁴ref_genome
- ## # ℹ Use `print(n = ...)` to see more rows
+ ## ens iso `read count` sample cases_0_project_dise…¹ cases_0_samples_0_sa…²
+ ##
+ ## 1 ENSG… 13 144 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 2 ENSG… 13 72 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 3 ENSG… 13 0 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 4 ENSG… 13 1099 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 5 ENSG… 13 11 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 6 ENSG… 13 2 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 7 ENSG… 13 3 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 8 ENSG… 13 2678 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 9 ENSG… 13 751 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## 10 ENSG… 13 1 TARGE… Acute Myeloid Leukemia Primary Blood Derived…
+ ## # ℹ 109 more rows
+ ## # ℹ abbreviated names: ¹cases_0_project_disease_type,
+ ## # ²cases_0_samples_0_sample_type
+ ## # ℹ 2 more variables: transcript , ref_genome
## From gene symbol to gene description (gene name in full)
@@ -1162,25 +1180,31 @@ We can add gene full name (and in future description) from symbol
identifiers. This currently works for human and mouse.
``` r
-counts_SE %>%
- describe_transcript() %>%
+counts_SE |>
+ describe_transcript() |>
select(feature, description, everything())
- ## # A SummarizedExperiment-tibble abstraction: 408,624 × 48
+ ## Warning in is_sample_feature_deprecated_used(.data, .cols):
+ ## tidySummarizedExperiment says: from version 1.3.1, the special columns
+ ## including sample/feature id (colnames(se), rownames(se)) has changed to
+ ## ".sample" and ".feature". This dataset is returned with the old-style
+ ## vocabulary (feature and sample), however we suggest to update your workflow to
+ ## reflect the new vocabulary (.feature, .sample)
+ ## # A SummarizedExperiment-tibble abstraction: 408,624 × 10
## # [90mFeatures=8513 | Samples=48 | Assays=count[0m
- ## feature sample count Cell.…¹ time condi…² batch facto…³ descr…⁴ gene_…⁵
- ##
- ## 1 A1BG SRR1740034 153 b_cell 0 d TRUE 0 TRUE alpha-… A1BG
- ## 2 A1BG-AS1 SRR1740034 83 b_cell 0 d TRUE 0 TRUE A1BG a… A1BG-A…
- ## 3 AAAS SRR1740034 868 b_cell 0 d TRUE 0 TRUE aladin… AAAS
- ## 4 AACS SRR1740034 222 b_cell 0 d TRUE 0 TRUE acetoa… AACS
- ## 5 AAGAB SRR1740034 590 b_cell 0 d TRUE 0 TRUE alpha … AAGAB
- ## 6 AAMDC SRR1740034 48 b_cell 0 d TRUE 0 TRUE adipog… AAMDC
- ## 7 AAMP SRR1740034 1257 b_cell 0 d TRUE 0 TRUE angio … AAMP
- ## 8 AANAT SRR1740034 284 b_cell 0 d TRUE 0 TRUE aralky… AANAT
- ## 9 AAR2 SRR1740034 379 b_cell 0 d TRUE 0 TRUE AAR2 s… AAR2
- ## 10 AARS2 SRR1740034 685 b_cell 0 d TRUE 0 TRUE alanyl… AARS2
- ## # … with 40 more rows, and abbreviated variable names ¹Cell.type, ²condition,
- ## # ³factor_of_interest, ⁴description, ⁵gene_name
- ## # ℹ Use `print(n = ...)` to see more rows
+ ## feature sample count Cell.type time condition batch factor_of_interest
+ ##
+ ## 1 A1BG SRR1740034 153 b_cell 0 d TRUE 0 TRUE
+ ## 2 A1BG-AS1 SRR1740034 83 b_cell 0 d TRUE 0 TRUE
+ ## 3 AAAS SRR1740034 868 b_cell 0 d TRUE 0 TRUE
+ ## 4 AACS SRR1740034 222 b_cell 0 d TRUE 0 TRUE
+ ## 5 AAGAB SRR1740034 590 b_cell 0 d TRUE 0 TRUE
+ ## 6 AAMDC SRR1740034 48 b_cell 0 d TRUE 0 TRUE
+ ## 7 AAMP SRR1740034 1257 b_cell 0 d TRUE 0 TRUE
+ ## 8 AANAT SRR1740034 284 b_cell 0 d TRUE 0 TRUE
+ ## 9 AAR2 SRR1740034 379 b_cell 0 d TRUE 0 TRUE
+ ## 10 AARS2 SRR1740034 685 b_cell 0 d TRUE 0 TRUE
+ ## # ℹ 40 more rows
+ ## # ℹ 2 more variables: description , gene_name
diff --git a/man/figures/plot_cibersort-1.png b/man/figures/plot_cibersort-1.png
index ed786285..88be62f5 100644
Binary files a/man/figures/plot_cibersort-1.png and b/man/figures/plot_cibersort-1.png differ
diff --git a/man/figures/plot_cluster-1.png b/man/figures/plot_cluster-1.png
index bbaeaaed..50773f4b 100644
Binary files a/man/figures/plot_cluster-1.png and b/man/figures/plot_cluster-1.png differ
diff --git a/man/figures/plot_drop-1.png b/man/figures/plot_drop-1.png
index fdaaf789..67e73cdb 100644
Binary files a/man/figures/plot_drop-1.png and b/man/figures/plot_drop-1.png differ
diff --git a/man/figures/plot_drop2-1.png b/man/figures/plot_drop2-1.png
index 12f943fc..b2bea833 100644
Binary files a/man/figures/plot_drop2-1.png and b/man/figures/plot_drop2-1.png differ
diff --git a/man/figures/plot_mds-1.png b/man/figures/plot_mds-1.png
index 3be8cef6..78c69a32 100644
Binary files a/man/figures/plot_mds-1.png and b/man/figures/plot_mds-1.png differ
diff --git a/man/figures/plot_normalise-1.png b/man/figures/plot_normalise-1.png
index 19092d3e..7a882512 100644
Binary files a/man/figures/plot_normalise-1.png and b/man/figures/plot_normalise-1.png differ
diff --git a/man/figures/plot_pca-1.png b/man/figures/plot_pca-1.png
index 208dbfec..a5b66ab9 100644
Binary files a/man/figures/plot_pca-1.png and b/man/figures/plot_pca-1.png differ
diff --git a/man/figures/plot_rotate_1-1.png b/man/figures/plot_rotate_1-1.png
index 3d50c395..348b056f 100644
Binary files a/man/figures/plot_rotate_1-1.png and b/man/figures/plot_rotate_1-1.png differ
diff --git a/man/figures/plot_rotate_2-1.png b/man/figures/plot_rotate_2-1.png
index a9aba09d..012019dd 100644
Binary files a/man/figures/plot_rotate_2-1.png and b/man/figures/plot_rotate_2-1.png differ
diff --git a/man/figures/unnamed-chunk-14-1.png b/man/figures/unnamed-chunk-14-1.png
index bfa30c18..ae313d7f 100644
Binary files a/man/figures/unnamed-chunk-14-1.png and b/man/figures/unnamed-chunk-14-1.png differ
diff --git a/man/figures/unnamed-chunk-19-1.png b/man/figures/unnamed-chunk-19-1.png
index 24447bbe..e6072ba1 100644
Binary files a/man/figures/unnamed-chunk-19-1.png and b/man/figures/unnamed-chunk-19-1.png differ