From c7cb65865a5f336cd993138b0b75d7b50d1f4c85 Mon Sep 17 00:00:00 2001 From: Gene233 Date: Wed, 13 Nov 2024 13:21:54 +1100 Subject: [PATCH] update batch param in top_markers function. --- R/AllGenerics.R | 1 + R/top_markers-methods.R | 13 ++++++++++++- R/top_markers.R | 4 ++++ man/top_markers.Rd | 5 +++++ man/top_markers_init.Rd | 3 +++ tests/testthat/test-top_markers.R | 13 ++++++++++++- 6 files changed, 37 insertions(+), 2 deletions(-) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index c94af85..97ee8c1 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -73,6 +73,7 @@ setGeneric( label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, diff --git a/R/top_markers-methods.R b/R/top_markers-methods.R index bb02816..e4dca7c 100644 --- a/R/top_markers-methods.R +++ b/R/top_markers-methods.R @@ -10,6 +10,7 @@ setMethod( label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, @@ -20,6 +21,8 @@ setMethod( "label must be atomic and the same length as data column number" = is.atomic(label) & length(label) == ncol(data), "use.glm must be logical" = is.logical(use.glm), + "batch must be atomic and the same length as data column number" = + (is.atomic(batch) & length(batch) == ncol(data)) | is.null(batch), "scale must be logical" = is.logical(scale), "use.mgm must be logical" = is.logical(use.mgm), "softmax must be logical" = is.logical(softmax) @@ -30,6 +33,7 @@ setMethod( label = label, n = n, use.glm = use.glm, + batch = batch, scale = scale, use.mgm = use.mgm, softmax = softmax, @@ -49,6 +53,7 @@ setMethod( label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, @@ -57,19 +62,25 @@ setMethod( ## check stopifnot( "label must be a single character for se object" = - is.character(label) & length(label) == 1 + is.character(label) & length(label) == 1, + "batch must be NA or a single character for se object" = + is.null(batch) | (is.character(batch) & length(batch) == 1) ) ## get expr expr <- SummarizedExperiment::assay(data, i = slot) ## get label label <- SummarizedExperiment::colData(data)[[label]] + ## get batch + if(!is.null(batch)) + batch <- SummarizedExperiment::colData(data)[[batch]] top_m <- top_markers( data = expr, label = label, n = n, use.glm = use.glm, + batch = batch, scale = scale, use.mgm = use.mgm, softmax = softmax, diff --git a/R/top_markers.R b/R/top_markers.R index cc4cb1f..38020b7 100644 --- a/R/top_markers.R +++ b/R/top_markers.R @@ -14,6 +14,7 @@ #' top_markers_init(data, label = rep(c("A", "B"), 5)) top_markers_init <- function(data, label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, @@ -22,6 +23,7 @@ top_markers_init <- function(data, label, n = 10, data <- top_markers_glm( data = data, label = label, + batch = batch, n = n, scale = scale, use.mgm = use.mgm, @@ -151,6 +153,8 @@ top_markers_glm <- function(data, label, n = 10, ## model with group label only betas <- apply(data, 1, \(s) glm(s ~ 0 + label, family = family)$coef) } else { + ## factorize batch label + batch <- factor(batch) ## model with both group and batch label betas <- apply(data, 1, \(s) glm(s ~ 0 + label + batch, family = family)$coef) ## only extract betas of group label diff --git a/man/top_markers.Rd b/man/top_markers.Rd index d70e0b5..bfccb24 100644 --- a/man/top_markers.Rd +++ b/man/top_markers.Rd @@ -11,6 +11,7 @@ top_markers( label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, @@ -23,6 +24,7 @@ top_markers( label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, @@ -35,6 +37,7 @@ top_markers( label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, @@ -52,6 +55,8 @@ top_markers( \item{use.glm}{logical, if to use \code{\link[stats:glm]{stats::glm()}} to compute group mean score, if TRUE, also compute mean score difference as output} +\item{batch}{a vector of batch labels, default NULL} + \item{scale}{logical, if to scale data by row} \item{use.mgm}{logical, if to scale data using \code{\link[=scale_mgm]{scale_mgm()}}} diff --git a/man/top_markers_init.Rd b/man/top_markers_init.Rd index 251b0aa..14cbcca 100644 --- a/man/top_markers_init.Rd +++ b/man/top_markers_init.Rd @@ -9,6 +9,7 @@ top_markers_init( label, n = 10, use.glm = TRUE, + batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, @@ -25,6 +26,8 @@ top_markers_init( \item{use.glm}{logical, if to use \code{\link[stats:glm]{stats::glm()}} to compute group mean score, if TRUE, also compute mean score difference as output} +\item{batch}{a vector of batch labels, default NULL} + \item{scale}{logical, if to scale data by row} \item{use.mgm}{logical, if to scale data using \code{\link[=scale_mgm]{scale_mgm()}}} diff --git a/tests/testthat/test-top_markers.R b/tests/testthat/test-top_markers.R index 93f4a08..81bd910 100644 --- a/tests/testthat/test-top_markers.R +++ b/tests/testthat/test-top_markers.R @@ -2,10 +2,11 @@ test_that("top markers works", { ## sim data expr <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) label <- rep(c("A", "B"), 5) + batch <- sample(seq_len(2), 10, replace = TRUE) se <- SummarizedExperiment::SummarizedExperiment( assays = list(counts = expr), - colData = data.frame(group = label) + colData = data.frame(group = label, batch = batch) ) ## test matrix @@ -50,4 +51,14 @@ test_that("top markers works", { slot = "counts" ) expect_s3_class(res, "data.frame") + + ## test batch + res <- top_markers( + data = se, + label = "group", + batch = "batch", + n = 3, + slot = "counts" + ) + expect_s3_class(res, "data.frame") })