From c7cb65865a5f336cd993138b0b75d7b50d1f4c85 Mon Sep 17 00:00:00 2001
From: Gene233 <chen_jj97@163.com>
Date: Wed, 13 Nov 2024 13:21:54 +1100
Subject: [PATCH] update batch param in top_markers function.

---
 R/AllGenerics.R                   |  1 +
 R/top_markers-methods.R           | 13 ++++++++++++-
 R/top_markers.R                   |  4 ++++
 man/top_markers.Rd                |  5 +++++
 man/top_markers_init.Rd           |  3 +++
 tests/testthat/test-top_markers.R | 13 ++++++++++++-
 6 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/R/AllGenerics.R b/R/AllGenerics.R
index c94af85..97ee8c1 100644
--- a/R/AllGenerics.R
+++ b/R/AllGenerics.R
@@ -73,6 +73,7 @@ setGeneric(
            label,
            n = 10,
            use.glm = TRUE,
+           batch = NULL,
            scale = TRUE,
            use.mgm = TRUE,
            softmax = TRUE,
diff --git a/R/top_markers-methods.R b/R/top_markers-methods.R
index bb02816..e4dca7c 100644
--- a/R/top_markers-methods.R
+++ b/R/top_markers-methods.R
@@ -10,6 +10,7 @@ setMethod(
            label,
            n = 10,
            use.glm = TRUE,
+           batch = NULL,
            scale = TRUE,
            use.mgm = TRUE,
            softmax = TRUE,
@@ -20,6 +21,8 @@ setMethod(
       "label must be atomic and the same length as data column number" =
         is.atomic(label) & length(label) == ncol(data),
       "use.glm must be logical" = is.logical(use.glm),
+      "batch must be atomic and the same length as data column number" =
+        (is.atomic(batch) & length(batch) == ncol(data)) | is.null(batch),
       "scale must be logical" = is.logical(scale),
       "use.mgm must be logical" = is.logical(use.mgm),
       "softmax must be logical" = is.logical(softmax)
@@ -30,6 +33,7 @@ setMethod(
       label = label,
       n = n,
       use.glm = use.glm,
+      batch = batch,
       scale = scale,
       use.mgm = use.mgm,
       softmax = softmax,
@@ -49,6 +53,7 @@ setMethod(
            label,
            n = 10,
            use.glm = TRUE,
+           batch = NULL,
            scale = TRUE,
            use.mgm = TRUE,
            softmax = TRUE,
@@ -57,19 +62,25 @@ setMethod(
     ## check
     stopifnot(
       "label must be a single character for se object" =
-        is.character(label) & length(label) == 1
+        is.character(label) & length(label) == 1,
+      "batch must be NA or a single character for se object" =
+        is.null(batch) | (is.character(batch) & length(batch) == 1)
     )
 
     ## get expr
     expr <- SummarizedExperiment::assay(data, i = slot)
     ## get label
     label <- SummarizedExperiment::colData(data)[[label]]
+    ## get batch
+    if(!is.null(batch))
+      batch <- SummarizedExperiment::colData(data)[[batch]]
 
     top_m <- top_markers(
       data = expr,
       label = label,
       n = n,
       use.glm = use.glm,
+      batch = batch,
       scale = scale,
       use.mgm = use.mgm,
       softmax = softmax,
diff --git a/R/top_markers.R b/R/top_markers.R
index cc4cb1f..38020b7 100644
--- a/R/top_markers.R
+++ b/R/top_markers.R
@@ -14,6 +14,7 @@
 #' top_markers_init(data, label = rep(c("A", "B"), 5))
 top_markers_init <- function(data, label, n = 10,
                              use.glm = TRUE,
+                             batch = NULL,
                              scale = TRUE,
                              use.mgm = TRUE,
                              softmax = TRUE,
@@ -22,6 +23,7 @@ top_markers_init <- function(data, label, n = 10,
     data <- top_markers_glm(
       data = data,
       label = label,
+      batch = batch,
       n = n,
       scale = scale,
       use.mgm = use.mgm,
@@ -151,6 +153,8 @@ top_markers_glm <- function(data, label, n = 10,
     ## model with group label only
     betas <- apply(data, 1, \(s) glm(s ~ 0 + label, family = family)$coef)
   } else {
+    ## factorize batch label
+    batch <- factor(batch)
     ## model with both group and batch label
     betas <- apply(data, 1, \(s) glm(s ~ 0 + label + batch, family = family)$coef)
     ## only extract betas of group label
diff --git a/man/top_markers.Rd b/man/top_markers.Rd
index d70e0b5..bfccb24 100644
--- a/man/top_markers.Rd
+++ b/man/top_markers.Rd
@@ -11,6 +11,7 @@ top_markers(
   label,
   n = 10,
   use.glm = TRUE,
+  batch = NULL,
   scale = TRUE,
   use.mgm = TRUE,
   softmax = TRUE,
@@ -23,6 +24,7 @@ top_markers(
   label,
   n = 10,
   use.glm = TRUE,
+  batch = NULL,
   scale = TRUE,
   use.mgm = TRUE,
   softmax = TRUE,
@@ -35,6 +37,7 @@ top_markers(
   label,
   n = 10,
   use.glm = TRUE,
+  batch = NULL,
   scale = TRUE,
   use.mgm = TRUE,
   softmax = TRUE,
@@ -52,6 +55,8 @@ top_markers(
 \item{use.glm}{logical, if to use \code{\link[stats:glm]{stats::glm()}} to compute group mean score,
 if TRUE, also compute mean score difference as output}
 
+\item{batch}{a vector of batch labels, default NULL}
+
 \item{scale}{logical, if to scale data by row}
 
 \item{use.mgm}{logical, if to scale data using \code{\link[=scale_mgm]{scale_mgm()}}}
diff --git a/man/top_markers_init.Rd b/man/top_markers_init.Rd
index 251b0aa..14cbcca 100644
--- a/man/top_markers_init.Rd
+++ b/man/top_markers_init.Rd
@@ -9,6 +9,7 @@ top_markers_init(
   label,
   n = 10,
   use.glm = TRUE,
+  batch = NULL,
   scale = TRUE,
   use.mgm = TRUE,
   softmax = TRUE,
@@ -25,6 +26,8 @@ top_markers_init(
 \item{use.glm}{logical, if to use \code{\link[stats:glm]{stats::glm()}} to compute group mean score,
 if TRUE, also compute mean score difference as output}
 
+\item{batch}{a vector of batch labels, default NULL}
+
 \item{scale}{logical, if to scale data by row}
 
 \item{use.mgm}{logical, if to scale data using \code{\link[=scale_mgm]{scale_mgm()}}}
diff --git a/tests/testthat/test-top_markers.R b/tests/testthat/test-top_markers.R
index 93f4a08..81bd910 100644
--- a/tests/testthat/test-top_markers.R
+++ b/tests/testthat/test-top_markers.R
@@ -2,10 +2,11 @@ test_that("top markers works", {
   ## sim data
   expr <- matrix(rgamma(100, 2), 10, dimnames = list(1:10))
   label <- rep(c("A", "B"), 5)
+  batch <- sample(seq_len(2), 10, replace = TRUE)
 
   se <- SummarizedExperiment::SummarizedExperiment(
     assays = list(counts = expr),
-    colData = data.frame(group = label)
+    colData = data.frame(group = label, batch = batch)
   )
 
   ## test matrix
@@ -50,4 +51,14 @@ test_that("top markers works", {
     slot = "counts"
   )
   expect_s3_class(res, "data.frame")
+
+  ## test batch
+  res <- top_markers(
+    data = se,
+    label = "group",
+    batch = "batch",
+    n = 3,
+    slot = "counts"
+  )
+  expect_s3_class(res, "data.frame")
 })