Skip to content

Commit

Permalink
voom object now passable to run_limma_splines(), updated documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas-Rauter committed Sep 20, 2024
1 parent 5b08292 commit 450e4f7
Show file tree
Hide file tree
Showing 29 changed files with 1,410 additions and 1,093 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
^CODE_OF_CONDUCT\.md$
^inst/CITATION\.cff$
^pkgdown$
_pkgdown.yml$

1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(explore_data)
export(extract_data)
export(open_template)
export(open_tutorial)
export(preprocess_rna_seq_data)
export(run_gsea)
export(run_limma_splines)
export(screen_limma_hyperparams)
Expand Down
7 changes: 5 additions & 2 deletions R/cluster_hits.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ cluster_hits <- function(
analysis_type = "time_effect",
report = TRUE
) {

report_dir <- normalizePath(
report_dir,
mustWork = FALSE
Expand Down Expand Up @@ -226,6 +226,9 @@ cluster_hits <- function(

# Add gene column for the run_gsea() function.
clustered_hits_levels <- lapply(clustered_hits_levels, function(df) {
if (is.character(df)) {
return(df)
}
df$gene <- genes[df$feature]
return(df)
})
Expand Down Expand Up @@ -490,7 +493,7 @@ make_clustering_report <- function(
analysis_type,
feature_name_columns
) {

# Optionally remove the batch-effect with the batch column and design matrix
# For mode == "integrated", the batch-effect is removed from the whole data
# For mode == "isolated", the batch-effect is removed for every level
Expand Down
124 changes: 124 additions & 0 deletions R/preprocess_rna_seq_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Exported function: preprocess_rna_seq_data() ---------------------------------


#' Perform default preprocessing of raw RNA-seq counts
#'
#' @description
#' The `preprocess_rna_seq_data()` function performs essential preprocessing
#' steps for raw RNA-seq counts. This includes creating a `DGEList` object,
#' normalizing the counts using the default TMM (Trimmed Mean of M-values)
#' normalization via the `edgeR::calcNormFactors` function, and applying the
#' `voom` transformation from the `limma` package to obtain log-transformed
#' counts per million (logCPM) with associated precision weights. If you
#' require a different normalization method, you can supply your own
#' custom normalization function.
#'
#' @param raw_counts A matrix of raw RNA-seq counts (genes as rows, samples as
#' columns).
#' @param meta A dataframe containing the metadata for data.
#' @param spline_params Parameters for spline functions (optional). Must contain
#' the named elements spline_type, which must contain either the string "n" for
#' natural cubic splines, or "b", for B-splines, the named element degree in the
#' case of B-splines, that must contain only an integer, and the named element
#' dof, specifying the degree of freedom, containing an integer and required
#' both for natural and B-splines.
#' @param design A design formula for the limma analysis, such as
#' '~ 1 + Phase*X + Reactor'.
#' @param normalize_func An optional normalization function. If provided, this
#' function will be used to normalize the `DGEList` object. If not provided,
#' TMM normalization (via `edgeR::calcNormFactors`) will be used by default.
#' Must take as
#' input the y of: y <- edgeR::DGEList(counts = raw_counts) and output the y
#' with the normalized counts.
#' @return A `voom` object, which includes the log2-counts per million (logCPM)
#' matrix and observation-specific weights.
#'
#' @importFrom limma voom
#'
#' @export
#'
preprocess_rna_seq_data <- function(
raw_counts,
meta,
spline_params,
design,
normalize_func = NULL
) {

message("Preprocessing RNA-seq data (normalization + voom)...")

# Check if edgeR is installed; if not, prompt the user
if (!requireNamespace("edgeR", quietly = TRUE)) {
message("The 'edgeR' package is not installed.")

# Prompt user for action
repeat {
user_input <- readline(
prompt =
"What would you like to do?\n
1: Automatically install edgeR\n
2: Manually install edgeR\n
3: Cancel\n
Please enter 1, 2, or 3: "
)

if (user_input == "1") {
# Try to install edgeR automatically from Bioconductor
message("Attempting to install 'edgeR' automatically
from Bioconductor...")
if (!requireNamespace("BiocManager", quietly = TRUE)) {
utils::install.packages("BiocManager")
}
tryCatch(
{
BiocManager::install("edgeR", update = FALSE)
},
error = function(e) {
stop(
"Automatic installation of 'edgeR' failed.
Please install it manually and try again.",
call. = FALSE
)
}
)
break # Exit the loop if installation is successful
} else if (user_input == "2") {
stop(
"Please install 'edgeR' manually using
BiocManager::install('edgeR') and then re-run the function.",
call. = FALSE
)
} else if (user_input == "3") {
stop("Operation canceled by the user.", call. = FALSE)
} else {
message("Invalid input. Please enter 1, 2, or 3.")
}
}
}

design_matrix <- design2design_matrix(
meta = meta,
spline_params = spline_params,
level_index = 1,
design = design
)

# Step 1: Create DGEList object from raw counts
y <- edgeR::DGEList(counts = raw_counts)

# Step 2: Apply the normalization function (either user-provided or default)
if (!is.null(normalize_func) && is.function(normalize_func)) {
y <- normalize_func(y) # user provided normalisation function
} else {
# Default: Normalize the counts using TMM normalization
y <- edgeR::calcNormFactors(y)
}

# Step 3: Apply voom transformation to get logCPM values and weights
voom_obj <- limma::voom(
y,
design_matrix
)

return(voom_obj)
}
Loading

0 comments on commit 450e4f7

Please sign in to comment.