Skip to content

Commit

Permalink
docuemented functions
Browse files Browse the repository at this point in the history
  • Loading branch information
wverastegui committed Aug 29, 2024
1 parent 6ac2542 commit 50ffc18
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 5 deletions.
30 changes: 26 additions & 4 deletions R/adaptive.bin.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
#' @import tibble dplyr
NULL
#> NULL

#' Compute densities for given masses.
#' @description
#' Compute the density estimation for a set of masses using kernel density estimation.
#' It allows for intensity-weighted densities and custom bandwidth functions.
#' @param masses A numeric vector of mass values.
#' @param mz_tol The mass-to-charge ratio tolerance.
#' @param intensity_weighted A logical value indicating whether to weight the densities by intensities.
#' @param intensities A numeric vector of intensity values corresponding to the masses.
#' @param bw_func A function to compute the bandwidth based on the masses.
#' @param n The number of equally spaced points at which the density is to be estimated. Default is 512.
#' @return A density object representing the estimated density of the masses.
#' @export
compute_densities <- function(masses, mz_tol, intensity_weighted, intensities, bw_func, n = 512) {
bandwidth <- 0.5 * mz_tol * bw_func(masses)
Expand All @@ -14,6 +21,15 @@ compute_densities <- function(masses, mz_tol, intensity_weighted, intensities, b
return(all.mass.den)
}

#' Compute mass values based on kernel density estimation.
#' @description
#' This function computes the mass values by performing kernel density estimation on the given masses.
#' It identifies the valleys in the density plot to determine the mass values.
#' @param mz_tol The mass-to-charge ratio tolerance.
#' @param masses A numeric vector of mass values.
#' @param intensity_binned A numeric vector of binned intensity values corresponding to the masses.
#' @param intensity_weighted A logical value indicating whether to weight the densities by intensities.
#' @return A numeric vector of mass values corresponding to the valleys in the density plot.
#' @export
compute_mass_values <- function(mz_tol, masses, intensity_binned, intensity_weighted) {
n <- 2^min(15, floor(log2(length(masses))) - 2)
Expand All @@ -33,6 +49,12 @@ compute_breaks <- function(mz_tol, masses, intensity_binned, intensity_weighted)
}


#' Increment pointers in a list.
#' @description
#' This function increments the values of `prof.pointer`, `height.pointer`, and `curr.label` in the provided list of pointers.
#' @param pointers A list containing the pointers to be incremented.
#' @param that.n An integer value to increment the `prof.pointer`.
#' @return The updated list of pointers.
#' @export
increment_counter <- function(pointers, that.n){
pointers$prof.pointer <- pointers$prof.pointer + that.n
Expand Down
52 changes: 52 additions & 0 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,33 @@ tolerance_plot <- function(x, y, exp_y, selected, main) {
abline(v = x[selected], col = "blue")
}

#' Draw retention time normal peaks.
#' @description
#' This function draws the normal peaks for retention time data based on the provided parameters.
#' @param x A numeric vector of retention time values.
#' @param truth A numeric vector containing the parameters for the normal peaks:
#' \itemize{
#' \item mean of the peak
#' \item standard deviation for the left side of the peak
#' \item standard deviation for the right side of the peak
#' \item scaling factor for the peak height
#' }
#' @export
draw_rt_normal_peaks <- function(x, truth) {
true.y1 <- dnorm(x[x < truth[1]], mean = truth[1], sd = truth[2]) * truth[2] * truth[4]
true.y2 <- dnorm(x[x >= truth[1]], mean = truth[1], sd = truth[3]) * truth[3] * truth[4]
lines(x, c(true.y1, true.y2), col = "green")
}

#' Plot raw profile histogram.
#' @description
#' This function plots various histograms and density plots for a given raw profile, including noise groups, selected groups, retention time range distribution, and signal presence distribution.
#' @param raw.prof A list containing raw profile data, including height records and minimum count run.
#' @param min_pres A numeric value indicating the minimum presence threshold.
#' @param baseline.correct A numeric value for baseline correction. If NA, it will be computed automatically.
#' @param baseline.correct.noise.percentile A numeric value indicating the percentile of noise group heights for baseline correction.
#' @param mz_tol A numeric value for the mass-to-charge ratio tolerance.
#' @param new.prof A list containing new profile data, including height records, time range records, and m/z presence records.
#' @export
plot_raw_profile_histogram <- function(raw.prof,
min_pres,
Expand Down Expand Up @@ -80,6 +100,11 @@ plot_raw_profile_histogram <- function(raw.prof,
)
}

#' Plot peak summary.
#' @description
#' This function plots a summary of peak characteristics, including m/z standard deviation, retention time standard deviation, and peak strength.
#' @param feature_groups A list of data frames, where each data frame represents a group of features with m/z values.
#' @param processed_features A data frame containing processed feature information with columns "sd1", "sd2", and "area".
#' @export
plot_peak_summary <- function(feature_groups, processed_features) {
mz_sd <- compute_mz_sd(feature_groups)
Expand All @@ -92,6 +117,13 @@ plot_peak_summary <- function(feature_groups, processed_features) {
hist(log10(processed_features[, "area"]), xlab = "peak strength (log scale)", ylab = "Frequency", main = "Peak strength distribution")
}

#' Plot retention time profile.
#' @description
#' This function plots the retention time profile, including the base curve, intensity, and fitted components.
#' @param rt_profile A data frame containing the retention time profile with columns "base_curve" and "intensity".
#' @param bw The bandwidth used for the kernel density estimation.
#' @param fit A matrix containing the fitted components for the retention time profile.
#' @param m A numeric vector of positions where vertical lines should be drawn.
#' @export
plot_rt_profile <- function(rt_profile, bw, fit, m) {
plot(rt_profile[, "base_curve"], rt_profile[, "intensity"], cex = .1, main = paste("bw=", bw))
Expand All @@ -105,6 +137,18 @@ plot_rt_profile <- function(rt_profile, bw, fit, m) {
}
}

#' Plot normalized mixture model with BIC.
#' @description
#' This function plots the data points and the fitted Gaussian mixture model components with different bandwidths.
#' @param x A numeric vector of data points on the x-axis.
#' @param y A numeric vector of data points on the y-axis.
#' @param bw The bandwidth used for the kernel density estimation.
#' @param aaa A matrix containing the parameters of the Gaussian mixture model components:
#' \itemize{
#' \item mean of the Gaussian component
#' \item standard deviation of the Gaussian component
#' \item scaling factor for the Gaussian component
#' }
#' @export
plot_normix_bic <- function(x, y, bw, aaa) {
plot(x, y, cex = .1, main = paste("bw=", bw))
Expand All @@ -116,6 +160,14 @@ plot_normix_bic <- function(x, y, bw, aaa) {
}
}

#' Draw retention time correction plot.
#' @description
#' This function draws a plot showing the retention time correction for extracted features.
#' It plots the deviation of corrected retention times from the original retention times.
#' @param colors A character vector of colors to use for plotting each sample. If NA, default colors are used.
#' @param extracted_features A list of data frames, where each data frame contains the original retention times and m/z values of features.
#' @param corrected_features A list of data frames, where each data frame contains the corrected retention times of features.
#' @param rt_tol_relative A numeric value representing the relative retention time tolerance.
#' @export
draw_rt_correction_plot <- function(colors,
extracted_features,
Expand Down
6 changes: 6 additions & 0 deletions R/two.step.hybrid.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ wide_to_long_feature_table <- function(wide_table, sample_names) {
return(long_features)
}

#' Extract column names matching a pattern.
#' @description
#' This function extracts the column names from a dataframe that contain a specified pattern.
#' @param dataframe A dataframe from which to extract column names.
#' @param pattern A character string containing the pattern to match in the column names.
#' @return A character vector of column names that match the specified pattern.
extract_pattern_colnames <- function(dataframe, pattern) {
dataframe <- dplyr::select(dataframe, contains(pattern))
return(colnames(dataframe))
Expand Down
14 changes: 13 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,21 @@ load_aligned_features <- function(metadata_file, intensities_file, rt_file, tol_
return(result)
}

#' Calculate the span of a numeric vector.
#' @description
#' This function calculates the span (range) of a numeric vector, ignoring NA values.
#' @param x A numeric vector.
#' @return A numeric value representing the span of the vector.
#' @export
span <- function(x) {
diff(range(x, na.rm = TRUE))
}

#' Compute standard deviation of m/z values for feature groups.
#' @description
#' Compute standard deviation of m/z values groupwise
#' This function computes the standard deviation of m/z values for each group of features.
#' @param feature_groups A list of data frames, where each data frame represents a group of features with m/z values.
#' @return A numeric vector of standard deviations of m/z values for each feature group.
#' @export
compute_mz_sd <- function(feature_groups) {
mz_sd <- c()
Expand All @@ -146,6 +154,10 @@ compute_mz_sd <- function(feature_groups) {
return(mz_sd)
}

#' Get the number of available worker cores.
#' @description
#' This function determines the number of available worker cores, taking into account CRAN's limit on the number of cores.
#' @return An integer representing the number of available worker cores.
#' @export
get_num_workers <- function() {
# CRAN limits the number of cores available to packages to 2
Expand Down

0 comments on commit 50ffc18

Please sign in to comment.