diff --git a/R/adjust.time.R b/R/adjust.time.R index 31882237..0687211f 100644 --- a/R/adjust.time.R +++ b/R/adjust.time.R @@ -79,7 +79,7 @@ compute_template <- function(extracted_features) { template <- which.max(num.ftrs) message(paste("the template is sample", template)) - candi <- extracted_features[[template]] |> dplyr::select(c(mz, rt)) + candi <- tibble::as_tibble(extracted_features[[template]]) |> dplyr::select(c(mz, rt)) template_features <- dplyr::bind_cols(candi, sample_id = rep(template, nrow(candi))) return(tibble::as_tibble(template_features)) } @@ -120,28 +120,22 @@ correct_time <- function(this.feature, template_features, mz_tol_relative, rt_to #' This function adjusts the retention time in each LC/MS profile to achieve better between-profile agreement. #' #' @param extracted_features A list object. Each component is a matrix which is the output from compute_clusters -#' @param mz_tol_relative The m/z tolerance level for peak alignment. The default is NA, which allows the -#' program to search for the tolerance level based on the data. This value is expressed as the +#' @param mz_tol_relative The m/z tolerance level for peak alignment. This value is expressed as the #' percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level. -#' @param rt_tol_relative The retention time tolerance level for peak alignment. The default is NA, which -#' allows the program to search for the tolerance level based on the data. +#' @param rt_tol_relative The retention time tolerance level for peak alignment. #' @param colors The vector of colors to be used for the line plots of time adjustments. The default is NA, #' in which case the program uses a set of default color set. -#' @param mz_max_diff Argument passed to find.tol(). Consider only m/z diffs smaller than this value. -#' This is only used when the mz_tol_relative is NA. -#' @param mz_tol_absolute As the m/z tolerance is expressed in relative terms (ppm), it may not be suitable -#' when the m/z range is wide. This parameter limits the tolerance in absolute terms. It mostly -#' influences feature matching in higher m/z range. #' @param do.plot Indicates whether plot should be drawn. #' @return A list object with the exact same structure as the input object features, i.e. one matrix per profile #' being processed. The only difference this output object has with the input object is that the retention time #' column in each of the matrices is changed to new adjusted values. #' @export #' @examples -#' adjust.time(extracted_features, mz_max_diff = 10 * 1e-05, do.plot = FALSE) +#' data(extracted) +#' adjust.time(extracted, 10e-06, 5, do.plot = FALSE) adjust.time <- function(extracted_features, - mz_tol_relative = NA, - rt_tol_relative = NA, + mz_tol_relative, + rt_tol_relative, colors = NA, do.plot = TRUE) { number_of_samples <- length(extracted_features) diff --git a/R/feature.align.R b/R/feature.align.R index 676346b2..81991f48 100644 --- a/R/feature.align.R +++ b/R/feature.align.R @@ -2,9 +2,9 @@ create_empty_tibble <- function(number_of_samples, metadata_colnames, intensity_colnames, rt_colnames) { features <- new("list") - features$metadata <- as_tibble(matrix(nrow = 0, ncol = length(metadata_colnames)), .name_repair = ~ metadata_colnames) - features$intensity <- as_tibble(matrix(nrow = 0, ncol = length(intensity_colnames)), .name_repair = ~ intensity_colnames) - features$rt <- as_tibble(matrix(nrow = 0, ncol = length(rt_colnames)), .name_repair = ~ rt_colnames) + features$metadata <- tibble::as_tibble(matrix(nrow = 0, ncol = length(metadata_colnames)), .name_repair = ~ metadata_colnames) + features$intensity <- tibble::as_tibble(matrix(nrow = 0, ncol = length(intensity_colnames)), .name_repair = ~ intensity_colnames) + features$rt <- tibble::as_tibble(matrix(nrow = 0, ncol = length(rt_colnames)), .name_repair = ~ rt_colnames) return(features) } @@ -12,7 +12,7 @@ create_empty_tibble <- function(number_of_samples, metadata_colnames, intensity_ add_row <- function(df, data, i, column_names) { row <- matrix(c(i, data), nrow=1) colnames(row) <- column_names - return(bind_rows(df, as_tibble(row))) + return(dplyr::bind_rows(df, tibble::as_tibble(row))) } @@ -101,7 +101,7 @@ create_rows <- function(features, gc() } # call Garbage Collection for performance improvement? - sample <- filter(features, cluster == sel.labels[i]) + sample <- dplyr::filter(features, cluster == sel.labels[i]) if (nrow(sample) > 1) { if (validate_contents(sample, min_occurrence)) { return(select_mz(sample, mz_tol_relative, rt_tol_relative, min_occurrence, number_of_samples)) @@ -177,7 +177,8 @@ create_aligned_feature_table <- function(all_table, #' } #' @export #' @examples -#' feature.align(features, mz_max_diff = 10 * 1e-05, do.plot = FALSE) +#' data(extracted) +#' feature.align(extracted, mz_max_diff = 10 * 1e-05, do.plot = FALSE) feature.align <- function(features, min_occurrence = 2, mz_tol_relative = NA, diff --git a/data/datalist b/data/datalist index 9a11cbe9..daa8bbec 100644 --- a/data/datalist +++ b/data/datalist @@ -1,5 +1,6 @@ adduct.table aligned +extracted features.learn features features2.learn diff --git a/data/extracted.rda b/data/extracted.rda new file mode 100644 index 00000000..37779b61 Binary files /dev/null and b/data/extracted.rda differ diff --git a/man/adjust.time.Rd b/man/adjust.time.Rd index f19ff29e..814bc4b1 100644 --- a/man/adjust.time.Rd +++ b/man/adjust.time.Rd @@ -1,37 +1,39 @@ -\encoding{latin1} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/adjust.time.R \name{adjust.time} \alias{adjust.time} -%- Also NEED an '\alias' for EACH other topic documented here. -\title{ Adjust retention time across spectra. } -\description{ - This function adjusts the retention time in each LC/MS profile to achieve better between-profile agreement. -} +\title{Adjust retention time across spectra.} \usage{ -adjust.time(features, mz_tol_relative = NA, rt_tol_relative = NA, colors=NA, - mz_max_diff=1e-4, mz_tol_absolute=0.01, do.plot=TRUE) +adjust.time( + extracted_features, + mz_tol_relative, + rt_tol_relative, + colors = NA, + do.plot = TRUE +) } \arguments{ - \item{features}{ A list object. Each component is a matrix which is the output from proc.to.feature(). } - \item{mz_tol_relative}{ The m/z tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data. This value is expressed as the percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.} - \item{rt_tol_relative}{ The retention time tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data. } - \item{colors}{ The vector of colors to be used for the line plots of time adjustments. The default is NA, in which case the program uses a set of default color set. } - \item{mz_max_diff}{Argument passed to find.tol(). Consider only m/z diffs smaller than this value. This is only used when the mz_tol_relative is NA. } - \item{mz_tol_absolute}{As the m/z tolerance is expressed in relative terms (ppm), it may not be suitable when the m/z range is wide. This parameter limits the tolerance in absolute terms. It mostly influences feature matching in higher m/z range.} - \item{do.plot}{Indicates whether plot should be drawn.} - \item{rt_colname}{contains the retention time information} -} -\details{ - The function first searches for the m/z tolerance level using a mixture model. After the mz.tol is obtained, the peaks are grouped based on it. The function then searches for the retention time tolerance level. Because the peaks are grouped using m/z, only metabolites that share m/z require this parameter. A rather lenient retention time tolerance level is found using a mixture model. - - The profile with the highest number of peaks is selected as the template and every other spetrum is adjusted to it one at a time. At every m/z value, if each of the two spetra has just one peak, and the peaks are within the retention time tolerance range, the pair of retention time values are used in the curve fitting. A kernel smoother is fitted using the difference in retention time against the retention time in the profile to be adjusted. +\item{extracted_features}{A list object. Each component is a matrix which is the output from compute_clusters} + +\item{mz_tol_relative}{The m/z tolerance level for peak alignment. This value is expressed as the +percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.} + +\item{rt_tol_relative}{The retention time tolerance level for peak alignment.} + +\item{colors}{The vector of colors to be used for the line plots of time adjustments. The default is NA, +in which case the program uses a set of default color set.} + +\item{do.plot}{Indicates whether plot should be drawn.} } \value{ -A list object with the exact same structure as the input object features, i.e. one matrix per profile being processed. The only difference this output object has with the input object is that the retention time column in each of the matrices is changed to new adjusted values. +A list object with the exact same structure as the input object features, i.e. one matrix per profile + being processed. The only difference this output object has with the input object is that the retention time + column in each of the matrices is changed to new adjusted values. +} +\description{ +This function adjusts the retention time in each LC/MS profile to achieve better between-profile agreement. } -\author{ Tianwei Yu } -\seealso{ feature.align } \examples{ -data(features) -adjusted<-adjust.time(features, colors=c("red","blue","green","cyan")) +data(extracted) +adjust.time(extracted, 10e-06, 5, do.plot = FALSE) } -\keyword{ models } diff --git a/man/feature.align.Rd b/man/feature.align.Rd index 29fcd83f..e081193c 100644 --- a/man/feature.align.Rd +++ b/man/feature.align.Rd @@ -1,43 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feature.align.R \name{feature.align} \alias{feature.align} -%- Also NEED an '\alias' for EACH other topic documented here. -\title{ Align peaks from spectra into a feature table. } -\description{ - Identifies which of the peaks from the profiles correspond to the same feature. -} +\title{Align peaks from spectra into a feature table.} \usage{ -feature.align(features, min_occurrence = 2, mz_tol_relative = NA, rt_tol_relative = NA, - mz_max_diff=1e-4, mz_tol_absolute=0.01, do.plot=TRUE) +feature.align( + features, + min_occurrence = 2, + mz_tol_relative = NA, + rt_tol_relative = NA, + mz_max_diff = 1e-04, + mz_tol_absolute = 0.01, + do.plot = TRUE +) } \arguments{ - \item{features}{ A list object. Each component is a matrix which is the output from proc.to.feature(). } - \item{min_occurrence}{ A feature has to show up in at least this number of profiles to be included in the final result. } - \item{mz_tol_relative}{ The m/z tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data. This value is expressed as the percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.} - \item{rt_tol_relative}{ The retention time tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data. } - \item{mz_max_diff}{Argument passed to find.tol(). Consider only m/z diffs smaller than this value.This is only used when the mz_tol_relative is NA. } - \item{mz_tol_absolute}{As the m/z tolerance is expressed in relative terms (ppm), it may not be suitable when the m/z range is wide. This parameter limits the tolerance in absolute terms. It mostly influences feature matching in higher m/z range.} - \item{do.plot}{Indicates whether plot should be drawn.} - \item{rt_colname}{contains the retention time information} -} -\details{ - The function first searches for the m/z tolerance level using a mixture model. After the mz_tol_relative is obtained, the peaks are grouped based on it. Consecutive peaks with m/z value difference smaller than the tolerance level are considered to belong to the same peak group. Non-parametric density estimation within each peak group is used to further split peak groups. - The function then searches for the retention time tolerance level. Because the peaks are grouped using m/z, only metabolites that share m/z require this parameter. A rather lenient retention time tolerance level is found using a mixture model. After splitting the peak groups by this value, non-parametric density estimation is used to further split peak groups. Peaks belonging to one group are considered to correspond to the same feature. +\item{features}{A list object. Each component is a matrix which is the output from proc.to.feature().} + +\item{min_occurrence}{A feature has to show up in at least this number of profiles to be included in the final result.} + +\item{mz_tol_relative}{The m/z tolerance level for peak alignment. The default is NA, which allows the +program to search for the tolerance level based on the data. This value is expressed as the +percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.} + +\item{rt_tol_relative}{The retention time tolerance level for peak alignment. The default is NA, which +allows the program to search for the tolerance level based on the data.} + +\item{mz_max_diff}{Argument passed to find.tol(). Consider only m/z diffs smaller than this value. +This is only used when the mz_tol_relative is NA.} + +\item{mz_tol_absolute}{As the m/z tolerance is expressed in relative terms (ppm), it may not be suitable +when the m/z range is wide. This parameter limits the tolerance in absolute terms. It mostly +influences feature matching in higher m/z range.} + +\item{do.plot}{Indicates whether plot should be drawn.} } \value{ - Returns a list object with the following objects in it: - \item{aligned.ftrs}{A matrix, with columns of m/z values, elution times, signal strengths in each spectrum.} - \item{pk.times}{A matrix, with columns of m/z, median elution time, and elution times in each spectrum.} - \item{mz.tol}{The m/z tolerance used in the alignment.} - \item{chr.tol}{The elution time tolerance in the alignment.} +Returns a list object with the following objects in it: +\itemize{ + \item aligned.ftrs - A matrix, with columns of m/z values, elution times, signal strengths in each spectrum. + \item pk.times - A matrix, with columns of m/z, median elution time, and elution times in each spectrum. + \item mz.tol - The m/z tolerance used in the alignment. + \item rt.tol - The elution time tolerance in the alignment. +} +} +\description{ +Identifies which of the peaks from the profiles correspond to the same feature. } -\author{ Tianwei Yu } -\seealso{ proc.to.feature } \examples{ -data(features) -features.2<-adjust.time(features) -this.aligned<-feature.align(features,min_occurrence=2) -summary(this.aligned) -this.aligned$aligned.ftrs[1:5,] -this.aligned$pk.times[1:5,] +data(extracted) +feature.align(extracted, mz_max_diff = 10 * 1e-05, do.plot = FALSE) } -\keyword{ models } \ No newline at end of file