const-ae
diff --git a/‎R/AllGenerics.R
+1-1 b/‎R/AllGenerics.R
+1-1
diff --git a/‎R/calculate_distance.R
+5-1 b/‎R/calculate_distance.R
+5-1
diff --git a/‎R/completion.R
+14-1 b/‎R/completion.R
+14-1
diff --git a/‎R/generate_synthetic_data.R
+10-6 b/‎R/generate_synthetic_data.R
+10-6
diff --git a/‎R/pd_lm.R
+3-1 b/‎R/pd_lm.R
+3-1
diff --git a/‎R/predict.R
+2-2 b/‎R/predict.R
+2-2
diff --git a/‎R/proDA.R
+11-9 b/‎R/proDA.R
+11-9
diff --git a/‎R/simple_tests.R
+1-1 b/‎R/simple_tests.R
+1-1
diff --git a/‎R/test_diff.R
+20-15 b/‎R/test_diff.R
+20-15
diff --git a/‎R/util.R
+17 b/‎R/util.R
+17
diff --git a/‎man/cash-proDAFit-method.Rd
+15-1 b/‎man/cash-proDAFit-method.Rd
+15-1
diff --git a/‎man/dist_approx_impl.Rd
+7-1 b/‎man/dist_approx_impl.Rd
+7-1
@@ -92,7 +92,7 @@ setGeneric("coefficient_variance_matrices", function(object, ...) standardGeneri
 #'
 #' @examples
 #'   syn_data <- generate_synthetic_data(n_proteins = 10)
-#'   fit <- proDA(syn_data$Y, design = syn_data$groups)
+#'   fit <- proDA(syn_data$Y, design = syn_data$groups, reference_level = "Condition_1")
 #'   reference_level(fit)
 #'
 #' @seealso \link{accessor_methods} for the implementation for a 'proDAFit' object
 
@@ -16,13 +16,17 @@
 #' @param blind fit an intercept model for the missing values to make
 #'   sure that the results are not biased for the expected result.
 #'   Default: `TRUE`
-#' @param ... additional argument to \code{proDA()} in case object is a
+#' @param ... additional arguments to \code{proDA()} in case object is a
 #'   \code{SummarizedExperiment} or a \code{matrix}
 #'
 #'
 #' @return a list with two elements: `mean` and `sd` both are formally
 #'   of class "dist"
 #'
+#' @examples
+#'   syn_data <- generate_synthetic_data(n_proteins = 10)
+#'   fit <- proDA(syn_data$Y, design = syn_data$groups)
+#'   dist_approx(fit)
 #'
 #' @name dist_approx_impl
 #' @aliases dist_approx,proDAFit-method
 
@@ -31,7 +31,7 @@
 #'   \item result_names
 #'   \item coefficient_variance_matrices
 #'   \item colData
-#'   \item rowData.
+#'   \item rowData
 #' }
 #'
 #' @param x an object of class 'proDAFit' produced by \code{proDA()}
@@ -42,6 +42,19 @@
 #'
 #' @return whatever the function called \code{name} returns.
 #'
+#' @examples
+#'   syn_data <- generate_synthetic_data(n_proteins = 10)
+#'   fit <- proDA(syn_data$Y, design = syn_data$groups)
+#'
+#'   # The two styles are identical
+#'   design(fit)
+#'   fit$design
+#'
+#'   # More functions
+#'   fit$abundances
+#'
+#'
+#'
 #' @seealso \link{accessor_methods} for more documentation on the
 #'   accessor functions.
 #' @aliases dollar_methods
 
@@ -10,7 +10,7 @@
 #' @param frac_changed the fraction of proteins that actually
 #'   differ between the conditions. Default: 0.1
 #' @param dropout_curve_position the point where the chance
-#'   to observe a value is 50%. Can be a single number or
+#'   to observe a value is 50\%. Can be a single number or
 #'   a vector of \code{length(dropout_curve_position) == n_conditions * n_replicates}.
 #'   Default: 18.5
 #' @param dropout_curve_scale The width of the dropout curve.
@@ -26,7 +26,7 @@
 #'    the scale and the degrees of freedom of the inverse
 #'    Chi-squared distribution used as a prior for the
 #'    variances. Default: 0.05 and 2
-#' @param effect_size the standard deviation that used to draw
+#' @param effect_size the standard deviation that is used to draw
 #'   different values for the \code{frac_changed} part of the
 #'   proteins. Default: 2
 #' @param return_summarized_experiment a boolean indicator if
@@ -39,11 +39,11 @@
 #'     \item{Z}{the intensity matrix before dropping out values}
 #'     \item{t_mu}{a matrix with \code{n_proteins} rows and
 #'        \code{n_conditions} columns that contains the underlying
-#'        means for each protein.}
-#'     \item{t_sigma2}{a vector with the true variance for each
-#'        protein.}
+#'        means for each protein}
+#'     \item{t_sigma2}{a vector with the true variances for each
+#'        protein}
 #'     \item{changed}{a vector with boolean values if the
-#'        protein is actually changed.}
+#'        protein is actually changed}
 #'     \item{group}{the group structure mapping samples to conditions}
 #'   }
 #'   if \code{return_summarized_experiment} is \code{FALSE}. Otherwise
@@ -52,8 +52,12 @@
 #' @examples
 #'   syn_data <- generate_synthetic_data(n_proteins = 10)
 #'   names(syn_data)
+#'   head(syn_data$Y)
+#'
+#'   # Returning a SummarizedExperiment
 #'   se <- generate_synthetic_data(n_proteins = 10, return_summarized_experiment = TRUE)
 #'   se
+#'   head(assay(se))
 #'
 #' @export
 generate_synthetic_data <- function(n_proteins, n_conditions = 2,
 
@@ -16,7 +16,7 @@
 #'   specify the \code{formula}
 #' @param subset an optional selection vector for data to subset it
 #' @param dropout_curve_position the value where the chance to
-#'   observe a value is 50%. Can either be a single value that is
+#'   observe a value is 50\%. Can either be a single value that is
 #'   repeated for each row or a vector with one element for each
 #'   row. Not optional.
 #' @param dropout_curve_scale the width of the dropout curve. Smaller
@@ -47,6 +47,8 @@
 #' @return a list with the following entries
 #'   \describe{
 #'     \item{coefficients}{a named vector with the fitted values}
+#'     \item{coef_variance_matrix}{a \code{p*p} matrix with the variance associated
+#'       with each coefficient estimate}
 #'     \item{n_approx}{the estimated "size" of the data set (n_hat - variance_prior_df)}
 #'     \item{df}{the estimated degrees of freedom (n_hat - p)}
 #'     \item{s2}{the estimated unbiased variance}
 
@@ -5,8 +5,8 @@
 #' (\code{type = "response"}) without missing values according to the
 #' linear probabilistic dropout model, fitted with \code{proDA()}. Or, it
 #' can predict the feature parameters for additional proteins given their
-#' abundances including missing values after estimating the hyper-
-#' parameters on a dataset with the same sample structure
+#' abundances including missing values after estimating the hyper-parameters
+#' on a dataset with the same sample structure
 #' (\code{type = "feature_parameters"}).
 #'
 #' \strong{Note:} this method behaves a little different from what one might
 
@@ -32,7 +32,7 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c("Condition1", "Condition
 
 
 
-#' Main function to determine the hyper and protein parameters
+#' Main function to fit the probabilistic dropout model
 #'
 #' The function fits a linear probabilistic dropout model and infers
 #' the hyper-parameters for the location prior, the variance prior,
@@ -44,7 +44,7 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c("Condition1", "Condition
 #' of each protein estimate. The variance moderation is fairly standard
 #' in high-throughput experiments and can boost the power to detect
 #' differentially abundant proteins. The location moderation is important
-#' to handle extreme cases where in one conditio a protein is not observed
+#' to handle the edge case where in one condition a protein is not observed
 #' in any sample. In addition it can help to get more precise estimates
 #' of the difference between conditions. Unlike 'DESeq2', which moderates
 #' the coefficient estimates (ie. the "betas") to be centered around zero,
@@ -53,10 +53,10 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c("Condition1", "Condition
 #'
 #' @param data a matrix like object (\code{matrix()},
 #'   \code{SummarizedExperiment()}, or anything that can be cast to
-#'   \code{SummarizedExperiment()} (eg. MSnSet, eSet, ...)) with the
+#'   \code{SummarizedExperiment()} (eg. `MSnSet`, `eSet`, ...)) with
 #'   one column per
 #'   sample and one row per protein. Missing values should be
-#'   coded \code{NA}.
+#'   coded as \code{NA}.
 #' @param design a specification of the experimental design that
 #'   is used to fit the linear model. It can be a \code{model.matrix()}
 #'   with one row for each sample and one column for each
@@ -77,9 +77,9 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c("Condition1", "Condition
 #'   This is undesirable and can be removed by working on the log
 #'   scale. The easiest way to find out if the data is already log-
 #'   transformed is to see if the intensities are in the range of
-#'   0 to 100 in which case they are transformed or if they rather
-#'   are between 1e5 to 1e12, in which case they need to be
-#'   transformed. Default: \code{TRUE}
+#'   `0` to `100` in which case they are transformed or if they rather
+#'   are between `1e5` to `1e12`, in which case they are not.
+#'   Default: \code{TRUE}
 #' @param moderate_location,moderate_variance boolean values
 #'   to indicate if the location and the variances are
 #'   moderated. Default: \code{TRUE}
@@ -95,8 +95,8 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c("Condition1", "Condition
 #'   \code{20}
 #' @param epsilon if the remaining error is smaller than \code{epsilon}
 #'   the model has converged. Default: \code{1e-3}
-#' @param verbose boolean that signals if the method prints informative
-#'   messages. Default: \code{FALSE}
+#' @param verbose boolean that signals if the method prints messages
+#'   during the fitting. Default: \code{FALSE}
 #' @param ... additional parameters for the construction of the
 #'   'proDAFit' object
 #'
@@ -105,6 +105,8 @@ if(getRversion() >= "2.15.1")  utils::globalVariables(c("Condition1", "Condition
 #'   on the hyper-parameters and feature parameters, the convergence,
 #'   the experimental design etc. Internally, it is a sub-class of
 #'   \code{SummarizedExperiment} which means the object is subsettable.
+#'   The `$`-operator is overloaded for this object to make it easy to
+#'   discover applicable functions.
 #'
 #'
 #' @examples
 
@@ -64,7 +64,7 @@
 #'   t.test(x, y)
 #'   summary(lm(c(x, y) ~ cond,
 #'              data = data.frame(cond = c(rep("x", 5),
-#'                                         rep("y", 5)))))$coef[2,]
+#'                                         rep("y", 5)))))$coefficients[2,]
 #'   pd_row_t_test(matrix(x, nrow=1), matrix(y, nrow=1),
 #'                 moderate_location = FALSE,
 #'                 moderate_variance = FALSE)
 
@@ -1,9 +1,9 @@
 
 
 
-#' Identify significant coefficients
+#' Identify differentially abundant proteins
 #'
-#' The function is used to test coefficients of a 'proDAFit'
+#' The `test_diff()` function is used to test coefficients of a 'proDAFit'
 #' object. It provides a Wald test to test individual
 #' coefficients and a likelihood ratio F-test to compare the
 #' original model with a reduced model. The \code{result_names}
@@ -30,16 +30,18 @@
 #'   linear combination of them. The contrast is always compared
 #'   against zero. Thus, to find out if two coefficients differ
 #'   use \code{coef1 - coef2}.
-#' @param reduced_model The fit with an alternative model (nested
-#'   in the original model) is compared with the original model
-#'   using an F-test. This is useful if not just an individual
-#'   coefficient should be tested, but the effect of including
-#'   a combination of covariates. If neither a \code{contrast}, nor
+#' @param reduced_model If you don't want to test an individual
+#'   coefficient, you can can specify a reduced model and compare
+#'   it with the original model using an F-test. This is useful
+#'   to find out how a set of parameters affect the goodness of
+#'   the fit. If neither a \code{contrast}, nor
 #'   a \code{reduced_model} is specified, by default a comparison
-#'   with an intercept model is done. Default: \code{~ 1}.
+#'   with an intercept model (ie. just the average across conditions)
+#'   is done. Default: \code{~ 1}.
 #' @param alternative a string that decides how the
 #'   hypothesis test is done. This parameter is only relevant for
-#'   the t-test / contrast test. Default: \code{"two.sided"}
+#'   the Wald-test specified using the `contrast` argument.
+#'   Default: \code{"two.sided"}
 #' @param pval_adjust_method a string the indicates the method
 #'   that is used to adjust the p-value for the multiple testing.
 #'   It must match the options in \code{\link[stats]{p.adjust}}.
@@ -55,11 +57,14 @@
 #'   messages. Default: \code{FALSE}.
 #'
 #' @return
-#'   Both functions return a \code{data.frame} with one row per protein
-#'   with the key parameters of the statistical test.
+#'   The `result_names()` function returns a character vector.
+#'
+#'   The `test_diff()` function returns a \code{data.frame} with one row per protein
+#'   with the key parameters of the statistical test. Depending what kind of test
+#'   (Wald or F test) the content of the `data.frame` differs.
 #'
 #'   The Wald test, which can considered equivalent to a t-test, returns
-#'   a data.frame with the following columns:
+#'   a `data.frame` with the following columns:
 #'   \describe{
 #'     \item{name}{the name of the protein, extracted from the rowname of
 #'       the input matrix}
@@ -76,7 +81,7 @@
 #'       of available information for estimating the \code{se}. They
 #'       are the sum of the number of samples the protein was observed
 #'       in, the amount of information contained in the missing values,
-#'       and the estimated df from the variance prior.}
+#'       and the degrees of freedom of the variance prior.}
 #'     \item{avg_abundance}{the estimate of the average abundance of
 #'       the protein across all samples.}
 #'     \item{n_approx}{the approximated information available for estimating
@@ -86,7 +91,7 @@
 #'   }
 #'
 #'
-#'   The F-test returns a data.frame with the following columns
+#'   The F-test returns a `data.frame` with the following columns
 #'   \describe{
 #'     \item{name}{the name of the protein, extracted from the rowname of
 #'       the input matrix}
@@ -102,7 +107,7 @@
 #'       of available information for estimating the \code{se}. They
 #'       are the sum of the number of samples the protein was observed
 #'       in, the amount of information contained in the missing values,
-#'       and the estimated df from the variance prior.}
+#'       and the degrees of freedom of the variance prior.}
 #'     \item{avg_abundance}{the estimate of the average abundance of
 #'       the protein across all samples.}
 #'     \item{n_approx}{the information available for estimating
 
@@ -16,6 +16,23 @@
 #' @param ... additional arguments to FUN
 #'
 #' @return a matrix of size \code{length(x) x ncol}
+#'
+#'
+#' @examples
+#'   # Behaves similar to sapply(), but it always returns a matrix
+#'   t(sapply(1:5, function(i) c(i - i/3, i, i + i/3)))
+#'   mply_dbl(1:5, function(i) c(i - i/3, i, i + i/3), ncol=3)
+#'
+#'   # Which can avoid some bad surprises
+#'   t(sapply(1:5, identity))
+#'   mply_dbl(1:5, identity)
+#'
+#'
+#'   # Works also with matrix input
+#'   mat <- matrix(1:20, ncol=4)
+#'   mat
+#'   msply_dbl(mat, function(i) rep(i, each=2))
+#'
 mply_dbl <- function(x, FUN, ncol=1, ...){
   if(is.vector(x)){
     res <- vapply(x, FUN, FUN.VALUE=rep(0.0, times=ncol), ...)