Skip to content

Commit fc3cc95

Browse files
committed
Lot's of small improvements to documentation
1 parent ea18c54 commit fc3cc95

20 files changed

+174
-77
lines changed

R/AllGenerics.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ setGeneric("coefficient_variance_matrices", function(object, ...) standardGeneri
9292
#'
9393
#' @examples
9494
#' syn_data <- generate_synthetic_data(n_proteins = 10)
95-
#' fit <- proDA(syn_data$Y, design = syn_data$groups)
95+
#' fit <- proDA(syn_data$Y, design = syn_data$groups, reference_level = "Condition_1")
9696
#' reference_level(fit)
9797
#'
9898
#' @seealso \link{accessor_methods} for the implementation for a 'proDAFit' object

R/calculate_distance.R

+5-1
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,17 @@
1616
#' @param blind fit an intercept model for the missing values to make
1717
#' sure that the results are not biased for the expected result.
1818
#' Default: `TRUE`
19-
#' @param ... additional argument to \code{proDA()} in case object is a
19+
#' @param ... additional arguments to \code{proDA()} in case object is a
2020
#' \code{SummarizedExperiment} or a \code{matrix}
2121
#'
2222
#'
2323
#' @return a list with two elements: `mean` and `sd` both are formally
2424
#' of class "dist"
2525
#'
26+
#' @examples
27+
#' syn_data <- generate_synthetic_data(n_proteins = 10)
28+
#' fit <- proDA(syn_data$Y, design = syn_data$groups)
29+
#' dist_approx(fit)
2630
#'
2731
#' @name dist_approx_impl
2832
#' @aliases dist_approx,proDAFit-method

R/completion.R

+14-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#' \item result_names
3232
#' \item coefficient_variance_matrices
3333
#' \item colData
34-
#' \item rowData.
34+
#' \item rowData
3535
#' }
3636
#'
3737
#' @param x an object of class 'proDAFit' produced by \code{proDA()}
@@ -42,6 +42,19 @@
4242
#'
4343
#' @return whatever the function called \code{name} returns.
4444
#'
45+
#' @examples
46+
#' syn_data <- generate_synthetic_data(n_proteins = 10)
47+
#' fit <- proDA(syn_data$Y, design = syn_data$groups)
48+
#'
49+
#' # The two styles are identical
50+
#' design(fit)
51+
#' fit$design
52+
#'
53+
#' # More functions
54+
#' fit$abundances
55+
#'
56+
#'
57+
#'
4558
#' @seealso \link{accessor_methods} for more documentation on the
4659
#' accessor functions.
4760
#' @aliases dollar_methods

R/generate_synthetic_data.R

+10-6
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#' @param frac_changed the fraction of proteins that actually
1111
#' differ between the conditions. Default: 0.1
1212
#' @param dropout_curve_position the point where the chance
13-
#' to observe a value is 50%. Can be a single number or
13+
#' to observe a value is 50\%. Can be a single number or
1414
#' a vector of \code{length(dropout_curve_position) == n_conditions * n_replicates}.
1515
#' Default: 18.5
1616
#' @param dropout_curve_scale The width of the dropout curve.
@@ -26,7 +26,7 @@
2626
#' the scale and the degrees of freedom of the inverse
2727
#' Chi-squared distribution used as a prior for the
2828
#' variances. Default: 0.05 and 2
29-
#' @param effect_size the standard deviation that used to draw
29+
#' @param effect_size the standard deviation that is used to draw
3030
#' different values for the \code{frac_changed} part of the
3131
#' proteins. Default: 2
3232
#' @param return_summarized_experiment a boolean indicator if
@@ -39,11 +39,11 @@
3939
#' \item{Z}{the intensity matrix before dropping out values}
4040
#' \item{t_mu}{a matrix with \code{n_proteins} rows and
4141
#' \code{n_conditions} columns that contains the underlying
42-
#' means for each protein.}
43-
#' \item{t_sigma2}{a vector with the true variance for each
44-
#' protein.}
42+
#' means for each protein}
43+
#' \item{t_sigma2}{a vector with the true variances for each
44+
#' protein}
4545
#' \item{changed}{a vector with boolean values if the
46-
#' protein is actually changed.}
46+
#' protein is actually changed}
4747
#' \item{group}{the group structure mapping samples to conditions}
4848
#' }
4949
#' if \code{return_summarized_experiment} is \code{FALSE}. Otherwise
@@ -52,8 +52,12 @@
5252
#' @examples
5353
#' syn_data <- generate_synthetic_data(n_proteins = 10)
5454
#' names(syn_data)
55+
#' head(syn_data$Y)
56+
#'
57+
#' # Returning a SummarizedExperiment
5558
#' se <- generate_synthetic_data(n_proteins = 10, return_summarized_experiment = TRUE)
5659
#' se
60+
#' head(assay(se))
5761
#'
5862
#' @export
5963
generate_synthetic_data <- function(n_proteins, n_conditions = 2,

R/pd_lm.R

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#' specify the \code{formula}
1717
#' @param subset an optional selection vector for data to subset it
1818
#' @param dropout_curve_position the value where the chance to
19-
#' observe a value is 50%. Can either be a single value that is
19+
#' observe a value is 50\%. Can either be a single value that is
2020
#' repeated for each row or a vector with one element for each
2121
#' row. Not optional.
2222
#' @param dropout_curve_scale the width of the dropout curve. Smaller
@@ -47,6 +47,8 @@
4747
#' @return a list with the following entries
4848
#' \describe{
4949
#' \item{coefficients}{a named vector with the fitted values}
50+
#' \item{coef_variance_matrix}{a \code{p*p} matrix with the variance associated
51+
#' with each coefficient estimate}
5052
#' \item{n_approx}{the estimated "size" of the data set (n_hat - variance_prior_df)}
5153
#' \item{df}{the estimated degrees of freedom (n_hat - p)}
5254
#' \item{s2}{the estimated unbiased variance}

R/predict.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#' (\code{type = "response"}) without missing values according to the
66
#' linear probabilistic dropout model, fitted with \code{proDA()}. Or, it
77
#' can predict the feature parameters for additional proteins given their
8-
#' abundances including missing values after estimating the hyper-
9-
#' parameters on a dataset with the same sample structure
8+
#' abundances including missing values after estimating the hyper-parameters
9+
#' on a dataset with the same sample structure
1010
#' (\code{type = "feature_parameters"}).
1111
#'
1212
#' \strong{Note:} this method behaves a little different from what one might

R/proDA.R

+11-9
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("Condition1", "Condition
3232

3333

3434

35-
#' Main function to determine the hyper and protein parameters
35+
#' Main function to fit the probabilistic dropout model
3636
#'
3737
#' The function fits a linear probabilistic dropout model and infers
3838
#' the hyper-parameters for the location prior, the variance prior,
@@ -44,7 +44,7 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("Condition1", "Condition
4444
#' of each protein estimate. The variance moderation is fairly standard
4545
#' in high-throughput experiments and can boost the power to detect
4646
#' differentially abundant proteins. The location moderation is important
47-
#' to handle extreme cases where in one conditio a protein is not observed
47+
#' to handle the edge case where in one condition a protein is not observed
4848
#' in any sample. In addition it can help to get more precise estimates
4949
#' of the difference between conditions. Unlike 'DESeq2', which moderates
5050
#' the coefficient estimates (ie. the "betas") to be centered around zero,
@@ -53,10 +53,10 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("Condition1", "Condition
5353
#'
5454
#' @param data a matrix like object (\code{matrix()},
5555
#' \code{SummarizedExperiment()}, or anything that can be cast to
56-
#' \code{SummarizedExperiment()} (eg. MSnSet, eSet, ...)) with the
56+
#' \code{SummarizedExperiment()} (eg. `MSnSet`, `eSet`, ...)) with
5757
#' one column per
5858
#' sample and one row per protein. Missing values should be
59-
#' coded \code{NA}.
59+
#' coded as \code{NA}.
6060
#' @param design a specification of the experimental design that
6161
#' is used to fit the linear model. It can be a \code{model.matrix()}
6262
#' with one row for each sample and one column for each
@@ -77,9 +77,9 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("Condition1", "Condition
7777
#' This is undesirable and can be removed by working on the log
7878
#' scale. The easiest way to find out if the data is already log-
7979
#' transformed is to see if the intensities are in the range of
80-
#' 0 to 100 in which case they are transformed or if they rather
81-
#' are between 1e5 to 1e12, in which case they need to be
82-
#' transformed. Default: \code{TRUE}
80+
#' `0` to `100` in which case they are transformed or if they rather
81+
#' are between `1e5` to `1e12`, in which case they are not.
82+
#' Default: \code{TRUE}
8383
#' @param moderate_location,moderate_variance boolean values
8484
#' to indicate if the location and the variances are
8585
#' moderated. Default: \code{TRUE}
@@ -95,8 +95,8 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("Condition1", "Condition
9595
#' \code{20}
9696
#' @param epsilon if the remaining error is smaller than \code{epsilon}
9797
#' the model has converged. Default: \code{1e-3}
98-
#' @param verbose boolean that signals if the method prints informative
99-
#' messages. Default: \code{FALSE}
98+
#' @param verbose boolean that signals if the method prints messages
99+
#' during the fitting. Default: \code{FALSE}
100100
#' @param ... additional parameters for the construction of the
101101
#' 'proDAFit' object
102102
#'
@@ -105,6 +105,8 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("Condition1", "Condition
105105
#' on the hyper-parameters and feature parameters, the convergence,
106106
#' the experimental design etc. Internally, it is a sub-class of
107107
#' \code{SummarizedExperiment} which means the object is subsettable.
108+
#' The `$`-operator is overloaded for this object to make it easy to
109+
#' discover applicable functions.
108110
#'
109111
#'
110112
#' @examples

R/simple_tests.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
#' t.test(x, y)
6565
#' summary(lm(c(x, y) ~ cond,
6666
#' data = data.frame(cond = c(rep("x", 5),
67-
#' rep("y", 5)))))$coef[2,]
67+
#' rep("y", 5)))))$coefficients[2,]
6868
#' pd_row_t_test(matrix(x, nrow=1), matrix(y, nrow=1),
6969
#' moderate_location = FALSE,
7070
#' moderate_variance = FALSE)

R/test_diff.R

+20-15
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11

22

33

4-
#' Identify significant coefficients
4+
#' Identify differentially abundant proteins
55
#'
6-
#' The function is used to test coefficients of a 'proDAFit'
6+
#' The `test_diff()` function is used to test coefficients of a 'proDAFit'
77
#' object. It provides a Wald test to test individual
88
#' coefficients and a likelihood ratio F-test to compare the
99
#' original model with a reduced model. The \code{result_names}
@@ -30,16 +30,18 @@
3030
#' linear combination of them. The contrast is always compared
3131
#' against zero. Thus, to find out if two coefficients differ
3232
#' use \code{coef1 - coef2}.
33-
#' @param reduced_model The fit with an alternative model (nested
34-
#' in the original model) is compared with the original model
35-
#' using an F-test. This is useful if not just an individual
36-
#' coefficient should be tested, but the effect of including
37-
#' a combination of covariates. If neither a \code{contrast}, nor
33+
#' @param reduced_model If you don't want to test an individual
34+
#' coefficient, you can can specify a reduced model and compare
35+
#' it with the original model using an F-test. This is useful
36+
#' to find out how a set of parameters affect the goodness of
37+
#' the fit. If neither a \code{contrast}, nor
3838
#' a \code{reduced_model} is specified, by default a comparison
39-
#' with an intercept model is done. Default: \code{~ 1}.
39+
#' with an intercept model (ie. just the average across conditions)
40+
#' is done. Default: \code{~ 1}.
4041
#' @param alternative a string that decides how the
4142
#' hypothesis test is done. This parameter is only relevant for
42-
#' the t-test / contrast test. Default: \code{"two.sided"}
43+
#' the Wald-test specified using the `contrast` argument.
44+
#' Default: \code{"two.sided"}
4345
#' @param pval_adjust_method a string the indicates the method
4446
#' that is used to adjust the p-value for the multiple testing.
4547
#' It must match the options in \code{\link[stats]{p.adjust}}.
@@ -55,11 +57,14 @@
5557
#' messages. Default: \code{FALSE}.
5658
#'
5759
#' @return
58-
#' Both functions return a \code{data.frame} with one row per protein
59-
#' with the key parameters of the statistical test.
60+
#' The `result_names()` function returns a character vector.
61+
#'
62+
#' The `test_diff()` function returns a \code{data.frame} with one row per protein
63+
#' with the key parameters of the statistical test. Depending what kind of test
64+
#' (Wald or F test) the content of the `data.frame` differs.
6065
#'
6166
#' The Wald test, which can considered equivalent to a t-test, returns
62-
#' a data.frame with the following columns:
67+
#' a `data.frame` with the following columns:
6368
#' \describe{
6469
#' \item{name}{the name of the protein, extracted from the rowname of
6570
#' the input matrix}
@@ -76,7 +81,7 @@
7681
#' of available information for estimating the \code{se}. They
7782
#' are the sum of the number of samples the protein was observed
7883
#' in, the amount of information contained in the missing values,
79-
#' and the estimated df from the variance prior.}
84+
#' and the degrees of freedom of the variance prior.}
8085
#' \item{avg_abundance}{the estimate of the average abundance of
8186
#' the protein across all samples.}
8287
#' \item{n_approx}{the approximated information available for estimating
@@ -86,7 +91,7 @@
8691
#' }
8792
#'
8893
#'
89-
#' The F-test returns a data.frame with the following columns
94+
#' The F-test returns a `data.frame` with the following columns
9095
#' \describe{
9196
#' \item{name}{the name of the protein, extracted from the rowname of
9297
#' the input matrix}
@@ -102,7 +107,7 @@
102107
#' of available information for estimating the \code{se}. They
103108
#' are the sum of the number of samples the protein was observed
104109
#' in, the amount of information contained in the missing values,
105-
#' and the estimated df from the variance prior.}
110+
#' and the degrees of freedom of the variance prior.}
106111
#' \item{avg_abundance}{the estimate of the average abundance of
107112
#' the protein across all samples.}
108113
#' \item{n_approx}{the information available for estimating

R/util.R

+17
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,23 @@
1616
#' @param ... additional arguments to FUN
1717
#'
1818
#' @return a matrix of size \code{length(x) x ncol}
19+
#'
20+
#'
21+
#' @examples
22+
#' # Behaves similar to sapply(), but it always returns a matrix
23+
#' t(sapply(1:5, function(i) c(i - i/3, i, i + i/3)))
24+
#' mply_dbl(1:5, function(i) c(i - i/3, i, i + i/3), ncol=3)
25+
#'
26+
#' # Which can avoid some bad surprises
27+
#' t(sapply(1:5, identity))
28+
#' mply_dbl(1:5, identity)
29+
#'
30+
#'
31+
#' # Works also with matrix input
32+
#' mat <- matrix(1:20, ncol=4)
33+
#' mat
34+
#' msply_dbl(mat, function(i) rep(i, each=2))
35+
#'
1936
mply_dbl <- function(x, FUN, ncol=1, ...){
2037
if(is.vector(x)){
2138
res <- vapply(x, FUN, FUN.VALUE=rep(0.0, times=ncol), ...)

man/cash-proDAFit-method.Rd

+15-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/dist_approx_impl.Rd

+7-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)