From 9c60c3b2064dc0583a610f4045fc4b7e7e264cb0 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 28 Oct 2024 11:53:21 +0100 Subject: [PATCH] Lint (#68) --- .github/workflows/check-bioc.yml | 6 +- .lintr | 7 + .vscode/c_cpp_properties.json | 17 +++ .vscode/settings.json | 34 +++++ R/accessor-generics.r | 11 +- R/censoring.r | 42 +++--- R/colorlegend.r | 75 +++++------ R/cube_helix.r | 26 ++-- R/dataset-helpers.r | 10 +- R/destiny-package.r | 10 +- R/diffusionmap-methods-accession.r | 12 +- R/diffusionmap-methods.r | 26 ++-- R/diffusionmap-plotting.r | 78 +++++------ R/diffusionmap.r | 134 ++++++++++--------- R/dist-matrix-coerce.r | 2 +- R/dpt-branching.r | 50 +++---- R/dpt-helpers.r | 20 +-- R/dpt-methods-matrix.r | 20 +-- R/dpt-methods.r | 22 +-- R/dpt-plotting.r | 46 ++++--- R/dpt.r | 22 +-- R/eig_decomp.r | 20 +-- R/expressionset-helpers.r | 30 ++--- R/find_dm_k.r | 20 +-- R/gene-relevance-methods.r | 18 +-- R/gene-relevance-plotting-differential-map.r | 23 ++-- R/gene-relevance-plotting-gr-map.r | 20 +-- R/gene-relevance-plotting-rank.r | 12 +- R/gene-relevance-plotting.r | 14 +- R/gene-relevance.r | 58 ++++---- R/guo-data.r | 8 +- R/knn.r | 16 +-- R/l_which.r | 22 +-- R/methods-coercion.r | 30 ++--- R/methods-extraction.r | 22 +-- R/methods-update.r | 52 +++---- R/plothelpers.r | 14 +- R/predict.r | 28 ++-- R/projection-dist.r | 17 ++- R/sigmas-plotting.r | 48 +++---- R/sigmas.r | 100 +++++++------- R/utils.r | 39 +++--- demo/destiny.r | 10 +- src/censoring.cpp | 2 +- tests/testthat/test_dataset_types.r | 58 ++++---- tests/testthat/test_distances.r | 24 ++-- tests/testthat/test_ggplot.r | 10 +- tests/testthat/test_gr.r | 12 +- tests/testthat/test_knn.r | 20 +-- tests/testthat/test_utils.r | 4 +- vignettes/DPT.Rmd | 8 +- vignettes/Diffusion-Maps.Rmd | 19 +-- vignettes/Gene-Relevance.Rmd | 24 ++-- vignettes/Global-Sigma.Rmd | 13 +- vignettes/tidyverse.Rmd | 3 +- 55 files changed, 760 insertions(+), 728 deletions(-) create mode 100644 .lintr create mode 100644 .vscode/c_cpp_properties.json create mode 100644 .vscode/settings.json diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index d46e628..3a07c4a 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -57,9 +57,9 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: 'release', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - - { os: macOS-latest, r: 'release', bioc: '3.18' } - - { os: windows-latest, r: 'release', bioc: '3.18' } + - { os: ubuntu-latest, r: 'release', bioc: '3.19', cont: "bioconductor/bioconductor_docker:RELEASE_3_19", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: macOS-latest, r: 'release', bioc: '3.19' } + - { os: windows-latest, r: 'release', bioc: '3.19' } ## Check https://github.com/r-lib/actions/tree/master/examples ## for examples using the http-user-agent diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..0bebca5 --- /dev/null +++ b/.lintr @@ -0,0 +1,7 @@ +linters: linters_with_defaults( # see vignette("lintr") + whitespace_linter = NULL, + indentation_linter = NULL, + line_length_linter = NULL, + quotes_linter = quotes_linter("'") + ) +encoding: "UTF-8" diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..ddfe2c6 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,17 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "/usr/include/**", + "/usr/lib/R/library/*/include", + "${workspaceFolder}/**", + ], + "defines": [], + "cStandard": "c11", + "cppStandard": "c++11", + "intelliSenseMode": "linux-clang-x64", + }, + ], + "version": 4, +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..0b8bf44 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,34 @@ +{ + "cSpell.words": [ + "axtype", + "bbox", + "bgplot", + "Biobase", + "colorlegend", + "Colour", + "colourbar", + "consec", + "diffusionmap", + "dists", + "Expl", + "ggplot", + "ggthemes", + "ggtitle", + "gradientn", + "idxs", + "igraph", + "lvls", + "nomap", + "nticks", + "pheno", + "plothelpers", + "rangeframe", + "rdname", + "rlang", + "scatterplot", + "vals", + "xlen", + "ylen", + "zlen" + ], +} \ No newline at end of file diff --git a/R/accessor-generics.r b/R/accessor-generics.r index 817691d..354a008 100644 --- a/R/accessor-generics.r +++ b/R/accessor-generics.r @@ -1,10 +1,10 @@ #' destiny generics -#' +#' #' destiny provides several generic methods and implements them for the \code{\link{DiffusionMap}} and \code{\link{Sigmas}} classes. -#' +#' #' @param object Object from which to extract or to which to assign a value #' @param value Value to assign within an object -#' +#' #' @examples #' data(guo_norm) #' dm <- DiffusionMap(guo_norm) @@ -14,9 +14,9 @@ #' optimal_sigma(dm) #' dataset(dm) #' distance(dm) -#' +#' #' @seealso \link{DiffusionMap methods} and \link{Sigmas} class for implementations -#' +#' #' @importFrom methods setGeneric #' @name destiny generics #' @rdname destiny-generics @@ -77,4 +77,3 @@ setGeneric('distance<-', function(object, value) standardGeneric('distance<-')) #' @rdname destiny-generics #' @export setGeneric('optimal_sigma', function(object) standardGeneric('optimal_sigma')) - diff --git a/R/censoring.r b/R/censoring.r index 7d5b94b..bc8a62f 100644 --- a/R/censoring.r +++ b/R/censoring.r @@ -1,14 +1,14 @@ censoring <- function(data, sigma, dists, censor_val = NULL, censor_range = NULL, missing_range = NULL, callback = invisible) { if (!is.null(censor_range)) censor_range <- matrix(censor_range, ncol = 2) - + if (!is.null(missing_range)) missing_range <- matrix(missing_range, ncol = 2) - + validate_censoring(data, sigma, dists, censor_val, censor_range, missing_range) - + data <- as.matrix(data) - + # dists is a sparse symmetrix matrix, which eigen cannot handle # so we just pass its filled triangle as sparse matrix and convert it back afterwards uplo <- dists@uplo @@ -17,52 +17,52 @@ censoring <- function(data, sigma, dists, censor_val = NULL, censor_range = NULL } predict_censoring <- function(data, data2, censor_val = NULL, censor_range = NULL, missing_range = NULL, sigma) { - if (is.null(censor_val )) censor_val <- NA # this works since this will be NaN in C++ and comparison with NaN is always false - if (is.null(censor_range )) censor_range <- c(NA, NA) + if (is.null(censor_val)) censor_val <- NA # this works since this will be NaN in C++ and comparison with NaN is always false + if (is.null(censor_range)) censor_range <- c(NA, NA) if (is.null(missing_range)) missing_range <- c(NA, NA) - + predict_censoring_impl(data, data2, censor_val, censor_range, missing_range, sigma) } #' @importFrom Matrix sparseMatrix -validate_censoring <- function(data, sigma, dists, censor_val, censor_range, missing_range) { - G <- ncol(data) +validate_censoring <- function(data, sigma, dists, censor_val, censor_range, missing_range) { # nolint: cyclocomp_linter. + g <- ncol(data) n <- nrow(data) - + if (any(is.na(data)) && is.null(missing_range)) stop('Your data contains missing values (NA). You have to provide a the `missing_range` parameter.') - + if (is.null(censor_val) != is.null(censor_range)) stop('You have to provide both a censoring value and a censor_range or none') - + if (!is.null(censor_range)) { - if (!is.numeric(censor_val) || (length(censor_val) != 1L && length(censor_val) != G)) + if (!is.numeric(censor_val) || (length(censor_val) != 1L && length(censor_val) != g)) stop('censor_val has to be a single numeric value, or length(censor_val) == ncol(data) must be TRUE') - - if (!is.numeric(censor_range) || !(nrow(censor_range) %in% c(G, 1L)) || ncol(censor_range) != 2L || any(diff(t(censor_range)) <= 0L)) + + if (!is.numeric(censor_range) || !(nrow(censor_range) %in% c(g, 1L)) || ncol(censor_range) != 2L || any(diff(t(censor_range)) <= 0L)) stop('censor_range has to be a numeric vector of length 2, the second of which being larger, or a matrix with nrow(censor_range) == ncol(data) where each row is such a vector') } - + if (!is.null(missing_range)) { - if (!is.numeric(missing_range) || !(nrow(missing_range) %in% c(G, 1L)) || ncol(missing_range) != 2L || any(diff(t(missing_range)) <= 0L)) + if (!is.numeric(missing_range) || !(nrow(missing_range) %in% c(g, 1L)) || ncol(missing_range) != 2L || any(diff(t(missing_range)) <= 0L)) stop('missing_range has to be a numeric vector of length 2, the second of which being larger, or a matrix with nrow(missing_range) == ncol(data) where each row is such a vector') } - + if (!is.numeric(sigma) || !length(sigma) %in% c(n, 1L)) stop('sigma has to be a single numeric value or of length nrow(data)') - + if (!is(dists, 'dsCMatrix')) stop('dists has to be a dsCMatrix, not ', class(dists)) } test_censoring <- function(censor_val, censor_range, data, missing_range) { if (is.null(data)) return(FALSE) - + has_censor_range <- !(missing(censor_range) || is.null(censor_range)) has_missing_range <- !(missing(missing_range) || is.null(missing_range)) if (has_censor_range || has_missing_range) return(TRUE) - + any(is.na(data)) || any(data == censor_val) } diff --git a/R/colorlegend.r b/R/colorlegend.r index 3c45d7a..14884ae 100644 --- a/R/colorlegend.r +++ b/R/colorlegend.r @@ -1,9 +1,9 @@ #' Color legend -#' +#' #' Creates a color legend for a vector used to color a plot. It will use the current \code{\link[grDevices]{palette}()} or the specified \code{pal} as reference. -#' +#' #' When passed a factor or integer vector, it will create a discrete legend, whereas a double vector will result in a continuous bar. -#' +#' #' @param col Vector of factor, integer, or double used to determine the ticks. #' @param pal If \code{col} is double, pal is used as a continuous palette, else as categorical one #' @param log Use logarithmic scale? @@ -20,19 +20,19 @@ #' @param left logical. If TRUE, invert posx #' @param ... Additional parameters for the \link[graphics]{text} call used for labels #' @param cex.main,cex.axis,col.main,col.lab For compatibility with \code{\link{par}} -#' +#' #' @return This function is called for the side effect of adding a colorbar to a plot and returns nothing/NULL. -#' +#' #' @examples #' color_data <- 1:6 #' par(mar = par('mar') + c(0, 0, 0, 3)) #' plot(sample(6), col = color_data) #' colorlegend(color_data) -#' +#' #' @importFrom graphics par rect segments text #' @importFrom grDevices colorRampPalette palette #' @export -colorlegend <- function( +colorlegend <- function( # nolint: cyclocomp_linter. col, pal = palette(), log = FALSE, posx = c(.9, .93), posy = c(.05, .9), main = NULL, cex_main = par('cex.sub'), @@ -41,39 +41,39 @@ colorlegend <- function( steps = 5, steps_color = 100, digit = 2, left = FALSE, ..., - cex.main = NULL, - cex.axis = NULL, - col.main = NULL, - col.lab = NULL) { + cex.main = NULL, # nolint: object_name_linter. + cex.axis = NULL, # nolint: object_name_linter. + col.main = NULL, # nolint: object_name_linter. + col.lab = NULL) { # nolint: object_name_linter. draw_ticks <- as.logical(steps) if (!draw_ticks) steps <- 2L if (!is.null(cex.main)) cex_main <- cex.main if (!is.null(cex.axis)) cex_axis <- cex.axis if (!is.null(col.main)) col_main <- col.main if (!is.null(col.lab)) col_lab <- col.lab - + zval <- if (is.double(col)) seq(min(col, na.rm = TRUE), max(col, na.rm = TRUE), length.out = steps) else if (is.factor(col)) factor(levels(col)) else sort(unique(col)) - + zval_num <- if (is.integer(zval)) seq_along(zval) else if (is.numeric(zval)) zval else if (is.factor(zval) || is.character(zval)) seq_along(zval) else as.integer(zval) - + zlim <- if (is.double(col)) range(zval_num) else c(min(zval_num) - .5, max(zval_num) + .5) - + par(new = TRUE) omar <- nmar <- par('mar') nmar[c(2, 4)] <- 0 par(mar = nmar) - + emptyplot() - + pars <- par('usr') dx <- pars[[2]] - pars[[1]] xmin <- pars[[1]] + posx[[1]] * dx @@ -81,53 +81,52 @@ colorlegend <- function( dy <- pars[[4]] - pars[[3]] ymin <- pars[[3]] + posy[[1]] * dy ymax <- pars[[3]] + posy[[2]] * dy - + if (log) { zlim <- log10(zlim) zval <- log10(zval) } zmin <- zlim[[1]] zmax <- zlim[[2]] - + if (is.double(col)) { pal_fun <- if (is.function(pal)) pal else colorRampPalette(pal) batches <- pal_fun(steps_color) - Y <- seq(ymin, ymax, length.out = length(batches) + 1) + y <- seq(ymin, ymax, length.out = length(batches) + 1) } else { idx_c <- seq(min(zval_num), max(zval_num)) idx_c[!(idx_c %in% zval_num)] <- NA - + batches <- pal[idx_c] - Y <- seq(ymin, ymax, length.out = length(idx_c) + 1) + y <- seq(ymin, ymax, length.out = length(idx_c) + 1) } - - rect(xmin, Y[-length(Y)], xmax, Y[-1], col = batches, border = NA) + + rect(xmin, y[-length(y)], xmax, y[-1], col = batches, border = NA) rect(xmin, ymin, xmax, ymax, border = col_lab) - + dx <- xmax - xmin dy <- ymax - ymin if (left) { - Dx <- -dx + dx <- -dx pos <- 2 - xpos <- xmin + Dx * .5 - } - else { - Dx <- +dx + xpos <- xmin + dx * .5 + } else { + dx <- +dx pos <- 4 - xpos <- xmax + Dx * .5 + xpos <- xmax + dx * .5 } - + zval_txt <- if (is.double(col)) formatC(zval, digits = digit, format = 'fg') else zval - - Ypos <- ymin + (zval_num - zmin)/(zmax - zmin) * dy + + y_pos <- ymin + (zval_num - zmin) / (zmax - zmin) * dy if (draw_ticks) { if (is.double(col)) - segments(xmax, Ypos, xpos + Dx * .25, Ypos, col = col_lab) - text(xpos, Ypos, zval_txt, pos = pos, col = col_lab, cex = cex_axis, ...) + segments(xmax, y_pos, xpos + dx * .25, y_pos, col = col_lab) + text(xpos, y_pos, zval_txt, pos = pos, col = col_lab, cex = cex_axis, ...) } - + if (!is.null(main)) { - for (i in length(main):1) + for (i in rev(seq_along(main))) text(x = mean(c(xmin, xmax)), y = ymax + .05 * (length(main) - i + 1), labels = main[i], diff --git a/R/cube_helix.r b/R/cube_helix.r index f30bfd0..b8970db 100644 --- a/R/cube_helix.r +++ b/R/cube_helix.r @@ -1,8 +1,8 @@ #' Sequential color palette using the cube helix system -#' +#' #' Creates a perceptually monotonously decreasing (or increasing) lightness color palette with different tones. #' This was necessary in pre-viridis times, by now you can probably just use \code{\link[grDevices]{hcl.colors}} -#' +#' #' @param n Number of colors to return (default: 6) #' @param start Hue to start helix at (\eqn{\textrm{start} \in [0,3]}, default: 0) #' @param r Number of rotations of the helix. Can be negative. (default: 0.4) @@ -14,28 +14,30 @@ #' @param discrete If TRUE, return a discrete scale, if FALSE a continuous one (default: TRUE) #' @param guide Type of scale guide to use. See \code{\link[ggplot2]{guides}} #' @param ... parameters passed to \code{\link[ggplot2]{discrete_scale}} or \code{\link[ggplot2]{continuous_scale}} -#' +#' #' @return A \code{character} vector of hex colors with length \code{n} -#' +#' #' @examples #' palette(cube_helix()) #' image(matrix(1:6), col = 1:6, pch = 19, axes = FALSE) -#' +#' #' cr <- scales::colour_ramp(cube_helix(12, r = 3)) #' r <- runif(100) #' plot(1:100, r, col = cr(r), type = 'b', pch = 20) -#' +#' #' @importFrom grDevices rgb #' @export cube_helix <- function(n = 6, start = 0, r = .4, hue = .8, gamma = 1, light = .85, dark = .15, reverse = FALSE) { - M <- matrix(c(-.14861, -.29227, 1.97294, - 1.78277, -.90649, 0), ncol = 2) + m <- matrix(c( + -.14861, -.29227, 1.97294, + 1.78277, -.90649, 0 + ), ncol = 2) lambda <- seq(light, dark, length.out = n) if (reverse) lambda <- rev(lambda) l <- rep(lambda ^ gamma, each = 3) - phi <- 2 * pi * (start/3 + r * lambda) + phi <- 2 * pi * (start / 3 + r * lambda) t <- rbind(cos(phi), sin(phi)) - out <- l + hue * l * (1 - l)/2 * (M %*% t) + out <- l + hue * l * (1 - l) / 2 * (m %*% t) out <- pmin(pmax(out, 0), 1) out <- apply(out, 2, function(x) rgb(x[[1]], x[[2]], x[[3]])) out @@ -44,9 +46,9 @@ cube_helix <- function(n = 6, start = 0, r = .4, hue = .8, gamma = 1, light = .8 scale_cube_helix <- function(aesthetics, start, r, hue, gamma, light, dark, reverse, discrete, guide, ...) { if (!requireNamespace('ggplot2', quietly = TRUE)) stop('scale_', aesthetics, '_cube_helix needs (and is only useful for) the ggplot2 package') - + f <- function(n) cube_helix(n, start, r, hue, gamma, light, dark, reverse) - + if (discrete) { ggplot2::discrete_scale(aesthetics, 'cube_helix', f, ..., guide = guide) } else { diff --git a/R/dataset-helpers.r b/R/dataset-helpers.r index 3e0f7bc..87f41a5 100644 --- a/R/dataset-helpers.r +++ b/R/dataset-helpers.r @@ -23,7 +23,7 @@ dataset_extract_doublematrix <- function(data, vars = NULL) { data <- data[!dupes, ] warning('Duplicate rows removed from data. Consider explicitly using `df[!duplicated(df), ]`') } - + if (!is.null(vars)) data <- data[, vars] data @@ -52,13 +52,13 @@ dataset_n_features <- function(data, distances = NULL, vars = NULL) { #' @importFrom methods canCoerce #' @importFrom utils getS3method -dataset_to_df <- function(dta, row.names = NULL, optional = FALSE, ...) { +dataset_to_df <- function(dta, row.names = NULL, optional = FALSE, ...) { # nolint: object_name_linter. # The ExpressionSet as.data.frame sucks if (is(dta, 'ExpressionSet')) { cbind(as.data.frame(t(exprs(dta)), row.names, optional, ...), pData(dta)) } else if (is(dta, 'SingleCellExperiment')) { smp_meta <- as.data.frame(colData(dta), row.names, optional, ...) - + #TODO: allow other name? mat <- assay(dta, 'logcounts') if (is(mat, 'sparseMatrix')) { @@ -73,11 +73,11 @@ dataset_to_df <- function(dta, row.names = NULL, optional = FALSE, ...) { mat <- as.matrix(mat) } } - + cbind(as.data.frame(t(mat), row.names, optional, ...), smp_meta) } else if (canCoerce(dta, 'data.frame')) { as(dta, 'data.frame') - } else if (!is.null(getS3method('as.data.frame', class(dta)[[1L]], optional = TRUE))) { + } else if (!is.null(getS3method('as.data.frame', class(dta)[[1L]], optional = TRUE))) { # nolint: brace_linter. as.data.frame(dta, row.names, optional, ...) } else NULL } diff --git a/R/destiny-package.r b/R/destiny-package.r index 63b81da..3cac01c 100644 --- a/R/destiny-package.r +++ b/R/destiny-package.r @@ -1,13 +1,13 @@ #' Create and plot diffusion maps -#' +#' #' The main function is \code{\link{DiffusionMap}}, which returns an object you can \code{\link{plot}} (\code{\link{plot.DiffusionMap}} is then called). -#' +#' #' @examples #' demo(destiny, ask = FALSE) -#' +#' #' @name destiny -#' +#' ## Make sure Rcpp and RcppEigen are loaded #' @importFrom Rcpp evalCpp #' @importFrom RcppEigen RcppEigen.package.skeleton -"_PACKAGE" +'_PACKAGE' diff --git a/R/diffusionmap-methods-accession.r b/R/diffusionmap-methods-accession.r index 2c504d6..73a49fd 100644 --- a/R/diffusionmap-methods-accession.r +++ b/R/diffusionmap-methods-accession.r @@ -2,16 +2,16 @@ NULL #' DiffusionMap accession methods -#' +#' #' Get and set eigenvalues, eigenvectors, and sigma(s) of a \link{DiffusionMap} object. -#' +#' #' @param object A DiffusionMap #' @param value Vector of eigenvalues or matrix of eigenvectors to get/set -#' +#' #' @return The assigned or retrieved value -#' +#' #' @seealso \link{Extraction methods}, \link{DiffusionMap methods}, \link{Coercion methods} for more -#' +#' #' @examples #' data(guo) #' dm <- DiffusionMap(guo) @@ -20,7 +20,7 @@ NULL #' sigmas(dm) #' dataset(dm) #' optimal_sigma(dm) -#' +#' #' @importFrom methods is setGeneric #' @name DiffusionMap accession methods #' @rdname DiffusionMap-accessors diff --git a/R/diffusionmap-methods.r b/R/diffusionmap-methods.r index 961f0d8..fa02ab3 100644 --- a/R/diffusionmap-methods.r +++ b/R/diffusionmap-methods.r @@ -1,40 +1,40 @@ #' DiffusionMap methods -#' +#' #' Methods for external operations on diffusion maps -#' +#' #' @param x,object A \code{\link{DiffusionMap}} -#' +#' #' @return The \code{DiffusionMap} object (\code{print}), or NULL (\code{show}), invisibly -#' +#' #' @seealso \link{DiffusionMap accession methods}, \link{Extraction methods}, \link{Coercion methods} for more -#' +#' #' @examples #' data(guo) #' dm <- DiffusionMap(guo) #' print(dm) #' show(dm) -#' +#' #' @aliases print.DiffusionMap show.DiffusionMap #' @name DiffusionMap methods #' @rdname DiffusionMap-methods NULL #' @importFrom utils str -#' +#' #' @rdname DiffusionMap-methods #' @export setMethod('print', 'DiffusionMap', function(x) { cat(sprintf('DiffusionMap (%s Diffusion components and %s observations)\n', length(eigenvalues(x)), nrow(eigenvectors(x)))) - cat('eigenvalues: '); str(eigenvalues(x)) - cat('eigenvectors: '); str(structure(eigenvectors(x), dimnames = NULL)) - cat(' ..colnames: '); str(colnames(eigenvectors(x)), vec.len = 4) - cat('optimal_sigma: '); str(optimal_sigma(x)) - cat('distance: '); str(distance(x)) + cat('eigenvalues: '); str(eigenvalues(x)) # nolint: semicolon_linter. + cat('eigenvectors: '); str(structure(eigenvectors(x), dimnames = NULL)) # nolint: semicolon_linter. + cat(' ..colnames: '); str(colnames(eigenvectors(x)), vec.len = 4) # nolint: semicolon_linter. + cat('optimal_sigma: '); str(optimal_sigma(x)) # nolint: semicolon_linter. + cat('distance: '); str(distance(x)) # nolint: semicolon_linter. invisible(x) }) #' @importFrom methods show -#' +#' #' @rdname DiffusionMap-methods #' @export setMethod('show', 'DiffusionMap', function(object) { diff --git a/R/diffusionmap-plotting.r b/R/diffusionmap-plotting.r index 0813288..0cc8734 100644 --- a/R/diffusionmap-plotting.r +++ b/R/diffusionmap-plotting.r @@ -3,11 +3,11 @@ NULL #' 3D or 2D plot of diffusion map -#' +#' #' If you want to plot the eigenvalues, simply \code{plot(eigenvalues(dm)[start:end], ...)} -#' +#' #' If you specify negative numbers as diffusion components (e.g. \code{plot(dm, c(-1,2))}), then the corresponding components will be flipped. -#' +#' #' @param x A \link{DiffusionMap} #' @param dims,y Diffusion components (eigenvectors) to plot (default: first three components; 1:3) #' @param new_dcs An optional matrix also containing the rows specified with \code{y} and plotted. (default: no more points) @@ -34,15 +34,15 @@ NULL #' its \code{rescale} argument is \code{NULL}, a \code{list(from = c(a, b), to = c(c, d))}), #' or an array of shape \eqn{from|to \times dims \times min|max}, i.e. \eqn{2 \times length(dims) \times 2}. #' In case of 2d plotting, it should take and return a ggplot2 object. -#' +#' #' @return The return value of the underlying call is returned, i.e. a scatterplot3d or rgl object. -#' +#' #' @examples #' data(guo) #' plot(DiffusionMap(guo)) -#' +#' #' @aliases plot.DiffusionMap -#' +#' #' @importFrom graphics par axis plot plot.new #' @importFrom stats setNames #' @importFrom grDevices palette hcl.colors @@ -55,10 +55,10 @@ NULL #' @importFrom ggplot2 scale_x_continuous scale_y_continuous #' @importFrom ggplot2 guide_colourbar guide_legend #' @importFrom ggthemes geom_rangeframe extended_range_breaks -#' +#' #' @rdname plot.DiffusionMap #' @export -plot.DiffusionMap <- function( +plot.DiffusionMap <- function( # nolint: cyclocomp_linter. x, dims = 1:3, new_dcs = if (!is.null(new_data)) dm_predict(x, new_data), new_data = NULL, @@ -77,14 +77,14 @@ plot.DiffusionMap <- function( ) { dif <- x is_projection <- !is.null(new_dcs) && is.character(col_new) && length(col_new) == 1L - + if (interactive) { if (!requireNamespace('rgl', quietly = TRUE)) stop(sprintf('The package %s is required for interactive plots', sQuote('rgl'))) if (length(dims) != 3L) stop('Only 3d plots can be made interactive') } - + if (!is.null(col) && !is.null(col_by)) stop('Only specify one of col or col_by') if (!is.null(col_by)) { col <- dataset_get_feature(dataset(dif), col_by) @@ -102,26 +102,26 @@ plot.DiffusionMap <- function( legend_main <- 'Projection' } col_legend <- if (continuous && !is.null(col_limits)) col_limits else col - + # use a fitting default palette if (is.null(pal)) { pal <- if (is.double(col)) hcl.colors else palette() } - + # make consecutive the colors for the color legend if (is.integer(col) && consec_col) { - # c(5,0,0,3) -> c(3,1,1,2) + # For example: `c(5,0,0,3) -> c(3,1,1,2)` col <- factor(col) } - + point_data <- cbind( as.data.frame(flipped_dcs(eigenvectors(dif), dims)), Colour = col, ColourExpl = get_explicit_col(col, pal, col_na, col_limits), Projection = factor(rep('old', nrow(eigenvectors(dif))), c('old', 'new'))) rm(col) - + if (!is.null(new_dcs)) { point_data <- rbind(point_data, cbind( as.data.frame(flipped_dcs(new_dcs, dims)), @@ -131,16 +131,16 @@ plot.DiffusionMap <- function( )) col_legend } - + lvl_fn <- if (is.factor(point_data$Colour)) levels else unique col_lvls <- na.omit(as.character(lvl_fn(point_data$Colour))) col_breaks <- point_data$ColourExpl[match(col_lvls, point_data$Colour)] is_one_colour <- length(col_lvls) == 1L - + if (length(dims) == 2) { d1 <- names(point_data)[[1L]] d2 <- names(point_data)[[2L]] - + use_mapping <- continuous || is_projection || !is.null(col_by) p <- ggplot(point_data, aes(.data[[d1]], .data[[d2]])) + @@ -149,26 +149,26 @@ plot.DiffusionMap <- function( aes(fill = if (use_mapping) .data$Colour else .data$ColourExpl), colour = I('#00000000'), shape = I(21)) - + nomap_guide <- if (is_one_colour) 'none' else 'legend' p <- p + - if (is_projection) scale_fill_identity (name = legend_main, guide = 'legend', labels = names(projection_guide), breaks = projection_guide, na.value = col_na) - else if (!use_mapping) scale_fill_identity (name = legend_main, guide = nomap_guide, labels = col_lvls, breaks = col_breaks, na.value = col_na) + if (is_projection) scale_fill_identity(name = legend_main, guide = 'legend', labels = names(projection_guide), breaks = projection_guide, na.value = col_na) + else if (!use_mapping) scale_fill_identity(name = legend_main, guide = nomap_guide, labels = col_lvls, breaks = col_breaks, na.value = col_na) else if (continuous) scale_fill_gradientn(name = legend_main, colours = if (is.function(pal)) pal(100) else pal, na.value = col_na) - else scale_fill_manual (name = legend_main, values = if (is.function(pal)) pal(length(col_lvls)) else pal[seq_along(col_lvls)], breaks = col_lvls, labels = col_lvls, na.value = col_na) + else scale_fill_manual(name = legend_main, values = if (is.function(pal)) pal(length(col_lvls)) else pal[seq_along(col_lvls)], breaks = col_lvls, labels = col_lvls, na.value = col_na) if (box) p <- p + theme(panel.border = element_rect(fill = NA), axis.title.x = element_text(), axis.title.y = element_text()) if (ticks) p <- p + theme(axis.ticks = element_line(), axis.text.x = element_text(), axis.text.y = element_text()) if (axes) p <- p + geom_rangeframe(colour = par('col')) - if (ticks && axes && !box) p <- p + + if (ticks && axes && !box) p <- p + scale_x_continuous(breaks = extended_range_breaks()(point_data[[1L]])) + scale_y_continuous(breaks = extended_range_breaks()(point_data[[2L]])) p <- plot_more(p, rescale = NULL) - } else if (length(dims) == 3L) { + } else if (length(dims) == 3L) { # nolint: brace_linter. if (interactive) { p <- rgl::plot3d(point_data, ..., col = point_data$ColourExpl, axes = FALSE, box = FALSE) if (axes || ticks) { - axtype = if (axes) 'lines' else 'cull' - nticks = if (ticks) 5 else 0 + axtype <- if (axes) 'lines' else 'cull' + nticks <- if (ticks) 5 else 0 rgl::bbox3d(xlen = nticks, ylen = nticks, zlen = nticks, front = axtype, back = axtype) } if (box) rgl::box3d() @@ -184,10 +184,11 @@ plot.DiffusionMap <- function( rescale['to', d, ] <- c(0, 1) } } - + mar <- list(...)$mar if (is.null(mar)) mar <- par('mar') - old_mar <- mar; on.exit(par(mar = old_mar)) + old_mar <- mar + on.exit(par(mar = old_mar)) if (draw_legend) mar[[4]] <- mar[[4]] + 5 p <- scatterplot3d( point_data[, 1:3], ..., color = point_data$ColourExpl, mar = mar, @@ -195,7 +196,7 @@ plot.DiffusionMap <- function( box = box, tick.marks = ticks) rm(mar) plot_more(p, rescale = rescale) - + if (draw_legend) { args <- c(list(col_legend, pal = pal, main = legend_main), legend_opts) if (interactive) { @@ -209,7 +210,7 @@ plot.DiffusionMap <- function( } } } else stop(sprintf('dims is of wrong length (%s): Can only handle 2 or 3 dimensions', dims)) - + if (length(dims) == 2) p else invisible(p) } @@ -217,11 +218,11 @@ plot.DiffusionMap <- function( get_explicit_col <- function(col, pal, col_na, col_limits) { # if nothing is given, return one colour if (is.null(col)) return(par('col')) - + # if we have continuous colour, we are done. if (is.double(col)) return(continuous_colors(col, pal, col_limits)) - + # get palette length and convert col to consecutive integers length_pal <- if (is.factor(col)) @@ -231,7 +232,7 @@ get_explicit_col <- function(col, pal, col_na, col_limits) { else stopifnot(is.character(col)) if (is.factor(col)) col <- as.integer(col) - + # map integers to strings if necessary if (is.integer(col)) { if (is.function(pal)) { @@ -242,23 +243,18 @@ get_explicit_col <- function(col, pal, col_na, col_limits) { length_pal <- min(length(pal), length_pal) pal <- pal[seq_len(length_pal)] } - + idx_wrapped <- ((col - 1L) %% length_pal) + 1L col <- pal[idx_wrapped] col[is.na(col)] <- col_na } - + # if the color wasn’t numeric, use as is col } theme_really_minimal <- function(...) theme_minimal() + theme(axis.text.x = element_blank(), axis.text.y = element_blank(), ...) -# test: -# layout(matrix(1:8, 2)) -# mapply(function(t, a, b) plot(dif, ticks = t, axes = a, box = b, main = sprintf('t=%s a=%s b=%s', t, a, b)), -# c(T,T,T,T,F,F,F,F), c(T,F,T,F,T,F,T,F), c(T,T,F,F,T,T,F,F)) - #' @rdname plot.DiffusionMap #' @export setMethod('plot', c(x = 'DiffusionMap', y = 'numeric'), function(x, y, ...) plot.DiffusionMap(x, y, ...)) diff --git a/R/diffusionmap.r b/R/diffusionmap.r index 19be642..1628e20 100644 --- a/R/diffusionmap.r +++ b/R/diffusionmap.r @@ -3,15 +3,17 @@ #' @useDynLib destiny NULL -sigma_msg <- function(sigma) sprintf( - "The sigma parameter needs to be NULL, 'local', 'global', numeric or a %s object, not a %s.", - sQuote('Sigmas'), sQuote(class(sigma))) +sigma_msg <- function(sigma) { + sprintf( + "The sigma parameter needs to be NULL, 'local', 'global', numeric or a %s object, not a %s.", + sQuote('Sigmas'), sQuote(class(sigma))) +} #' Create a diffusion map of cells -#' +#' #' The provided data can be a double \link[base]{matrix} of expression data or a \link[base]{data.frame} with all non-integer (double) columns #' being treated as expression data features (and the others ignored), an \link[Biobase:class.ExpressionSet]{ExpressionSet}, or a \link[SingleCellExperiment]{SingleCellExperiment}. -#' +#' #' @param data Expression data to be analyzed and covariates. Provide \code{vars} to select specific columns other than the default: all double value columns. #' If \code{distance} is a distance matrix, \code{data} has to be a \code{\link{data.frame}} with covariates only. #' @param sigma Diffusion scale parameter of the Gaussian kernel. One of \code{'local'}, \code{'global'}, a (\link[base]{numeric}) global sigma or a \link{Sigmas} object. @@ -35,9 +37,9 @@ sigma_msg <- function(sigma) sprintf( #' @param knn_params Parameters passed to \code{\link{find_knn}} #' @param verbose Show a progressbar and other progress information (default: do it if censoring is enabled) #' @param suppress_dpt Specify TRUE to skip calculation of necessary (but spacious) information for \code{\link{DPT}} in the returned object (default: FALSE) -#' +#' #' @return A DiffusionMap object: -#' +#' #' @slot eigenvalues Eigenvalues ranking the eigenvectors #' @slot eigenvectors Eigenvectors mapping the datapoints to \code{n_eigs} dimensions #' @slot sigmas \link{Sigmas} object with either information about the \link{find_sigmas} heuristic run or just local or \link{optimal_sigma}. @@ -57,22 +59,22 @@ sigma_msg <- function(sigma) sprintf( #' @slot missing_range Whole data range for missing value model #' @slot vars Vars parameter used to extract the part of the data used for diffusion map creation #' @slot knn_params Parameters passed to \code{\link{find_knn}} -#' +#' #' @seealso \link{DiffusionMap methods} to get and set the slots. \code{\link{find_sigmas}} to pre-calculate a fitting global \code{sigma} parameter -#' +#' #' @examples #' data(guo) #' DiffusionMap(guo) #' DiffusionMap(guo, 13, censor_val = 15, censor_range = c(15, 40), verbose = TRUE) -#' +#' #' covars <- data.frame(covar1 = letters[1:100]) #' dists <- dist(matrix(rnorm(100*10), 100)) #' DiffusionMap(covars, distance = dists) -#' +#' #' @importFrom methods setClass validObject #' @rdname DiffusionMap-class #' @export -setClass( +setClass( # nolint: cyclocomp_linter. 'DiffusionMap', slots = c( eigenvalues = 'numeric', @@ -133,7 +135,7 @@ setClass( #' @importFrom SingleCellExperiment reducedDimNames reducedDim<- #' @rdname DiffusionMap-class #' @export -DiffusionMap <- function( +DiffusionMap <- function( # nolint: object_name_linter, cyclocomp_linter. data = stopifnot_distmatrix(distance), sigma = 'local', k = find_dm_k(dataset_n_observations(data, distance) - 1L), @@ -154,24 +156,24 @@ DiffusionMap <- function( # make sure those promises are resolved before we mess with `data` force(k) force(n_eigs) - + chkDots(...) - + if (is.null(sigma) || !is(sigma, 'Sigmas') && isTRUE(is.na(sigma))) sigma <- 'local' if (!is(sigma, 'Sigmas') && !(length(sigma) == 1L && sigma %in% c('local', 'global')) && !is.numeric(sigma)) stop(sigma_msg(sigma)) - + if (identical(sigma, 'local') && any(n_local > k)) stop('For local sigma, All entries of n_local (', paste(n_local, collapse = ','), ') have to be \u2264 k (', k, ')') - + # store away data and continue using imputed, unified version data_env <- new.env(parent = .GlobalEnv) - + if (is_distmatrix(distance)) { if (!(is.data.frame(data) || is.null(data))) stop('If you provide a matrix for `distance`, `data` has to be NULL or a covariate `data.frame` is of class', class(data)) if (!is.null(n_pcs)) stop('If you provide a matrix for `distance`, `n_pcs` has to be NULL') - + data_env$data <- if (is.null(data)) distance else data # put covariates or distance dists <- as(distance, 'symmetricMatrix') if (!is.null(rownames(data_env$data))) rownames(dists) <- colnames(dists) <- rownames(data) @@ -181,14 +183,14 @@ DiffusionMap <- function( } else { dists <- NULL distance <- match.arg(distance) - + data_env$data <- data data <- dataset_extract_doublematrix(data, vars) imputed_data <- data if (anyNA(imputed_data)) imputed_data <- as.matrix(hotdeck(data, imp_var = FALSE)) n <- nrow(imputed_data) - + # PCA pca <- get_pca(imputed_data, data_env$data, n_pcs, verbose) if (is.null(pca) && ncol(imputed_data) > 500L) { @@ -205,44 +207,43 @@ DiffusionMap <- function( reducedDim(data_env$data, 'pca') <- pca } } - + # arg validation - - if (n <= n_eigs + 1L) stop('Eigen decomposition not possible if n \u2264 n_eigs+1 (And ', n,' \u2264 ', n_eigs + 1L, ')') + + if (n <= n_eigs + 1L) stop('Eigen decomposition not possible if n \u2264 n_eigs+1 (And ', n, ' \u2264 ', n_eigs + 1L, ')') if (is.null(k) || is.na(k)) k <- n - 1L - #TODO: optimize case - #dense <- k == n - 1L - + #TODO: optimize case `dense <- k == n - 1L` + if (k >= n) stop(sprintf('k has to be < nrow(data) (And %s \u2265 nrow(data))', k)) - + censor <- test_censoring(censor_val, censor_range, imputed_data, missing_range) - + if (censor && !identical(distance, 'euclidean')) stop('censoring model only valid with euclidean distance') - + knn <- get_knn(data_or_pca, dists, k, distance, knn_params, verbose) # use dists if given, else compute from pca if available, else from data - + sigmas <- get_sigmas(imputed_data, knn$dist, sigma, n_local, distance, censor_val, censor_range, missing_range, vars, verbose) sigma <- optimal_sigma(sigmas) # single number = global, multiple = local - + trans_p <- transition_probabilities(imputed_data, sigma, knn$dist_mat, censor, censor_val, censor_range, missing_range, verbose) rm(knn) # free memory - + d <- rowSums(trans_p, na.rm = TRUE) + 1 # diagonal set to 1 - + # normalize by density if requested norm_p <- get_norm_p(trans_p, d, d, density_norm) rm(trans_p) # free memory - + d_norm <- rowSums(norm_p) - + # calculate the inverse of a diagonal matrix by inverting the diagonal d_rot <- Diagonal(x = d_norm ^ -.5) transitions <- as(d_rot %*% norm_p %*% d_rot, 'symmetricMatrix') rm(norm_p) # free memory - + eig_transitions <- decomp_transitions(transitions, n_eigs + 1L, verbose) - + eig_vec <- eig_transitions$vectors eig_val <- eig_transitions$values if (rotate) eig_vec <- as.matrix(t(t(eig_vec) %*% d_rot)) @@ -253,7 +254,7 @@ DiffusionMap <- function( colnames(eig_vec) <- names(eig_val) <- paste0('DC', seq(0, n_eigs)) - + new( 'DiffusionMap', eigenvalues = eig_val[-1], @@ -303,7 +304,7 @@ get_sigmas <- function(imputed_data, nn_dists, sigma, n_local, distance = 'eucli sig_mat <- nn_dists[, n_local, drop = FALSE] sigma <- rowSums(sig_mat) / length(n_local) / 2 } - new('Sigmas', + new('Sigmas', log_sigmas = NULL, dim_norms = NULL, optimal_sigma = sigma, @@ -312,7 +313,7 @@ get_sigmas <- function(imputed_data, nn_dists, sigma, n_local, distance = 'eucli } else if (identical(sigma, 'global')) { if (!identical(distance, 'euclidean')) stop(sprintf('You have to use euclidean distances with sigma estimation, not %s.', sQuote(distance))) - + find_sigmas( imputed_data, distance = distance, @@ -331,11 +332,11 @@ get_sigmas <- function(imputed_data, nn_dists, sigma, n_local, distance = 'eucli #' @importFrom methods hasMethod #' @importFrom SingleCellExperiment reducedDimNames reducedDim -get_pca <- function(data_mat, data_raw, n_pcs, verbose = FALSE) { +get_pca <- function(data_mat, data_raw, n_pcs, verbose = FALSE) { # nolint: cyclocomp_linter. stopifnot(is.null(n_pcs) || length(n_pcs) == 1L) # If we suppress PCA computation, return NULL if (isTRUE(is.na(n_pcs))) return(NULL) - + # get PCs from SingleCellExperiment if possible existing_pca <- if (hasMethod('reducedDim', class(data_raw))) reducedDim(data_raw, 'pca') if (!is.null(existing_pca)) { @@ -345,7 +346,7 @@ get_pca <- function(data_mat, data_raw, n_pcs, verbose = FALSE) { } else if (n_pcs < ncol(existing_pca)) { warning('Specified n_pcs < ncol(reducedDim(data, "pca")), using subset') return(existing_pca[, seq_len(n_pcs), drop = FALSE]) - } else {# n_pcs > ncol(pcs) + } else { #: n_pcs > ncol(pcs) warning('Specified n_pcs > ncol(reducedDim(data, "pca")), recalculating PCA') } } else if (is.null(n_pcs)) { @@ -361,13 +362,15 @@ get_pca <- function(data_mat, data_raw, n_pcs, verbose = FALSE) { get_knn <- function(imputed_data, dists, k, distance = 'euclidean', knn_params = list(), verbose = FALSE) { stopifnot(is.null(imputed_data) != is.null(dists)) - + if (!is.null(dists)) { nn_dist <- t(apply(dists, 1, function(row) sort(row)[2:k])) list(dist = nn_dist, dist_mat = dists) - } else tryCatch({ - verbose_timing(verbose, 'finding knns', do.call(find_knn, c(list(imputed_data, k, distance = distance), knn_params))) - }, error = function(e) stop('Could not call find_knn. Consider specifying `knn_params = list(M = )`. Original error:\n', e$message, call. = FALSE)) + } else { + tryCatch({ + verbose_timing(verbose, 'finding knns', do.call(find_knn, c(list(imputed_data, k, distance = distance), knn_params))) + }, error = function(e) stop('Could not call find_knn. Consider specifying `knn_params = list(M = )`. Original error:\n', e$message, call. = FALSE)) + } } @@ -375,14 +378,16 @@ get_knn <- function(imputed_data, dists, k, distance = 'euclidean', knn_params = #' @importFrom utils txtProgressBar setTxtProgressBar transition_probabilities <- function(imputed_data, sigma, dists, censor, censor_val, censor_range, missing_range, verbose) { n <- nrow(dists) - + # create markovian transition probability matrix (trans_p) - + cb <- if (verbose) { pb <- txtProgressBar(1, n, style = 3) function(i) setTxtProgressBar(pb, i) - } else invisible - + } else { + invisible + } + # initialize trans_p trans_p <- verbose_timing(verbose, 'Calculating transition probabilities', { if (censor) @@ -390,15 +395,15 @@ transition_probabilities <- function(imputed_data, sigma, dists, censor, censor_ else no_censoring(dists, sigma, cb) }) - + if (verbose) close(pb) - + #nnzero - + # normalize trans_p and only retain intra-cell transitions diag(trans_p) <- 0 trans_p <- drop0(trans_p) - + stopifnot(is(trans_p, 'symmetricMatrix')) trans_p } @@ -407,7 +412,7 @@ transition_probabilities <- function(imputed_data, sigma, dists, censor, censor_ no_censoring <- function(dists, sigma, cb = invisible) { d2 <- dists ^ 2 stopifnot(isSymmetric(d2)) - + t_p <- if (length(sigma) == 1L) { exp(-d2@x / (2 * sigma ^ 2)) } else { @@ -416,12 +421,12 @@ no_censoring <- function(dists, sigma, cb = invisible) { i <- coords@i + 1L j <- coords@j + 1L sig2 <- sigma^2 - - S1 <- sigma[i] * sigma[j] - S2 <- sig2[i] + sig2[j] - sqrt(2 * S1 / S2) * exp(-d2@x / S2) + + s1 <- sigma[i] * sigma[j] + s2 <- sig2[i] + sig2[j] + sqrt(2 * s1 / s2) * exp(-d2@x / s2) } - + sparseMatrix(d2@i, p = d2@p, x = t_p, dims = dim(d2), symmetric = TRUE, index1 = FALSE) } @@ -432,7 +437,7 @@ get_norm_p <- function(trans_p, d, d_new, density_norm) { if (density_norm) { trans_p <- as(trans_p, 'dgTMatrix') # use non-symmetric triples to operate on all values stopifsmall(max(trans_p@x, na.rm = TRUE)) - + #creates a dgCMatrix sparseMatrix(trans_p@i, trans_p@j, x = trans_p@x / (d_new[trans_p@i + 1] * d[trans_p@j + 1]), dims = dim(trans_p), index1 = FALSE) } else { @@ -441,5 +446,6 @@ get_norm_p <- function(trans_p, d, d_new, density_norm) { } -decomp_transitions <- function(transitions, n_eigs, verbose) +decomp_transitions <- function(transitions, n_eigs, verbose) { verbose_timing(verbose, 'performing eigen decomposition', eig_decomp(transitions, n_eigs)) +} diff --git a/R/dist-matrix-coerce.r b/R/dist-matrix-coerce.r index 6ce530a..0ccdc55 100644 --- a/R/dist-matrix-coerce.r +++ b/R/dist-matrix-coerce.r @@ -2,7 +2,7 @@ setOldClass('dist') #' @importFrom Matrix sparseMatrix -as.Matrix.dist <- function(from) { +as.Matrix.dist <- function(from) { # nolint: object_name_linter. s <- attr(from, 'Size') i <- rep.int(seq_len(s - 1L), rev(seq_len(s - 1L))) j <- rev(abs(sequence(seq.int(s - 1L)) - s) + 1L) diff --git a/R/dpt-branching.r b/R/dpt-branching.r index 0473bbf..69d8f03 100644 --- a/R/dpt-branching.r +++ b/R/dpt-branching.r @@ -1,47 +1,47 @@ auto_branch <- function(dpt, cells, stats, w_width, nmin = 10L, gmin = 1.1) { n <- length(cells) - + stopifnot(n >= nmin) stopifnot(stats$g >= gmin) - + # initialize one level (branch, tips) and three branches (dpt) branches <- cut_branches(dpt[cells, stats$tips], cells, w_width) # list of vectors of numeric indices branch <- matrix(idx_list_to_vec(branches, cells, n), n, 1L) tips <- matrix(logical(n), n, 1L) tips[match(stats$tips, cells), 1L] <- TRUE - + subs <- mapply(function(idx_sub, i) { if (length(idx_sub) < nmin || !i %in% idx_sub) # Tip cells can end up undecided! return(NULL) - + sub_stats <- tipstats(dpt, idx_sub, i) if (sub_stats$g < gmin) return(NULL) - + auto_branch(dpt, idx_sub, sub_stats, w_width, nmin, gmin) }, branches, stats$tips, SIMPLIFY = FALSE) - + # add dpt columns to dpt and a level column to branch/tips if any branch was subdivided nonnull_subs <- vapply(subs, Negate(is.null), logical(1L)) if (any(nonnull_subs)) { n_sublevels <- do.call(max, lapply(subs[nonnull_subs], function(s) ncol(s$branch))) - + branch <- cbind(branch, matrix(NA_integer_, n, n_sublevels)) tips <- cbind(tips, matrix(NA, n, n_sublevels)) - + for (s in which(nonnull_subs)) { sub <- subs[[s]] idx_sub <- branches[[s]] - + idx_newcol <- seq.int(ncol(branch) - n_sublevels + 1L, length.out = ncol(sub$branch)) stopifnot(ncol(sub$branch) == ncol(sub$tips)) - + branch_offset <- max(branch, na.rm = TRUE) branch[match(idx_sub, cells), idx_newcol] <- sub$branch + branch_offset - tips[ match(idx_sub, cells), idx_newcol] <- sub$tips + tips[match(idx_sub, cells), idx_newcol] <- sub$tips } } - + stopifnot(ncol(branch) == ncol(tips)) list(branch = branch, tips = tips) } @@ -66,37 +66,37 @@ cut_branches <- function(dpt_mat, cells, w_width) { branchcut <- function(dpt_mat, bid, b, w_width) { n <- nrow(bid) all_branches <- seq_len(3L) - + # sanity checks stopifnot(b %in% all_branches) stopifnot(ncol(dpt_mat) == 3L, ncol(bid) == 3L) stopifnot(nrow(dpt_mat) == n) stopifnot(is.double(dpt_mat), is.integer(bid)) - - # find cell indexes per branch + + # find cell indexes per branch other <- all_branches[all_branches != b] b1 <- other[[1L]] b2 <- other[[2L]] - + # DPT for other branches, sorted by b3 b3_idxs <- bid[, b] dpt1 <- dpt_mat[b3_idxs, b1] dpt2 <- dpt_mat[b3_idxs, b2] - + kcor <- vapply(seq_len(n - 1L), function(s1) { s2 <- s1 + 1L l <- seq_len(s1) r <- seq(s2, n) - + k_l <- kendall_finite_cor(dpt1[l], dpt2[l], dpt1[[s2]], dpt2[[s2]]) k_r <- kendall_finite_cor(dpt1[r], dpt2[r], dpt1[[s1]], dpt2[[s1]]) - - k_l/s1 - k_r/(n - s1) + + k_l / s1 - k_r / (n - s1) }, double(1L)) - + kcor <- smth.gaussian(kcor, w_width) cut <- which.max(kcor) - + b3_idxs[seq_len(cut)] } @@ -110,7 +110,7 @@ organize_branches <- function(branches) { intersect_branches <- function(bs) intersect(branches[[bs[[1L]]]], branches[[bs[[2L]]]]) branch_intersections <- lapply(combn(3L, 2L, simplify = FALSE), intersect_branches) inters <- Reduce(union, branch_intersections, integer()) - + lapply(branches, function(b) union(setdiff(b, inters), b[[1]])) } @@ -121,10 +121,10 @@ kendall_finite_cor <- function(b1, b2, new1, new2) { b11 <- numeric(length(b1)) b11[b1 >= new1] <- 1 b11[b1 < new1] <- -1 - + b22 <- numeric(length(b2)) b22[b2 >= new2] <- 1 b22[b2 < new2] <- -1 - + as.double(b11 %*% b22) # strip dims } diff --git a/R/dpt-helpers.r b/R/dpt-helpers.r index 84e4eb9..e29d834 100644 --- a/R/dpt-helpers.r +++ b/R/dpt-helpers.r @@ -1,16 +1,16 @@ #' Find a random root cell index -#' +#' #' Finds a cell that has the maximum DPT distance from a randomly selected one. -#' +#' #' @param dm_or_dpt A \code{\link{DiffusionMap}} or \code{\link{DPT}} object -#' +#' #' @return A cell index -#' +#' #' @examples #' data(guo) #' dm <- DiffusionMap(guo) #' random_root(dm) -#' +#' #' @export random_root <- function(dm_or_dpt) { dpt <- dummy_dpt(dm_or_dpt) @@ -20,18 +20,18 @@ random_root <- function(dm_or_dpt) { #' Find tips in a DiffusionMap object -#' +#' #' @param dm_or_dpt A \code{\link{DiffusionMap}} or \code{\link{DPT}} object #' @param root Root cell index from which to find tips. (default: random) -#' +#' #' @return An integer vector of length 3 -#' +#' #' @examples #' data(guo) #' dm <- DiffusionMap(guo) #' is_tip <- l_which(find_tips(dm), len = ncol(guo)) #' plot(dm, col = factor(is_tip)) -#' +#' #' @export find_tips <- function(dm_or_dpt, root = random_root(dm_or_dpt)) { dpt <- dummy_dpt(dm_or_dpt) @@ -44,7 +44,7 @@ tipstats <- function(dpt, cells, tips) { y <- if (length(tips) >= 2L) tips[[2L]] else cells[[which.max(dx)]] dy <- dpt[y, cells] z <- if (length(tips) == 3L) tips[[3L]] else cells[[which.max(dx + dy)]] - + list( tips = c(x, y, z), dx = dx, dy = dy, diff --git a/R/dpt-methods-matrix.r b/R/dpt-methods-matrix.r index 3594d7b..7de23c3 100644 --- a/R/dpt-methods-matrix.r +++ b/R/dpt-methods-matrix.r @@ -2,21 +2,21 @@ NULL #' DPT Matrix methods -#' +#' #' Treat DPT object as a matrix of cell-by-cell DPT distances. -#' +#' #' @param x \code{\link{DPT}} object. #' @param i,j \link[=numeric]{Numeric} or \link{logical} index. #' @param ... ignored #' @param drop If \code{\link{TRUE}}, coerce result to a vector if it would otherwise have \code{1 \%in\% dim(result)}. -#' +#' #' @examples #' data(guo_norm) #' dm <- DiffusionMap(guo_norm) #' dpt <- DPT(dm) #' set.seed(1) #' plot(dpt[random_root(dpt), ], Biobase::exprs(guo_norm)['DppaI', ]) -#' +#' #' @seealso \code{\link{as.matrix.DPT}} ## Not [[.DPT, that is in the extraction methods #' @aliases [.DPT nrow.DPT ncol.DPT dim.DPT @@ -31,21 +31,21 @@ setMethod('[', c('DPT', 'index', 'index', 'logicalOrMissing'), function(x, i, j, stopifnot(length(list(...)) == 0L) evas <- eigenvalues(x@dm) eves <- eigenvectors(x@dm) - + # get numeric from negative or logical indices i <- seq_len(nrow(x))[i] j <- seq_len(nrow(x))[j] - + norm <- array(NA, c(length(i), length(j), length(evas))) ev_dist <- norm - + for (e in seq_along(evas)) norm[, , e] <- evas[[e]] - norm <- norm / (1-norm) - + norm <- norm / (1 - norm) + do.call(mapply, c(list(function(ii, jj) { ev_dist[ii, jj, ] <<- eves[i[[ii]], ] - eves[j[[jj]], ] }), expand.grid(ii = seq_along(i), jj = seq_along(j)))) - + r <- sqrt(apply(norm^2 * ev_dist^2, 1:2, sum)) if (drop && 1L %in% dim(r)) dim(r) <- NULL r diff --git a/R/dpt-methods.r b/R/dpt-methods.r index fedfe15..13351a7 100644 --- a/R/dpt-methods.r +++ b/R/dpt-methods.r @@ -2,23 +2,23 @@ NULL #' DPT methods -#' +#' #' Methods for the \link{DPT} class. \code{branch_divide} subdivides branches for plotting (see the examples). -#' +#' #' @param dpt,object DPT object #' @param divide Vector of branch numbers to use for division #' @param value Value of slot to set -#' +#' #' @return \code{branch_divide} and \code{dataset<-} return the changed object, \code{dataset} the extracted data, and \code{tips} the tip indices. -#' +#' #' @examples #' data(guo_norm) #' dpt <- DPT(DiffusionMap(guo_norm)) #' dpt_9_branches <- branch_divide(dpt, 1:3) #' plot(dpt_9_branches, col_by = 'branch') -#' +#' #' @seealso \link{plot.DPT} uses \code{branch_divide} for its \code{divide} argument. -#' +#' #' @aliases dataset.DPT #' @name DPT methods #' @rdname DPT-methods @@ -30,25 +30,25 @@ NULL branch_divide <- function(dpt, divide = integer(0L)) { check_dpt(dpt) if (length(divide) == 0L) return(dpt) - + for (b in divide) { super_rows <- dpt@branch[, 1] == b & !is.na(dpt@branch[, 1]) if (!any(super_rows)) { available <- na.omit(unique(dpt@branch[, 1])) stop('invalid branch to divide ', b, ' not in ', available) } - + # shift sub branches/tips to the left dpt@branch[super_rows, ] <- cbind(dpt@branch[super_rows, -1], NA) dpt@tips [super_rows, ] <- cbind(dpt@tips [super_rows, -1], NA) - + # TODO: maybe also modify DPT? } - + vacant_levels <- apply(dpt@branch, 2L, function(col) all(is.na(col))) dpt@branch <- dpt@branch[, !vacant_levels] dpt@tips <- dpt@tips [, !vacant_levels] - + dpt } diff --git a/R/dpt-plotting.r b/R/dpt-plotting.r index f358442..56740ec 100644 --- a/R/dpt-plotting.r +++ b/R/dpt-plotting.r @@ -2,9 +2,9 @@ NULL #' Plot DPT -#' +#' #' Plots diffusion components from a Diffusion Map and the accompanying Diffusion Pseudo Time (\code{\link{DPT}}) -#' +#' #' @param x A \code{\link{DPT}} object. #' @param y,root Root branch ID. Will be used as the start of the DPT. (default: lowest branch ID) #' (If longer than size 1, will be interpreted as \code{c(root, branches)}) @@ -18,11 +18,11 @@ NULL #' @param ... Graphical parameters supplied to \code{\link{plot.DiffusionMap}} #' @param col See \code{\link{plot.DiffusionMap}}. This overrides \code{col_by} #' @param legend_main See \code{\link{plot.DiffusionMap}}. -#' +#' #' @return The return value of the underlying call is returned, i.e. a scatterplot3d or rgl object for 3D plots. -#' +#' #' @aliases plot.DPT -#' +#' #' @examples #' data(guo_norm) #' dm <- DiffusionMap(guo_norm) @@ -31,7 +31,7 @@ NULL #' plot(dpt, 2L, col_by = 'branch') #' plot(dpt, 1L, 2:3, col_by = 'num_cells') #' plot(dpt, col_by = 'DPT3') -#' +#' #' @importFrom graphics plot points #' @importFrom methods is setMethod #' @importFrom scales colour_ramp rescale @@ -39,7 +39,7 @@ NULL #' @importFrom rlang .data #' @importFrom ggplot2 geom_path geom_point scale_colour_identity #' @export -plot.DPT <- function( +plot.DPT <- function( # nolint: cyclocomp_linter. x, root = NULL, paths_to = integer(0L), dcs = 1:2, @@ -54,48 +54,48 @@ plot.DPT <- function( ) { dpt <- x dpt_flat <- branch_divide(dpt, divide) - + if (!is.null(root) && length(root) < 1L) stop('root needs to be specified') root <- if (is.null(root)) min(dpt_flat@branch[, 1], na.rm = TRUE) else as.integer(root) paths_to <- as.integer(paths_to) - + if (length(root) > 1L && length(paths_to) > 0L) stop('(length(root), length(paths_to)) needs to be (1, 0-n) or (2-n, 0), but is (', length(root), ', ', length(paths_to), ')') stopifnot(length(dcs) %in% 2:3) - + if (length(root) > 1L && length(paths_to) == 0L) { paths_to <- root[-1] root <- root[[1]] } - + pt_vec <- dpt_for_branch(dpt_flat, root) - + evs <- flipped_dcs(dpt@dm, dcs) - + plot_paths <- function(p, ..., rescale) { plot_points <- get_plot_fn(p) rescale_fun <- if (is.null(rescale)) identity else function(x) rescale_mat(x, rescale) - + for (b in seq_along(paths_to)) { idx <- dpt@branch[, 1] %in% c(root, paths_to[[b]]) path <- average_path(pt_vec[idx], evs[idx, ], w_width) p <- plot_points(p, rescale_fun(path), type = 'l', col = col_path[[b]], ...) } - + tips <- evs[dpt_flat@tips[, 1], ] p <- plot_points(p, rescale_fun(tips), col = col_tip, ...) - + if (!is(p, 'ggplot')) p else p + scale_colour_identity( name = 'Path and Tips', guide = 'legend', breaks = c(col_path[seq_along(paths_to)], col_tip), labels = c(sprintf('Path to %s', paths_to), 'Tips')) } - + col <- if (!is.null(col)) col else switch(col_by, @@ -103,16 +103,16 @@ plot.DPT <- function( branch = , Branch = dpt_flat@branch[, 1], dpt[[col_by]]) - + legend_main <- switch(legend_main, dpt = 'DPT', branch = 'Branch', legend_main) - + args <- list( dpt@dm, dcs, plot_more = plot_paths, legend_main = legend_main, col = col, ...) - + if (!identical(Sys.getenv('LOG_LEVEL'), '')) message('Args:\n', paste(capture.output(print(args)), collapse = '\n')) do.call(plot, args) } @@ -127,7 +127,7 @@ setMethod('plot', c('DPT', 'missing'), function(x, y, ...) { args <- list(...) root <- args$root # may be NULL args$root <- NULL - + do.call(plot.DPT, c(list(x, root), args)) }) @@ -157,5 +157,7 @@ get_plot_fn <- function(p) { switch(type, p = rgl::points3d, l = rgl::lines3d, stop)(x, y, z, ...) p2 } - } else stop('unknown p passed to plot_more (class(es): ', paste(class(p), collapse = ', '), ')') + } else { + stop('unknown p passed to plot_more (class(es): ', paste(class(p), collapse = ', '), ')') + } } diff --git a/R/dpt.r b/R/dpt.r index 3586de5..a05b805 100644 --- a/R/dpt.r +++ b/R/dpt.r @@ -1,26 +1,26 @@ #' Diffusion Pseudo Time #' #' Create pseudotime ordering and assigns cell to one of three branches -#' +#' #' Treat it as a matrix of pseudotime by subsetting (\code{\link[=dim.DPT]{[ dim nrow ncol}} \code{\link[=as.matrix.DPT]{as.matrix}}), and as a list of pseudodime, and expression vectors (\code{\link[=names.DPT]{$ [[ names}} \code{\link[=as.data.frame.DPT]{as.data.frame}}). -#' +#' #' @param dm A \code{\link{DiffusionMap}} object. Its transition probabilities will be used to calculate the DPT #' @param tips The cell index/indices from which to calculate the DPT(s) (integer of length 1-3) #' @param ... Unused. All parameters to the right of the \code{...} have to be specified by name (e.g. \code{DPT(dm, w_width = 0.2)}) #' @param w_width Window width to use for deciding the branch cutoff -#' +#' #' @return A \code{DPT} object: -#' +#' #' @slot branch \code{\link[base]{matrix}} (of \code{\link[base]{integer}}) recursive branch labels for each cell (row); \code{NA} for undeceided. Use \code{\link{branch_divide}} to modify this. #' @slot tips \code{\link[base]{matrix}} (of \code{\link[base]{logical}}) indicating if a cell (row) is a tip of the corresponding banch level (col) #' @slot dm \code{\link{DiffusionMap}} used to create this DPT object -#' +#' #' @examples #' data(guo_norm) #' dm <- DiffusionMap(guo_norm) #' dpt <- DPT(dm) #' str(dpt) -#' +#' #' @importFrom methods setClass #' @rdname DPT #' @export @@ -36,20 +36,20 @@ setClass( #' @rdname DPT #' @export -DPT <- function(dm, tips = random_root(dm), ..., w_width = .1) { +DPT <- function(dm, tips = random_root(dm), ..., w_width = .1) { # nolint: object_name_linter. chkDots(...) if (!is(dm, 'DiffusionMap')) stop('dm needs to be of class DiffusionMap, not ', class(dm)) if (!length(tips) %in% 1:3) stop('you need to specify 1-3 tips, got ', length(tips)) - + dpt <- dummy_dpt(dm) all_cells <- seq_len(nrow(dpt)) - + stats <- tipstats(dpt, all_cells, tips) branches <- auto_branch(dpt, all_cells, stats, w_width) - + colnames(branches$branch) <- paste0('Branch', seq_len(ncol(branches$branch))) colnames(branches$tips) <- paste0('Tips', seq_len(ncol(branches$tips))) - + rownames(branches$branch) <- rownames(branches$tips) <- rownames(dm@eigenvectors) dpt@branch <- branches$branch dpt@tips <- branches$tips diff --git a/R/eig_decomp.r b/R/eig_decomp.r index 728e8a4..8a557a0 100644 --- a/R/eig_decomp.r +++ b/R/eig_decomp.r @@ -1,30 +1,30 @@ #' Fast eigen decomposition using \code{\link[RSpectra]{eigs}} -#' +#' #' By default uses a random initialization vector that you can make deterministic using #' \code{\link[base]{set.seed}} or override by specifying \code{opts = list(initvec = ...)}. #' -#' @param M A matrix (e.g. from the Matrix package) or +#' @param m A matrix (e.g. from the Matrix package) or #' a function (see \code{\link[RSpectra]{eigs}}). #' @param n_eigs Number of eigenvectors to return. #' @param sym defunct and ignored. #' @param ... Passed to \code{\link[RSpectra]{eigs}}. #' @param opts Passed to \code{\link[RSpectra]{eigs}}. -#' +#' #' @return see \code{\link[RSpectra]{eigs}}. -#' +#' #' @examples #' eig_decomp(cbind(c(1,0,-1), c(0,1,0), c(-1,0,1)), 2) -#' +#' #' @importFrom Matrix isSymmetric #' @importFrom RSpectra eigs eigs_sym #' @importFrom stats runif #' @export -eig_decomp <- function(M, n_eigs, sym, ..., opts = list()) { +eig_decomp <- function(m, n_eigs, sym, ..., opts = list()) { if (!('initvec' %in% names(opts))) - opts$initvec <- runif(nrow(M)) - .5 + opts$initvec <- runif(nrow(m)) - .5 # eigs cannot handle symmetricMatrix & sparseMatrix yet # TODO: low-effort. We can copy the memory and use the `lower = T/F` arg instead - if (is(M, 'dsCMatrix')) M <- as(M, 'dgCMatrix') - if (is(M, 'dsRMatrix')) M <- as(M, 'dgRMatrix') - eigs(M, n_eigs, ..., opts = opts) + if (is(m, 'dsCMatrix')) m <- as(m, 'dgCMatrix') + if (is(m, 'dsRMatrix')) m <- as(m, 'dgRMatrix') + eigs(m, n_eigs, ..., opts = opts) } diff --git a/R/expressionset-helpers.r b/R/expressionset-helpers.r index f1910b6..84f77ef 100644 --- a/R/expressionset-helpers.r +++ b/R/expressionset-helpers.r @@ -2,15 +2,15 @@ NULL #' Convert object to \link[Biobase:class.ExpressionSet]{ExpressionSet} or read it from a file -#' +#' #' These functions present quick way to create \link[Biobase:class.ExpressionSet]{ExpressionSet} objects. -#' +#' #' They work by using all continuous (double) columns as expression data, and all others as observation annotations. -#' +#' #' @param x \link[base]{data.frame} to convert to an \link[Biobase:class.ExpressionSet]{ExpressionSet}. -#' +#' #' @return an \link[Biobase:class.ExpressionSet]{ExpressionSet} object -#' +#' #' @examples #' library(Biobase) #' df <- data.frame(Time = seq_len(3), #integer column @@ -19,9 +19,9 @@ NULL #' set <- as.ExpressionSet(df) #' rownames(exprs(set)) == c('Actb', 'Gapdh') #' phenoData(set)$Time == 1:3 -#' +#' #' @seealso \link[utils]{read.table} on which \code{read.ExpressionSet} is based, and \link[Biobase:class.ExpressionSet]{ExpressionSet}. -#' +#' #' @name ExpressionSet helper methods #' @rdname ExpressionSet-helpers NULL @@ -32,27 +32,27 @@ NULL setGeneric('as.ExpressionSet', function(x, ...) standardGeneric('as.ExpressionSet'), valueClass = 'ExpressionSet') #' @param annotation_cols The data.frame columns used as annotations. All others are used as expressions. (Logical, character or numerical index array) -#' +#' #' @importFrom Biobase ExpressionSet AnnotatedDataFrame phenoData #' @rdname ExpressionSet-helpers #' @export setMethod('as.ExpressionSet', 'data.frame', function(x, annotation_cols = !sapply(x, is.double)) { if (!is.logical(annotation_cols)) annotation_cols <- l_which(annotation_cols, names(x)) - - assayData <- t(as.matrix(x[!annotation_cols])) - phenoData <- AnnotatedDataFrame(x[annotation_cols]) - - ExpressionSet(assayData, phenoData) + + assay_data <- t(as.matrix(x[!annotation_cols])) + pheno_data <- AnnotatedDataFrame(x[annotation_cols]) + + ExpressionSet(assay_data, pheno_data) }) #' @param file File path to read ASCII data from #' @param header Specifies if the file has a header row. #' @param ... Additional parameters to \link[utils]{read.table} -#' +#' #' @importFrom utils read.table #' @rdname ExpressionSet-helpers #' @export -read.ExpressionSet <- function(file, header = TRUE, ...) { +read.ExpressionSet <- function(file, header = TRUE, ...) { # nolint: object_name_linter. as.ExpressionSet(read.table(file, header, ...)) } diff --git a/R/find_dm_k.r b/R/find_dm_k.r index f154cc3..4e422cf 100644 --- a/R/find_dm_k.r +++ b/R/find_dm_k.r @@ -1,15 +1,15 @@ #' Find a suitable k -#' +#' #' The \code{k} parameter for the k nearest neighbors used in \link{DiffusionMap} should be as big as possible while #' still being computationally feasible. This function approximates it depending on the size of the dataset \code{n}. -#' +#' #' @param n Number of possible neighbors (nrow(dataset) - 1) #' @param min_k Minimum number of neighbors. Will be chosen for \eqn{n \ge big} #' @param small Number of neighbors considered small. If/where \eqn{n \le small}, n itself will be returned. #' @param big Number of neighbors considered big. If/where \eqn{n \ge big}, \code{min_k} will be returned. -#' +#' #' @return A vector of the same length as \code{n} that contains suitable \code{k} values for the respective \code{n} -#' +#' #' @examples #' curve(find_dm_k(n), 0, 13000, xname = 'n') #' curve(find_dm_k(n) / n, 0, 13000, xname = 'n') @@ -17,18 +17,18 @@ find_dm_k <- function(n, min_k = 100L, small = 1000L, big = 10000L) { stopifnot(small < big) if (is.null(n)) return(NULL) - + k <- rep(NA_integer_, length(n)) k[small >= n] <- n[small >= n] k[n >= big] <- min_k - + rest <- !is.na(n) & small < n & n < big - + n_shifted <- (n[rest] - small) / (big - small) # linear transf [small, big] -> [0, 1] k_shifted <- (cos(n_shifted * pi) + 1) / 2 # ease function [0, 1] -> [1, 0] k_rest <- min_k + k_shifted * (n[rest] - min_k) # linear transf [0, 1] -> [min_k, n] - + k[rest] <- as.integer(round(k_rest)) - + k -} \ No newline at end of file +} diff --git a/R/gene-relevance-methods.r b/R/gene-relevance-methods.r index f1f4f5b..448fd2d 100644 --- a/R/gene-relevance-methods.r +++ b/R/gene-relevance-methods.r @@ -3,22 +3,22 @@ NULL #' Gene Relevance methods -#' +#' #' \code{featureNames <- ...} can be used to set the gene names used for plotting #' (e.g. if the data contains hardly readably gene or transcript IDs). #' \code{dataset} gets the expressions used for the gene relevance calculations, #' and \code{distance} the distance measure. -#' +#' #' @param x,object \code{GeneRelevance} object #' @param value A text vector (\code{\link{character}} or \code{\link{factor}}) -#' +#' #' @return #' \code{dataset}, \code{distance}, and \code{featureNames} return the stored properties. #' The other methods return a \code{GeneRelevance} object (\code{print}, \code{... <- ...}), #' or NULL (\code{show}), invisibly -#' +#' #' @seealso \code{\link{gene_relevance}}, \link{Gene Relevance plotting} -#' +#' #' @examples #' data(guo_norm) #' dm <- DiffusionMap(guo_norm) @@ -26,7 +26,7 @@ NULL #' stopifnot(distance(gr) == distance(dm)) #' featureNames(gr)[[37]] <- 'Id2 (suppresses differentiation)' #' # now plot it with the changed gene name(s) -#' +#' #' @aliases featureNames.GeneRelevance dataset.GeneRelevance #' @name Gene Relevance methods #' @rdname Gene-Relevance-methods @@ -35,7 +35,7 @@ NULL #' @importFrom methods is #' @importFrom utils str -#' +#' #' @rdname Gene-Relevance-methods #' @export setMethod('print', 'GeneRelevance', function(x) { @@ -44,12 +44,12 @@ setMethod('print', 'GeneRelevance', function(x) { cat('is: ') if (is(d, 'Matrix')) cat(sprintf('%s%s%s %s (%s)\n', nrow(d), sym_times, ncol(d), class(d)[[1L]], mode(d@x))) else str(structure(d, dimnames = NULL)) - cat('featureNames: '); str(featureNames(x)) + cat('featureNames: '); str(featureNames(x)) # nolint: semicolon_linter. invisible(x) }) #' @importFrom methods show -#' +#' #' @rdname Gene-Relevance-methods #' @export setMethod('show', 'GeneRelevance', function(object) { diff --git a/R/gene-relevance-plotting-differential-map.r b/R/gene-relevance-plotting-differential-map.r index 2d3e6dd..67ad7bc 100644 --- a/R/gene-relevance-plotting-differential-map.r +++ b/R/gene-relevance-plotting-differential-map.r @@ -25,24 +25,25 @@ setMethod('plot_differential_map', c('GeneRelevance', 'missing'), function(coord differential_map <- function(relevance_map, genes = NULL, dims = 1:2, all = FALSE) { relevance_map <- updateObject(relevance_map) - + all_dims <- get_dim_range(relevance_map@partials, 3L, dims) if (!all(dims %in% all_dims)) stop( 'The relevance map contains only the dimensions ', paste(all_dims, collapse = ', '), ', not ', paste(setdiff(dims, all_dims), collapse = ', ')) - + genes_existing <- colnames(relevance_map@partials_norm) - if (is.null(genes)) genes <- genes_existing - else { + if (is.null(genes)) { + genes <- genes_existing + } else { genes_missing <- is.na(match(genes, genes_existing)) if (any(genes_missing)) stop( 'The dataset used for the relevance map does not contain gene(s) ', paste(genes[genes_missing], collapse = ', '), '. Did you mean ', paste(agrep(genes[genes_missing], genes_existing, value = TRUE), collapse = ', '), '?') } - + exprs <- relevance_map@exprs coords <- get_coords(relevance_map, dims) - + partials_norms <- relevance_map@partials_norm[, genes, drop = FALSE] do.call(rbind, lapply(genes, function(g) { @@ -54,14 +55,14 @@ differential_map <- function(relevance_map, genes = NULL, dims = 1:2, all = FALS delta <- diff(rev(range(dc, na.rm = TRUE))) partials / max(abs(partials), na.rm = TRUE) * d_var * delta }) - + cbind( as.data.frame(coords), Expression = exprs[, g], PartialsNorm = partials_norms[, g], Cell = if (!is.null(rownames(exprs))) rownames(exprs) else seq_len(nrow(exprs)), Gene = factor(g, levels = genes), - Angle = atan(partials[[2]] / partials[[1]] ), + Angle = atan(partials[[2]] / partials[[1]]), Magnitude = sqrt(partials[[1]]^2 + partials[[2]]^2)) })) } @@ -78,7 +79,7 @@ plot_differential_map_impl <- function(relevance_map, ..., genes, dims, pal, fac dtm <- differential_map(relevance_map, genes, dims) coords <- get_coords(relevance_map, dims) gene_names <- if (is.character(genes)) genes else colnames(relevance_map@exprs)[genes] - + d1 <- colnames(coords)[[1]] d2 <- colnames(coords)[[2]] gg <- ggplot(dtm, aes(.data[[d1]], .data[[d2]])) + @@ -90,9 +91,9 @@ plot_differential_map_impl <- function(relevance_map, ..., genes, dims, pal, fac ), arrow = arrow(length = unit(.01, 'npc')) ) + - scale_colour_gradientn(colours = pal) + + scale_colour_gradientn(colours = pal) + geom_rangeframe(colour = par('col')) + theme_really_minimal() - + if (length(genes) > 1) gg + faceter else gg + ggtitle(gene_names) } diff --git a/R/gene-relevance-plotting-gr-map.r b/R/gene-relevance-plotting-gr-map.r index b90e82d..9386565 100644 --- a/R/gene-relevance-plotting-gr-map.r +++ b/R/gene-relevance-plotting-gr-map.r @@ -29,14 +29,14 @@ setMethod('plot_gene_relevance', c('GeneRelevance', 'missing'), function(coords, #' @importFrom ggplot2 ggtitle #' @importFrom Biobase featureNames #' @importFrom utils head -plot_gene_relevance_impl <- function(relevance_map, ..., iter_smooth, n_top, genes, dims, pal, col_na, limit) { +plot_gene_relevance_impl <- function(relevance_map, ..., iter_smooth, n_top, genes, dims, pal, col_na, limit) { # nolint: cyclocomp_linter. chkDots(...) relevance_map <- updateObject(relevance_map) partials_norm <- relevance_map@partials_norm coords <- get_coords(relevance_map, dims) if (!is.numeric(iter_smooth) || length(iter_smooth) != 1L) stop('iter_smooth needs to be an integer(1)') - if (!is.numeric(n_top) || length(n_top) != 1L ) stop( 'n_top needs to be an integer(1)') - + if (!is.numeric(n_top) || length(n_top) != 1L) stop('n_top needs to be an integer(1)') + all_genes <- featureNames(relevance_map) if (is.null(genes)) { genes <- all_genes @@ -63,7 +63,7 @@ plot_gene_relevance_impl <- function(relevance_map, ..., iter_smooth, n_top, gen gene_ids <- counts_valid$genes_max scores <- counts_valid$Freq / sum(counts_valid$Freq) names(scores) <- gene_ids - + num_top <- min(5L, length(gene_ids)) top_n_cell_text <- apply(partials_norm, 1L, function(cell) { idxs <- head(order(cell, decreasing = TRUE), num_top) @@ -71,10 +71,10 @@ plot_gene_relevance_impl <- function(relevance_map, ..., iter_smooth, n_top, gen txt <- sprintf('%s. %s (%.3f)', seq_len(num_top), names, cell[idxs]) paste(txt, collapse = '\n') }) - - # Plot a single map with cells coloured by gene which has + + # Plot a single map with cells coloured by gene which has # the highest differential norm of all genes considered. - + # matrix cells by n_top. might contain NAs later max_genes <- if (n_top > 1L) genes_ord @@ -107,7 +107,7 @@ plot_gene_relevance_impl <- function(relevance_map, ..., iter_smooth, n_top, gen # Add more than two DC and return data frame so that user # can easily rebuild relevance map on other DC combination than 1 and 2. rel_map_data <- cbind(as.data.frame(coords), Gene = gene_labels, TopN = top_n_cell_text) - + d1 <- colnames(coords)[[1]] d2 <- colnames(coords)[[2]] rel_map <- ggplot(rel_map_data, aes(x = .data[[d1]], y = .data[[d2]], colour = .data$Gene, text = .data$TopN)) + @@ -116,10 +116,10 @@ plot_gene_relevance_impl <- function(relevance_map, ..., iter_smooth, n_top, gen scale_color_manual(values = pal, na.value = col_na) + ggtitle(sprintf('Gene relevance map')) + theme_really_minimal() - + rel_map$ids <- gene_ids rel_map$scores <- scores - + rel_map } diff --git a/R/gene-relevance-plotting-rank.r b/R/gene-relevance-plotting-rank.r index 18d442a..8c4277e 100644 --- a/R/gene-relevance-plotting-rank.r +++ b/R/gene-relevance-plotting-rank.r @@ -1,6 +1,6 @@ #' @rdname Gene-Relevance-plotting #' @export -setGeneric('plot_gene_relevance_rank', function(coords, exprs, ..., genes, dims = 1:2, n_top = 10L, pal = c("#3B99B1", "#F5191C"), bins = 10L, faceter = facet_wrap(~ Gene)) { +setGeneric('plot_gene_relevance_rank', function(coords, exprs, ..., genes, dims = 1:2, n_top = 10L, pal = c('#3B99B1', '#F5191C'), bins = 10L, faceter = facet_wrap(~ Gene)) { standardGeneric('plot_gene_relevance_rank') }) @@ -34,20 +34,20 @@ plot_gene_relevance_rank_impl <- function(relevance_map, ..., genes, dims, n_top if (is.function(pal)) pal <- pal(12) coords <- get_coords(relevance_map, dims) gene_names <- if (is.character(genes)) genes else colnames(relevance_map@exprs)[genes] - + genes_missing <- setdiff(genes, colnames(relevance_map@partials_norm)) if (length(genes_missing) > 0) { genes_close <- lapply(genes_missing, agrep, colnames(relevance_map@partials_norm), value = TRUE) stop('Missing genes: ', paste(genes_missing, collapse = ', '), '. ', 'Closest available: ', paste(unlist(genes_close), collapse = ', ')) } - + top10 <- function(x) sum(x <= 10) / length(x) - + partials <- as.data.frame(t(apply(-relevance_map@partials_norm, 1, rank)[genes, , drop = FALSE])) d <- gather(cbind(partials, as.data.frame(coords)), 'Gene', 'Rank', one_of(gene_names)) d$Gene <- factor(d$Gene, gene_names) - + d1 <- colnames(coords)[[1]] d2 <- colnames(coords)[[2]] gg <- ggplot(d, aes(.data[[d1]], .data[[d2]], z = .data$Rank)) + @@ -63,6 +63,6 @@ plot_gene_relevance_rank_impl <- function(relevance_map, ..., genes, dims, n_top ) + scale_alpha_continuous(name = '#Cells', trans = 'log10') + theme_really_minimal() - + if (length(genes) > 1) gg + faceter else gg + ggtitle(gene_names) } diff --git a/R/gene-relevance-plotting.r b/R/gene-relevance-plotting.r index 3de7865..0c64e3b 100644 --- a/R/gene-relevance-plotting.r +++ b/R/gene-relevance-plotting.r @@ -2,11 +2,11 @@ NULL #' Plot gene relevance or differential map -#' +#' #' \code{plot(gene_relevance, 'Gene')} plots the differential map of this/these gene(s), #' \code{plot(gene_relevance)} a relevance map of a selection of genes. #' Alternatively, you can use \code{plot_differential_map} or \code{plot_gene_relevance} on a \code{\link[=gene_relevance]{GeneRelevance}} or \code{\link{DiffusionMap}} object, or with two matrices. -#' +#' #' @param x \code{\link[=gene_relevance]{GeneRelevance}} object. #' @param y Gene name(s) or index/indices to create differential map for. (integer or character) #' @param coords A \code{\link{DiffusionMap}}/\code{\link[=gene_relevance]{GeneRelevance}} object or a cells \eqn{\times} dims \code{\link{matrix}}. @@ -24,23 +24,23 @@ NULL #' @param bins Number of hexagonal bins for \code{plot_gene_relevance_rank}. #' @param limit Limit the amount of displayed gene labels to the amount of available colors in \code{pal}? #' @param faceter A ggplot faceter like \code{\link[ggplot2]{facet_wrap}(~ Gene)}. -#' +#' #' @return ggplot2 plot, when plotting a relevance map with a list member \code{$ids} containing the gene IDs used. -#' +#' #' @seealso \code{\link{gene_relevance}}, \link{Gene Relevance methods} -#' +#' #' @examples #' data(guo_norm) #' dm <- DiffusionMap(guo_norm) #' gr <- gene_relevance(dm) #' plot(gr) # or plot_gene_relevance(dm) #' plot(gr, 'Fgf4') # or plot_differential_map(dm, 'Fgf4') -#' +#' #' guo_norm_mat <- t(Biobase::exprs(guo_norm)) #' pca <- prcomp(guo_norm_mat)$x #' plot_gene_relevance(pca, guo_norm_mat, dims = 2:3) #' plot_differential_map(pca, guo_norm_mat, genes = c('Fgf4', 'Nanog')) -#' +#' #' @name Gene Relevance plotting #' @rdname Gene-Relevance-plotting NULL diff --git a/R/gene-relevance.r b/R/gene-relevance.r index ad50780..f62d24c 100644 --- a/R/gene-relevance.r +++ b/R/gene-relevance.r @@ -2,9 +2,9 @@ NULL #' Gene relevances for entire data set -#' +#' #' The relevance map is cached insided of the \code{\link{DiffusionMap}}. -#' +#' #' @param coords A \code{\link{DiffusionMap}} object or a cells \eqn{\times} dims \code{\link{matrix}}. #' @param exprs An cells \eqn{\times} genes \code{\link{matrix}}. Only provide if \code{coords} is no \code{\link{DiffusionMap}}. #' @param ... Unused. All parameters to the right of the \code{...} have to be specified by name. @@ -19,9 +19,9 @@ NULL #' @param knn_params A \code{\link{list}} of parameters for \code{\link{find_knn}}. #' @param weights Weights for the partial derivatives. A vector of the same length as \code{dims}. #' @param verbose If TRUE, log additional info to the console -#' +#' #' @return A \code{GeneRelevance} object: -#' +#' #' @slot coords A cells \eqn{\times} dims \code{\link{matrix}} or \code{\link[Matrix:sparseMatrix-class]{sparseMatrix}} #' of coordinates (e.g. diffusion components), reduced to the dimensions passed as \code{dims} #' @slot exprs A cells \eqn{\times} genes matrix of expressions @@ -33,18 +33,18 @@ NULL #' @slot distance Distance measure used in the nearest neighbor search. See \code{\link{find_knn}} #' @slot smooth_window Smoothing window used (see \code{\link[smoother]{smth.gaussian}}) #' @slot smooth_alpha Smoothing kernel width used (see \code{\link[smoother]{smth.gaussian}}) -#' +#' #' @seealso \link{Gene Relevance methods}, \link{Gene Relevance plotting}: \code{plot_differential_map}/\code{plot_gene_relevance} -#' +#' #' @examples #' data(guo_norm) #' dm <- DiffusionMap(guo_norm) #' gr <- gene_relevance(dm) -#' +#' #' m <- t(Biobase::exprs(guo_norm)) #' gr_pca <- gene_relevance(prcomp(m)$x, m) #' # now plot them! -#' +#' #' @rdname Gene-Relevance #' @export setClass('GeneRelevance', slots = c( @@ -52,7 +52,7 @@ setClass('GeneRelevance', slots = c( exprs = 'dMatrixOrMatrix', partials = 'array', partials_norm = 'matrix', - nn_index = 'matrix', # k = ncol(nn_index) + nn_index = 'matrix', # Derived `k = ncol(nn_index)` dims = 'ColIndex', distance = 'character', smooth_window = 'numeric', @@ -60,7 +60,7 @@ setClass('GeneRelevance', slots = c( #' @rdname Gene-Relevance #' @export -setGeneric('gene_relevance', function( +setGeneric('gene_relevance', function( # nolint: brace_linter. coords, exprs, ..., k = 20L, dims = 1:2, distance = NULL, smooth = TRUE, remove_outliers = FALSE, verbose = FALSE ) standardGeneric('gene_relevance')) @@ -68,7 +68,7 @@ setGeneric('gene_relevance', function( #' @importFrom Biobase updateObject #' @rdname Gene-Relevance #' @export -setMethod('gene_relevance', c('DiffusionMap', 'missing'), function( +setMethod('gene_relevance', c('DiffusionMap', 'missing'), function( # nolint: cyclocomp_linter. coords, exprs, ..., k, dims, distance, smooth, remove_outliers, verbose ) { @@ -87,14 +87,16 @@ setMethod('gene_relevance', c('DiffusionMap', 'missing'), function( pcs <- get_pca(exprs, dataset(dm), dm@n_pcs, verbose) weights <- eigenvalues(dm)[dims] if (is.null(distance)) distance <- dm@distance - else if (!identical(distance, dm@distance)) stop('the specified distance ', distance,' is not the same as the one used for the diffusion map: ', dm@distance) + else if (!identical(distance, dm@distance)) stop('the specified distance ', distance, ' is not the same as the one used for the diffusion map: ', dm@distance) relevance_map <- gene_relevance( coords, exprs, ..., k = k, dims = dims, distance = distance, smooth = smooth, remove_outliers = remove_outliers, verbose = verbose, pcs = pcs, knn_params = dm@knn_params, weights = weights ) dm@data_env$relevance_map <- relevance_map - } else chkDots(...) + } else { + chkDots(...) + } relevance_map }) @@ -114,12 +116,12 @@ setMethod('gene_relevance', c('matrix', 'dMatrixOrMatrix'), function( n_dims <- ncol(coords_used) if (length(weights) == 1L) weights <- rep(weights, n_dims) smooth <- get_smoothing(smooth) - + if (is.null(colnames(exprs))) stop('The expression matrix columns need to be named but are NULL') if (n_dims != length(weights)) stop(n_dims, ' dimensions, but ', length(weights), ' weights were provided') - + nn_index <- do.call(find_knn, c(list(if (is.null(pcs)) exprs else pcs, k, distance = distance), knn_params))$index - + k <- ncol(nn_index) n_cells <- nrow(coords_used) n_genes <- ncol(exprs) @@ -127,7 +129,7 @@ setMethod('gene_relevance', c('matrix', 'dMatrixOrMatrix'), function( NA, dim = c(n_cells, n_genes, n_dims), dimnames = list(rownames(exprs), colnames(exprs), if (is.character(dims)) dims else colnames(coords_used))) - + # a very small value to subtract from the differential values <- if (is(exprs, 'Matrix')) exprs@x else exprs small <- min(values[values != 0]) / (length(exprs) - nnzero(exprs)) @@ -139,23 +141,23 @@ setMethod('gene_relevance', c('matrix', 'dMatrixOrMatrix'), function( expr_masked[expr_masked == 0] <- small differential_expr <- apply(nn_index, 2, function(nn) expr_gene[nn] - expr_masked) differential_expr[differential_expr == 0] <- NA # Cannot evaluate partial - #stopifnot(identical(dim(differential_expr), c(n_cells, k))) + #assert stopifnot(identical(dim(differential_expr), c(n_cells, k))) differential_expr } differential_exprs <- apply(exprs, 2L, gene_differential) - #stopifnot(identical(dim(differential_exprs), c(n_cells * k, n_genes))) + #assert stopifnot(identical(dim(differential_exprs), c(n_cells * k, n_genes))) # apply only handles returning vectors, so we have to reshape the return value dim(differential_exprs) <- c(n_cells, k, n_genes) dimnames(differential_exprs)[[3L]] <- if (length(colnames(exprs)) > 1L) colnames(exprs) else list(colnames(exprs)) - #stopifnot(identical(gene_differential(exprs[, 1L]), differential_exprs[, , 1L])) - + #assert stopifnot(identical(gene_differential(exprs[, 1L]), differential_exprs[, , 1L])) + for (d in seq_len(n_dims)) { # Compute partial derivatives in direction of current dimension - + if (verbose) cat('Calculating partial derivatives of dimension ', d, '/', n_dims, '\n') # We could optionaly add normalization by max(coords_used[, d]) - min(coords_used[, d]) differential_coord <- apply(nn_index, 2L, function(nn) coords_used[nn, d] - coords_used[, d]) - + partials_unweighted <- apply(differential_exprs, 3L, function(grad_gene_exprs) { # Compute median of difference quotients to NN difference_quotients <- grad_gene_exprs / differential_coord @@ -164,7 +166,7 @@ setMethod('gene_relevance', c('matrix', 'dMatrixOrMatrix'), function( ifelse(stable_cells, rowMedians(difference_quotients, na.rm = TRUE), NA) }) colnames(partials_unweighted) <- colnames(exprs) - + if (!any(is.na(smooth))) { order_coor <- order(coords_used[, d]) order_orig <- order(order_coor) @@ -176,21 +178,21 @@ setMethod('gene_relevance', c('matrix', 'dMatrixOrMatrix'), function( }) colnames(partials_unweighted) <- colnames(exprs) } - + partials[, , d] <- weights[[d]] * partials_unweighted } - + # Compute norm over partial derivates: Frobenius partials_norm <- apply(partials, c(1, 2), function(z) sqrt(sum(z^2, na.rm = TRUE))) colnames(partials_norm) <- colnames(partials) - + # Find outlier cells: Not in NN of more than 1 other cell # Remove these as they tend to receive very large norms if (remove_outliers) { outliers <- sapply(seq_len(n_cells), function(cell) sum(nn_index == cell) > 1) partials_norm[, outliers] <- NA } - + # Prepare output rownames(partials_norm) <- rownames(partials) colnames(partials_norm) <- colnames(partials) diff --git a/R/guo-data.r b/R/guo-data.r index f6fc0d4..f0329d2 100644 --- a/R/guo-data.r +++ b/R/guo-data.r @@ -1,12 +1,12 @@ #' Guo at al. mouse embryonic stem cell qPCR data -#' +#' #' Gene expression data of 48 genes and an annotation column \code{$num_cells} containing the cell stage at which the embryos were harvested. -#' +#' #' The data is normalized using the mean of two housekeeping genes. #' The difference between \code{guo} and \code{guo_norm} is the LoD being set to 10 in the former, making it usable with the \code{censor_val} parameter of \link{DiffusionMap}. -#' +#' #' @return an \link[Biobase:class.ExpressionSet]{ExpressionSet} with 48 features and 428 observations containing qPCR Ct values and a "num.cells" observation annotation. -#' +#' #' @aliases data:guo data:guo_norm guo guo_norm #' @name guo #' @docType data diff --git a/R/knn.r b/R/knn.r index 08fa2a7..d20f086 100644 --- a/R/knn.r +++ b/R/knn.r @@ -1,7 +1,7 @@ #' kNN search -#' +#' #' Approximate k nearest neighbor search with flexible distance function. -#' +#' #' @param data Data matrix #' @param query Query matrix. Leave it out to use \code{data} as query #' @param k Number of nearest neighbors @@ -10,7 +10,7 @@ #' @param method Method to use. \code{'hnsw'} is tunable with \code{...} but generally less exact than \code{'covertree'} (default: 'covertree') #' @param sym Return a symmetric matrix (as long as query is NULL)? #' @param verbose Show a progressbar? (default: FALSE) -#' +#' #' @return A \code{\link{list}} with the entries: #' \describe{ #' \item{\code{index}}{A \eqn{nrow(data) \times k} \link{integer} \link{matrix} containing the indices of the k nearest neighbors for each cell.} @@ -21,7 +21,7 @@ #' Any zero in the matrix (except for the diagonal) indicates that the cells in the corresponding pair are close neighbors. #' } #' } -#' +#' #' @rdname knn #' @importFrom RcppHNSW hnsw_build hnsw_knn hnsw_search #' @export @@ -44,23 +44,23 @@ find_knn <- function( if (method == 'covertree') { return(knn.covertree::find_knn(data, k, query = query, distance = distance, sym = sym)) } - + if (distance == 'rankcor') { # TODO: rank_mat only works on dense matrices distance <- 'cosine' data <- rank_mat(data) if (!is.null(query)) query <- rank_mat(query) } - + if (is.null(query)) { knn <- hnsw_knn(data, k + 1L, distance, M = p$M, ef_construction = p$ef_construction, ef = p$ef, verbose = verbose) - knn$idx <- knn$idx[ , -1, drop = FALSE] + knn$idx <- knn$idx[, -1, drop = FALSE] knn$dist <- knn$dist[, -1, drop = FALSE] } else { index <- hnsw_build(data, distance, M = p$M, ef = p$ef_construction, verbose = verbose) knn <- hnsw_search(query, index, k, ef = p$ef, verbose = verbose) } - names(knn)[[1L]] <- 'index' # idx -> index + names(knn)[[1L]] <- 'index' # idx → index # R matrices are column-major, so as.vector(m) == c(m[, 1], m[, 2], ...) knn$dist_mat <- sparseMatrix( rep(seq_len(nrow(knn$index)), k), diff --git a/R/l_which.r b/R/l_which.r index 43da3f0..1d6df17 100644 --- a/R/l_which.r +++ b/R/l_which.r @@ -1,33 +1,33 @@ #' Logical which -#' +#' #' Inverse of \link[base]{which}. Converts an array of numeric or character indices to a logical index array. #' This function is useful if you need to perform logical operation on an index array but are only given numeric indices. -#' +#' #' Either \code{nms} or \code{len} has to be specified. -#' +#' #' @param idx Numeric or character indices. #' @param nms Array of names or a sequence. Required if \code{idx} is a character array #' @param len Length of output array. Alternative to \code{nms} if \code{idx} is numeric #' @param useNames Use the names of nms or idx -#' +#' #' @return Logical vector of length \code{len} or the same length as \code{nms} -#' +#' #' @examples #' all(l_which(2, len = 3L) == c(FALSE, TRUE, FALSE)) #' all(l_which(c('a', 'c'), letters[1:3]) == c(TRUE, FALSE, TRUE)) -#' +#' #' @export -l_which <- function(idx, nms = seq_len(len), len = length(nms), useNames = TRUE) { +l_which <- function(idx, nms = seq_len(len), len = length(nms), useNames = TRUE) { # nolint: object_name_linter. rv <- logical(len) if (is.character(nms)) # we need names here so that rv[idx] works names(rv) <- nms - + if (useNames && !is.null(names(idx))) names(rv)[idx] <- names(idx) - + rv[idx] <- TRUE - + if (!useNames) # if we don't want names, we'll remove them if we added them before names(rv) <- NULL rv -} \ No newline at end of file +} diff --git a/R/methods-coercion.r b/R/methods-coercion.r index 09492a6..a082e40 100644 --- a/R/methods-coercion.r +++ b/R/methods-coercion.r @@ -2,22 +2,22 @@ NULL #' Coercion methods -#' +#' #' Convert a \code{\link{DiffusionMap}} or \code{\link{DPT}} object to other classes -#' -#' \link[ggplot2]{fortify} is a ggplot2 generic allowing a diffusion map to be used as \code{data} parameter in \link[ggplot2]{ggplot} or \link[ggplot2]{qplot}. -#' +#' +#' \link[ggplot2]{fortify} is a ggplot2 generic allowing a diffusion map to be used as \code{data} parameter in \link[ggplot2]{ggplot} or \link[ggplot2]{qplot}. +#' #' @param x,model A \code{\link{DiffusionMap}} or \code{\link{DPT}} object #' @param row.names NULL or a character vector giving the row names for the data frame. Missing values are not allowed. #' @param optional logical. If TRUE, setting row names and converting column names (to syntactic names: see make.names) is optional. #' @param ... Passed to \code{\link[base]{as.data.frame}} #' @param data ignored -#' +#' #' @return An object of the desired class -#' +#' #' @seealso \link{DiffusionMap accession methods}, \link{Extraction methods}, \link{DiffusionMap methods} for more -#' +#' #' @examples #' library(Biobase) #' data(guo) @@ -26,12 +26,12 @@ NULL #' stopifnot(all(classes[paste0('DC', 1:20)] == 'numeric')) #' stopifnot(all(classes[featureNames(guo) ] == 'numeric')) #' stopifnot(all(classes[ varLabels(guo) ] == c('factor', 'integer'))) -#' +#' #' @aliases #' as.data.frame.DiffusionMap fortify.DiffusionMap #' as.data.frame.DPT fortify.DPT #' as.matrix.DPT -#' +#' #' @importFrom methods setAs #' @importFrom BiocGenerics as.data.frame #' @name Coercion methods @@ -43,10 +43,10 @@ NULL #' @importFrom Biobase pData #' @rdname coercions #' @export -setMethod('as.data.frame', 'DiffusionMap', function(x, row.names = NULL, optional = FALSE, ...) { +setMethod('as.data.frame', 'DiffusionMap', function(x, row.names = NULL, optional = FALSE, ...) { # nolint: object_name_linter. df_evec <- as.data.frame(eigenvectors(x), row.names, optional, ...) - df_data <- dataset_to_df( dataset(x), row.names, optional, ...) - + df_data <- dataset_to_df(dataset(x), row.names, optional, ...) + if (is.null(df_data)) df_evec else @@ -55,7 +55,7 @@ setMethod('as.data.frame', 'DiffusionMap', function(x, row.names = NULL, optiona #' @usage fortify.DiffusionMap(model, data, ...) -#' +#' #' @importFrom BiocGenerics as.data.frame #' @importFrom Biobase as.data.frame.ExpressionSet #' @importFrom ggplot2 fortify @@ -68,7 +68,7 @@ setAs('DiffusionMap', 'data.frame', function(from) as.data.frame(from)) #' @rdname coercions #' @export -setMethod('as.data.frame', 'DPT', function(x, row.names = NULL, optional = FALSE, ...) { +setMethod('as.data.frame', 'DPT', function(x, row.names = NULL, optional = FALSE, ...) { # nolint: object_name_linter. dpt <- as.matrix(x) colnames(dpt) <- paste0('DPT', seq_len(ncol(dpt))) cbind( @@ -78,7 +78,7 @@ setMethod('as.data.frame', 'DPT', function(x, row.names = NULL, optional = FALSE #' @usage fortify.DPT(model, data, ...) -#' +#' #' @importFrom BiocGenerics as.data.frame #' @importFrom Biobase as.data.frame.ExpressionSet #' @importFrom ggplot2 fortify diff --git a/R/methods-extraction.r b/R/methods-extraction.r index 916c565..7c31607 100644 --- a/R/methods-extraction.r +++ b/R/methods-extraction.r @@ -1,31 +1,31 @@ #' Extraction methods -#' +#' #' Extract common information from objects. #' Apart from the input data's branches, #' you can extract diffusion components via \code{$DCx}. #' From \code{\link{DPT}} objects, you can also extract the branch label via \code{$Branch}, #' or the diffusion pseudo time for a numbered cell via \code{$DPTx}. -#' +#' #' @param x \code{\link{DiffusionMap}} or \code{\link{DPT}} object #' @param i,name Name of a diffusion component \code{'DCx'}, \code{'DPTx'}, \code{'Branch'} or column from the data #' @param j N/A #' @param ... ignored -#' +#' #' @return The names or data row, see respective generics. -#' +#' #' @seealso \link[base]{Extract}, \code{\link[base]{names}} for the generics. \link{DiffusionMap accession methods}, \link{DiffusionMap methods}, \link{Coercion methods} for more -#' +#' #' @examples #' data(guo) #' dm <- DiffusionMap(guo) #' dm$DC1 # A diffusion component #' dm$Actb # A gene expression vector #' dm$num_cells # Phenotype metadata -#' +#' #' dpt <- DPT(dm) #' dm$Branch #' dm$DPT1 -#' +#' #' @name Extraction methods #' @rdname extractions #' @aliases @@ -62,14 +62,16 @@ setMethod('[[', c('DiffusionMap', 'character', 'missing'), function(x, i, j, ... #' @export setMethod('[[', c('DPT', 'character', 'missing'), function(x, i, j, ...) { if (identical(i, 'dpt')) return(dpt_for_branch(x, 1L)) #TODO - + num_i <- if (grepl('DPT\\d+', i)) as.integer(sub('DPT(\\d+)', '\\1', i)) - + if (!is.null(num_i) && 1L <= num_i && num_i <= nrow(x)) { x[num_i, ] } else if (identical(i, 'Branch') || identical(i, 'branch')) { x@branch[, 1L] - } else x@dm[[i]] + } else { + x@dm[[i]] + } }) diff --git a/R/methods-update.r b/R/methods-update.r index ae9099e..1bf98bc 100644 --- a/R/methods-update.r +++ b/R/methods-update.r @@ -2,54 +2,54 @@ NULL #' Update old destiny objects to a newer version. -#' +#' #' Handles \link{DiffusionMap}, \link{Sigmas}, and \link[=gene_relevance]{GeneRelevance}. -#' +#' #' @param object An object created with an older destiny release #' @param ... ignored #' @param verbose tells what is being updated -#' +#' #' @return A \link{DiffusionMap} or \link{Sigmas} object that is valid when used with the current destiny release -#' +#' #' @aliases updateObject.DiffusionMap updateObject.Sigmas updateObject.GeneRelevance #' @name updateObject methods #' @rdname updateObject-methods NULL -#' @importFrom methods setMethod validObject .hasSlot slot slot<- +#' @importFrom methods setMethod validObject .hasSlot slot slot<- #' @importFrom Matrix Matrix #' @importFrom BiocGenerics updateObject #' @rdname updateObject-methods #' @export setMethod('updateObject', 'DiffusionMap', function(object, ..., verbose = FALSE) { - if (verbose) + if (verbose) message("updateObject(object = 'DiffusionMap')") - + if (!hasattr(object, 'distance')) slot(object, 'distance', check = FALSE) <- 'euclidean' - + if (!hasattr(object, 'transitions')) slot(object, 'transitions', check = FALSE) <- NULL - + if (!hasattr(object, 'd.norm')) # upgrade only nonexistence, name later slot(object, 'd.norm', check = FALSE) <- rep(NA_real_, length(object@d)) - + if (!hasattr(object, 'n_pcs')) slot(object, 'n_pcs', check = FALSE) <- NA_integer_ - + if (!hasattr(object, 'n_local')) slot(object, 'n_local', check = FALSE) <- 5L - + if (!hasattr(object, 'rotate')) slot(object, 'rotate', check = FALSE) <- TRUE # old ones were rotated by default - + if (!hasattr(object, 'knn_params')) slot(object, 'knn_params', check = FALSE) <- list() - + object <- update_slot_names(object, c('data.env', 'd.norm', 'density.norm', 'censor.val', 'censor.range', 'missing.range')) - + slot(object, 'sigmas', check = FALSE) <- updateObject(object@sigmas) - + validObject(object) object }) @@ -58,11 +58,11 @@ setMethod('updateObject', 'DiffusionMap', function(object, ..., verbose = FALSE) #' @rdname updateObject-methods #' @export setMethod('updateObject', 'Sigmas', function(object, ..., verbose = FALSE) { - if (verbose) + if (verbose) message("updateObject(object = 'Sigmas')") - + object <- update_slot_names(object, c('log.sigmas', 'dim.norms', 'optimal.sigma', 'optimal.idx', 'avrd.norms')) - + object }) @@ -70,12 +70,12 @@ setMethod('updateObject', 'Sigmas', function(object, ..., verbose = FALSE) { #' @rdname updateObject-methods #' @export setMethod('updateObject', 'GeneRelevance', function(object, ..., verbose = FALSE) { - if (verbose) + if (verbose) message("updateObject(object = 'GeneRelevance')") - + if (!hasattr(object, 'distance')) slot(object, 'distance', check = FALSE) <- 'euclidean' - + # the dimensions were switched to fit the convention elsewhere in the package. if (!hasattr(object, 'smooth_window')) { object@partials <- aperm(object@partials, c(2, 1, 3)) @@ -83,7 +83,7 @@ setMethod('updateObject', 'GeneRelevance', function(object, ..., verbose = FALSE slot(object, 'smooth_window', check = FALSE) <- NA_real_ slot(object, 'smooth_alpha', check = FALSE) <- NA_real_ } - + object }) @@ -93,11 +93,11 @@ update_slot_names <- function(object, old_slots, new_slots = sub('.', '_', old_s atts <- attributes(object) update_idx <- old_slots %in% names(atts) if (!length(update_idx)) return(object) - + slot_idx <- na.omit(match(old_slots[update_idx], names(atts))) - + names(atts)[slot_idx] <- new_slots[update_idx] attributes(object) <- atts - + object } diff --git a/R/plothelpers.r b/R/plothelpers.r index 73869a7..1d6ba06 100644 --- a/R/plothelpers.r +++ b/R/plothelpers.r @@ -1,8 +1,8 @@ -emptyplot <- function(xlim = c(0, 1), ylim = xlim, asp = 1, frame.plot = FALSE, col = NULL, ...) { - plot(0, type = 'n', xlab = '', ylab = '', asp = asp, axes = FALSE, - frame.plot = frame.plot, xlim = xlim, ylim = ylim, xaxs = 'i', +emptyplot <- function(xlim = c(0, 1), ylim = xlim, asp = 1, frame.plot = FALSE, col = NULL, ...) { # nolint: object_name_linter. + plot(0, type = 'n', xlab = '', ylab = '', asp = asp, axes = FALSE, + frame.plot = frame.plot, xlim = xlim, ylim = ylim, xaxs = 'i', yaxs = 'i', ...) - if (!is.null(col)) + if (!is.null(col)) rect(xlim[1], ylim[1], xlim[2], ylim[2], col = col) } @@ -11,11 +11,11 @@ emptyplot <- function(xlim = c(0, 1), ylim = xlim, asp = 1, frame.plot = FALSE, continuous_colors <- function(vals, pal = palette(), limits = NULL, levels = 100) { if (is.function(pal)) pal <- pal(levels) - + if (is.null(limits)) limits <- range(vals, na.rm = TRUE) - + ramp <- colour_ramp(pal, alpha = TRUE) - + ramp(rescale(vals, from = limits)) } diff --git a/R/predict.r b/R/predict.r index 4582c82..a548784 100644 --- a/R/predict.r +++ b/R/predict.r @@ -2,7 +2,7 @@ NULL #' Predict new data points using an existing DiffusionMap. The resulting matrix can be used in \link[=plot.DiffusionMap]{the plot method for the DiffusionMap} -#' +#' #' @param dm A \code{\link{DiffusionMap}} object. #' @param new_data New data points to project into the diffusion map. #' Can be a \link[base]{matrix}, \link[base]{data.frame}, @@ -10,9 +10,9 @@ NULL #' or \link[SingleCellExperiment]{SingleCellExperiment}. #' @param verbose Show progress messages? #' @param ... Passed to \code{\link[proxy:dist]{proxy::dist}(new_data, data, dm@distance, ...)}. -#' +#' #' @return A \eqn{nrow(new\_data) \times ncol(eigenvectors(dif))} matrix of projected diffusion components for the new data. -#' +#' #' @examples #' data(guo) #' g1 <- guo[, guo$num_cells != 32L] @@ -20,22 +20,22 @@ NULL #' dm <- DiffusionMap(g1) #' dc2 <- dm_predict(dm, g2) #' plot(dm, new_dcs = dc2) -#' +#' #' @importFrom methods is #' @importFrom Matrix Diagonal colSums rowSums #' @importFrom proxy dist #' @export dm_predict <- function(dm, new_data, ..., verbose = FALSE) { if (!is(dm, 'DiffusionMap')) stop('dm needs to be a DiffusionMap') - + data <- dataset_extract_doublematrix(dataset(dm), dm@vars) new_data <- dataset_extract_doublematrix(new_data, dm@vars) if (!dataset_n_features(data) == dataset_n_features(new_data)) stop('new data needs to have the same features as the one used to create the diffusion map') - + censor <- !is.null(dm@censor_val) || any(is.na(data)) || any(is.na(new_data)) #censor imples euclidean distance - + #TODO: local sigma sigma <- optimal_sigma(dm) if (!censor) { @@ -47,7 +47,7 @@ dm_predict <- function(dm, new_data, ..., verbose = FALSE) { d2 <- unclass(d ^ 2) # matrix (dense) stopifnot(nrow(d2) == nrow(new_data)) stopifnot(ncol(d2) == nrow(data)) - + #TODO: zeros not on diag trans_p <- exp(-d2 / (2 * sigma ^ 2)) trans_p[d2 == 0] <- 0 @@ -55,18 +55,18 @@ dm_predict <- function(dm, new_data, ..., verbose = FALSE) { if (verbose) cat('Creating distance matrix with censoring\n') trans_p <- predict_censoring(data, new_data, dm@censor_val, dm@censor_range, dm@missing_range, sigma) } - + #trans_p: columns: old data, rows: new data d_new <- rowSums(trans_p, na.rm = TRUE) norm_p <- get_norm_p(trans_p, dm@d, d_new, dm@density_norm) rm(trans_p) # free memory - + d_norm_new <- rowSums(norm_p) - + d_rot <- Diagonal(x = dm@d_norm ^ -.5) d_rot_new <- Diagonal(x = d_norm_new ^ -.5) - M_new <- d_rot_new %*% norm_p %*% d_rot - + m_new <- d_rot_new %*% norm_p %*% d_rot + if (verbose) cat('Transforming data\n') - t(t(M_new %*% eigenvectors(dm)) / eigenvalues(dm)) + t(t(m_new %*% eigenvectors(dm)) / eigenvalues(dm)) } diff --git a/R/projection-dist.r b/R/projection-dist.r index 7cbc7e1..a4bd3d5 100644 --- a/R/projection-dist.r +++ b/R/projection-dist.r @@ -1,5 +1,5 @@ #' Projection distance -#' +#' #' @param dm A \code{\link{DiffusionMap}} object. #' @param new_dcs Diffusion component matrix of which to calculate the distance to the data. #' @param ... Passed to \code{\link[proxy:dist]{proxy::dist}} if \code{new_data} was passed. @@ -8,29 +8,28 @@ #' \link[Biobase:class.ExpressionSet]{ExpressionSet}, #' or \link[SingleCellExperiment]{SingleCellExperiment}. #' @param verbose If \code{\link{TRUE}}, log additional info to the console. -#' +#' #' @return A vector of distances each new data point has to the existing data. -#' +#' #' @examples #' data(guo_norm) #' g2_32 <- guo_norm[, guo_norm$num_cells < 64] #' g64 <- guo_norm[, guo_norm$num_cells == 64] #' dm <- DiffusionMap(g2_32) #' d <- projection_dist(dm, new_data = g64) -#' +#' #' @export projection_dist <- function(dm, new_dcs = NULL, ..., new_data, verbose = FALSE) { if (is.null(new_dcs)) new_dcs <- dm_predict(dm, new_data, ..., verbose = verbose) else if (!missing(new_data)) stop('only pass one of new_dcs and new_data') - + evs <- eigenvectors(dm) - + nns <- find_knn(evs, 1, query = as.matrix(new_dcs)) - - #nn_idx <- nns$index[, 1] + nn_dist <- nns$dist[, 1] - + nn_dist } diff --git a/R/sigmas-plotting.r b/R/sigmas-plotting.r index f8ddb44..347f58c 100644 --- a/R/sigmas-plotting.r +++ b/R/sigmas-plotting.r @@ -6,7 +6,7 @@ rangle <- '\u27E9' angles <- list(langle = langle, rangle = rangle) #' Plot \link{Sigmas} object -#' +#' #' @param x Sigmas object to plot #' @param col Vector of bar colors or single color for all bars #' @param col_highlight Color for highest bar. Overrides col @@ -18,14 +18,14 @@ angles <- list(langle = langle, rangle = rangle) #' @param xlab X label. NULL to use default #' @param ylab Either one y label or y labels for both plots. NULL to use both defauts, a NULL in a list of length 2 to use one default. #' @param main Title of the plot -#' +#' #' @return This method plots a Sigma object to the current device and returns nothing/NULL -#' +#' #' @examples #' data(guo) #' sigs <- find_sigmas(guo) #' plot(sigs) -#' +#' #' @importFrom graphics plot plot.window par text dotchart layout #' @name plot.Sigmas #' @rdname plot.Sigmas @@ -34,7 +34,7 @@ NULL #' @importFrom graphics plot plot.window par text dotchart layout #' @rdname plot.Sigmas #' @export -setMethod('plot', c(x = 'Sigmas', y = 'missing'), function( +setMethod('plot', c(x = 'Sigmas', y = 'missing'), function( # nolint: cyclocomp_linter. x, col = par('fg'), col_highlight = '#E41A1C', # brewer Set1[[1]] @@ -50,51 +50,51 @@ setMethod('plot', c(x = 'Sigmas', y = 'missing'), function( sigmas <- x if (is.null(optimal_sigma(sigmas))) { plot.new() - plot.window(c(-1,1), c(-1,1)) + plot.window(c(-1, 1), c(-1, 1)) text(0, 0, expression('local ' * sigma), .5) return() } if (is.null(sigmas@log_sigmas)) { return(dotchart(optimal_sigma(sigmas), labels = 'optimal_sigma', color = col_highlight, pch = pch, main = 'optimal sigma given directly')) } - + steps <- length(sigmas@log_sigmas) - + colors <- if (length(col) == 1L) rep(col, steps - 1L) else col colors[[sigmas@optimal_idx]] <- col_highlight - + #prepare parameters and reset them at the end - + mar <- par('mar') - + if (!only_dim) { layout(matrix(1:2, 2L)) par_old <- par(mar = c(0, mar[2:4]), ..., no.readonly = TRUE) - + on.exit({ layout(matrix(1)) par(par_old) }) } - + if (is.null(xlab)) xlab <- expression(log[10](sigma)) - + if (is.null(ylab)) ylab <- list(NULL, NULL) if (length(ylab) == 1L) ylab <- list(ylab, NULL) if (is.null(ylab[[1L]])) ylab[[1L]] <- substitute(paste(langle, d, rangle), angles) if (is.null(ylab[[2L]])) ylab[[2L]] <- substitute(paste(langle, log[10](Z(x)), rangle), angles) - + #first plot - + x <- sigmas@log_sigmas[-1L] - diff(sigmas@log_sigmas) / 2 xlim <- range(sigmas@log_sigmas) - + ysteps <- pretty(sigmas@dim_norms) ymin <- min(ysteps) - ymax <- max(ysteps) + diff(ysteps[1:2])/4 - + ymax <- max(ysteps) + diff(ysteps[1:2]) / 4 + plot(x, sigmas@dim_norms, xlim = xlim, ylim = c(ymin, ymax), xaxt = if (only_dim) 's' else 'n', xlab = if (only_dim) xlab else '', @@ -102,18 +102,18 @@ setMethod('plot', c(x = 'Sigmas', y = 'missing'), function( col = colors, type = type[[1L]], pch = pch[[1L]], main = main, ...) - + text(log10(optimal_sigma(sigmas)), sigmas@dim_norms[[sigmas@optimal_idx]], substitute(sigma == rsig, list(rsig = round(optimal_sigma(sigmas), 1))), pos = 3) - + #second (overlay) plot - + if (!only_dim) { par(mar = c(mar[1:2], 0, mar[[4L]]), ...) - + col_lines <- rep(col_line, length(sigmas@log_sigmas)) col_lines[c(sigmas@optimal_idx, sigmas@optimal_idx + 1L)] <- col_highlight - + t2 <- if (length(type) > 1L) type[[2L]] else type p2 <- if (length(pch) > 1L) pch[[2L]] else pch plot(sigmas@log_sigmas, sigmas@avrd_norms, diff --git a/R/sigmas.r b/R/sigmas.r index c588d12..a977836 100644 --- a/R/sigmas.r +++ b/R/sigmas.r @@ -2,44 +2,44 @@ NULL #' Sigmas Object -#' +#' #' Holds the information about how the \code{sigma} parameter for a \link{DiffusionMap} was obtained, #' and in this way provides a plotting function for the \link{find_sigmas} heuristic. #' You should not need to create a Sigmas object yourself. Provide \code{sigma} to \link{DiffusionMap} instead or use \link{find_sigmas}. -#' +#' #' A Sigmas object is either created by \link{find_sigmas} or by specifying the \code{sigma} parameter to \link{DiffusionMap}. -#' +#' #' In the second case, if the \code{sigma} parameter is just a number, #' the resulting \code{Sigmas} object has all slots except of \code{optimal_sigma} set to \code{NULL}. -#' +#' #' @usage Sigmas(...) -#' +#' #' @param ... See \dQuote{\strong{Slots}} below #' @param object,x \link{Sigmas} object -#' +#' #' @return \code{Sigmas} creates an object of the same class -#' +#' #' @slot log_sigmas Vector of length \eqn{m} containing the \eqn{\log_{10}} of the \eqn{\sigma}s #' @slot dim_norms Vector of length \eqn{m-1} containing the average dimensionality \eqn{\langle p \rangle} for the respective kernel widths #' @slot optimal_sigma Multiple local sigmas or the mean of the two global \eqn{\sigma}s around the highest \eqn{\langle p \rangle} (\code{c(optimal_idx, optimal_idx+1L)}) #' @slot optimal_idx The index of the highest \eqn{\langle p \rangle}. #' @slot avrd_norms Vector of length \eqn{m} containing the average dimensionality for the corresponding sigma. -#' +#' #' @examples #' data(guo) #' sigs <- find_sigmas(guo, verbose = FALSE) #' optimal_sigma(sigs) #' print(sigs) -#' +#' #' @seealso \code{\link{find_sigmas}}, the function to determine a locally optimal sigma and returning this class -#' +#' #' @aliases Sigmas-methods -#' +#' #' @importFrom methods setClass #' @rdname Sigmas-class #' @export Sigmas #' @exportClass Sigmas -Sigmas <- setClass('Sigmas', slots = c( +Sigmas <- setClass('Sigmas', slots = c( # nolint: object_name_linter. log_sigmas = 'numericOrNULL', dim_norms = 'numericOrNULL', optimal_sigma = 'numericOrNULL', @@ -68,10 +68,10 @@ setMethod('show', 'Sigmas', function(object) { }) #' Calculate the average dimensionality for m different gaussian kernel widths (\eqn{\sigma}). -#' +#' #' The sigma with the maximum value in average dimensionality is close to the ideal one. #' Increasing step number gets this nearer to the ideal one. -#' +#' #' @param data Data set with \eqn{n} observations. Can be a \link[base]{data.frame}, #' \link[base]{matrix}, \link[Biobase:class.ExpressionSet]{ExpressionSet} #' or \link[SingleCellExperiment]{SingleCellExperiment}. @@ -87,23 +87,23 @@ setMethod('show', 'Sigmas', function(object) { #' @param missing_range Whole data range for missing value model. Has to be specified if NAs are in the data #' @param vars Variables (columns) of the data to use. Specifying TRUE will select all columns (default: All floating point value columns) #' @param verbose logical. If TRUE, show a progress bar and plot the output -#' +#' #' @return Object of class \link{Sigmas} -#' +#' #' @seealso \code{\link{Sigmas}}, the class returned by this; \code{\link{DiffusionMap}}, the class this is used for -#' +#' #' @examples #' data(guo) #' sigs <- find_sigmas(guo, verbose = TRUE) #' DiffusionMap(guo, sigs) -#' +#' #' @importFrom methods as #' @importFrom graphics plot #' @importFrom utils txtProgressBar setTxtProgressBar #' @importFrom Matrix colSums rowSums #' @importFrom proxy dist #' @export -find_sigmas <- function( +find_sigmas <- function( # nolint: cyclocomp_linter. data, step_size = .1, steps = 10L, start = NULL, @@ -117,35 +117,33 @@ find_sigmas <- function( ) { chkDots(...) data <- dataset_extract_doublematrix(data, vars) - + if (any(is.na(data))) data <- as.matrix(hotdeck(data, imp_var = FALSE)) - + stopifnot(steps >= 3L) - + if (length(sample_rows) > 1L) { data <- data[sample_rows, ] } else if (nrow(data) > sample_rows) { sample_idx <- sample(nrow(data), sample_rows) data <- data[sample_idx, ] } - + n <- nrow(data) - + dists <- dist(data) - + min_dist <- min(dists) if (min_dist == 0) stop('Minimum distance in the data may not be 0') dists <- as(dists, 'symmetricMatrix') - + if (is.null(start)) start <- log10(min_dist) - #if (missing(step_size)) - # step_size = min_dist / steps - + if (verbose) print(c(min_dist = min_dist, start = start, step_size = step_size)) - + get_trans_p <- if (test_censoring(censor_val, censor_range, data, missing_range)) { function(sigma) censoring(data, sigma, dists, censor_val, censor_range, missing_range) @@ -153,56 +151,56 @@ find_sigmas <- function( msqd <- -(dists ^ 2) function(sigma) exp(msqd / (2 * sigma ^ 2)) } - + do_step <- function(i) { # i can be negative! - log_sigma <- start + i*step_size + log_sigma <- start + i * step_size trans_p <- get_trans_p(10 ^ log_sigma) - + diag_d <- colSums(trans_p, na.rm = TRUE) - - list(avrd_norm = (sum(log10(diag_d/n) / diag_d)) / sum(1 / diag_d), + + list(avrd_norm = (sum(log10(diag_d / n) / diag_d)) / sum(1 / diag_d), log_sigma = log_sigma) } - + avrd_norms <- numeric(steps) log_sigmas <- numeric(steps) dim_norms <- numeric(steps - 1) - + step_diff <- function(step) { idxs <- c(step, step - 1) diff(avrd_norms[idxs]) / diff(log_sigmas[idxs]) } - + a0 <- do_step(0L) avrd_norms[[1L]] <- a0$avrd_norm log_sigmas[[1L]] <- a0$log_sigma - + a1 <- do_step(1) dir <- 1L avrd_norms[[2L]] <- a1$avrd_norm log_sigmas[[2L]] <- a1$log_sigma - + if (step_diff(2L) < 0) { a1 <- do_step(-1L) dir <- -1L avrd_norms[[2L]] <- a1$avrd_norm log_sigmas[[2L]] <- a1$log_sigma } - + dim_norms[[1L]] <- step_diff(2L) - + if (verbose) pb <- txtProgressBar(2L, steps, 1L, style = 3) for (step in seq(2L, steps)) { - a_i = do_step(dir * (step - 1L)) + a_i <- do_step(dir * (step - 1L)) avrd_norms[[step]] <- a_i$avrd_norm log_sigmas[[step]] <- a_i$log_sigma - + dif_step <- step - 1 dim_norms[[dif_step]] <- step_diff(step) - + if (verbose) setTxtProgressBar(pb, step) - + if (early_exit && step > 2 && dim_norms[[dif_step]] < dim_norms[[dif_step - 1L]]) { avrd_norms <- avrd_norms[seq_len(step)] log_sigmas <- log_sigmas[seq_len(step)] @@ -214,19 +212,19 @@ find_sigmas <- function( setTxtProgressBar(pb, steps) close(pb) } - + if (early_exit && step == steps) warning('All steps were exhausted without finding a maximum. Using last encountered sigma') - + optimal_idx <- which.max(dim_norms) - + ret <- Sigmas( log_sigmas = log_sigmas, dim_norms = dim_norms, optimal_sigma = 10 ^ mean(log_sigmas[c(optimal_idx, optimal_idx + 1L)]), optimal_idx = optimal_idx, avrd_norms = avrd_norms) - + if (verbose) plot(ret) - + ret } diff --git a/R/utils.r b/R/utils.r index 12a56ef..2d875dd 100644 --- a/R/utils.r +++ b/R/utils.r @@ -30,7 +30,9 @@ verbose_timing <- function(verbose, msg, expr) { cat(sprintf('...done. Time: %.2fs\n', dif[['elapsed']])) flush.console() r - } else expr + } else { + expr + } } @@ -41,18 +43,18 @@ accumulated_transitions <- function(dm) { dm@data_env$accumulated_transitions <- dm@data_env$propagations rm('propagations', envir = dm@data_env) } - + if (is.null(dm@data_env$accumulated_transitions)) { if (is.null(dm@transitions)) stop('DiffusionMap was created with suppress_dpt = TRUE') - + n <- length(dm@d_norm) - + phi0 <- dm@d_norm / sqrt(sum(dm@d_norm ^ 2)) inv <- solve(Diagonal(n) - dm@transitions + phi0 %*% t(phi0)) dm@data_env$accumulated_transitions <- inv - Diagonal(n) } - + dm@data_env$accumulated_transitions } @@ -62,7 +64,7 @@ hasattr <- function(x, which) !is.null(attr(x, which, exact = TRUE)) flipped_dcs <- function(d, dcs) { if (is(d, 'DiffusionMap')) d <- eigenvectors(d) - + evs <- as.matrix(d[, abs(dcs)]) evs[, dcs < 0] <- -evs[, dcs < 0] evs @@ -78,7 +80,7 @@ rescale_mat <- function(mat, rescale) { stopifnot(ncol(mat) == ncol(rescale)) stopifnot(dim(rescale)[[1L]] == 2L) stopifnot(dim(rescale)[[3L]] == 2L) - + col_type <- get(typeof(mat)) rv <- vapply(seq_len(ncol(mat)), function(d) { scales::rescale(mat[, d], rescale['to', d, ], rescale['from', d, ]) @@ -113,14 +115,6 @@ runs <- function(vec) { } -upper.tri.sparse <- function(x, diag = FALSE) { - # Works just like upper.tri() but doesn't forcibly coerce large 'sparseMatrix' back to 'matrix' - if (diag) - row(x) <= col(x) - else row(x) < col(x) -} - - get_louvain_clusters <- function(transitions) { graph <- igraph::graph_from_adjacency_matrix(transitions, 'undirected', weighted = TRUE) as.integer(unclass(igraph::membership(igraph::cluster_louvain(graph)))) @@ -128,14 +122,14 @@ get_louvain_clusters <- function(transitions) { #' @importFrom BiocGenerics duplicated -setMethod('duplicated', 'dgCMatrix', function(x, incomparables = FALSE, MARGIN = 1L, ...) { - MARGIN <- as.integer(MARGIN) +setMethod('duplicated', 'dgCMatrix', function(x, incomparables = FALSE, MARGIN = 1L, ...) { # nolint: object_name_linter. + MARGIN <- as.integer(MARGIN) # nolint: object_name_linter. n <- nrow(x) p <- ncol(x) j <- rep(seq_len(p), diff(x@p)) i <- x@i + 1 v <- x@x - + if (MARGIN == 1L) { # rows names(v) <- j splits <- split(v, i) @@ -144,15 +138,16 @@ setMethod('duplicated', 'dgCMatrix', function(x, incomparables = FALSE, MARGIN = names(v) <- i splits <- split(v, j) is_empty <- setdiff(seq_len(p), j) - } else stop('Invalid MARGIN ', MARGIN, ', matrices only have rows (1) and columns (2).') - + } else { + stop('Invalid MARGIN ', MARGIN, ', matrices only have rows (1) and columns (2).') + } + result <- duplicated.default(splits) if (!any(is_empty)) return(result) - + out <- logical(if (MARGIN == 1L) n else p) out[-is_empty] <- result if (length(is_empty) > 1) out[is_empty[-1]] <- TRUE out }) - diff --git a/demo/destiny.r b/demo/destiny.r index 7453251..f954166 100644 --- a/demo/destiny.r +++ b/demo/destiny.r @@ -2,8 +2,8 @@ library(destiny) data(guo) -Dark2 <- scales::brewer_pal(palette = 'Dark2') -palette(Dark2(8L)) +dark2 <- scales::brewer_pal(palette = 'Dark2') +palette(dark2(8L)) @@ -31,9 +31,3 @@ dm_guo_global <- DiffusionMap(guo, sigmas, verbose = FALSE, censor_val = 10, censor_range = c(10, 40)) plot(dm_guo_global, col = guo$num_cells, pch = 20) - - - - -#library(rgl) -#plot3d(eigenvectors(dm_guo)[, 1:3], col = guo$num_cells) diff --git a/src/censoring.cpp b/src/censoring.cpp index e50b639..71e0f11 100644 --- a/src/censoring.cpp +++ b/src/censoring.cpp @@ -66,7 +66,7 @@ inline double censor_pair( const double v = use_d ? d : c; return - pow(M_PI*kt/2, -1./4) + pow(M_PI*kt/2, -1./4) * sqrt(M_PI*kt/4) * ( std::erfc((m0-v) / sigma) - std::erfc((m1-v) / sigma) ) / sqrt(m1-m0); diff --git a/tests/testthat/test_dataset_types.r b/tests/testthat/test_dataset_types.r index 65c422a..6ec1154 100644 --- a/tests/testthat/test_dataset_types.r +++ b/tests/testthat/test_dataset_types.r @@ -37,53 +37,53 @@ assay(test_se_sparse) <- as(assay(test_se_sparse), 'sparseMatrix') test_that('The helpers work with matrix data', { expect_identical(dataset_extract_doublematrix(test_matrix), test_matrix) - expect_identical(dataset_n_observations (test_matrix), test_nobss) - expect_identical(dataset_n_features (test_matrix), test_nfeat) - expect_identical(dataset_to_df (test_matrix), test_df[, 1:3]) - expect_identical(dataset_names (test_matrix), test_feat) - expect_identical(dataset_get_feature (test_matrix, 'g2'), test_matrix[, 'g2']) + expect_identical(dataset_n_observations(test_matrix), test_nobss) + expect_identical(dataset_n_features(test_matrix), test_nfeat) + expect_identical(dataset_to_df(test_matrix), test_df[, 1:3]) + expect_identical(dataset_names(test_matrix), test_feat) + expect_identical(dataset_get_feature(test_matrix, 'g2'), test_matrix[, 'g2']) }) test_that('The helpers work with data.frame data', { expect_identical(dataset_extract_doublematrix(test_df), test_matrix) - expect_identical(dataset_n_observations (test_df), test_nobss) - expect_identical(dataset_n_features (test_df), test_nfeat) - expect_identical(dataset_to_df (test_df), test_df) - expect_identical(dataset_names (test_df), c(test_feat, 'cm1')) - expect_identical(dataset_get_feature (test_df, 'g3'), test_df$g3) - expect_identical(dataset_get_feature (test_df, 'cm1'), test_df$cm1) + expect_identical(dataset_n_observations(test_df), test_nobss) + expect_identical(dataset_n_features(test_df), test_nfeat) + expect_identical(dataset_to_df(test_df), test_df) + expect_identical(dataset_names(test_df), c(test_feat, 'cm1')) + expect_identical(dataset_get_feature(test_df, 'g3'), test_df$g3) + expect_identical(dataset_get_feature(test_df, 'cm1'), test_df$cm1) }) test_that('The helpers work with ExpressionSet data', { expect_identical(dataset_extract_doublematrix(test_es), test_matrix) - expect_identical(dataset_n_observations (test_es), test_nobss) - expect_identical(dataset_n_features (test_es), test_nfeat) - expect_identical(dataset_to_df (test_es), test_df) - expect_identical(dataset_names (test_es), c(test_feat, 'cm1')) - expect_identical(dataset_get_feature (test_es, 'g1'), exprs(test_es)['g1', ]) - expect_identical(dataset_get_feature (test_es, 'cm1'), test_es$cm1) + expect_identical(dataset_n_observations(test_es), test_nobss) + expect_identical(dataset_n_features(test_es), test_nfeat) + expect_identical(dataset_to_df(test_es), test_df) + expect_identical(dataset_names(test_es), c(test_feat, 'cm1')) + expect_identical(dataset_get_feature(test_es, 'g1'), exprs(test_es)['g1', ]) + expect_identical(dataset_get_feature(test_es, 'cm1'), test_es$cm1) }) test_that('The helpers work with SingleCellExperiment data', { expect_identical(dataset_extract_doublematrix(test_se), test_matrix) - expect_identical(dataset_n_observations (test_se), test_nobss) - expect_identical(dataset_n_features (test_se), test_nfeat) - expect_identical(dataset_to_df (test_se), test_df) - expect_identical(dataset_names (test_se), c(test_feat, 'cm1')) - expect_identical(dataset_get_feature (test_se, 'g1'), assay(test_se, 'logcounts')['g1', ]) - expect_identical(dataset_get_feature (test_se, 'cm1'), test_se$cm1) + expect_identical(dataset_n_observations(test_se), test_nobss) + expect_identical(dataset_n_features(test_se), test_nfeat) + expect_identical(dataset_to_df(test_se), test_df) + expect_identical(dataset_names(test_se), c(test_feat, 'cm1')) + expect_identical(dataset_get_feature(test_se, 'g1'), assay(test_se, 'logcounts')['g1', ]) + expect_identical(dataset_get_feature(test_se, 'cm1'), test_se$cm1) }) test_that('The helpers work with sparse SingleCellExperiment data', { expect_identical(dataset_extract_doublematrix(test_se_sparse), as(test_matrix, 'sparseMatrix')) - expect_identical(dataset_n_observations (test_se_sparse), test_nobss) - expect_identical(dataset_n_features (test_se_sparse), test_nfeat) - expect_identical(dataset_to_df (test_se_sparse), test_df) - expect_identical(dataset_names (test_se_sparse), c(test_feat, 'cm1')) - expect_identical(dataset_get_feature (test_se_sparse, 'g1'), assay(test_se_sparse, 'logcounts')['g1', ]) - expect_identical(dataset_get_feature (test_se_sparse, 'cm1'), test_se_sparse$cm1) + expect_identical(dataset_n_observations(test_se_sparse), test_nobss) + expect_identical(dataset_n_features(test_se_sparse), test_nfeat) + expect_identical(dataset_to_df(test_se_sparse), test_df) + expect_identical(dataset_names(test_se_sparse), c(test_feat, 'cm1')) + expect_identical(dataset_get_feature(test_se_sparse, 'g1'), assay(test_se_sparse, 'logcounts')['g1', ]) + expect_identical(dataset_get_feature(test_se_sparse, 'cm1'), test_se_sparse$cm1) }) diff --git a/tests/testthat/test_distances.r b/tests/testthat/test_distances.r index 005053a..5084f84 100644 --- a/tests/testthat/test_distances.r +++ b/tests/testthat/test_distances.r @@ -4,36 +4,30 @@ library(Matrix) full_t_p_local <- function(dat, sigma, dists) { d2 <- as.matrix(dists ^ 2) - - S1 <- sigma %*% t(sigma) - S2 <- outer(sigma ^ 2, sigma ^ 2, '+') - - rv <- sqrt(2 * S1 / S2) * exp(-d2 / S2) + + s1 <- sigma %*% t(sigma) + s2 <- outer(sigma ^ 2, sigma ^ 2, '+') + + rv <- sqrt(2 * s1 / s2) * exp(-d2 / s2) rv[d2 == 0] <- 0 rv } test_that('no_censoring produces the correct output for local sigma', { - test_data <- matrix(rnorm(4L*5L), 4L, 5L) + test_data <- matrix(rnorm(4L * 5L), 4L, 5L) k <- 3L n_local <- 2:3 - + knn <- find_knn(test_data, k) expect_identical(dim(knn$dist), c(nrow(test_data), k)) expect_identical(dim(knn$dist_mat), rep(nrow(test_data), 2L)) expect_identical(sum(!is.finite(knn$dist_mat)), 0L) - + sigma <- optimal_sigma(get_sigmas(test_data, knn$dist, 'local', n_local)) expect_identical(length(sigma), nrow(test_data)) - + dists_expected <- full_t_p_local(test_data, sigma, knn$dist_mat) dists <- no_censoring(knn$dist_mat, sigma) expect_equal(dim(dists), dim(dists_expected)) expect_equal(as.matrix(dists), dists_expected) }) - -matidx_apply <- function(nrow, ncol, FUN) { - mat <- matrix(NA, nrow, ncol) - vals <- mapply(FUN, row(mat), col(mat)) - matrix(vals, nrow, ncol) -} diff --git a/tests/testthat/test_ggplot.r b/tests/testthat/test_ggplot.r index 0ff14d6..50c3af0 100644 --- a/tests/testthat/test_ggplot.r +++ b/tests/testthat/test_ggplot.r @@ -2,10 +2,6 @@ context('ggplot') data(guo_norm) -get_geom <- function(p, name) { - -} - guo_df <- as(guo_norm, 'data.frame') guo_32 <- guo_df[guo_df$num_cells == 32, ] guo_no_32 <- guo_df[guo_df$num_cells != 32, ] @@ -24,19 +20,19 @@ test_that('ggplot plots have the ticks/boxes they should have', { p2 <- plot.DiffusionMap(dm_no_32, 1:2, col_by = 'num_cells', ticks = TRUE) p3 <- plot.DiffusionMap(dm_no_32, 1:2, col_by = 'num_cells', axes = FALSE) p4 <- plot.DiffusionMap(dm_no_32, 1:2, col_by = 'num_cells', box = TRUE) - + # check range_frame expect_identical(length(p1$layers), 2L) expect_identical(length(p2$layers), 2L) expect_identical(length(p3$layers), 1L) expect_identical(length(p4$layers), 2L) - + # check ticks expect_identical(class(p1$theme$axis.ticks)[[1L]], 'element_blank') expect_identical(class(p2$theme$axis.ticks)[[1L]], 'element_line') expect_identical(class(p3$theme$axis.ticks)[[1L]], 'element_blank') expect_identical(class(p4$theme$axis.ticks)[[1L]], 'element_blank') - + # check box expect_identical(class(p1$theme$panel.border)[[1L]], 'element_blank') expect_identical(class(p2$theme$panel.border)[[1L]], 'element_blank') diff --git a/tests/testthat/test_gr.r b/tests/testthat/test_gr.r index 54a6ea8..a7ce21d 100644 --- a/tests/testthat/test_gr.r +++ b/tests/testthat/test_gr.r @@ -1,17 +1,17 @@ context('gene relevance') smps <- 120L -part <- 1/4 +part <- 1 / 4 test_data <- local({ d <- data.frame( - A = c( seq(1, 0, length = smps * part), rep(0, smps*3*part)), - B = c( seq(0, 1, length = smps*2*part), rep(1, smps*2*part)), - C = c(rep(0, smps*part), seq(0, 1, length = smps*2*part), rep(0, smps * part)), - D = c(rep(0, smps*part), seq(0, .5, length = smps * part), rep(0, smps * part), seq(.5, 1, length = smps*part)), # + rnorm(smps, 0, .05) + A = c(seq(1, 0, length = smps * part), rep(0, smps * 3 * part)), + B = c(seq(0, 1, length = smps * 2 * part), rep(1, smps * 2 * part)), + C = c(rep(0, smps * part), seq(0, 1, length = smps * 2 * part), rep(0, smps * part)), + D = c(rep(0, smps * part), seq(0, .5, length = smps * part), rep(0, smps * part), seq(.5, 1, length = smps * part)), #: + rnorm(smps, 0, .05) stringsAsFactors = FALSE ) d$Cell <- seq_len(nrow(d)) - d$Type <- as.integer(ceiling(d$Cell / (smps/4))) + d$Type <- as.integer(ceiling(d$Cell / (smps / 4))) d }) diff --git a/tests/testthat/test_knn.r b/tests/testthat/test_knn.r index 49171a0..a90b003 100644 --- a/tests/testthat/test_knn.r +++ b/tests/testthat/test_knn.r @@ -2,15 +2,15 @@ context('kNN integrity') test_that('knn works similarly to FNN', { skip_if_not_installed('FNN') - + data(guo_norm, package = 'destiny') e <- t(Biobase::exprs(guo_norm)) - + r_destiny <- destiny::find_knn(e, 5L) r_fnn <- FNN::get.knn(e, 5L) - + dimnames(r_destiny$index) <- dimnames(r_destiny$dist) <- NULL - + rows_eq <- sapply(seq_len(nrow(e)), function(r) isTRUE(all.equal(r_destiny$index[r, ], r_fnn$nn.index[r, ]))) expect_lte(sum(!rows_eq), 5L) expect_equal(r_destiny$dist[rows_eq, ], r_fnn$nn.dist[rows_eq, ], tolerance = 1e-5) @@ -18,16 +18,16 @@ test_that('knn works similarly to FNN', { test_that('knnx works similarly to FNN', { skip_if_not_installed('FNN') - + data(guo_norm, package = 'destiny') e <- t(Biobase::exprs(guo_norm)) nc <- guo_norm$num_cells - - r_destiny <- destiny::find_knn(e[nc != 32L,], 5L, query = e[nc == 32L,]) - r_fnn <- FNN::get.knnx(e[nc != 32L,], e[nc == 32L,], 5L) - + + r_destiny <- destiny::find_knn(e[nc != 32L, ], 5L, query = e[nc == 32L, ]) + r_fnn <- FNN::get.knnx(e[nc != 32L, ], e[nc == 32L, ], 5L) + dimnames(r_destiny$index) <- dimnames(r_destiny$dist) <- NULL - + rows_eq <- sapply(seq_len(sum(nc == 32L)), function(r) isTRUE(all.equal(r_destiny$index[r, ], r_fnn$nn.index[r, ]))) expect_lte(sum(!rows_eq), 30L) expect_equal(r_destiny$dist[rows_eq, ], r_fnn$nn.dist[rows_eq, ], tolerance = 1e-5) diff --git a/tests/testthat/test_utils.r b/tests/testthat/test_utils.r index f1c65a7..27877a4 100644 --- a/tests/testthat/test_utils.r +++ b/tests/testthat/test_utils.r @@ -11,7 +11,7 @@ test_that('duplicated.dgCMatrix works', { 0, 0, 0 ), 5L, 3L, TRUE, sparse = TRUE) expect_is(m, 'dgCMatrix') - + expect_identical(duplicated(m), c(FALSE, FALSE, FALSE, TRUE, TRUE)) expect_identical(duplicated(m, MARGIN = 2), c(FALSE, TRUE, FALSE)) expect_error(duplicated(m, MARGIN = 3), 'Invalid MARGIN 3') @@ -23,6 +23,6 @@ test_that('duplicated.dgCMatrix works with zero columns', { c(1, 2, 4, 4, 6), x = c(1, 1, 2, 3, 1)) expect_is(m, 'dgCMatrix') - + expect_identical(which(duplicated(m, MARGIN = 2)), c(2L, 5L, 6L)) }) diff --git a/vignettes/DPT.Rmd b/vignettes/DPT.Rmd index f5b67dc..f98c4c4 100644 --- a/vignettes/DPT.Rmd +++ b/vignettes/DPT.Rmd @@ -27,8 +27,8 @@ library(gridExtra) # Also we need grid.arrange ```{r} par(mar = rep(0, 4)) graph <- igraph::graph_from_literal( - data -+ 'transition probabilities' -+ DiffusionMap, - 'transition probabilities' -+ DPT) + data - + 'transition probabilities' - + DiffusionMap, + 'transition probabilities' - + DPT) plot( graph, layout = igraph::layout_as_tree, vertex.size = 50, @@ -75,13 +75,13 @@ grid.arrange( The `DPT` object also contains a clustering based on the tip cells and DPT, and you can specify where to draw paths from and to: ```{r} -plot(dpt, root = 2, paths_to = c(1,3), col_by = 'branch') +plot(dpt, root = 2, paths_to = c(1, 3), col_by = 'branch') ``` You can further divide branches. First simply plot branch colors like we did above, then identify the number of the branch you intend to plot, and then specify it in a subsequent `plot` call. In order to see the new branches best, we specify a `dcs` argument that visually spreads out out all four branches. ```{r} -plot(dpt, col_by = 'branch', divide = 3, dcs = c(-1,-3,2), pch = 20) +plot(dpt, col_by = 'branch', divide = 3, dcs = c(-1, -3, 2), pch = 20) ``` References diff --git a/vignettes/Diffusion-Maps.Rmd b/vignettes/Diffusion-Maps.Rmd index 0afe3f0..ad0b580 100644 --- a/vignettes/Diffusion-Maps.Rmd +++ b/vignettes/Diffusion-Maps.Rmd @@ -17,20 +17,6 @@ suppressPackageStartupMessages({ library(destiny) library(Biobase) }) - -#setHook('on.rgl.close', function(...) { -# name <- tempfile() -# par3d(windowRect = c(0, 0, 1200, 1200)) -# Sys.sleep(1) -# -# rgl.snapshot( filename = paste0(name, '.png')) -# #rgl.postscript(filename = paste0(name, '.pdf'), fmt='pdf') # doesn’t work with spheres -# -# publish_mimebundle(list( -# 'image/png' = base64encode(paste0(name, '.png')) -# #, 'application/pdf' = base64encode(paste0(name, '.pdf')) -# )) -#}, 'replace') ``` Diffusion maps are spectral method for non-linear dimension reduction introduced by @coifman_geometric_2005. Diffusion maps are based on a distance metric (diffusion distance) which is conceptually relevant to how differentiating cells follow noisy diffusion-like dynamics, moving from a pluripotent state towards more differentiated states. @@ -134,7 +120,7 @@ If you started reading here, execute `data(guo.norm)` to load the dataset that w ```{r} library(destiny) -#data(guo_norm) +#If you start here, run: data(guo_norm) dm <- DiffusionMap(guo_norm) ``` @@ -191,8 +177,7 @@ For the popular `ggplot2` package, there is built in support in the form of a `f library(ggplot2) qplot(DC1, DC2, data = dm, colour = factor(num_cells)) + scale_color_cube_helix() -# or alternatively: -#ggplot(dif, aes(DC1, DC2, colour = factor(num.cells))) + ... +# or alternatively: ggplot(dif, aes(DC1, DC2, colour = factor(num.cells))) + ... ``` As aesthetics, all diffusion components, gene expressions, and annotations are available. If you plan to make many plots, create a `data.frame` first by using `as.data.frame(dif)` or `fortify(dif)`, assign it to a variable name, and use it for plotting. diff --git a/vignettes/Gene-Relevance.Rmd b/vignettes/Gene-Relevance.Rmd index 5f8e351..14dcd6c 100644 --- a/vignettes/Gene-Relevance.Rmd +++ b/vignettes/Gene-Relevance.Rmd @@ -33,9 +33,12 @@ Let’s use data from the `scRNAseq`[1] package. If necessary, install it via `B ```{r} # The parts of the help we’re interested in -help('scRNAseq-package', package = 'scRNAseq') %>% repr::repr_html() %>% - stringr::str_extract_all(stringr::regex('

The dataset.*?

', dotall = TRUE)) %>% unlist() %>% - paste(collapse = '') %>% knitr::raw_html() +help('scRNAseq-package', package = 'scRNAseq') %>% + repr::repr_html() %>% + stringr::str_extract_all(stringr::regex('

The dataset.*?

', dotall = TRUE)) %>% + unlist() %>% + paste(collapse = '') %>% + knitr::raw_html() ``` 379 cells seems sufficient to see something! @@ -89,7 +92,7 @@ Let’s create a Diffusion map. For rapid results, people often create a PCA fir However, even with many more principal components than necessary to get a nicely resolved Diffusion Map, the close spatial correspondence between diffusion components and genes are lost. ```{r} -#reducedDim(allen_hvg, 'pca') <- irlba::prcomp_irlba(t(assay(allen, 'logcounts')), 50)$x +#To go from PCA: reducedDim(allen_hvg, 'pca') <- irlba::prcomp_irlba(t(assay(allen, 'logcounts')), 50)$x ``` The chosen distance metric has big implications on your results, you should try at least cosine and rankcor. @@ -101,9 +104,7 @@ dms <- c('euclidean', 'cosine', 'rankcor') %>% #, 'l2' map(~ DiffusionMap(allen_hvg, distance = ., knn_params = list(method = 'covertree'))) ``` -TODO: wide plot - -```{r} +```{r, fig.asp = 1/4, fig.width = 10} dms %>% imap(function(dm, dist) plot(dm, 1:2, col_by = 'driver_1_s') + ggtitle(dist)) %>% cowplot::plot_grid(plotlist = ., nrow = 1) @@ -113,9 +114,7 @@ dms %>% grs <- map(dms, gene_relevance) ``` -TODO: wide plot - -```{r} +```{r, fig.asp = 1/4, fig.width = 10} gms <- imap(grs, function(gr, dist) plot(gr, iter_smooth = 0) + ggtitle(dist)) cowplot::plot_grid(plotlist = gms, nrow = 1) ``` @@ -128,8 +127,9 @@ gms[-1] %>% map(~ .$ids[1:10]) %>% purrr::reduce(intersect) %>% cat(sep = ' ') ```{r} httr::GET('https://rest.uniprot.org/uniprotkb/search', query = list( - columns = 'id,genes,comment(TISSUE SPECIFICITY)', + fields = 'accession,gene_names,cc_tissue_specificity', format = 'tsv', query = rowData(allen)$Uniprot[gms$cosine$ids[1:6]] %>% unlist() %>% paste(collapse = ' OR ') -)) %>% httr::content(type = 'text/tab-separated-values', encoding = 'utf-8') +)) %>% + httr::content(type = 'text/tab-separated-values', encoding = 'utf-8') ``` diff --git a/vignettes/Global-Sigma.Rmd b/vignettes/Global-Sigma.Rmd index 3d56e68..35324b4 100644 --- a/vignettes/Global-Sigma.Rmd +++ b/vignettes/Global-Sigma.Rmd @@ -38,11 +38,14 @@ The resulting diffusion map’s approximation depends on the chosen sigma. Note palette(cube_helix(6)) plots <- lapply( - list('local', 5, round(optimal_sigma(sigmas), 2), 100), - function(sigma) plot( - DiffusionMap(guo_norm, sigma), 1:2, - main = paste('σ =', sigma), - col_by = 'num_cells', draw_legend = FALSE)) + list('local', 5, round(optimal_sigma(sigmas), 2), 100), + function(sigma) { + plot( + DiffusionMap(guo_norm, sigma), 1:2, + main = paste('σ =', sigma), + col_by = 'num_cells', draw_legend = FALSE) + } +) do.call(gridExtra::grid.arrange, c(plots, ncol = 2)) ``` diff --git a/vignettes/tidyverse.Rmd b/vignettes/tidyverse.Rmd index 4092284..1d803c8 100644 --- a/vignettes/tidyverse.Rmd +++ b/vignettes/tidyverse.Rmd @@ -74,7 +74,8 @@ fortify(dm) %>% mutate( EmbryoState = factor(num_cells) %>% lvls_revalue(paste(levels(.), 'cell state')) - ) %>% ggplot(aes(DC1, DC2, colour = EmbryoState)) + + ) %>% + ggplot(aes(DC1, DC2, colour = EmbryoState)) + geom_point() ```