Skip to content
This repository was archived by the owner on Sep 13, 2024. It is now read-only.

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mrohban committed Feb 14, 2018
0 parents commit ef5e160
Show file tree
Hide file tree
Showing 38 changed files with 1,992 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^docs$
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
src/*.o
src/*.so
src/*.dll
20 changes: 20 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Package: cytostats
Type: Package
Title: Statistical Functions for Single-Cell Analysis
Version: 0.0.0.9000
Date: 2018-02-13
Authors@R: c(person("Mohammad", "Rohban", email = "[email protected]", role = c("aut", "cre")),
person("Shantanu", "Singh", email = "[email protected]", role = c("aut"))
)
Description: Single-cell analysis requires efficient implementation of
statistical functions to handle the high-dimensionality of the data.
License: BSD_3_clause + file LICENSE
Imports:
doParallel (>= 1.0.10),
foreach (>= 1.4.3),
magrittr (>= 1.5),
Rcpp (>= 0.12.15)
Suggests:
testthat (>= 1.0.2)
LinkingTo: Rcpp, RcppArmadillo
RoxygenNote: 6.0.1
3 changes: 3 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
YEAR: 2018
COPYRIGHT HOLDER: Broad Institute, Inc.
ORGANIZATION: Broad Institute, Inc.
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Generated by roxygen2: do not edit by hand

export(combine_cov_estimates)
export(online_covar)
export(parallel_cor)
export(parallel_cov)
export(two_pass_multi_covar)
importFrom(Rcpp,sourceCpp)
importFrom(foreach,"%dopar%")
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
useDynLib(cytostats)
38 changes: 38 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' online_covar
#'
#' @param x1 a numeric vector containing samples of the first random variable
#' @param x2 a numeric vector containing corresponding samples of the second random variable
#'
#' @export
#'
online_covar <- function(x1, x2) {
.Call('_cytostats_online_covar', PACKAGE = 'cytostats', x1, x2)
}

#' two_pass_multi_covar
#'
#' @param s a data matrix whose column covariances are sought
#'
#' @export
#'
two_pass_multi_covar <- function(s) {
.Call('_cytostats_two_pass_multi_covar', PACKAGE = 'cytostats', s)
}

#' combine_cov_estimates
#'
#' @param batch_mean_cov the matrix which contains estimated means and covariance for each batch of data.
#' For n variable, and k batches, it is (n+1)*(n.k) size, with the first row being the means and
#' rest of the rows being the covariance matrices. Covariance matrices were concatenated column-wise
#' resulting in n.k columns.
#' @param b a vector containing number of samples in each batch of data
#'
#' @export
#'
combine_cov_estimates <- function(batch_mean_cov, b) {
.Call('_cytostats_combine_cov_estimates', PACKAGE = 'cytostats', batch_mean_cov, b)
}

9 changes: 9 additions & 0 deletions R/cytostats-package.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#' cytostats
#'
#' @name cytostats
#' @docType package
NULL

#' @useDynLib cytostats
#' @importFrom Rcpp sourceCpp
NULL
29 changes: 29 additions & 0 deletions R/parallel_cor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' Compute correlation function in batches, in parallel
#'
#' @param x data matrix
#' @param splits shows the number of splits of data that we want to combine (should be of 2^n form)
#' @param cores number of cores that will be used for the parallel computation
#' @param cov_fun covariance function used on each split
#'
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom magrittr %<>%
#' @importFrom foreach %dopar%
#'
parallel_cor <- function(x, splits = 2, cores = 2, cov_fun = "two_pass_multi_covar") {

cov_mat <-as.matrix(parallel_cov(x, splits = splits, cores = cores, cov_fun = cov_fun))

# \cor(X, Y) = \frac{\cov(X, Y)}{\sd(X) \sd(Y)}

inv_sd <- as.matrix(diag(diag(cov_mat)^-0.5))

result <- inv_sd %*% cov_mat %*% inv_sd

rownames(result) <- colnames(x)

colnames(result) <- colnames(x)

result
}
57 changes: 57 additions & 0 deletions R/parallel_cov.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' Compute covariance function in batches, in parallel
#'
#' @param x data matrix
#' @param splits shows the number of splits of data that we want to combine (should be of 2^n form)
#' @param cores number of cores that will be used for the parallel computation
#' @param cov_fun covariance function used on each split
#'
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom magrittr %<>%
#' @importFrom foreach %dopar%
#'
parallel_cov <- function(x, splits = 2, cores = 2, cov_fun = "two_pass_multi_covar") {

x <- as.matrix(x)

n <- NROW(x)

doParallel::registerDoParallel(cores = cores)

batches <- rep(round(n/splits), splits - 1)

batches <- c(batches, n - sum(batches))

j <- 0 # to avoid warning no visible binding for global variable j

result <- foreach::foreach(j = 1:length(batches)) %dopar% {
if (j == 1) {
i <- 1

} else {
i <- sum(batches[1:(j-1)]) + 1

}

k <- sum(batches[1:j])

batch_mean <- apply(x[i:k,], 2, mean)

batch_cov <- do.call(cov_fun, list(x[i:k, ]))

rbind(batch_mean, batch_cov)
}

batch_mean_cov <- do.call(cbind, result)

mean_cov <- combine_cov_estimates(batch_mean_cov, batches)

result <- mean_cov[2:NROW(mean_cov), ]

rownames(result) <- colnames(x)

colnames(result) <- colnames(x)

result
}
18 changes: 18 additions & 0 deletions cytostats.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
104 changes: 104 additions & 0 deletions docs/LICENSE.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ef5e160

Please sign in to comment.