Skip to content

Commit

Permalink
Merge pull request #9 from large-scale-gxe-methods/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
pancong419 authored Mar 24, 2023
2 parents c34c434 + 1faf942 commit 1996f18
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 23 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Description: Use a 'glmmkin' class object (GMMAT package) from the null model to
License: GPL-3
Copyright: See COPYRIGHTS for details.
Imports: Rcpp, Matrix, parallel, MASS, SeqArray, SeqVarTools, foreach,
GMMAT, CompQuadForm
GMMAT, CompQuadForm, data.table
Suggests: doMC, testthat
LinkingTo: Rcpp, RcppArmadillo
Encoding: UTF-8
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ importFrom("stats", "as.formula", "binomial", "dbeta", "glm", "model.frame",
"pnorm", "uniroot", "integrate", "weights", "vcov")
importFrom("utils", "read.table", "write.table")
importFrom("CompQuadForm", "davies", "liu")
importFrom("data.table", "fread")
importFrom("SeqArray", "seqOpen", "seqGetData", "seqClose", "seqSetFilter")
importFrom("SeqVarTools", "missingGenotypeRate", "alleleFrequency", "altDosage")
importFrom("GMMAT", "glmmkin", "glmm.score", "SMMAT")
Expand Down
24 changes: 12 additions & 12 deletions R/MAGEE.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep = "\t", bgen.samplefile = NULL, interaction.covariates = NULL, meta.file.prefix = NULL, MAF.range = c(1e-7, 0.5), MAF.weights.beta = c(1, 25), miss.cutoff = 1, missing.method = "impute2mean", method = "davies", tests = "JF", use.minor.allele = FALSE, auto.flip = FALSE, Garbage.Collection = FALSE, is.dosage = FALSE, ncores = 1){
MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep = "\t", bgen.samplefile = NULL, interaction.covariates = NULL, meta.file.prefix = NULL, MAF.range = c(1e-7, 0.5), AF.strata.range = c(0, 1), MAF.weights.beta = c(1, 25), miss.cutoff = 1, missing.method = "impute2mean", method = "davies", tests = "JF", use.minor.allele = FALSE, auto.flip = FALSE, Garbage.Collection = FALSE, is.dosage = FALSE, ncores = 1){
if(Sys.info()["sysname"] == "Windows" && ncores > 1) {
warning("The package doMC is not available on Windows... Switching to single thread...")
ncores <- 1
Expand Down Expand Up @@ -86,7 +86,7 @@ MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep =
}
variant.id <- paste(chr, pos, ref, alt, sep = ":")
rm(chr, pos, ref, alt); gc()
group.info <- try(read.table(group.file, header = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
group.info <- try(fread(group.file, header = FALSE, data.table = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
if (inherits(group.info, "try-error")) {
stop("Error: cannot read group.file!")
}
Expand Down Expand Up @@ -176,7 +176,7 @@ MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep =
if(!is.null(strata)) { # E is not continuous
freq.tmp <- sapply(strata.list, function(x) colMeans(geno[x, , drop = FALSE], na.rm = TRUE)/2) # freq.tmp is a matrix, each column is a strata, and each row is a varirant
if (length(dim(freq.tmp)) == 2) freq_strata <- apply(freq.tmp, 1, range) else freq_strata <- as.matrix(range(freq.tmp)) # freq_strata is the range of allele freq across strata.list
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= MAF.range[1] & freq_strata[2,] <= 1-MAF.range[1]
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= AF.strata.range[1] & freq_strata[2,] <= AF.strata.range[2]
rm(freq.tmp)
}
n.p <- sum(include)
Expand Down Expand Up @@ -385,7 +385,7 @@ MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep =
if(!is.null(strata)) { # E is not continuous
freq.tmp <- sapply(strata.list, function(x) colMeans(geno[x, , drop = FALSE], na.rm = TRUE)/2) # freq.tmp is a matrix, each column is a strata, and each row is a varirant
if (length(dim(freq.tmp)) == 2) freq_strata <- apply(freq.tmp, 1, range) else freq_strata <- as.matrix(range(freq.tmp)) # freq_strata is the range of allele freq across strata.list
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= MAF.range[1] & freq_strata[2,] <= 1-MAF.range[1]
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= AF.strata.range[1] & freq_strata[2,] <= AF.strata.range[2]
rm(freq.tmp)
}
n.p <- sum(include)
Expand Down Expand Up @@ -551,7 +551,7 @@ MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep =
if (is.null(bgen.samplefile)) {
stop("Error: bgen file does not contain sample identifiers. A .sample file (bgen.samplefile) is needed.")
}
sample.id <- read.table(bgen.samplefile, header = TRUE, sep = " ")
sample.id <- fread(bgen.samplefile, header = TRUE, data.table = FALSE)
if ((nrow(sample.id)-1) != bgenInfo$N){
stop(paste0("Error: Number of sample identifiers in BGEN sample file (", nrow(sample.id)-1, ") does not match number of samples in BGEN file (", bgenInfo$N,")."))
}
Expand Down Expand Up @@ -594,7 +594,7 @@ MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep =
variant.id <- paste(bgenVariant$VariantInfo$CHR, bgenVariant$VariantInfo$POS, bgenVariant$VariantInfo$A1, bgenVariant$VariantInfo$A2, sep = ":")
gc()
variant.idx <- 1:length(variant.id)
group.info <- try(read.table(group.file, header = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
group.info <- try(fread(group.file, header = FALSE, data.table = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
if (inherits(group.info, "try-error")) {
stop("Error: cannot read group.file!")
}
Expand Down Expand Up @@ -683,7 +683,7 @@ MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep =
if(!is.null(strata)) { # E is not continuous
freq.tmp <- sapply(strata.list, function(x) colMeans(geno[x, , drop = FALSE], na.rm = TRUE)/2) # freq.tmp is a matrix, each column is a strata, and each row is a varirant
if (length(dim(freq.tmp)) == 2) freq_strata <- apply(freq.tmp, 1, range) else freq_strata <- as.matrix(range(freq.tmp)) # freq_strata is the range of allele freq across strata.list
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= MAF.range[1] & freq_strata[2,] <= 1-MAF.range[1]
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= AF.strata.range[1] & freq_strata[2,] <= AF.strata.range[2]
rm(freq.tmp)
}
n.p <- sum(include)
Expand Down Expand Up @@ -886,7 +886,7 @@ MAGEE <- function(null.obj, interaction, geno.file, group.file, group.file.sep =
if(!is.null(strata)) { # E is not continuous
freq.tmp <- sapply(strata.list, function(x) colMeans(geno[x, , drop = FALSE], na.rm = TRUE)/2) # freq.tmp is a matrix, each column is a strata, and each row is a varirant
if (length(dim(freq.tmp)) == 2) freq_strata <- apply(freq.tmp, 1, range) else freq_strata <- as.matrix(range(freq.tmp)) # freq_strata is the range of allele freq across strata.list
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= MAF.range[1] & freq_strata[2,] <= 1-MAF.range[1]
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= AF.strata.range[1] & freq_strata[2,] <= AF.strata.range[2]
rm(freq.tmp)
}
n.p <- sum(include)
Expand Down Expand Up @@ -1217,7 +1217,7 @@ MAGEE.prep <- function(null.obj, interaction, geno.file, group.file, interaction
}
variant.id <- paste(chr, pos, ref, alt, sep = ":")
rm(chr, pos, ref, alt); gc()
group.info <- try(read.table(group.file, header = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
group.info <- try(fread(group.file, header = FALSE, data.table = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
if (inherits(group.info, "try-error")) {
stop("Error: cannot read group.file!")
}
Expand Down Expand Up @@ -1258,7 +1258,7 @@ MAGEE.prep <- function(null.obj, interaction, geno.file, group.file, interaction
return(out)
}

MAGEE.lowmem <- function(MAGEE.prep.obj, geno.file = NULL, meta.file.prefix = NULL, MAF.range = c(1e-7, 0.5), MAF.weights.beta = c(1, 25), miss.cutoff = 1, missing.method = "impute2mean", method = "davies", tests = "JF", use.minor.allele = FALSE, Garbage.Collection = FALSE, is.dosage = FALSE, ncores = 1)
MAGEE.lowmem <- function(MAGEE.prep.obj, geno.file = NULL, meta.file.prefix = NULL, MAF.range = c(1e-7, 0.5), AF.strata.range = c(0, 1), MAF.weights.beta = c(1, 25), miss.cutoff = 1, missing.method = "impute2mean", method = "davies", tests = "JF", use.minor.allele = FALSE, Garbage.Collection = FALSE, is.dosage = FALSE, ncores = 1)
{
if(!inherits(MAGEE.prep.obj, "MAGEE.prep")) stop("Error: MAGEE.prep.obj must be a class MAGEE.prep object!")
is.Windows <- Sys.info()["sysname"] == "Windows"
Expand Down Expand Up @@ -1354,7 +1354,7 @@ MAGEE.lowmem <- function(MAGEE.prep.obj, geno.file = NULL, meta.file.prefix = NU
if(!is.null(strata)) { # E is not continuous
freq.tmp <- sapply(strata.list, function(x) colMeans(geno[x, , drop = FALSE], na.rm = TRUE)/2) # freq.tmp is a matrix, each column is a strata, and each row is a varirant
if (length(dim(freq.tmp)) == 2) freq_strata <- apply(freq.tmp, 1, range) else freq_strata <- as.matrix(range(freq.tmp)) # freq_strata is the range of allele freq across strata.list
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= MAF.range[1] & freq_strata[2,] <= 1-MAF.range[1]
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= AF.strata.range[1] & freq_strata[2,] <= AF.strata.range[2]
rm(freq.tmp)
}
n.p <- sum(include)
Expand Down Expand Up @@ -1557,7 +1557,7 @@ MAGEE.lowmem <- function(MAGEE.prep.obj, geno.file = NULL, meta.file.prefix = NU
if(!is.null(strata)) { # E is not continuous
freq.tmp <- sapply(strata.list, function(x) colMeans(geno[x, , drop = FALSE], na.rm = TRUE)/2) # freq.tmp is a matrix, each column is a strata, and each row is a varirant
if (length(dim(freq.tmp)) == 2) freq_strata <- apply(freq.tmp, 1, range) else freq_strata <- as.matrix(range(freq.tmp)) # freq_strata is the range of allele freq across strata.list
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= MAF.range[1] & freq_strata[2,] <= 1-MAF.range[1]
include <- include & !is.na(freq_strata[1,]) & !is.na(freq_strata[2,]) & freq_strata[1,] >= AF.strata.range[1] & freq_strata[2,] <= AF.strata.range[2]
rm(freq.tmp)
}
n.p <- sum(include)
Expand Down
4 changes: 2 additions & 2 deletions R/MAGEE.meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ MAGEE.meta <- function(meta.files.prefix, n.files = rep(1, length(meta.files.pre
JV <- "JV" %in% tests
JF <- "JF" %in% tests
JD <- "JD" %in% tests
group.info <- try(read.table(group.file, header = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
group.info <- try(fread(group.file, header = FALSE, data.table = FALSE, col.names = c("group", "chr", "pos", "ref", "alt", "weight"), colClasses = c("character","character","integer","character","character","numeric"), sep = group.file.sep), silent = TRUE)
if (inherits(group.info, "try-error")) {
stop("Error: cannot read group.file!")
}
Expand All @@ -32,7 +32,7 @@ MAGEE.meta <- function(meta.files.prefix, n.files = rep(1, length(meta.files.pre
for(i in 1:n.cohort) { # Read the scores for each study from each core
tmp.scores <- NULL
for(j in 1:n.files[i]) { # n.files[i] is the number of cores for the i-th study
tmp <- try(read.table(paste0(meta.files.prefix[i], ".score.", j), header = TRUE, as.is = TRUE))
tmp <- try(fread(paste0(meta.files.prefix[i], ".score.", j), header = TRUE, data.table = FALSE))
if (inherits(tmp,"try-error")) {
stop(paste0("Error: cannot read ", meta.files.prefix[i], ".score.", j, "!"))
}
Expand Down
2 changes: 1 addition & 1 deletion R/glmm.gei.R
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ glmm.gei <- function(null.obj, interaction, geno.file, outfile, bgen.samplefile=
if (is.null(bgen.samplefile)) {
stop("Error: bgen file does not contain sample identifiers. A .sample file (bgen.samplefile) is needed.")
}
sample.id <- read.table(bgen.samplefile, header = TRUE, sep = " ")
sample.id <- fread(bgen.samplefile, header = TRUE, data.table = FALSE)
if ((nrow(sample.id)-1) != bgenInfo$N){
stop(paste0("Error: Number of sample identifiers in BGEN sample file (", nrow(sample.id)-1, ") does not match number of samples in BGEN file (", bgenInfo$N,")."))
}
Expand Down
4 changes: 2 additions & 2 deletions R/glmm.gei.meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ glmm.gei.meta <- function(files, outfile, interaction, SNPID = rep("SNPID", leng
if(length(Non_Effect_Allele) != k) stop("Error: \"Non_Effect_Allele\" must have the same length as \"files\"!")
if(length(Effect_Allele) != k) stop("Error: \"Effect_Allele\" must have the same length as \"files\"!")
col.include <- c("Beta_Marginal", "SE_Beta_Marginal", "P_Value_Marginal", "Beta_G", paste0("Beta_G.",interaction), "SE_Beta_G", paste0("SE_Beta_G.", interaction), paste0("Cov_Beta_G_G.", interaction), "P_Value_Interaction", "P_Value_Joint")
master <- read.table(files[1], header=T, as.is=T)[, c(SNPID[1], CHR[1], POS[1],Non_Effect_Allele[1], Effect_Allele[1], "N_Samples", "AF", col.include)]
master <- fread(files[1], header=T, data.table = FALSE)[, c(SNPID[1], CHR[1], POS[1],Non_Effect_Allele[1], Effect_Allele[1], "N_Samples", "AF", col.include)]
names(master)[1:5] <- c("SNPID", "CHR", "POS", "Non_Effect_Allele", "Effect_Allele")
master <- master[apply(!is.na(master[, col.include]), 1, all),]
master$SNPID <- paste(master$CHR, master$POS, master$Non_Effect_Allele, master$Effect_Allele, sep = ":")
Expand All @@ -23,7 +23,7 @@ glmm.gei.meta <- function(files, outfile, interaction, SNPID = rep("SNPID", leng
flag <- rep(0, nrow(master))
if(k > 1) {
for(i in 2:k) {
tmp <- read.table(files[i], header=T, as.is=T)[, c(SNPID[i], CHR[i],POS[i],Non_Effect_Allele[i], Effect_Allele[i], "N_Samples", "AF", col.include)]
tmp <- fread(files[i], header=T, data.table = FALSE)[, c(SNPID[i], CHR[i],POS[i],Non_Effect_Allele[i], Effect_Allele[i], "N_Samples", "AF", col.include)]
names(tmp)[1:5] <- c("SNPID", "CHR", "POS", "Non_Effect_Allele", "Effect_Allele")
tmp <- tmp[apply(!is.na(tmp[, col.include]), 1, all),]
tmp$SNPID <- paste(tmp$CHR, tmp$POS, tmp$Non_Effect_Allele, tmp$Effect_Allele, sep = ":")
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ See Section 3.2 of the <a href="https://github.com/large-scale-gxe-methods/MAGEE
For optimal computational performance, it is recommended to use an R version configured with the Intel Math Kernel Library (or other fast BLAS/LAPACK libraries). See the <a href="https://www.intel.com/content/www/us/en/developer/articles/technical/using-onemkl-with-r.html">instructions</a> on building R with Intel MKL.

## Version
The current version is 1.2.0 (June 2, 2022).
The current version is 1.2.1 (March 23, 2023).

## License
This software is licensed under GPL-3.
Expand Down
11 changes: 7 additions & 4 deletions man/MAGEE.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Use a glmmkin class object from the null GLMM to perform variant set-based main
\usage{
MAGEE(null.obj, interaction, geno.file, group.file, group.file.sep = "\t",
bgen.samplefile = NULL, interaction.covariates = NULL, meta.file.prefix = NULL,
MAF.range = c(1e-7, 0.5), MAF.weights.beta = c(1, 25), miss.cutoff = 1,
MAF.range = c(1e-7, 0.5), AF.strata.range = c(0, 1), MAF.weights.beta = c(1, 25), miss.cutoff = 1,
missing.method = "impute2mean", method = "davies", tests = "JF",
use.minor.allele = FALSE, auto.flip = FALSE,
Garbage.Collection = FALSE, is.dosage = FALSE, ncores = 1)
Expand All @@ -21,7 +21,7 @@ MAGEE.prep(null.obj, interaction, geno.file, group.file, interaction.covariates
group.file.sep = "\t", auto.flip = FALSE)

MAGEE.lowmem(MAGEE.prep.obj, geno.file = NULL, meta.file.prefix = NULL,
MAF.range = c(1e-7, 0.5), MAF.weights.beta = c(1, 25), miss.cutoff = 1,
MAF.range = c(1e-7, 0.5), AF.strata.range = c(0, 1), MAF.weights.beta = c(1, 25), miss.cutoff = 1,
missing.method = "impute2mean", method = "davies", tests = "JF",
use.minor.allele = FALSE, Garbage.Collection = FALSE, is.dosage = FALSE,
ncores = 1)
Expand Down Expand Up @@ -54,6 +54,9 @@ the prefix for meta-analysis (default = \code{"NULL"}).
}
\item{MAF.range}{
a numeric vector of length 2 defining the minimum and maximum minor allele frequencies of variants that should be included in the analysis (default = c(1e-7, 0.5)).
}
\item{AF.strata.range}{
a numeric vector of length 2 defining the minimum and maximum coding allele frequencies of variants in each stratum that should be included in the analysis, if the environmental factor is categorical (default = c(0, 1)).
}
\item{MAF.weights.beta}{
a numeric vector of length 2 defining the beta probability density function parameters on the minor allele frequencies. This internal minor allele frequency weight is multiplied by the external weight given by the group.file. To turn off internal minor allele frequency weight and only use the external weight given by the group.file, use c(1, 1) to assign flat weights (default = c(1, 25)).
Expand Down Expand Up @@ -116,10 +119,10 @@ mean coding allele frequency for variants in the test unit group.
maximum coding allele frequency for variants in the test unit group.
}
\item{freq.strata.min}{
minimum coding allele frequency of each strata if the environmental factor is categorical.
minimum coding allele frequency of each stratum if the environmental factor is categorical.
}
\item{freq.strata.max}{
maximum coding allele frequency of each strata if the environmental factor is categorical.
maximum coding allele frequency of each stratum if the environmental factor is categorical.
}
\item{MV.pval}{
MV test p-value.
Expand Down

0 comments on commit 1996f18

Please sign in to comment.