Skip to content

Commit

Permalink
Merge pull request #474 from MRCIEU/442-empty-exposure_dat-for-multi-…
Browse files Browse the repository at this point in the history
…variable-anlalysis

442 empty exposure dat for multi variable anlalysis
  • Loading branch information
explodecomputer authored Feb 20, 2024
2 parents f247a68 + 97ed057 commit 97cff7e
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 32 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: TwoSampleMR
Title: Two Sample MR Functions and Interface to MR Base Database
Version: 0.5.9
Version: 0.5.10
Authors@R: c(
person("Gibran", "Hemani", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0920-1055")),
Expand Down
6 changes: 4 additions & 2 deletions R/ld.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
#' @param clump_p1 Clumping sig level for index SNPs, default is `1`.
#' @param clump_p2 Clumping sig level for secondary SNPs, default is `1`.
#' @param pop Super-population to use as reference panel. Default = "EUR". Options are EUR, SAS, EAS, AFR, AMR. 'legacy' also available - which is a previously used version of the EUR panel with a slightly different set of markers
#' @param bfile If this is provided then will use the API. Default = ‘NULL’
#' @param plink_bin If ‘NULL’ and ‘bfile’ is not ‘NULL’ then will detect packaged plink binary for specific OS. Otherwise specify path to plink binary. Default = ‘NULL’
#'
#' @export
#' @return Data frame
clump_data <- function(dat, clump_kb=10000, clump_r2=0.001, clump_p1=1, clump_p2=1, pop="EUR")
clump_data <- function(dat, clump_kb=10000, clump_r2=0.001, clump_p1=1, clump_p2=1, pop="EUR", bfile=NULL, plink_bin=NULL)
{
# .Deprecated("ieugwasr::ld_clump()")

Expand Down Expand Up @@ -53,7 +55,7 @@ clump_data <- function(dat, clump_kb=10000, clump_r2=0.001, clump_p1=1, clump_p2
}

d <- data.frame(rsid=dat$SNP, pval=dat[[pval_column]], id=dat$id.exposure)
out <- ieugwasr::ld_clump(d, clump_kb=clump_kb, clump_r2=clump_r2, clump_p=clump_p1, pop=pop)
out <- ieugwasr::ld_clump(d, clump_kb=clump_kb, clump_r2=clump_r2, clump_p=clump_p1, pop=pop, bfile=bfile, plink_bin=plink_bin)
keep <- paste(dat$SNP, dat$id.exposure) %in% paste(out$rsid, out$id)
return(dat[keep, ])
}
Expand Down
98 changes: 75 additions & 23 deletions R/multivariable_mr.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@ mv_extract_exposures <- function(id_exposure, clump_r2=0.001, clump_kb=10000, ha

#' Attempt to perform MVMR using local data
#'
#' Under construction
#' Allows you to read in summary data from text files to format the multivariable exposure dataset.
#'
#' Note that you can provide an array of column names for each column, which is of length `filenames_exposure`
#'
#' @param filenames_exposure Filenames for each exposure dataset. Must have header with at least SNP column present. Following arguments are used for determining how to read the filename and clumping etc.
#' @param sep Specify delimeter in file. The default is space, i.e. `sep=" "`.
#' @param sep Specify delimeter in file. The default is space, i.e. `sep=" "`. If length is 1 it will use the same `sep` value for each exposure dataset. You can provide a vector of values, one for each exposure dataset, if the values are different across datasets. The same applies to all dataset-formatting options listed below.
#' @param phenotype_col Optional column name for the column with phenotype name corresponding the the SNP. If not present then will be created with the value `"Outcome"`. Default is `"Phenotype"`.
#' @param snp_col Required name of column with SNP rs IDs. The default is `"SNP"`.
#' @param beta_col Required for MR. Name of column with effect sizes. THe default is `"beta"`.
Expand All @@ -83,36 +85,84 @@ mv_extract_exposures <- function(id_exposure, clump_r2=0.001, clump_kb=10000, ha
#'
#' @export
#' @return List
mv_extract_exposures_local <- function(filenames_exposure, sep = " ", phenotype_col = "Phenotype", snp_col = "SNP", beta_col = "beta", se_col = "se", eaf_col = "eaf", effect_allele_col = "effect_allele", other_allele_col = "other_allele", pval_col = "pval", units_col = "units", ncase_col = "ncase", ncontrol_col = "ncontrol", samplesize_col = "samplesize", gene_col = "gene", id_col = "id", min_pval = 1e-200, log_pval = FALSE, pval_threshold=5e-8, clump_r2=0.001, clump_kb=10000, harmonise_strictness=2)
{
mv_extract_exposures_local <- function(
filenames_exposure,
sep = " ",
phenotype_col = "Phenotype",
snp_col = "SNP",
beta_col = "beta",
se_col = "se",
eaf_col = "eaf",
effect_allele_col = "effect_allele",
other_allele_col = "other_allele",
pval_col = "pval",
units_col = "units",
ncase_col = "ncase",
ncontrol_col = "ncontrol",
samplesize_col = "samplesize",
gene_col = "gene",
id_col = "id",
min_pval = 1e-200,
log_pval = FALSE,
pval_threshold=5e-8,
plink_bin=NULL,
bfile=NULL,
clump_r2=0.001,
clump_kb=10000,
pop="EUR",
harmonise_strictness=2
) {
message("WARNING: Experimental function")

n <- length(filenames_exposure)
if(length(sep) == 1) {sep <- rep(sep, n)}
if(length(phenotype_col) == 1) {phenotype_col <- rep(phenotype_col, n)}
if(length(snp_col) == 1) {snp_col <- rep(snp_col, n)}
if(length(beta_col) == 1) {beta_col <- rep(beta_col, n)}
if(length(se_col) == 1) {se_col <- rep(se_col, n)}
if(length(eaf_col) == 1) {eaf_col <- rep(eaf_col, n)}
if(length(effect_allele_col) == 1) {effect_allele_col <- rep(effect_allele_col, n)}
if(length(other_allele_col) == 1) {other_allele_col <- rep(other_allele_col, n)}
if(length(pval_col) == 1) {pval_col <- rep(pval_col, n)}
if(length(units_col) == 1) {units_col <- rep(units_col, n)}
if(length(ncase_col) == 1) {ncase_col <- rep(ncase_col, n)}
if(length(ncontrol_col) == 1) {ncontrol_col <- rep(ncontrol_col, n)}
if(length(samplesize_col) == 1) {samplesize_col <- rep(samplesize_col, n)}
if(length(gene_col) == 1) {gene_col <- rep(gene_col, n)}
if(length(id_col) == 1) {id_col <- rep(id_col, n)}
if(length(min_pval) == 1) {min_pval <- rep(min_pval, n)}
if(length(log_pval) == 1) {log_pval <- rep(log_pval, n)}

l_full <- list()
l_inst <- list()
for(i in 1:length(filenames_exposure))
{
l_full[[i]] <- read_outcome_data(filenames_exposure[i],
sep = sep,
phenotype_col = phenotype_col,
snp_col = snp_col,
beta_col = beta_col,
se_col = se_col,
eaf_col = eaf_col,
effect_allele_col = effect_allele_col,
other_allele_col = other_allele_col,
pval_col = pval_col,
units_col = units_col,
ncase_col = ncase_col,
ncontrol_col = ncontrol_col,
samplesize_col = samplesize_col,
gene_col = gene_col,
id_col = id_col,
min_pval = min_pval,
log_pval = log_pval
sep = sep[i],
phenotype_col = phenotype_col[i],
snp_col = snp_col[i],
beta_col = beta_col[i],
se_col = se_col[i],
eaf_col = eaf_col[i],
effect_allele_col = effect_allele_col[i],
other_allele_col = other_allele_col[i],
pval_col = pval_col[i],
units_col = units_col[i],
ncase_col = ncase_col[i],
ncontrol_col = ncontrol_col[i],
samplesize_col = samplesize_col[i],
gene_col = gene_col[i],
id_col = id_col[i],
min_pval = min_pval[i],
log_pval = log_pval[i]
)
if(l_full[[i]]$outcome[1] == "outcome") l_full[[i]]$outcome <- paste0("exposure", i)
l_inst[[i]] <- subset(l_full[[i]], pval.outcome < pval_threshold)
l_inst[[i]] <- subset(l_inst[[i]], !duplicated(SNP))
l_inst[[i]] <- convert_outcome_to_exposure(l_inst[[i]])
l_inst[[i]] <- subset(l_inst[[i]], pval.exposure < pval_threshold)
l_inst[[i]] <- clump_data(l_inst[[i]], clump_p1=pval_threshold, clump_r2=clump_r2, clump_kb=clump_kb)
l_inst[[i]] <- clump_data(l_inst[[i]], clump_p1=pval_threshold, clump_r2=clump_r2, clump_kb=clump_kb, bfile=bfile, plink_bin=plink_bin, pop=pop)
message("Identified ", nrow(l_inst[[i]]), " hits for trait ", l_inst[[i]]$exposure[1])
}

exposure_dat <- dplyr::bind_rows(l_inst)
Expand All @@ -121,9 +171,11 @@ mv_extract_exposures_local <- function(filenames_exposure, sep = " ", phenotype_
temp$id.exposure <- 1
temp <- temp[order(temp$pval.exposure, decreasing=FALSE), ]
temp <- subset(temp, !duplicated(SNP))
temp <- clump_data(temp, clump_p1=pval_threshold, clump_r2=clump_r2, clump_kb=clump_kb)
temp <- clump_data(temp, clump_p1=pval_threshold, clump_r2=clump_r2, clump_kb=clump_kb, bfile=bfile, plink_bin=plink_bin, pop=pop)
exposure_dat <- subset(exposure_dat, SNP %in% temp$SNP)

message("Identified ", length(unique(temp$SNP)), " variants to include")

d1 <- lapply(l_full, function(x) {
subset(x, SNP %in% exposure_dat$SNP)
}) %>% dplyr::bind_rows()
Expand Down
2 changes: 1 addition & 1 deletion R/read_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ format_data <- function(dat, type="exposure", snps=NULL, header=TRUE,
}
}

if ( log_pval )
if(log_pval)
{
dat$pval <- 10^-dat[[pval_col]]
}
Expand Down
8 changes: 7 additions & 1 deletion man/clump_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions man/mv_extract_exposures_local.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions tests/testthat/test_ld.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,17 @@ test_that("matrix", {
expect_equal(ncol(b), nrow(out))
})



a <- extract_instruments(c("ieu-a-2", "ieu-a-1001"), clump=FALSE)
out <- clump_data(a)

test_that("clump multiple", {
expect_equal(length(unique(a$id.exposure)), length(unique(out$id.exposure)))
})

test_that("clump local", {
skip_on_ci()
skip_on_cran()
skip_if_not(file.exists("/Users/gh13047/repo/opengwas-api-internal/opengwas-api/app/ld_files/EUR.bim"))
aclump <- clump_data(a, bfile="/Users/gh13047/repo/opengwas-api-internal/opengwas-api/app/ld_files/EUR", plink_bin="plink")
})

31 changes: 31 additions & 0 deletions tests/testthat/test_mvmr_local.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
context("mvmr local")

test_that("mv exposure local", {
skip_on_ci()
skip_on_cran()

a <- ieugwasr::tophits("ieu-a-2")
b <- ieugwasr::tophits("ieu-a-1001")
rsid <- unique(c(a$rsid, b$rsid))
a1 <- ieugwasr::associations(rsid, "ieu-a-2")
a2 <- ieugwasr::associations(rsid, "ieu-a-1001")

f1 <- tempfile()
f2 <- tempfile()
write.table(a1, file=f1, row=F, col=T, qu=F, sep="\t")
write.table(a2, file=f2, row=F, col=T, qu=F, sep="\t")

exposure_dat <- mv_extract_exposures_local(
c(f1, f2),
sep = "\t",
snp_col=c("rsid"),
beta_col=c("beta"),
se_col=c("se"),
effect_allele_col=c("ea"),
other_allele_col=c("nea"),
pval_col=c("p")
)

expect_true(nrow(exposure_dat) > 100)
})

0 comments on commit 97cff7e

Please sign in to comment.