diff --git a/.Rbuildignore b/.Rbuildignore index ed8b4b66..bcab4d60 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -17,3 +17,4 @@ cache$ ^README\.Rmd$ ^CITATION\.cff$ ^README\.html$ +^\.lintr$ diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 196eceba..216b930d 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -1,3 +1,9 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# NOTE: This workflow is overkill for most R packages and +# check-standard.yaml is likely a better choice. +# usethis::use_github_action("check-standard") will install it. on: push: branches: [main, master] @@ -9,6 +15,8 @@ on: name: R-CMD-check +permissions: read-all + jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} @@ -57,5 +65,5 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true - args: 'c("--no-manual", "--as-cran")' + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' error-on: '"error"' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 3ff58464..51be99dc 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -11,6 +11,8 @@ on: name: pkgdown +permissions: read-all + jobs: pkgdown: runs-on: ubuntu-latest diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index ca096091..bc18334f 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,3 +1,5 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: branches: [main, master] @@ -7,6 +9,8 @@ on: name: test-coverage +permissions: read-all + jobs: test-coverage: runs-on: macOS-latest diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..5e1afb31 --- /dev/null +++ b/.lintr @@ -0,0 +1,16 @@ +linters: linters_with_defaults( + line_length_linter = NULL, + commented_code_linter = NULL, + indentation_linter = NULL, + trailing_whitespace_linter = NULL, + infix_spaces_linter = NULL, + quotes_linter = NULL, + trailing_blank_lines_linter = NULL, + brace_linter = NULL, + commas_linter = NULL, + whitespace_linter = NULL, + object_name_linter = NULL, + assignment_linter = NULL, + cyclocomp_linter = NULL + ) +encoding: "UTF-8" diff --git a/DESCRIPTION b/DESCRIPTION index efe14bee..b63603b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TwoSampleMR Title: Two Sample MR Functions and Interface to MR Base Database -Version: 0.6.5 +Version: 0.6.6 Authors@R: c( person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0920-1055")), @@ -36,7 +36,7 @@ Imports: glmnet, gridExtra, gtable, - ieugwasr (>= 1.0.0), + ieugwasr (>= 1.0.1), jsonlite, knitr, lattice, @@ -59,7 +59,8 @@ Suggests: MendelianRandomization, MRInstruments, randomForest, - testthat + testthat, + tidyr VignetteBuilder: knitr Remotes: diff --git a/NEWS.md b/NEWS.md index eb289b4f..3db2091f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# TwoSampleMR v0.6.6 + +(Release date 2024-07-06) + +* Improve a test +* Improve permissions in GitHub Actions workflows +* Bump minimum required version of **ieugwasr** to 1.0.1 +* Made some amends to the code to bring it more in line with **lintr** recommendations +* Added omitted **tidyr** soft dependency + # TwoSampleMR v0.6.5 (Release date: 2024-06-30) diff --git a/R/add_metadata.r b/R/add_metadata.r index 324f702b..cc857e0e 100644 --- a/R/add_metadata.r +++ b/R/add_metadata.r @@ -15,14 +15,14 @@ add_metadata <- function(dat, cols = c("sample_size", "ncase", "ncontrol", "unit get_info <- function(id, what="exposure", cols) { info <- ieugwasr::gwasinfo(id) - if(nrow(info) == 0) + if (nrow(info) == 0) { message(what, ": none of the IDs found in database") return(NULL) } - for(col in cols) + for (col in cols) { - if(!col %in% names(info)) + if (!col %in% names(info)) { info[[col]] <- NA } @@ -30,7 +30,7 @@ add_metadata <- function(dat, cols = c("sample_size", "ncase", "ncontrol", "unit info <- subset(info, select=c("id", cols)) names(info) <- paste0(names(info), ".", what) names(info)[names(info) == paste0("sample_size.", what)] <- paste0("samplesize.", what) - if("sample_size" %in% cols) + if ("sample_size" %in% cols) { index <- grepl("ukb-d", info$id) & is.na(info[[paste0("samplesize.", what)]]) info[[paste0("samplesize.", what)]][index] <- 300000 @@ -39,21 +39,21 @@ add_metadata <- function(dat, cols = c("sample_size", "ncase", "ncontrol", "unit } order_col <- random_string() - dat[[order_col]] <- 1:nrow(dat) - if("id.exposure" %in% names(dat)) + dat[[order_col]] <- seq_len(nrow(dat)) + if ("id.exposure" %in% names(dat)) { exposure_id <- unique(dat[["id.exposure"]]) info <- get_info(id=exposure_id, what="exposure", cols=cols) - if(!is.null(info)) + if (!is.null(info)) { - for(x in names(info)) + for (x in names(info)) { - if(! x %in% names(dat)) + if (! x %in% names(dat)) { dat[[x]] <- NA } - for(id in unique(info[["id.exposure"]])) + for (id in unique(info[["id.exposure"]])) { dat[[x]][is.na(dat[[x]]) & dat[["id.exposure"]] == id] <- info[[x]][info[["id.exposure"]] == id] } @@ -61,20 +61,20 @@ add_metadata <- function(dat, cols = c("sample_size", "ncase", "ncontrol", "unit } } - if("id.outcome" %in% names(dat)) + if ("id.outcome" %in% names(dat)) { outcome_id <- unique(dat[["id.outcome"]]) info <- get_info(id=outcome_id, what="outcome", cols=cols) - if(!is.null(info)) + if (!is.null(info)) { - for(x in names(info)) + for (x in names(info)) { - if(! x %in% names(dat)) + if (! x %in% names(dat)) { dat[[x]] <- NA } - for(id in unique(info[["id.outcome"]])) + for (id in unique(info[["id.outcome"]])) { dat[[x]][is.na(dat[[x]]) & dat[["id.outcome"]] == id] <- info[[x]][info[["id.outcome"]] == id] } diff --git a/R/add_rsq.r b/R/add_rsq.r index 86f563d5..79676d3e 100644 --- a/R/add_rsq.r +++ b/R/add_rsq.r @@ -68,7 +68,7 @@ add_rsq_one <- function(dat, what="exposure") } else { message("Try adding metadata with add_metadata()") } - } else if(all(grepl("SD", dat[[paste0("units.", what)]])) & all(!is.na(dat[[paste0("eaf.", what)]]))) { + } else if(all(grepl("SD", dat[[paste0("units.", what)]])) && all(!is.na(dat[[paste0("eaf.", what)]]))) { dat[[paste0("rsq.", what)]] <- NA dat[[paste0("rsq.", what)]] <- 2 * dat[[paste0("beta.", what)]]^2 * dat[[paste0("eaf.", what)]] * (1-dat[[paste0("eaf.", what)]]) dat[[paste0("effective_n.", what)]] <- dat[[paste0("samplesize.", what)]] @@ -109,12 +109,19 @@ get_r_from_pn_less_accurate <- function(p, n) test_r_from_pn <- function() { + if (!requireNamespace("tidyr", quietly = TRUE)) { + stop( + "Package \"tidyr\" must be installed to use this function.", + call. = FALSE + ) + } + param <- expand.grid( n = c(10, 100, 1000, 10000, 100000), rsq = 10^seq(-4,-0.5, length.out=30) ) - for(i in 1:nrow(param)) + for(i in seq_len(nrow(param))) { message(i) x <- scale(stats::rnorm(param$n[i])) @@ -125,7 +132,7 @@ test_r_from_pn <- function() param$rsq2[i] <- get_r_from_pn(param$pval[i], param$n[i])^2 } - param <- gather(param, key=out, value=value, rsq1, rsq2) + param <- tidyr::gather(param, key=out, value=value, rsq1, rsq2) p <- ggplot2::ggplot(param, ggplot2::aes(x=rsq_emp, value)) + ggplot2::geom_abline(slope=1, linetype="dotted") + @@ -170,7 +177,7 @@ get_r_from_pn <- function(p, n) abs(-log10(suppressWarnings(get_p_from_r2n(x, sample_size))) - -log10(pvalue)) } - if(length(p) > 1 & length(n) == 1) + if(length(p) > 1 && length(n) == 1) { message("Assuming n the same for all p values") n <- rep(n, length(p)) @@ -182,7 +189,7 @@ get_r_from_pn <- function(p, n) if(any(index)) { index <- which(index) - for(i in 1:length(index)) + for(i in seq_along(index)) { if(p[index[i]] == 0) { @@ -240,15 +247,15 @@ get_r_from_lor <- function(lor, af, ncase, ncontrol, prevalence, model="logit", stopifnot(length(ncase) == 1 | length(ncase) == length(lor)) stopifnot(length(ncontrol) == 1 | length(ncontrol) == length(lor)) stopifnot(length(prevalence) == 1 | length(prevalence) == length(lor)) - if(length(prevalence) == 1 & length(lor) != 1) + if(length(prevalence) == 1 && length(lor) != 1) { prevalence <- rep(prevalence, length(lor)) } - if(length(ncase) == 1 & length(lor) != 1) + if(length(ncase) == 1 && length(lor) != 1) { ncase <- rep(ncase, length(lor)) } - if(length(ncontrol) == 1 & length(lor) != 1) + if(length(ncontrol) == 1 && length(lor) != 1) { ncontrol <- rep(ncontrol, length(lor)) } @@ -340,7 +347,7 @@ get_population_allele_frequency <- function(af, prop, odds_ratio, prevalence) { stopifnot(length(af) == length(odds_ratio)) stopifnot(length(prop) == length(odds_ratio)) - for(i in 1:length(odds_ratio)) + for(i in seq_along(odds_ratio)) { co <- contingency(af[i], prop[i], odds_ratio[i]) af_controls <- co[1,2] / (co[1,2] + co[2,2]) diff --git a/R/forest_plot.R b/R/forest_plot.R index 400b7ca3..e57eb018 100644 --- a/R/forest_plot.R +++ b/R/forest_plot.R @@ -162,7 +162,7 @@ mr_forest_plot_grouped <- data_Fm$eff_col <- log(as.numeric(data_Fm[,eff_col])) } # ggplot code to generate the forest plot using geom_segments and geom_points and to make a relatively minimal theme - raw_forest <- ggplot(data = data_Fm, aes( y = space_col, yend = space_col, x = as.numeric(lb_col), xend = as.numeric(ub_col) )) + geom_segment() + geom_point(aes( y = space_col, x = as.numeric(eff_col), size = 4 )) + theme_bw() + theme( axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title = element_blank(), panel.grid = element_blank(), rect = element_blank(), title = element_text(size = 23), legend.position = 'none' ) + expand_limits(y = c(data_Fm[,space_col] - 1, data_Fm[,space_col] + 2)) + labs(title = title_text) # returns ggplot2 object with the (un-annotated) forest plot + raw_forest <- ggplot2::ggplot(data = data_Fm, ggplot2::aes( y = space_col, yend = space_col, x = as.numeric(lb_col), xend = as.numeric(ub_col) )) + ggplot2::geom_segment() + ggplot2::geom_point(ggplot2::aes( y = space_col, x = as.numeric(eff_col), size = 4 )) + ggplot2::theme_bw() + ggplot2::theme( axis.text.y = ggplot2::element_blank(), axis.ticks.y = ggplot2::element_blank(), axis.title = ggplot2::element_blank(), panel.grid = ggplot2::element_blank(), rect = ggplot2::element_blank(), title = ggplot2::element_text(size = 23), legend.position = 'none') + ggplot2::expand_limits(y = c(data_Fm[,space_col] - 1, data_Fm[,space_col] + 2)) + ggplot2::labs(title = title_text) # returns ggplot2 object with the (un-annotated) forest plot return(raw_forest) } @@ -183,7 +183,7 @@ mr_forest_plot_grouped <- text_widths <- c(-1, max(10,0.5 * max(sapply( as.character(data_Fm[,text_col]),nchar )))) # GGplot rendering of the annotation column - lefttext <- ggplot(data = data_Fm, aes( y = space_col, x = 0, label = text_col, fontface = attr_list )) + geom_text(hjust = 0) + theme_bw() + theme( axis.text.y = element_blank(), axis.ticks.y = element_blank(),axis.text.x = element_text(colour = "white"),axis.ticks.x = element_line(colour = "white"), axis.title = element_blank(), rect = element_blank(), panel.grid = element_blank(), title = element_text(size = 23) ) + expand_limits(x = text_widths, y = c(data_Fm[,space_col] - 1, data_Fm[,space_col] + 2)) + labs(title = title_text, size = 40) # returns two-item list with left_text, the GGplot annotations, and text_widths, the x-axis limits of the plot + lefttext <- ggplot2::ggplot(data = data_Fm, ggplot2::aes( y = space_col, x = 0, label = text_col, fontface = attr_list )) + ggplot2::geom_text(hjust = 0) + ggplot2::theme_bw() + ggplot2::theme( axis.text.y = ggplot2::element_blank(), axis.ticks.y = ggplot2::element_blank(),axis.text.x = ggplot2::element_text(colour = "white"),axis.ticks.x = ggplot2::element_line(colour = "white"), axis.title = ggplot2::element_blank(), rect = ggplot2::element_blank(), panel.grid = ggplot2::element_blank(), title = ggplot2::element_text(size = 23) ) + ggplot2::expand_limits(x = text_widths, y = c(data_Fm[,space_col] - 1, data_Fm[,space_col] + 2)) + ggplot2::labs(title = title_text, size = 40) # returns two-item list with left_text, the GGplot annotations, and text_widths, the x-axis limits of the plot return(list(left_text = lefttext, text_widths = text_widths)) } @@ -201,7 +201,7 @@ mr_forest_plot_grouped <- title_list <- col_names } - for (i in 1:length(col_names)) { + for (i in seq_along(col_names)) { # loop to get the widths of each annotation column and to group the annotation objects together col <- anot_col( data_Fm = data_Fm, text_col = col_names[i], space_col = space_col, title_text = title_list[[i]] ) relative_widths[i] <- col$text_widths[2] - col$text_widths[1] @@ -230,7 +230,7 @@ mr_forest_plot_grouped <- left_Grobs <- left_Hs left_Grobs$relative_widths <- NULL - for (i in 1:length(left_Grobs)) { + for (i in seq_along(left_Grobs)) { grob_Bag[paste('l',names(left_Grobs)[i],sep = '')] <- left_Grobs[i] } @@ -239,7 +239,7 @@ mr_forest_plot_grouped <- right_RW <- right_Hs$relative_widths right_Grobs <- right_Hs right_Grobs$relative_widths <- NULL - for (i in 1:length(right_Grobs)) { + for (i in seq_along(right_Grobs)) { grob_Bag[paste('r',names(right_Grobs)[i], sep = '')] <- right_Grobs[i] } diff --git a/R/forest_plot2.R b/R/forest_plot2.R index a91d2eeb..93846c6c 100644 --- a/R/forest_plot2.R +++ b/R/forest_plot2.R @@ -37,7 +37,7 @@ format_mr_results <- function(mr_res, exponentiate=FALSE, single_snp_method="Wal } dat <- subset(mr_res, (nsnp==1 & method==single_snp_method) | (nsnp > 1 & method == multi_snp_method)) - dat$index <- 1:nrow(dat) + dat$index <- seq_len(nrow(dat)) if(ao_slc) { @@ -173,7 +173,7 @@ simple_cap <- function(x) { #' #' @export #' @return Character or array of character -trim <- function( x ) { +trim <- function(x) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -263,7 +263,7 @@ forest_plot_basic <- function(dat, section=NULL, colour_group=NULL, colour_group dat <- subset(dat, exposure == colour_group) if(!is.null(threshold)) { - point_plot <- ggplot2::geom_point(size=2, aes(colour = pval < threshold)) + point_plot <- ggplot2::geom_point(size=2, ggplot2::aes(colour = pval < threshold)) } else { point_plot <- ggplot2::geom_point(size=2) } @@ -276,7 +276,7 @@ forest_plot_basic <- function(dat, section=NULL, colour_group=NULL, colour_group } } - if((!is.null(colour_group) & colour_group_first) | is.null(colour_group)) + if((!is.null(colour_group) && colour_group_first) || is.null(colour_group)) { outcome_labels <- ggplot2::geom_text(ggplot2::aes(label=outcome), x=lo, y=mean(c(1, length(unique(dat$exposure)))), hjust=0, vjust=0.5, size=2.5) main_title <- ifelse(is.null(section), "", section) @@ -296,10 +296,10 @@ forest_plot_basic <- function(dat, section=NULL, colour_group=NULL, colour_group } l <- data.frame(lab=sort(unique(dat$lab)), col="a", stringsAsFactors=FALSE) - l$col[1:nrow(l) %% 2 == 0] <- "b" + l$col[seq_len(nrow(l)) %% 2 == 0] <- "b" dat <- merge(dat, l, by="lab", all.x=TRUE) - dat <- dat[nrow(dat):1, ] + dat <- dat[rev(seq_len(nrow(dat))), ] p <- ggplot2::ggplot(dat, ggplot2::aes(x=effect, y=exposure)) + ggplot2::geom_rect(ggplot2::aes(fill=col), xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + @@ -392,7 +392,7 @@ forest_plot_names <- function(dat, section=NULL, bottom=TRUE) } l <- data.frame(lab=sort(unique(dat$lab)), col="a", stringsAsFactors=FALSE) - l$col[1:nrow(l) %% 2 == 0] <- "b" + l$col[seq_len(nrow(l)) %% 2 == 0] <- "b" dat <- merge(dat, l, by="lab", all.x=TRUE) @@ -506,7 +506,7 @@ forest_plot <- function(mr_res, exponentiate=FALSE, single_snp_method="Wald rati ) count <- 2 columns <- unique(dat$exposure) - for(i in 1:length(columns)) + for(i in seq_along(columns)) { l[[count]] <- forest_plot_basic( dat, @@ -539,7 +539,7 @@ forest_plot <- function(mr_res, exponentiate=FALSE, single_snp_method="Wald rati sec <- unique(as.character(dat$category)) h <- rep(0, length(sec)) l <- list() - for(i in 1:length(sec)) + for(i in seq_along(sec)) { l[[i]] <- forest_plot_basic( dat, @@ -570,7 +570,7 @@ forest_plot <- function(mr_res, exponentiate=FALSE, single_snp_method="Wald rati l <- list() h <- rep(0, length(sec)) count <- 1 - for(i in 1:length(sec)) + for(i in seq_along(sec)) { h[i] <- length(unique(subset(dat, category==sec[i])$outcome)) l[[count]] <- forest_plot_names( @@ -579,7 +579,7 @@ forest_plot <- function(mr_res, exponentiate=FALSE, single_snp_method="Wald rati bottom = i==length(sec) ) count <- count + 1 - for(j in 1:length(columns)) + for(j in seq_along(columns)) { l[[count]] <- forest_plot_basic( dat, @@ -611,4 +611,3 @@ forest_plot <- function(mr_res, exponentiate=FALSE, single_snp_method="Wald rati ) } } - diff --git a/R/forest_plot_1-to-many.R b/R/forest_plot_1-to-many.R index 22b524e2..8512a258 100644 --- a/R/forest_plot_1-to-many.R +++ b/R/forest_plot_1-to-many.R @@ -44,7 +44,7 @@ format_1_to_many <- function(mr_res, b="b",se="se",exponentiate=FALSE, ao_slc=FA Letters<-c("A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z") Letters<-sort(c(paste0("A",Letters),paste0("B",Letters),paste0("C",Letters),paste0("D",Letters))) mr_res$outcome2<-mr_res[,TraitM] - mr_res[,TraitM]<-paste(Letters[1:length(mr_res[,TraitM])],mr_res[,TraitM]) + mr_res[,TraitM]<-paste(Letters[seq_along(mr_res[,TraitM])],mr_res[,TraitM]) mr_res$subcategory<-trim(mr_res$subcategory) mr_res$exposure<-"" @@ -59,7 +59,7 @@ format_1_to_many <- function(mr_res, b="b",se="se",exponentiate=FALSE, ao_slc=FA } dat<-mr_res - dat$index <- 1:nrow(dat) + dat$index <- seq_len(nrow(dat)) if(ao_slc) { @@ -166,9 +166,9 @@ sort_1_to_many <- function(mr_res,b="b",trait_m="outcome",sort_action=4,group=NU Letters<-c("A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z") Letters<-sort(c(paste0("A",Letters),paste0("B",Letters),paste0("C",Letters))) groups<-unique(mr_res[,group]) - mr_res$Index<-unlist(lapply(1:length(unique(mr_res[,group])),FUN=function(x) rep(Letters[Letters==Letters[x]],length(which(mr_res[,group]==groups[x]))))) + mr_res$Index<-unlist(lapply(seq_along(unique(mr_res[,group])),FUN=function(x) rep(Letters[Letters==Letters[x]],length(which(mr_res[,group]==groups[x]))))) mr_res<-mr_res[order(mr_res[,b],decreasing=TRUE),] - mr_res$Index2<-Letters[1:nrow(mr_res)] + mr_res$Index2 <- Letters[seq_len(nrow(mr_res))] mr_res$Index3<-paste(mr_res$Index,mr_res$Index2,sep="") mr_res<-mr_res[order(mr_res$Index3),] mr_res<-mr_res[,!names(mr_res) %in% c("Index","Index2","Index3")] @@ -201,7 +201,7 @@ sort_1_to_many <- function(mr_res,b="b",trait_m="outcome",sort_action=4,group=NU mr_res<-mr_res[order(mr_res$b.sort,decreasing=TRUE),] groups<-unique(mr_res[,group]) List<-NULL - for(i in 1:length(groups)){ + for(i in seq_along(groups)){ Test<-mr_res[mr_res[,group]==groups[i],] Test1<-Test[Test[,trait_m] != priority,] Test2<-Test[Test[,trait_m] == priority,] @@ -270,7 +270,7 @@ forest_plot_basic2 <- function(dat, section=NULL, colour_group=NULL, colour_grou dat$up_ci <- pmin(dat$up_ci, xlim[2], na.rm=TRUE) } - if(is.null(up) | is.null(lo) ){ + if(is.null(up) || is.null(lo) ){ up <- max(dat$up_ci, na.rm=TRUE) lo <- min(dat$lo_ci, na.rm=TRUE) } @@ -294,7 +294,7 @@ forest_plot_basic2 <- function(dat, section=NULL, colour_group=NULL, colour_grou point_plot <- ggplot2::geom_point(ggplot2::aes(colour=colour_scheme), size=dat$weight,fill=colour_scheme) } - if((!is.null(colour_group) & colour_group_first) | is.null(colour_group)) + if((!is.null(colour_group) && colour_group_first) || is.null(colour_group)) { outcome_labels <- ggplot2::geom_text(ggplot2::aes(label=outcome2,colour=colour_scheme), x=lo, y=mean(c(1, length(unique(dat$exposure)))), hjust=0, vjust=0.5, size=2.5) main_title <- ifelse(is.null(section), "", section) @@ -311,10 +311,10 @@ forest_plot_basic2 <- function(dat, section=NULL, colour_group=NULL, colour_grou dat$lab<-dat$outcome l <- data.frame(lab=sort(unique(dat$lab)), col="a", stringsAsFactors=FALSE) - l$col[1:nrow(l) %% 2 == 0] <- "b" + l$col[seq_len(nrow(l)) %% 2 == 0] <- "b" dat <- merge(dat, l, by="lab", all.x=TRUE) - dat <- dat[nrow(dat):1, ] + dat <- dat[rev(seq_len(nrow(dat))), ] p <-ggplot2::ggplot(dat, ggplot2::aes(x=effect, y=exposure)) + ggplot2::geom_rect(ggplot2::aes(fill=col), colour=colour_scheme,xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + @@ -495,7 +495,7 @@ forest_plot_addcol <- function(dat, section=NULL, addcol=NULL,bottom=TRUE,addcol dat$lab<-dat$outcome l <- data.frame(lab=sort(unique(dat$lab)), col="a", stringsAsFactors=FALSE) - l$col[1:nrow(l) %% 2 == 0] <- "b" + l$col[seq_len(nrow(l)) %% 2 == 0] <- "b" dat <- merge(dat, l, by="lab", all.x=TRUE) @@ -602,7 +602,7 @@ forest_plot_1_to_many <- function(mr_res="mr_res", b="b",se="se",TraitM="outcome l <- list() h <- rep(0, length(sec)) count <- 1 - for(i in 1:length(sec)) + for(i in seq_along(sec)) { h[i] <- length(unique(subset(dat, category==sec[i])$outcome)) @@ -621,7 +621,7 @@ forest_plot_1_to_many <- function(mr_res="mr_res", b="b",se="se",TraitM="outcome if(!is.null(addcols)){ - for(j in 1:length(addcols)){ + for(j in seq_along(addcols)){ l[[count]]<-forest_plot_addcol( dat, sec[i], @@ -639,7 +639,7 @@ forest_plot_1_to_many <- function(mr_res="mr_res", b="b",se="se",TraitM="outcome } - for(j in 1:length(columns)) + for(j in seq_along(columns)) { l[[count]] <- forest_plot_basic2( dat, diff --git a/R/format_mr_results2.R b/R/format_mr_results2.R index 22abd9fd..43350c9d 100644 --- a/R/format_mr_results2.R +++ b/R/format_mr_results2.R @@ -141,7 +141,7 @@ combine_all_mrresults <- function(res,het,plt,sin,ao_slc=TRUE,Exp=FALSE,split.ex Class<-unlist(lapply(names(res), FUN=function(x) class(res[,x]))) if(any(Class == "factor")) { Pos<-which(unlist(lapply(names(res), FUN=function(x) class(res[,x])))=="factor") - for(i in 1:length(Pos)){ + for(i in seq_along(Pos)){ res[,Pos[i]]<-as.character(res[,Pos[i]]) } } @@ -150,7 +150,7 @@ combine_all_mrresults <- function(res,het,plt,sin,ao_slc=TRUE,Exp=FALSE,split.ex Class<-unlist(lapply(names(het), FUN=function(x) class(het[,x]))) if(any(Class == "factor")) { Pos<-which(unlist(lapply(names(het), FUN=function(x) class(het[,x])))=="factor") - for(i in 1:length(Pos)){ + for(i in seq_along(Pos)){ het[,Pos[i]]<-as.character(het[,Pos[i]]) } } @@ -159,7 +159,7 @@ combine_all_mrresults <- function(res,het,plt,sin,ao_slc=TRUE,Exp=FALSE,split.ex Class<-unlist(lapply(names(sin), FUN=function(x) class(sin[,x]))) if(any(Class == "factor")) { Pos<-which(unlist(lapply(names(sin), FUN=function(x) class(sin[,x])))=="factor") - for(i in 1:length(Pos)){ + for(i in seq_along(Pos)){ sin[,Pos[i]]<-as.character(sin[,Pos[i]]) } } @@ -263,7 +263,7 @@ power_prune <- function(dat,method=1,dist.outcome="binary") id.sets<-paste(split_exposure(dat)$exposure,split_outcome(dat)$outcome) id.set.unique<-unique(id.sets) dat$id.set<-as.numeric(factor(id.sets)) - for(i in 1:length(id.set.unique)){ + for(i in seq_along(id.set.unique)){ # print(i) print(paste("finding summary set for --", id.set.unique[i],"-- with largest sample size", sep="")) dat1<-dat[id.sets == id.set.unique[i],] @@ -310,7 +310,7 @@ power_prune <- function(dat,method=1,dist.outcome="binary") id.sets<-paste(split_exposure(dat)$exposure,split_outcome(dat)$outcome) id.set.unique<-unique(id.sets) dat$id.set<-as.numeric(factor(id.sets)) - for(i in 1:length(id.set.unique)){ + for(i in seq_along(id.set.unique)){ print(i) print(id.set.unique[i]) dat1<-dat[id.sets == id.set.unique[i],] @@ -319,7 +319,7 @@ power_prune <- function(dat,method=1,dist.outcome="binary") id.subset.unique<-unique(id.subset) dat1$id.subset<-as.numeric(factor(id.subset)) L1<-NULL - for(j in 1:length(id.subset.unique)){ + for(j in seq_along(id.subset.unique)){ # print(j) print(paste("identifying best powered summary set: ",id.subset.unique[j],sep="")) dat2<-dat1[id.subset ==id.subset.unique[j], ] @@ -345,7 +345,7 @@ power_prune <- function(dat,method=1,dist.outcome="binary") } if(dist.outcome == "binary"){ iv.se<-1/sqrt(unique(n.cas)*unique(n.con)*r2sum) #standard error of the IV should be proportional to this - if(any(is.na(n.cas)) | any(is.na(n.con))) { + if(any(is.na(n.cas)) || any(is.na(n.con))) { warning("dist.outcome set to binary but number of cases or controls is missing. Will try using total sample size instead but power pruning will be less accurate") iv.se<- 1/sqrt(unique(dat2$samplesize.outcome)*r2sum) } @@ -367,7 +367,7 @@ power_prune <- function(dat,method=1,dist.outcome="binary") dat2<-dat2[order(dat2$id.set,dat2$iv.se),] id.sets<-unique(dat2$id.set) id.keep<-NULL - for(i in 1:length(id.sets)){ + for(i in seq_along(id.sets)){ # print(i) # print(id.sets[i]) id.temp<-unique(dat2[dat2$id.set==id.sets[i],c("id.set","id.subset")]) diff --git a/R/harmonise.R b/R/harmonise.R index 86e61980..13852809 100644 --- a/R/harmonise.R +++ b/R/harmonise.R @@ -72,7 +72,7 @@ harmonise_data <- function(exposure_dat, outcome_dat, action=2) fix.tab <- list() mr_cols <- c("beta.exposure", "beta.outcome", "se.exposure", "se.outcome") - for(i in 1:nrow(combs)) + for(i in seq_len(nrow(combs))) { x <- subset(res.tab, id.exposure == combs$id.exposure[i] & id.outcome == combs$id.outcome[i]) message("Harmonising ", x$exposure[1], " (", x$id.exposure[1], ") and ", x$outcome[1], " (", x$id.outcome[1], ")") @@ -550,7 +550,7 @@ harmonise_11 <- function(SNP, A1, B1, betaA, betaB, fA, fB, tolerance, action) harmonise <- function(dat, tolerance, action) { dat$orig_SNP<-dat$SNP - dat <- data.table::data.table(dat)[, SNP_index := 1:.N, by="SNP"] + dat <- data.table::data.table(dat)[, SNP_index := seq_len(.N), by="SNP"] dat$SNP <- paste0(dat$SNP, "_", dat$SNP_index) SNP <- dat$SNP A1 <- dat$effect_allele.exposure diff --git a/R/knit.R b/R/knit.R index faa348c9..d982e051 100644 --- a/R/knit.R +++ b/R/knit.R @@ -98,7 +98,7 @@ mr_report <- function(dat, output_path = ".", output_type = "html", author = "An combinations <- plyr::ddply(dat, c("id.exposure", "id.outcome"), plyr::summarise, n=length(exposure), exposure=exposure[1], outcome=outcome[1]) output_file <- array("", nrow(combinations)) - for(i in 1:nrow(combinations)) + for(i in seq_len(nrow(combinations))) { title <- paste(combinations$exposure[i], "against", combinations$outcome[i]) tablist <- lapply(m[c("mr", "enrichment", "directionality_test", "mr_heterogeneity", "mr_pleiotropy_test")], function(x) diff --git a/R/ld.R b/R/ld.R index e0accc45..aa634a86 100644 --- a/R/ld.R +++ b/R/ld.R @@ -34,10 +34,10 @@ clump_data <- function(dat, clump_kb=10000, clump_r2=0.001, clump_p1=1, clump_p2 stop("Expecting data frame returned from format_data") } - if("pval.exposure" %in% names(dat) & "pval.outcome" %in% names(dat)) + if("pval.exposure" %in% names(dat) && "pval.outcome" %in% names(dat)) { message("pval.exposure and pval.outcome columns present. Using pval.exposure for clumping.") - } else if(!"pval.exposure" %in% names(dat) & "pval.outcome" %in% names(dat)) + } else if(!"pval.exposure" %in% names(dat) && "pval.outcome" %in% names(dat)) { message("pval.exposure column not present, using pval.outcome column for clumping.") pval_column <- "pval.outcome" @@ -91,4 +91,3 @@ ld_matrix <- function(snps, with_alleles=TRUE, pop="EUR") # .Deprecated("ieugwasr::ld_matrix()") ieugwasr::ld_matrix(variants=snps, with_alleles=with_alleles, pop=pop) } - diff --git a/R/mr.R b/R/mr.R index e3febd68..6d56a554 100644 --- a/R/mr.R +++ b/R/mr.R @@ -402,7 +402,7 @@ mr_two_sample_ml <- function(b_exp, b_out, se_exp, se_out, parameters) return(list(b=NA, se=NA, pval=NA, nsnp=NA, Q=NA, Q_df=NA, Q_pval=NA)) } loglikelihood <- function(param) { - return(1/2*sum((b_exp-param[1:length(b_exp)])^2/se_exp^2)+1/2*sum((b_out-param[length(b_exp)+1]*param[1:length(b_exp)])^2/se_out^2)) + return(1/2*sum((b_exp-param[seq_along(b_exp)])^2/se_exp^2)+1/2*sum((b_out-param[length(b_exp)+1]*param[seq_along(b_exp)])^2/se_out^2)) } opt <- try(stats::optim( c(b_exp, sum(b_exp*b_out/se_out^2)/sum(b_exp^2/se_out^2)), @@ -717,7 +717,7 @@ weighted_median <- function(b_iv, weights) weighted_median_bootstrap <- function(b_exp, b_out, se_exp, se_out, weights, nboot) { med <- rep(0, nboot) - for(i in 1:nboot){ + for(i in seq_len(nboot)){ b_exp.boot = stats::rnorm(length(b_exp), mean=b_exp, sd=se_exp) b_out.boot = stats::rnorm(length(b_out), mean=b_out, sd=se_out) betaIV.boot = b_out.boot/b_exp.boot diff --git a/R/mr_mode.R b/R/mr_mode.R index 8d6b57bf..04808a87 100644 --- a/R/mr_mode.R +++ b/R/mr_mode.R @@ -69,7 +69,7 @@ mr_mode <- function(dat, parameters=default_parameters(), mode_method="all") BetaIV.boot_NOME <- stats::rnorm(length(BetaIV.in), mean=BetaIV.in, sd=seBetaIV.in[,2]) #Simple mode, not assuming NOME - beta.boot[i,1:length(phi)] <- beta(BetaIV.in=BetaIV.boot, seBetaIV.in=rep(1, length(BetaIV)), phi=phi) + beta.boot[i, seq_along(phi)] <- beta(BetaIV.in=BetaIV.boot, seBetaIV.in=rep(1, length(BetaIV)), phi=phi) #Weighted mode, not assuming NOME beta.boot[i,(length(phi)+1):(2*length(phi))] <- beta(BetaIV.in=BetaIV.boot, seBetaIV.in=seBetaIV.in[,1], phi=phi) #Penalised mode, not assuming NOME diff --git a/R/multivariable_mr.R b/R/multivariable_mr.R index 4416c42d..f095a874 100644 --- a/R/multivariable_mr.R +++ b/R/multivariable_mr.R @@ -148,7 +148,7 @@ mv_extract_exposures_local <- function( l_full <- list() l_inst <- list() - for(i in 1:length(filenames_exposure)) + for(i in seq_along(filenames_exposure)) { if(flag == "character") { l_full[[i]] <- read_outcome_data(filenames_exposure[i], @@ -462,7 +462,7 @@ mv_multiple <- function(mvdat, intercept=FALSE, instrument_specific=FALSE, pval_ } } - if(instrument_specific & sum(index) <= (nexp + as.numeric(intercept))) + if(instrument_specific && sum(index) <= (nexp + as.numeric(intercept))) { effs[i] <- NA se[i] <- NA diff --git a/R/other_formats.R b/R/other_formats.R index 2834cda5..6b49a33f 100644 --- a/R/other_formats.R +++ b/R/other_formats.R @@ -153,7 +153,7 @@ run_mr_presso <- function(dat, NbDistribution = 1000, SignifThreshold = 0.05) attributes(res)$id.outcome <- d$id.outcome attributes(res)$exposure <- d$exposure attributes(res)$outcome <- d$outcome - for(j in 1:nrow(d)) + for(j in seq_len(nrow(d))) { x <- subset(dat, exposure == d$exposure[j] & outcome == d$outcome[j]) message(x$exposure[1], " - ", x$outcome[1]) @@ -205,7 +205,7 @@ mr_ivw_radial <- function(b_exp, b_out, se_exp, se_out, parameters=default_param if (sum(!is.na(b_exp) & !is.na(b_out) & !is.na(se_exp) & !is.na(se_out)) < 2) return(list(b = NA, se = NA, pval = NA, nsnp = NA)) - d <- RadialMR::format_radial(BXG=b_exp, BYG=b_out, seBXG=se_exp, seBYG=se_out, RSID=1:length(b_exp)) + d <- RadialMR::format_radial(BXG=b_exp, BYG=b_out, seBXG=se_exp, seBYG=se_out, RSID=seq_along(b_exp)) out <- RadialMR::ivw_radial(d, alpha=0.05, weights=3) b <- out$coef[1,1] se <- out$coef[1,2] diff --git a/R/query.R b/R/query.R index 83c0ae89..0325a1e8 100644 --- a/R/query.R +++ b/R/query.R @@ -40,7 +40,7 @@ extract_outcome_data <- function(snps, outcomes, proxies = TRUE, rsq = 0.8, alig if(proxies) { - for(i in 1:length(outcomes)) + for(i in seq_along(outcomes)) { if(is.null(firstpass)) { @@ -81,7 +81,7 @@ extract_outcome_data_internal <- function(snps, outcomes, proxies = TRUE, rsq = stop("'proxies' argument should be TRUE or FALSE") } - if((length(snps) < splitsize & length(outcomes) < splitsize) | (length(outcomes) < splitsize & length(snps) < splitsize)) + if((length(snps) < splitsize && length(outcomes) < splitsize) || (length(outcomes) < splitsize && length(snps) < splitsize)) { d <- ieugwasr::associations( @@ -102,7 +102,7 @@ extract_outcome_data_internal <- function(snps, outcomes, proxies = TRUE, rsq = n <- length(snps) splits <- data.frame(snps=snps, chunk_id=rep(1:(ceiling(n/splitsize)), each=splitsize)[1:n]) d <- list() - for(i in 1:length(outcomes)) + for(i in seq_along(outcomes)) { message(i, " of ", length(outcomes), " outcomes") @@ -132,7 +132,7 @@ extract_outcome_data_internal <- function(snps, outcomes, proxies = TRUE, rsq = n <- length(outcomes) splits <- data.frame(outcomes=outcomes, chunk_id=rep(1:(ceiling(n/splitsize)), each=splitsize)[1:n]) d <- list() - for(i in 1:length(snps)) + for(i in seq_along(snps)) { message(i, " of ", length(snps), " snps") @@ -160,7 +160,7 @@ extract_outcome_data_internal <- function(snps, outcomes, proxies = TRUE, rsq = d <- plyr::rbind.fill(d) } - if(is.null(nrow(d)) | nrow(d) == 0) + if(is.null(nrow(d)) || nrow(d) == 0) { # message("None of the requested SNPs were available in the specified GWASs.") return(NULL) diff --git a/R/read_data.R b/R/read_data.R index 90157198..be04eb2f 100644 --- a/R/read_data.R +++ b/R/read_data.R @@ -364,7 +364,7 @@ if (inherits(dat, "data.table")) { dat$pval_origin.outcome <- "reported" if(any(is.na(dat$pval.outcome))) { - if("beta.outcome" %in% names(dat) & "se.outcome" %in% names(dat)) + if("beta.outcome" %in% names(dat) && "se.outcome" %in% names(dat)) { index <- is.na(dat$pval.outcome) dat$pval.outcome[index] <- stats::pnorm(abs(dat$beta.outcome[index])/dat$se.outcome[index], lower.tail=FALSE) @@ -374,7 +374,7 @@ if (inherits(dat, "data.table")) { } # If no pval column then create it from beta and se if available - if("beta.outcome" %in% names(dat) & "se.outcome" %in% names(dat) & ! "pval.outcome" %in% names(dat)) + if("beta.outcome" %in% names(dat) && "se.outcome" %in% names(dat) && ! "pval.outcome" %in% names(dat)) { message("Inferring p-values") dat$pval.outcome <- stats::pnorm(abs(dat$beta.outcome)/dat$se.outcome, lower.tail=FALSE) * 2 @@ -412,7 +412,7 @@ if (inherits(dat, "data.table")) { dat$samplesize.outcome <- as.numeric(dat$samplesize.outcome) } - if("ncontrol.outcome" %in% names(dat) & "ncase.outcome" %in% names(dat)) + if("ncontrol.outcome" %in% names(dat) && "ncase.outcome" %in% names(dat)) { index <- is.na(dat$samplesize.outcome) & !is.na(dat$ncase.outcome) & !is.na(dat$ncontrol.outcome) if(any(index)) @@ -421,7 +421,7 @@ if (inherits(dat, "data.table")) { dat$samplesize.outcome[index] <- dat$ncase.outcome[index] + dat$ncontrol.outcome[index] } } - } else if("ncontrol.outcome" %in% names(dat) & "ncase.outcome" %in% names(dat)) + } else if("ncontrol.outcome" %in% names(dat) && "ncase.outcome" %in% names(dat)) { message("Generating sample size from ncase and ncontrol") dat$samplesize.outcome <- dat$ncase.outcome + dat$ncontrol.outcome diff --git a/R/rucker.R b/R/rucker.R index 875124cd..3a7b8b1c 100644 --- a/R/rucker.R +++ b/R/rucker.R @@ -84,7 +84,7 @@ mr_rucker <- function(dat, parameters=default_parameters()) attributes(res)$id.outcome <- d$id.outcome attributes(res)$exposure <- d$exposure attributes(res)$outcome <- d$outcome - for(j in 1:nrow(d)) + for(j in seq_len(nrow(d))) { x <- subset(dat, exposure == d$exposure[j] & outcome == d$outcome[j]) message(x$exposure[1], " - ", x$outcome[1]) @@ -369,7 +369,7 @@ mr_rucker_jackknife <- function(dat, parameters=default_parameters()) attributes(res)$id.outcome <- d$id.outcome attributes(res)$exposure <- d$exposure attributes(res)$outcome <- d$outcome - for(j in 1:nrow(d)) + for(j in seq_len(nrow(d))) { x <- subset(dat, exposure == d$exposure[j] & outcome == d$outcome[j]) message(x$exposure[1], " - ", x$outcome[1]) @@ -406,7 +406,7 @@ mr_rucker_jackknife_internal <- function(dat, parameters=default_parameters()) { # dat2$beta.exposure <- rnorm(nsnp, mean=dat$beta.exposure, sd=dat$se.exposure) # dat2$beta.outcome <- rnorm(nsnp, mean=dat$beta.outcome, sd=dat$se.outcome) - dat2 <- dat[sample(1:nrow(dat), nrow(dat), replace=TRUE), ] + dat2 <- dat[sample(seq_len(nrow(dat)), nrow(dat), replace = TRUE), ] l[[i]] <- mr_rucker_internal(dat2, parameters) } @@ -495,7 +495,7 @@ mr_rucker_cooksdistance <- function(dat, parameters=default_parameters()) i <- 1 l <- list() - while(any(index) & sum(!index) > 3) + while(any(index) && sum(!index) > 3) { dat <- dat[!index, ] cooks_threshold <- 4/nrow(dat) diff --git a/R/singlesnp.R b/R/singlesnp.R index 7cbc10d7..21e18aa9 100644 --- a/R/singlesnp.R +++ b/R/singlesnp.R @@ -45,7 +45,7 @@ mr_singlesnp <- function(dat, parameters=default_parameters(), single_method="mr with(x, get(single_method)(beta.exposure[i], beta.outcome[i], se.exposure[i], se.outcome[i], parameters)) }) nom <- c() - for(i in 1:length(all_method)) + for(i in seq_along(all_method)) { l[[nsnp+i]] <- with(x, get(all_method[i])(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters)) diff --git a/README.Rmd b/README.Rmd index 7c151e55..819e0773 100644 --- a/README.Rmd +++ b/README.Rmd @@ -9,4 +9,6 @@ output: github_document +## Documentation + **Full documentation available here:** https://mrcieu.github.io/TwoSampleMR/ diff --git a/README.md b/README.md index e9e4ec84..c58d1aa1 100644 --- a/README.md +++ b/README.md @@ -59,5 +59,7 @@ is available here: +## Documentation + **Full documentation available here:** diff --git a/inst/sandpit/api_comparisons.R b/inst/sandpit/api_comparisons.R index 5f050dcd..937746d4 100644 --- a/inst/sandpit/api_comparisons.R +++ b/inst/sandpit/api_comparisons.R @@ -16,7 +16,7 @@ options(mrbaseapi="http://api.mrbase.org/") inst2 <- extract_instruments(89, clump=TRUE) table(inst1==inst2) -for(i in 1:ncol(inst1)) +for (i in seq_len(ncol(inst1))) { print(sum(inst1[,i] == inst2[,i])) } diff --git a/inst/sandpit/harmonise.R b/inst/sandpit/harmonise.R index c315a166..ecdd14a4 100644 --- a/inst/sandpit/harmonise.R +++ b/inst/sandpit/harmonise.R @@ -27,7 +27,7 @@ -a <- read.table("inst/extdata/alleles.txt", he=T, stringsAsFactors=FALSE) +a <- read.table("inst/extdata/alleles.txt", he=TRUE, stringsAsFactors=FALSE) SNP <- a$SNP A1 <- a$A1 A2 <- a$A2 @@ -62,7 +62,7 @@ outcome_dat <- data.frame( dat <- TwoSampleMR::harmonise_data(exposure_dat, outcome_dat) -a <- read.table("inst/extdata/alleles.txt", he=T, stringsAsFactors=FALSE) +a <- read.table("inst/extdata/alleles.txt", he=TRUE, stringsAsFactors=FALSE) SNP <- a$SNP A1 <- a$A1 A2 <- a$A2 diff --git a/inst/sandpit/mysql.R b/inst/sandpit/mysql.R index 4e55e4c2..70adfdf6 100644 --- a/inst/sandpit/mysql.R +++ b/inst/sandpit/mysql.R @@ -1,11 +1,11 @@ -mysql -u epxjz -h epi-franklin.epi.bris.ac.uk -p -wv-92n_YjB +# mysql -u epxjz -h epi-franklin.epi.bris.ac.uk -p +# wv-92n_YjB -mysql -u mruser -h epi-franklin.epi.bris.ac.uk -p -TMG_F1WnTL +# mysql -u mruser -h epi-franklin.epi.bris.ac.uk -p +# TMG_F1WnTL -mysql -u gh13047 -h epi-franklin.epi.bris.ac.uk -p -ri.K-2Gbvd +# mysql -u gh13047 -h epi-franklin.epi.bris.ac.uk -p +# ri.K-2Gbvd mydb <- dbConnect(MySQL(), user='epxjz', password='wv-92n_YjB', dbname='mrbase', host='epi-franklin.epi.bris.ac.uk') dbListTables(mydb) @@ -28,42 +28,42 @@ d <- fetch(rs, n=10) dim(d) -ssh -L 3306:localhost:3306 gh13047@epi-franklin.epi.bris.ac.uk -mysql -u gh13047 -h 127.0.0.1 -P 3306 -p -ri.K-2Gbvd +# ssh -L 3306:localhost:3306 gh13047@epi-franklin.epi.bris.ac.uk +# mysql -u gh13047 -h 127.0.0.1 -P 3306 -p +# ri.K-2Gbvd -mysql -u mruser -h 127.0.0.1 -P 3306 -p -TMG_F1WnTL +# mysql -u mruser -h 127.0.0.1 -P 3306 -p +# TMG_F1WnTL -use mrbase; - -describe assoc; -describe snps; -describe study; - -SELECT COUNT(*) FROM study; -SELECT COUNT(*) FROM snps; +# use mrbase; +# +# describe assoc; +# describe snps; +# describe study; +# +# SELECT COUNT(*) FROM study; +# SELECT COUNT(*) FROM snps; # SELECT COUNT(*) FROM assoc; # 1.7 billion rows -SELECT * FROM study limit 10; -SELECT * FROM snps WHERE name='rs13078807'; -SELECT * FROM assoc WHERE snp=207707; - -SELECT * FROM assoc limit 10; -SELECT * FROM assoc WHERE snp=2223704; - -SELECT a.*, b.*, c.* -FROM assoc a, snps b, study c -WHERE a.snp=b.id AND a.study=c.id -AND (b.name='rs10900000' OR b.name='rs10000010' OR b.name='rs10000092') -AND (c.filename='cardiogramplusc4d_180814_update_data.txt.uniform.af.txt' OR c.filename='All_ancestries_SNP_gwas_mc_merge_nogc.tbl.uniq.gz.uniform.af.txt') -ORDER BY filename; - - -SELECT a.*, b.*, c.* -FROM assoc a, snps b, study c -WHERE a.snp=b.id AND a.study=c.id -AND b.name IN ('rs10900000', 'rs10000010', 'rs10000092') -AND c.filename IN ('cardiogramplusc4d_180814_update_data.txt.uniform.af.txt', 'All_ancestries_SNP_gwas_mc_merge_nogc.tbl.uniq.gz.uniform.af.txt', 'MAGIC_INSULIN_SECRETION_DI_for_release_HMrel27.txt.uniform.af.txt') -ORDER BY filename; +# SELECT * FROM study limit 10; +# SELECT * FROM snps WHERE name='rs13078807'; +# SELECT * FROM assoc WHERE snp=207707; +# +# SELECT * FROM assoc limit 10; +# SELECT * FROM assoc WHERE snp=2223704; +# +# SELECT a.*, b.*, c.* +# FROM assoc a, snps b, study c +# WHERE a.snp=b.id AND a.study=c.id +# AND (b.name='rs10900000' OR b.name='rs10000010' OR b.name='rs10000092') +# AND (c.filename='cardiogramplusc4d_180814_update_data.txt.uniform.af.txt' OR c.filename='All_ancestries_SNP_gwas_mc_merge_nogc.tbl.uniq.gz.uniform.af.txt') +# ORDER BY filename; +# +# +# SELECT a.*, b.*, c.* +# FROM assoc a, snps b, study c +# WHERE a.snp=b.id AND a.study=c.id +# AND b.name IN ('rs10900000', 'rs10000010', 'rs10000092') +# AND c.filename IN ('cardiogramplusc4d_180814_update_data.txt.uniform.af.txt', 'All_ancestries_SNP_gwas_mc_merge_nogc.tbl.uniq.gz.uniform.af.txt', 'MAGIC_INSULIN_SECRETION_DI_for_release_HMrel27.txt.uniform.af.txt') +# ORDER BY filename; diff --git a/inst/sandpit/sorting_gwas_catalog.R b/inst/sandpit/sorting_gwas_catalog.R index 847c2ba8..cb4f2988 100644 --- a/inst/sandpit/sorting_gwas_catalog.R +++ b/inst/sandpit/sorting_gwas_catalog.R @@ -2,11 +2,11 @@ # In EXCEL substitute missing cells for NA gwascat.file<-paste("~/gwascatalog_",Sys.Date(),".txt",sep="") #for newest version -download.file("https://www.ebi.ac.uk/gwas/api/search/downloads/alternative", gwascat.file, method="curl",quiet = FALSE,cacheOK = F) +download.file("https://www.ebi.ac.uk/gwas/api/search/downloads/alternative", gwascat.file, method="curl",quiet = FALSE,cacheOK = FALSE) # a <- read.table("~/Downloads/gwas_catalog_v1.0-downloaded_2015-09-21_2.txt", he=T, sep="\t", quote='"', comment="", stringsAsFactors=FALSE) -a<-read.table(gwascat.file,header=TRUE,sep='\t',quote="",comment.char="",as.is=TRUE,stringsAsFactors=F) +a<-read.table(gwascat.file,header=TRUE,sep='\t',quote="",comment.char="",as.is=TRUE,stringsAsFactors=FALSE) # a<-read.table("~/Downloads/gwascatalog.txt",header=TRUE,sep='\t',quote="",comment.char="",as.is=TRUE,stringsAsFactors=F) # a<-read.table(gwascat.file,he=T, sep="\t", quote='"', comment="", stringsAsFactors=FALSE) # a<-read.table("~/Downloads/gwascatalog.txt",he=T, sep="\t", quote='"', comment="", stringsAsFactors=FALSE) @@ -17,12 +17,12 @@ b <- subset(a, select=c(DISEASE.TRAIT, PUBMEDID, FIRST.AUTHOR, DATE, SNPS,STRONG b$RISK.ALLELE.FREQUENCY <- as.numeric(b$RISK.ALLELE.FREQUENCY) #exclude SNPs with missing rsids -b<-b[grep("rs",b$SNPS,ignore.case=T,),] #exclude SNPs without an rsid -b[grep("-",b$STRONGEST.SNP.RISK.ALLELE,ignore.case=T,invert=T),] +b<-b[grep("rs",b$SNPS,ignore.case=TRUE,),] #exclude SNPs without an rsid +b[grep("-",b$STRONGEST.SNP.RISK.ALLELE,ignore.case=TRUE,invert=TRUE),] # Get effect allele pos.allele<-gregexpr("[ATGC]",b$STRONGEST.SNP.RISK.ALLELE) -b$effect_allele<-lapply(1:length(b$STRONGEST.SNP.RISK.ALLELE),FUN=function(x) substr(b$STRONGEST.SNP.RISK.ALLELE[x],unlist(pos.allele[x]),unlist(pos.allele[x]))) +b$effect_allele<-lapply(seq_along(b$STRONGEST.SNP.RISK.ALLELE),FUN=function(x) substr(b$STRONGEST.SNP.RISK.ALLELE[x],unlist(pos.allele[x]),unlist(pos.allele[x]))) # Get year b$DATE <- as.Date(b$DATE, format="%d-%b-%y") @@ -31,7 +31,7 @@ b$year <- format(b$DATE, "%Y") # Try to get the units Start<-unlist(lapply(b$X95..CI..TEXT.,FUN=function(x) unlist(gregexpr("] ",x))+2)) Stop<-nchar(b$X95..CI..TEXT.) -b$units<-unlist(lapply(1:length(Start) ,FUN=function(x) substr(b$X95..CI..TEXT.[x],Start[x],Stop[x]))) +b$units<-unlist(lapply(seq_along(Start) ,FUN=function(x) substr(b$X95..CI..TEXT.[x],Start[x],Stop[x]))) b$units[which(unlist(regexpr("[:A-Za-z:]",b$units))==-1)]<-NA b$units[which(b$units=="[NR]")]<-NA b$units[which(b$units=="NR")]<-NA @@ -46,7 +46,7 @@ b$type<-NA b$type[unlist(lapply(c("older","higher","taller","increase","better","more", # higher "younger","lower","shorter","decrease","dcrease","decrea","fewer","worse", #lower "SD","unit","kg/m2","cm","msec","variance explained","% variance"), #other - FUN=function(x) grep(x,b$units,ignore.case=T)))]<-"continuous" + FUN=function(x) grep(x,b$units,ignore.case=TRUE)))]<-"continuous" # Assume that anything with OR.or.BETA < 0.5 is not an odds ratio / is a continuous phenotype b$type[which(b$OR.or.BETA<0.5 & is.na(b$units))]<-"continuous?" @@ -57,9 +57,9 @@ b$type[which((b$OR.or.BETA>1.0 | b$OR.or.BETA<2.0) & is.na(b$units))] <-"binary? b$direction<-NA b$direction[unlist(lapply(c("older","higher","taller","increase","better","more"), # higher - FUN=function(x) grep(x,b$units,ignore.case=T)))] <- "higher" + FUN=function(x) grep(x,b$units,ignore.case=TRUE)))] <- "higher" b$direction[unlist(lapply(c("younger","lower","shorter","decrease","dcrease","decrea","fewer","worse"), #lower - FUN=function(x) grep(x,b$units,ignore.case=T)))] <- "lower" + FUN=function(x) grep(x,b$units,ignore.case=TRUE)))] <- "lower" # Try to get standard errors and units from confidence intervals # calculate two sets of standard errors, assuming OR.or.BETA is and isn't an odds ratio @@ -69,7 +69,7 @@ b$direction[unlist(lapply(c("younger","lower","shorter","decrease","dcrease","de pos<-regexpr("-",c("1.174-1,457" , "1.46,2.33", "0.49,0.098", "1.28,2.202","1.1-1.2")) test1<-substr(c("1.174-1,457" , "1.46,2.33", "0.49,0.098", "1.28,2.202","1.1-1.2"),1,pos-1) nums<-gregexpr("[:0-9:]",c("1.174-1,457" , "1.46,2.33", "0.49,0.098", "1.28,2.202","1.1-1.2")) -end<-lapply(1:length(nums),FUN=function(x) unlist(nums[x])[length(unlist(nums[x]))]) # this finds the position of the last number in the sequence +end<-lapply(seq_along(nums),FUN=function(x) unlist(nums[x])[length(unlist(nums[x]))]) # this finds the position of the last number in the sequence test2<-substr(c("1.174-1,457" , "1.46,2.33", "0.49,0.098", "1.28,2.202","1.1-1.2"),pos+1,end) test1 @@ -78,7 +78,7 @@ as.numeric(test2) c<-b -b<-b[grep(",",b$ci95,invert=T),] +b<-b[grep(",",b$ci95,invert=TRUE),] pos.start<-regexpr("\\[",b$X95..CI..TEXT.)+1 @@ -87,9 +87,9 @@ ci95<-substr(b$X95..CI..TEXT.,pos.start,pos.end) ci95 ci95[which(ci95=="")]<-NA pos<-regexpr("-",ci95) -ci95[!is.na(ci95)][grep("-",ci95[!is.na(ci95)],invert=T)] +ci95[!is.na(ci95)][grep("-",ci95[!is.na(ci95)],invert=TRUE)] nums<-gregexpr("[:0-9:]",ci95) -end<-lapply(1:length(nums),FUN=function(x) unlist(nums[x])[length(unlist(nums[x]))]) # this finds the position of the last number in the sequence +end<-lapply(seq_along(nums),FUN=function(x) unlist(nums[x])[length(unlist(nums[x]))]) # this finds the position of the last number in the sequence num1<-substr(ci95,1,pos-1) num2<-substr(ci95,pos+1,end) unique(num2) @@ -197,14 +197,14 @@ Attr<-listAttributes(Mart) ensembl<-getBM(attributes=c("refsnp_id","chr_name","chrom_start","allele", "minor_allele", "minor_allele_freq"),filters="snp_filter",values=b1$SNP[i3],mart=Mart) ensembl <- subset(ensembl, !duplicated(refsnp_id)) temp <- subset(b1, select=c(SNP, Allele)) -temp$index <- 1:nrow(temp) +temp$index <- seq_len(nrow(temp)) temp <- merge(temp, ensembl, by.x="SNP", by.y="refsnp_id", all.x=TRUE) temp <- temp[order(temp$index),] alleles <- data.frame(t(sapply(strsplit(temp$allele, split="/"), function(x) x[1:2])), stringsAsFactors=FALSE) alleles$effect_allele <- temp$Allele -i4 <- sapply(1:nrow(alleles), function(i) alleles[i,which(alleles[i,1:2] != alleles[i,3])[1]]) -i4 <- sapply(i4, function(x) if(is.null(x)) NA else x) +i4 <- sapply(seq_len(nrow(alleles)), function(i) alleles[i, which(alleles[i, 1:2] != alleles[i, 3])[1]]) +i4 <- sapply(i4, function(x) if (is.null(x)) NA else x) b1$other_allele <- i4 b1$eaf[b1$eaf >= 1 | b1$eaf <= 0] <- NA diff --git a/inst/sandpit/test_mr_sign.R b/inst/sandpit/test_mr_sign.R index 2b343b9f..3b0bc075 100644 --- a/inst/sandpit/test_mr_sign.R +++ b/inst/sandpit/test_mr_sign.R @@ -7,7 +7,7 @@ param <- expand.grid(n = 1:100, x=0:100) param <- subset(param, x <= n) -for(i in 1:nrow(param)) +for (i in seq_len(nrow(param))) { param$pval[i] <- binom.test(x=param$x[i], n=param$n[i], p=0.5)$p.value } diff --git a/inst/sandpit/test_rucker.R b/inst/sandpit/test_rucker.R index 80d9dddd..bbbb9258 100644 --- a/inst/sandpit/test_rucker.R +++ b/inst/sandpit/test_rucker.R @@ -10,7 +10,7 @@ bp <- read.table(system.file(package="TwoSampleMR", "data/DebbieData_2.txt")) names(bp) <- c("beta.exposure", "se.exposure", "beta.outcome", "se.outcome") bp$mr_keep <- TRUE bp$id.exposure <- bp$id.outcome <- bp$exposure <- bp$outcome <- 1 -bp$SNP <- paste0("SNP", 1:nrow(bp)) +bp$SNP <- paste0("SNP", seq_len(nrow(bp))) a <- mr_all(bp) @@ -40,11 +40,11 @@ dev.off() res1 <- array(0, 100) res2 <- array(0, 100) res3 <- array(0, 100) -for(i in 1:100) +for (i in 1:100) { message(i) bp3 <- bp - index <- sample(1:nrow(bp3), replace=FALSE) + index <- sample(seq_len(nrow(bp3)), replace = FALSE) bp3$beta.outcome <- bp3$beta.outcome[index] bp3$se.outcome <- bp3$se.outcome[index] r1 <- mr_rucker_cooksdistance(bp3) @@ -56,11 +56,11 @@ for(i in 1:100) } res4 <- array(0, 100) -for(i in 1:100) +for (i in 1:100) { message(i) bp3 <- bp - index <- sample(1:nrow(bp3), replace=FALSE) + index <- sample(seq_len(nrow(bp3)), replace = FALSE) bp3$beta.outcome <- bp3$beta.outcome[index] bp3$se.outcome <- bp3$se.outcome[index] res4[i] <- with(bp3, mr_ivw(beta.exposure, beta.outcome, se.exposure, se.outcome))$pval @@ -161,7 +161,7 @@ param <- expand.grid( dim(param) out <- list() -for(i in 1:nrow(param)) +for (i in seq_len(nrow(param))) { effs <- make_effs(ninst1=param$nsnp[i], var_xy=param$var_xy[i], var_g1x=param$var_g1x[i], mu_g1y=param$mu_g1y[i]) pop1 <- make_pop(effs, param$nid1[i]) @@ -192,12 +192,12 @@ signif(cooks.distance(lmI), 3) # ~= Ci in Table 3, p.184 -for(i in 1:BootSim){ +for (i in 1:BootSim) { BXG = rnorm(length(BetaXG),BetaXG,seBetaXG) BYG = rnorm(length(BetaYG),BetaYG,seBetaYG) -if(weights==1){W = BXG^2/seBetaYG^2} -if(weights==2){W = 1/(seBetaYG^2/BXG^2 + (BYG^2)*seBetaXG^2/BXG^4)} +if (weights==1) {W = BXG^2/seBetaYG^2} +if (weights==2) {W = 1/(seBetaYG^2/BXG^2 + (BYG^2)*seBetaXG^2/BXG^4)} BIVw = BIV*sqrt(W) sW = sqrt(W) @@ -213,9 +213,9 @@ QQd[i] = DF2*phi_E Qp = 1-pchisq(Q,DF1) -if(QQ[i] <= qchisq(1-alpha,DF1)){Mod[i]=1} -if(QQ[i] >= qchisq(1-alpha,DF1)){Mod[i]=2} -if(QQ[i] >= qchisq(1-alpha,DF1) & QQ[i] - QQd[i] >= qchisq(1-alpha,1)){Mod[i]=3} -if(QQ[i] >= qchisq(1-alpha,DF1)& QQ[i] - QQd[i] >= qchisq(1-alpha,1)& QQd[i] >=qchisq(1-alpha,DF2)){Mod[i]=4} +if (QQ[i] <= qchisq(1-alpha,DF1)) {Mod[i]=1} +if (QQ[i] >= qchisq(1-alpha,DF1)) {Mod[i]=2} +if (QQ[i] >= qchisq(1-alpha,DF1) && QQ[i] - QQd[i] >= qchisq(1-alpha,1)) {Mod[i]=3} +if (QQ[i] >= qchisq(1-alpha,DF1) && QQ[i] - QQd[i] >= qchisq(1-alpha,1) && QQd[i] >=qchisq(1-alpha,DF2)) {Mod[i]=4} } diff --git a/inst/sandpit/test_selection.R b/inst/sandpit/test_selection.R index ac36f57e..ad48c5b9 100644 --- a/inst/sandpit/test_selection.R +++ b/inst/sandpit/test_selection.R @@ -7,7 +7,7 @@ find_invalid_instruments <- function(d1, d2, d3, steiger_thresh=0.05) index <- d1$pval < 5e-8 l0 <- list() - for(i in 1:nrow(d1)) + for (i in seq_len(nrow(d1))) { l0[[i]] <- mr_steiger( d2$pval[i], @@ -17,7 +17,7 @@ find_invalid_instruments <- function(d1, d2, d3, steiger_thresh=0.05) ) } l1 <- list() - for(i in 1:nrow(d1)) + for (i in seq_len(nrow(d1))) { l1[[i]] <- mr_steiger( d3$pval[i], @@ -27,7 +27,7 @@ find_invalid_instruments <- function(d1, d2, d3, steiger_thresh=0.05) ) } l2 <- list() - for(i in 1:nrow(d1)) + for (i in seq_len(nrow(d1))) { l2[[i]] <- mr_steiger( d3$pval[i], @@ -153,7 +153,7 @@ run_sim <- function(nid1, nid2, nidu, ninst1, ninst2, ninstu, var_xy, var_ux, va -for(i in 1:) +# for(i in 1:) # Sizes ninst1 <- 50 diff --git a/inst/sandpit/workflow.R b/inst/sandpit/workflow.R index 67f63a00..56ad6212 100644 --- a/inst/sandpit/workflow.R +++ b/inst/sandpit/workflow.R @@ -1,4 +1,4 @@ -ssh -L 3306:localhost:3306 gh13047@epi-franklin.epi.bris.ac.uk +# ssh -L 3306:localhost:3306 gh13047@epi-franklin.epi.bris.ac.uk # From GWAS catalog and mysql diff --git a/tests/testthat/test_otherformats.R b/tests/testthat/test_otherformats.R index c469bd75..92b3da97 100644 --- a/tests/testthat/test_otherformats.R +++ b/tests/testthat/test_otherformats.R @@ -15,8 +15,9 @@ test_that("MRInput with cor", { skip_on_cran() skip_if_offline() skip_if_offline(host = "api.opengwas.io") - w <- try(dat_to_MRInput(dat, get_correlations=TRUE)[[1]]) - if (inherits(w, "try-error")) skip("Server issues") + w <- tryCatch(dat_to_MRInput(dat, get_correlations=TRUE)[[1]], + error = skip("Server issues"), + warning = expect_warning()) expect_true(nrow(w@correlation) == length(w@betaX)) })