Skip to content

Commit

Permalink
NewYear_bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
Sudaraka88 authored and Sudaraka88 committed Jan 2, 2024
1 parent a5dbfc8 commit 232ad05
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 26 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
#- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

Expand All @@ -49,3 +49,4 @@ jobs:
- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
error-on: '"error"'
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: LDWeaver
Type: Package
Title:Genomewide Epistasis Analysis on Bacteria
Version: 1.3.1
Version: 1.3.2
Authors@R: person("Sudaraka", "Mallawaarachchi", email = "[email protected]", role = c("aut", "cre"))
Maintainer: Sudaraka Mallawaarachchi <[email protected]>
Description:Perform genomewide epistasis analysis by evaluating the LD structure in bacteria.
Expand Down
42 changes: 29 additions & 13 deletions R/BacGWES.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#' modelling since all links are used. However, setting a threshold > 2 will generally reduce the memory usage, plotting time (default = 3, i.e. corresponding to p = 0.001),
#' and run time for ARACNE. If all links are required to be returned, set to 0 (i.e. corresponding to p = 1), range 0 - 5
#' @param tanglegram_break_segments specify the number of genome segments to prepare - one tanglegram per segment (default = 5), range 1 - 10. Set NULL to skip tanglegram
#' @param write_gwesExplorer specify whether output for GWESExplorer is required (default = T)
#' @param multicore specify whether to use parallel processing (default = T)
#' @param ncores specify the number of cores to use for parallel processing (default = NULL), will auto detect if NULL
#' @param max_blk_sz specify maximum block size for MI computation (default = 10000), larger sizes require more RAM, range 1000 - 100000
Expand All @@ -48,7 +49,8 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
gap_freq = 0.15, maf_freq = 0.01, hdw_threshold = 0.1, perform_SR_analysis_only = F,
SnpEff_Annotate = T, sr_dist = 20000, lr_retain_links = 1e6,
max_tophits = 250, num_clusts_CDS = 3, srp_cutoff = 3, tanglegram_break_segments = 5,
multicore = T, max_blk_sz = 10000, ncores = NULL, save_additional_outputs = F){
write_gwesExplorer = T, multicore = T, max_blk_sz = 10000, ncores = NULL,
save_additional_outputs = F){
# Build blocks
# BLK1: Extract SNPs and create sparse Mx from MSA (fasta)
# BLK2: Parse GBK or GFF+REF
Expand All @@ -65,6 +67,8 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
#TODO: Provide the option to skip SNP extraction and use the whole provided alignment (redundant if pre-filtered)
#TODO: Add the option to provide genbank file without reference sequence
#TODO: Count through blocks and automate the displayed BLOCK NUMBER
#TODO: genbankr is being droped from the newest bioconductor, add alternative (https://github.com/gmbecker/genbankr)
#TODO: Add Hamming Distance plot, can we have a SNP Tree + Hamming Distance weights to show population structure control?

#NOTE: SnpEff does not parse the GBK and GFF3 file from the same refseq reference genome the same way. There might be differences between annotations/tophits/etc.
# # Welcome message # #
Expand Down Expand Up @@ -164,6 +168,12 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path

# setup paths
if(!file.exists(dset)) dir.create(dset) # save everything in here

# Save console output as a text file
info_file = file.path(dset, paste("LDW_run_",format(Sys.time(), "%Y%m%d%H%M%S"), ".txt", sep = ""))
suppressWarnings(sink(file= NULL))
sink(info_file, split = T)

add_path = file.path(dset, "Additional_Outputs") # Additional Outputs
if(save_additional_outputs) {
if(!file.exists(add_path)) dir.create(file.path(dset, "Additional_Outputs"))
Expand Down Expand Up @@ -205,7 +215,8 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
cat(paste("All outputs will be saved to:", normalizePath(dset), "\n"))
cat(paste("\n *** Input paths *** \n\n"))
cat(paste("* Alignment:", aln_path, "\n"))
cat(paste("* GenBank Annotation:", gbk_path, "\n"))
if(!is.null(gbk_path)) cat(paste("* GenBank Annotation:", gbk_path, "\n"))
if(!is.null(gff3_path)) cat(paste("* GFF3 Annotation:", gff3_path, "\n"))
if(!is.null(snpeff_jar_path)) cat(paste("* SnpEff Annotations will be performed on short-range links. SnpEff path:", snpeff_jar_path, "\n"))

cat(paste("\n *** Parameters *** \n\n"))
Expand Down Expand Up @@ -358,6 +369,7 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path


if(nrow(sr_links) == 0){
suppressWarnings(sink(file= NULL)) ### output info to text file
stop("No potentially important sr_links were identified! Cannot continue analysis...")
}

Expand Down Expand Up @@ -385,26 +397,30 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
tophits = LDWeaver::read_TopHits(top_hits_path = tophits_path)
}

# Additional paths if annotations are requested
# tanglegram
tanglegram_path = file.path(dset, "SR_Tanglegram")
if(!file.exists(tanglegram_path)) dir.create(tanglegram_path)
# GWESExplorer
gwesexplorer_path = file.path(dset, "SR_GWESExplorer")
if(!file.exists(gwesexplorer_path)) dir.create(gwesexplorer_path)
# NetworkPlot
netplot_path = file.path(dset, "SR_network_plot.png")

# BLK8
if(!is.null(tanglegram_break_segments)){
# tanglegram
tanglegram_path = file.path(dset, "SR_Tanglegram")
if(!file.exists(tanglegram_path)) dir.create(tanglegram_path)
cat("\n\n #################### BLOCK 9 #################### \n\n")
LDWeaver::create_tanglegram(tophits = tophits, gbk = gbk, gff = gff, tanglegram_folder = tanglegram_path, break_segments = tanglegram_break_segments)
}
# BLK9
cat("\n\n #################### BLOCK 10 #################### \n\n")
LDWeaver::write_output_for_gwes_explorer(snp.dat = snp.dat, tophits = tophits, gwes_explorer_folder = gwesexplorer_path)
if(write_gwesExplorer){
# GWESExplorer
gwesexplorer_path = file.path(dset, "SR_GWESExplorer")
if(!file.exists(gwesexplorer_path)) dir.create(gwesexplorer_path)
cat("\n\n #################### BLOCK 10 #################### \n\n")
LDWeaver::write_output_for_gwes_explorer(snp.dat = snp.dat, tophits = tophits, gwes_explorer_folder = gwesexplorer_path)
}


# BLK10
# Additional paths if annotations are requested
# NetworkPlot
netplot_path = file.path(dset, "SR_network_plot.png")

cat("\n\n #################### BLOCK 11 #################### \n\n")
LDWeaver::create_network(tophits = tophits, netplot_path = netplot_path, plot_title = paste("Networks in short-range tophits for", dset))

Expand Down
4 changes: 2 additions & 2 deletions R/io_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ cleanup = function(dset, delete_after_moving = F){
mv_success = c(mv_success, idx)
}

idx = c(grep("cds_var.rds", files), grep("hdw.rds", files), grep("parsed_gbk.rds", files), grep("snp_ACGTN.rds", files))
idx = c(grep("cds_var.rds", files), grep("hdw.rds", files), grep("parsed_gbk.rds", files), grep("parsed_gff3.rds", files), grep("snp_ACGTN.rds", files))
if(length(idx) > 0){
fldr = file.path(dset, "Additional_Outputs")
cleanup_support(files = file.path(dset, files[idx]), fldr)
Expand Down Expand Up @@ -298,7 +298,7 @@ cleanup = function(dset, delete_after_moving = F){
}

#### Temp folder ####
idx = c(grep("snpEff", files), grep("*.vcf", files), grep("*annotations.tsv", files), grep("*_links.tsv", files))
idx = c(grep("snpEff", files), grep("*.vcf", files), grep("*annotations.tsv", files), grep("*_links.tsv", files), grep("LDW_run_*", files))
if(length(idx) > 0){
fldr = file.path(dset, "Temp")
cleanup_support(files = file.path(dset, files[idx]), fldr)
Expand Down
16 changes: 7 additions & 9 deletions R/lr_analyser.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,14 @@ analyse_long_range_links = function(dset, lr_links_path, sr_links_path, are_lrli
# NOTE: spydrpick does not add clusters, add them from paint (requires cds_var)

if(SnpEff_Annotate == T) {
if( (is.null(gbk_path) & is.null(gff3_path)) | (!is.null(gbk_path) & !is.null(gff3_path)) ) stop("Either gbk_path or gff3_path must be provided.
To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F") # only one of gbk or gff can be NULL
if(!is.null(gff3_path) & is.null(ref_fasta_path)) stop("Reference fasta file must be provided for gff3 annoations.
To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F") # only one of gbk or gff can be NULL
if(is.null(snpeff_jar_path)) stop("You must specify <snpeff_jar_path> for annotations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F")
if(!file.exists(snpeff_jar_path)) stop(paste("<SnpEff.jar> not found at:", snpeff_jar_path, "please check the path provided"))
if(is.null(snpeff_jar_path)) stop("You must specify <snpeff_jar_path> for annotations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F")
if( (is.null(gbk_path) & is.null(gff3_path)) | (!is.null(gbk_path) & !is.null(gff3_path)) ) stop("Either gbk_path or gff3_path must be provided. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F") # only one of gbk or gff can be NULL
if(!is.null(gff3_path) & is.null(ref_fasta_path)) stop("Reference fasta file must be provided for gff3 annoations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F") # only one of gbk or gff can be NULL
# if(is.null(snpeff_jar_path)) stop("You must specify <snpeff_jar_path> for annotations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F")
# if(!file.exists(snpeff_jar_path)) stop(paste("<SnpEff.jar> not found at:", snpeff_jar_path, "please check the path provided"))
# if(is.null(snpeff_jar_path)) stop("You must specify <snpeff_jar_path> for annotations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F")
# if(is.null(gbk_path)) stop("You must specify <gbk_path> for annotations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F")
if(is.null(snp.dat)) stop("You must specify <gbk_path> for annotations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F")
if(is.null(cds_var)) stop("You must specify <gbk_path> for annotations. To run without annotations, re-run analyse_long_range_links() with SnpEff_Annotate = F")
if(is.null(snp.dat)) stop("You must provide snp.dat to perform annotations.")
if(is.null(cds_var)) stop("You must specify cds_var to perform for annotations.")
}

# lr_links_path = "~/Desktop/LDWeaver_RUN/maela/lr_links.tsv"
Expand Down

0 comments on commit 232ad05

Please sign in to comment.