diff --git a/.version b/.version index e6e6db4c..188bef59 100644 --- a/.version +++ b/.version @@ -1 +1 @@ -v0.9.2 +v0.9.3 diff --git a/README.md b/README.md index 8d682994..b35b054c 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,11 @@ conda deactivate ### Changelog: +**v0.9.3 (January 11th, 2019)** + +* Added 'list_samples' subcommand to list samples within a project. +* Caught a few bugs and worked them out for smoother processing and reports. + **v0.9.2 (January 7th, 2019)** * Modified test dataset to run tests quicker and implemented CirclCI checking. diff --git a/docs/pages/quickstart.rst b/docs/pages/quickstart.rst index a1e20233..48913e03 100644 --- a/docs/pages/quickstart.rst +++ b/docs/pages/quickstart.rst @@ -4,7 +4,7 @@ :depth: 2 ------------------- + Initializing a Run ------------------ @@ -16,7 +16,7 @@ be created using the command below where {ConfigFile} is the path to your config The directory should look like this (RunName is specified in the ConfigFile}:: - >tree analysis/{RunName} + > tree analysis/{RunName} analysis/{RunName}/ ├── config.yml -> {path to ConfigFile} ├── input_data @@ -53,7 +53,28 @@ machine demultiplexing through input of index sequences in the SampleSheet.csv. See SampleSheet example in XXX. If sequence files are demultiplexed, they can be concatenated together into one file for each type of read using 'zcat'. ----------------- + +List Samples for a Run +---------------------- + +As long as the config and sampleInfo files are present and in their respective +locations, you can get a quick view of what samples are related to the project. +Using the 'list_samples' subcommand will produce an overview table on the +console or write the table to a file (specified by the output option). +Additionally, if a supplemental information file is associated with the run, the +data will be combined with the listed table.:: + + > iguide list_samples configs/simulation.config.yml + + Specimen Info for : simulation. + + specimen replicates gRNA nuclease + ---------- ------------ --------------- ---------- + iGXA 1 TRAC Cas9v1 + iGXB 1 TRAC;TRBC;B2M Cas9v1 + iGXD 1 NA NA + + Processing a Run ---------------- @@ -75,7 +96,7 @@ options that can be passed to iGUIDE by appending to the iguide command after '- * [\-\-keep-going] will keep processing if one or more job error out * [-w X, \-\-latency-wait X] wait X seconds for the output files to appear before erroring out --------------- + An Example Run -------------- @@ -103,7 +124,7 @@ to ``dot -Tsvg`` will generate a vector graphic of the directed acyclic graph iguide run configs/simulation.config.yml -- --latency-wait 30 cat analysis/simulation/output/unique_sites.simulation.csv ---------- + Uninstall --------- diff --git a/tools/iguidelib/iguidelib/scripts/command.py b/tools/iguidelib/iguidelib/scripts/command.py index 9b2077f9..55ea4b7d 100644 --- a/tools/iguidelib/iguidelib/scripts/command.py +++ b/tools/iguidelib/iguidelib/scripts/command.py @@ -6,7 +6,7 @@ from iguidelib.scripts.run import main as Run from iguidelib.scripts.setup import main as Setup #from iguidelib.scripts.config import main as Config -#from iguidelib.scripts.list_samples import main as ListSamples +from iguidelib.scripts.list_samples import main as ListSamples def main(): @@ -15,8 +15,8 @@ def main(): "subcommands:\n" " setup \tCreate a new config file for a project using local data.\n" " run \tExecute the iGUIDE pipeline.\n" + " list_samples \tOutput a list of samples from a project.\n" " config \t[inDev] Modify or update iGUIDE config files.\n" - " list_samples \t[inDev] Make a list of samples from a directory.\n" ).format(version=__version__) parser = argparse.ArgumentParser( @@ -62,12 +62,6 @@ def main(): ) #Config(remaining) elif args.command == "list_samples": - raise SystemExit( - print(" 'iguide list_samples' subcommand is currently under \n" - " development. Checkout https://github.com/cnobles/iGUIDE/ \n" - " for updates and announcements. Thanks for using iGUIDE! \n" - ) - ) - #ListSamples(remaining) + ListSamples(remaining) else: parser.print_help() diff --git a/tools/iguidelib/iguidelib/scripts/list_samples.py b/tools/iguidelib/iguidelib/scripts/list_samples.py new file mode 100644 index 00000000..c43a4fa0 --- /dev/null +++ b/tools/iguidelib/iguidelib/scripts/list_samples.py @@ -0,0 +1,34 @@ +import os +import sys +import argparse +import subprocess + +from ruamel.yaml import YAML +from pathlib import Path + +def main( argv = sys.argv ): + """List samples in an iGUIDE project.""" + + try: + conda_prefix = os.environ.get("CONDA_PREFIX") + except (KeyError, IndexError): + raise SystemExit( + "Could not determine Conda prefix. Activate your iGUIDE " + "environment and try this command again.") + + root_dir = os.getenv("IGUIDE_DIR") + r_script = Path(root_dir + "/tools/rscripts/list_samples.R") + + if not r_script.is_file(): + sys.stderr.write( + "Error: Could not find a {0} in directory '{1}'\n".format( + "list_samples.R", args.iguide_dir + "/tools/rscripts/" + ) + ) + sys.exit(1) + + r_comps = ["Rscript", str(r_script)] + argv + + cmd = subprocess.run(r_comps) + + sys.exit(cmd.returncode) diff --git a/tools/iguidelib/iguidelib/scripts/run.py b/tools/iguidelib/iguidelib/scripts/run.py index 0bcec766..d56f5761 100644 --- a/tools/iguidelib/iguidelib/scripts/run.py +++ b/tools/iguidelib/iguidelib/scripts/run.py @@ -37,7 +37,7 @@ def main( argv = sys.argv ): parser.add_argument( "-i", "--iguide_dir", default = os.getenv("IGUIDE_DIR", os.getcwd()), - help = "Path to iGUIDE installation" + help = "Path to iGUIDE installation." ) # The remaining args (after --) are passed to Snakemake diff --git a/tools/rscripts/list_samples.R b/tools/rscripts/list_samples.R new file mode 100644 index 00000000..430817a4 --- /dev/null +++ b/tools/rscripts/list_samples.R @@ -0,0 +1,175 @@ +#' list_samples.R +#' +#' usage: Rscript list_samples.R +#' +#' This script lists the samples and their supplementary data (if provided) to +#' the consol. This can be a useful feature if you've systemtically named run +#' directories and would like to know which of the processed (or unprocessed) +#' directories contains a specific set of samples. +#' +#' For anyone reviewing the code below, the following is a small style guide +#' outlining the various formats for the code. +#' +#' Names with "_": objects, inlucding data.frames, GRanges, vectors, ... +#' Names in caMel format: functions or components of objects (i.e. columns +#' within a data.frame). +#' Names with ".": arguments / options for functions + +# Required / highly suggested option parameters and library ---- +options(stringsAsFactors = FALSE, scipen = 99, warn = -1) +suppressMessages(library("magrittr")) + + +# Set up and gather command line arguments ---- +parser <- argparse::ArgumentParser( + description = "List samples associated with a config file for iGUIDE.", + usage = "iguide list_samples " +) + +parser$add_argument( + "config", nargs = 1, type = "character", + help = "Run specific config file in yaml format." +) + +parser$add_argument( + "-o", "--output", nargs = 1, type = "character", default = FALSE, + help = "Output file name .csv, .tsv, or .rds format." +) + +parser$add_argument( + "-v", "--verbose", action = "store_true", + help = "Turns on diagnositc-based messages." +) + +parser$add_argument( + "--install_path", nargs = 1, type = "character", default = "IGUIDE_DIR", + help = "iGUIDE install directory path, do not change for normal applications." +) + +## Set arguments with parser +args <- parser$parse_args(commandArgs(trailingOnly = TRUE)) + +root_dir <- Sys.getenv("IGUIDE_DIR") + +code_dir <- dirname(sub( + pattern = "--file=", + replacement = "", + x = grep("--file=", commandArgs(trailingOnly = FALSE), value = TRUE) +)) + + +input_table <- data.frame( + "Variables" = paste0(names(args), " :"), + "Values" = sapply( + seq_along(args), + function(i) paste(args[[i]], collapse = ", ") + ) +) + +input_table <- input_table[ + match( + c("config :", "output :", "verbose :", "install_path :"), + input_table$Variables + ), +] + +## Log inputs +if( args$verbose ){ + + cat("List Sample Inputs\n") + print( + x = data.frame(input_table), + right = FALSE, + row.names = FALSE + ) + +} + + +# Load files ---- +## Config +if( file.exists(args$config) ){ + config <- yaml::yaml.load_file(args$config) +}else{ + stop("\nCannot find config file: ", args$config, ".\n") +} + +## Sample Info +if( file.exists(config$Sample_Info) ){ + + sample_info <- data.table::fread(config$Sample_Info, data.table = FALSE) + +}else if( file.exists(file.path(root_dir, config$Sample_Info)) ){ + + sample_info <- data.table::fread( + input = file.path(root_dir, config$Sample_Info), + data.table = FALSE + ) + +}else{ + + stop("\nCannot find associated Sample Info file: ", configs$Sample_Info, ".\n") + +} + +## Supplemental Info +if( file.exists(config$Supplemental_Info) ){ + + supp_info <- data.table::fread(config$Supplemental_Info) + +}else if( file.exists(file.path(root_dir, config$Supplemental_Info)) ){ + + supp_info <- data.table::fread( + input = file.path(root_dir, config$Supplemental_Info), + data.table = FALSE + ) + +}else{ + + warning( + "Cannot find Supplemental Info file: ", configs$Supplemental_Info, ".\n" + ) + +} + + +# Join appropriate tables together and / or format for output ---- + +sample_col <- match(config$Sample_Name_Column, names(sample_info)) + +if( is.na(sample_col) ){ + stop("\nCannot isolate sampleName column: ", config$Sample_Name_Column, ".\n") +} + +names(sample_info)[sample_col] <- "sampleName" + +sample_info <- sample_info %>% + dplyr::mutate( + specimen = stringr::str_extract(sample_info$sampleName, "[\\w]+") + ) %>% + dplyr::group_by(specimen) %>% + dplyr::summarise(replicates = n()) %>% + dplyr::ungroup() + +if( exists("supp_info") ){ + sample_info <- dplyr::left_join(sample_info, supp_info, by = "specimen") +} + + +# Output consolidated information ---- +if( args$output != FALSE ){ + + source(file.path(code_dir, "supporting_scripts/writeOutputFile.R")) + writeOutputFile(as.data.frame(sample_info), args$output) + +}else{ + + run_name <- stringr::str_extract(args$config, "[\\w]+.config.yml$") %>% + stringr::str_extract("[\\w]+") + + cat(paste0("\nSpecimen Info for : ", run_name, ".")) + pander::pandoc.table(sample_info, style = "simple", split.table = Inf) + +} + +q()