diff --git a/.gitignore b/.gitignore index 6d27821d..593db3d9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ doc/vtam.wrapper.rst out vtam.egg-info vtam/tests/outdir +<<<<<<< HEAD +======= example +>>>>>>> 2195a5d6bb2972454c238c6fd43a8da854059011 diff --git a/.travis.yml b/.travis.yml index 09818c18..e6bc91b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,7 +34,7 @@ script: # Your tests script goes here - python --version - pip --version - - coverage run_name -m unittest discover -v -f + - coverage run -m unittest discover -v -f after_success: - bash <(curl -s https://codecov.io/bash) diff --git a/MANIFEST.in b/MANIFEST.in index 962e0d3c..68477a81 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,3 @@ -exclude _build include Makefile environment.yml environment-dev.yml requirements.txt -recursive-include doc *.md Makefile *.tsv *.yml -include doc/data/* -include snakefile.yml -include tools/snake.tuto.data.yml +graft vtam/data +graft vtam/data/example \ No newline at end of file diff --git a/RELEASE.rst b/RELEASE.rst index deb8bf0d..25686e9e 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -1,3 +1,9 @@ +CHANGES IN VERSION 0.1.18 (Sep 23, 2020) +-------------------------------------------------- + +- BUG bugs fixed +- ENH Added a 'vtam example' command to generate a file tree for a quick start + CHANGES IN VERSION 0.1.17 (Sep 19, 2020) -------------------------------------------------- diff --git a/doc/conf.py b/doc/conf.py index bdb11478..22df7e44 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -12,25 +12,25 @@ # add these directories to sys.output here. If the directory is relative to the # documentation root, use os.output.abspath to make it absolute, like shown here. # -from configparser import RawConfigParser + +import configparser import os -import vtam -# import sys -# sys.path.insert(0, os.path.abspath('...')) +import sys +sys.path.insert(0, os.path.abspath('..')) +import vtam # -- Project information ----------------------------------------------------- -def read_setup_cfg_metadata(field): - """Reads and gets information from setup.cfg.""" - config = RawConfigParser() - config.read(os.path.join('..', 'setup.cfg')) - return str(config.get('metadata', field)) - # General information about the project. +config = configparser.RawConfigParser() +config.read(os.path.join('..', 'setup.cfg')) +author = config['metadata']['author'] +copyright = config['metadata']['copyright'] + project = 'VTAM' -copyright = read_setup_cfg_metadata(field='copyright') -author = read_setup_cfg_metadata(field='author') +author = author +copyright = copyright # The short X.Y version. # version = '.'.join(read_setup_cfg_metadata(field='version').split('.')[0:2]) @@ -102,7 +102,7 @@ def read_setup_cfg_metadata(field): # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. diff --git a/doc/content/img/tuto_fig2.png b/doc/content/img/tuto_fig2.png new file mode 100644 index 00000000..9dae9c1d Binary files /dev/null and b/doc/content/img/tuto_fig2.png differ diff --git a/doc/content/img/tuto_fig3.png b/doc/content/img/tuto_fig3.png new file mode 100644 index 00000000..ec09dd63 Binary files /dev/null and b/doc/content/img/tuto_fig3.png differ diff --git a/doc/content/tutorial.rst b/doc/content/tutorial.rst index a882cb38..ef0b675d 100644 --- a/doc/content/tutorial.rst +++ b/doc/content/tutorial.rst @@ -1,143 +1,436 @@ Tutorial ============ -Installation ---------------------------------------------------------------------------------- +Important! Before running any command, do not forget to change directory to vtam-X.Y.Z and activate the conda environment. + +.. code-block:: bash + + cd vtam-X.Y.Z + conda activate vtam -The installation instructions can be found in the readme file. +.. note:: + With the exception of BLAST database files and the sqlite database all I/O files of VTAM are text files, that can be opened and edited by a simple text editor (gedit, geany, Notepad++ etc.): + + - TSV: Text files with tab separated values. Can also be opened by spreadsheets such as LibreOffice, Excel + - YML: Text files used to provide parameter names and values Data --------------------------------------------------------------------------------- -To setup a new VTAM workflow, follow these steps: +In this tutorial, we use a small test dataset based on our previous publication: `PMID 28776936 `_. In this dataset, each sample was amplified by two overlapping markers (mfzr and zfzr), targeting the first 175-181 nucleotides of the COI gene (Fig. 2). We had three PCR replicates for each sample-marker combination (Fig. 1). The samples are tagged , so the combination of the forward and reverse tags can be used to identify the origin (sample) of each read. set, each sample was amplified by two overlapping markers (mfzr and zfzr), targeting the first 175-181 -Prepare a FASTQ directory with the FASTQ dir. Here we will use a dataset from our previous publication: `PMID 28776936 `_. +To reduce run time, the test dataset contains only one mock sample, one negative control and two real samples. -The dataset can be found as Dryad dataset ` doi:10.5061/dryad.f40v5 `_. +.. figure:: img/tuto_fig2.png + :scale: 50 % + :alt: Figure 2 -1. Set FASTQ file location -2. Define FASTQ file sample information +Figure 2. Positions on the primer on the COI gene used in the test dataset. -Merge the FASTQ files ---------------------------------------------------------------------------------- +You can download these FASTQ files from here with this command: + +.. code-block:: bash -Set FASTQ file location -^^^^^^^^^^^^^^^^^^^^^^^^ + wget -nc http://pedagogix-tagc.univ-mrs.fr/~gonzalez/vtam/fastq.tar.gz -O fastq.tar.gz + tar zxvf fastq.tar.gz + rm fastq.tar.gz -FASTQ files are in this directory +This will create a "FASTQ" directory with 12 FASTQ files: .. code-block:: bash - $ ls /path/to/my/fastqdir - MFZR1_S4_L001_R1_001.fastq MFZR2_S5_L001_R2_001.fastq ZFZR1_S1_L001_R1_001.fastq ZFZR2_S2_L001_R2_001.fastq - MFZR1_S4_L001_R2_001.fastq MFZR3_S6_L001_R1_001.fastq ZFZR1_S1_L001_R2_001.fastq ZFZR3_S3_L001_R1_001.fastq - MFZR2_S5_L001_R1_001.fastq MFZR3_S6_L001_R2_001.fastq ZFZR2_S2_L001_R1_001.fastq ZFZR3_S3_L001_R2_001.fastq + $ ls fastq/ + mfzr_1_fw.fastq + mfzr_1_rv.fastq + ... + +mfzr_1_fw.fastq: Forward reads of replicates of the MFZR marker (all samples) + +merge: Merge FASTQ files +---------------------------- + +The simplest use of vtam is to analyze one sequencing run (run1) and one marker (MFZR). -Define FASTQ file sample information -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The first step is to merge the FASTQ files and transform them into fasta files. It can be skipped, if you have single end reads, or your paired sequences have already been merged and transformed into fasta files. -Create a TSV (tab-separated file), with a header and 10 columns with all the information per FASTQ file pair. +Create a TSV (tab-separated file), with a header and 10 columns with all the information per FASTQ file pair. We will call it "fastqinfo_mfzr.tsv" in this tutorial and you will find it in the VTAM "doc/data" directory [Link to file in "doc directory"]. This TSV file will determine, which file pairs should be merged. These files should be all in the "fastq" directory. This directory can contain other files as well, but they will not be analyzed. -These columns are needed +The following columns are required: -- Tag_fwd -- Primer_fwd -- Tag_rev -- Primer_rev +- TagFwd +- PrimerFwd +- TagRev +- PrimerRev - Marker - Sample - Replicate - Run -- Fastq_fw -- Fastq_rv +- FastqFwd +- FastqRev + +Tag and primer sequences are in 5' => 3' orientation. + +Hereafter are the first lines of the "fastqinfo_mfzr.tsv" file: + +.. code-block:: bash + + TagFwd PrimerFwd TagRev PrimerRev Marker Sample Replicate Run FastqFwd FastqRev + tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC mfzr tpos1_run1 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq + agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC mfzr tnegtag_run1 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq + +We propose to work in a project directory called "asper1" (the dataset comes from a project on Zingel asper) and copy user created input files such as "fastqinfo_mfzr.tsv" to the "asper1/user_input" directory. + +.. code-block:: bash + + asper1 + `-- user_input + `-- fastqinfo_mfzr.tsv + fastq + |-- mfzr_1_fw.fastq + |-- mfzr_1_rv.fastq + |-- ... + +Run merge for all file-pairs in the "fastqinfo_mfzr.tsv" +.. code-block:: bash + + vtam merge --fastqinfo asper1/user_input/fastqinfo_mfzr.tsv --fastqdir fastq --fastainfo asper1/run1_mfzr/fastainfo.tsv --fastadir asper1/run1_mfzr/merged -v --log asper1/vtam.log -The first two lines of my *fastqinfo.tsv* look like this: +.. note:: + For info on I/O files see the Reference section + +This command adds a "merged" directory and a new "fastainfo_mfzr.tsv" file: .. code-block:: bash - Tag_fwd Primer_fwd Tag_rev Primer_rev Marker Sample Replicate Run Fastq_fwd Fastq_rev - cgatcgtcatcacg TCCACTAATCACAARGATATTGGTAC cgcgatctgtagag WACTAATCAATTWCCAAATCCTCC MFZR 14Mon01 repl2 prerun MFZR2_S5_L001_R1_001.fastq MFZR2_S5_L001_R2_001.fastq + asper1 + |-- run1_mfzr + | |-- fastainfo.tsv + | `-- merged + | |-- mfzr_1_fw.fasta + | |-- mfzr_2_fw.fasta + | `-- mfzr_3_fw.fasta + |-- user_input + | |-- fastqinfo_mfzr.tsv + |-- vtam.err + `-- vtam.log + fastq + |-- mfzr_1_fw.fastq + |-- mfzr_1_rv.fastq + |-- ... + +The first lines of the "fastainfo_mfzr.tsv" look like this: -Run the VTAM merge command -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + run marker sample replicate tagfwd primerfwd tagrev primerrev mergedfasta + run1 mfzr tpos1_run1 1 tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC mfzr_1_fw.fasta + run1 mfzr tnegtag_run1 1 agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC mfzr_1_fw.fasta -In addition to */path/to/my/fastqdir* and *fastqinfo.tsv*, we need +sortreads: Demultiplex and trim the reads +-------------------------------------------------------- -- Output WopMars DB file -- Output TSV file with FASTA file sample information -- Output directory to write the merged FASTA files +There is a single command "sortreads" to demultiplex the reads according to tags and to trim off tags and primers. + +The sortreads command is designed to deal with a dual indexing, where forward and reverse tag combinations are used to determine the origin of the reads. This is one of the most complex case of demultiplexing, therefore we implemented "sortreads" to help users. + +For simpler cases, we suggest using "cutadapt" directly, since it is quite straightforward. .. code-block:: bash - mkdir -p out/fastadir - vtam merge --fastqinfo fastqinfo.tsv --fastqdir /path/to/my/fastqdir --fastainfo out/fastainfo.tsv --fastadir out/fastadir --log out/vtam.log -v + vtam sortreads --fastainfo asper1/run1_mfzr/fastainfo.tsv --fastadir asper1/run1_mfzr/merged --sorteddir asper1/run1_mfzr/sorted -v --log asper1/vtam.log +.. note:: + For info on I/O files see the Reference section -Open the *fastainfo.tsv* file and verify its content. A new column should be written with the names of the merged FASTA files. +The FASTA files with the sorted reads are written to the "asper1/sorted" directory: + +.. code-block:: bash + + asper1 + |-- run1_mfzr + | |-- fastainfo.tsv + | |-- ... + | `-- sorted + | |-- mfzr_1_fw_000.fasta + | |-- mfzr_1_fw_001.fasta + | |-- ... + | `-- sortedinfo.tsv + |-- ... + ... + +In addition, the TSV file "asper1/run1_mfzr/sorted/sortedinfo.tsv" lists the information, i.e. run, marker, sample and replicate about each sorted FASTA file. The "sortedinfo.tsv" file looks like this: + +.. code-block:: bash + + run marker sample replicate sortedfasta + run1 MFZR tpos1_run1 1 mfzr_1_fw_000.fasta + run1 MFZR tnegtag_run1 1 mfzr_1_fw_001.fasta + +filter: Filter variants and create the ASV table +--------------------------------------------------- + + +The "filter" command is typically first run with default parameters. From the output, users should identify clearly unwanted (‘delete’) and clearly necessary (‘keep’) occurrences (see Manual section for details). These false positive and false negative occurrences will be used as input to the "optimize" command. The "optimize" command will then suggest an optimal parameter combination tailored to your dataset. Then "filter" command should be run again with the optimized parameters. + +Let's run first the "filter" command with default parameters. Verify also the content of the *out/fastadir* with the merged FASTA files. -Demultiplex the reads, filter variants and create the ASV tables -------------------------------------------------------------------------- + vtam filter --db asper1/db.sqlite --sortedinfo asper1/run1_mfzr/sorted/sortedinfo.tsv --sorteddir asper1/run1_mfzr/sorted --asvtable asper1/run1_mfzr/asvtable_default.tsv -v --log asper1/vtam.log + +.. note:: + For info on I/O files see the Reference section -There is a single command *filter* to demultiplex the reads, filter variants and create the ASV tables. This command takes quite long but its progress can be seen in the log file. +This command creates two new files "db.sqlite" and "asvtable_mfzr_default.tsv": .. code-block:: bash - vtam filter --fastainfo out/fastainfo.tsv --fastadir out/fastadir --db out/db.sqlite --outdir out --log out/vtam.log -v + asper1 + |-- db.sqlite + |-- run1_mfzr + | |-- asvtable_default.tsv + |-- ... + ... -The variants that passed all the filters together with read count in the different samples are found in the *out/asvtable.tsv*. The variants that were removed by the different filters can be found in the *out/db.sqlite* database that can be opened with the *sqlitebrowser* program. +The database "asper1/db.sqlite" contains one table by filter, and in each table occurrences are marked as deleted (filter_delete = 1) or retained (filter_delete = 0). This database can be opened with a sqlite browser program (For example, https://sqlitebrowser.org / or https://sqlitestudio.pl/index.rvt). + +.. figure:: img/tuto_fig3.png + :scale: 50 % + :alt: Figure 3 + +The "asper1/run1_mfzr/asvtable_default.tsv" contains information about the variants that passed all the filters such as the run, maker, read count over all replicates of a sample and the sequence. Hereafter are the first lines of the asvtable_default.tsv Pool Markers ---------------- -When variants were amplified with different markers, these variants can be pooled around a variant centroid with the following commands. + run marker variant sequence_length read_count tpos1_run1 tnegtag_run1 14ben01 14ben02 clusterid clustersize chimera_borderline sequence + run1 MFZR 25 181 478 478 0 0 0 25 1 False ACTATACCTTATCTTCGCAGTATTCTCAGGAATGCTAGGAACTGCTTTTAGTGTTCTTATTCGAATGGAACTAACATCTCCAGGTGTACAATACCTACAGGGAAACCACCAACTTTACAATGTAATCATTACAGCTCACGCATTCCTAATGATCTTTTTCATGGTTATGCCAGGACTTGTT + run1 MFZR 51 181 165 0 0 0 165 51 1 False ACTATATTTAATTTTTGCTGCAATTTCTGGTGTAGCAGGAACTACGCTTTCATTGTTTATTAGAGCTACATTAGCGACACCAAATTCTGGTGTTTTAGATTATAATTACCATTTGTATAATGTTATAGTTACGGGTCATGCTTTTTTGATGATCTTTTTTTTAGTAATGCCTGCTTTATTG + + +.. note:: + Filter can be run with the --known_occurrences argument that will add an additional column for each mock sample flagging expected variants. This helps in creating the known_occurrences.tsv input file for the optimization step + +taxassign: Assign variants of ASV table to taxa +-------------------------------------------------- + +The "taxassign" command assigns ASV sequences in the last column of a TSV file such as the "asvtable_default.tsv" file to taxa. + +The "taxassign" command needs a BLAST database (containing reference sequences of known taxonomic origin) and the taxonomy information file . + +A precomputed taxonomy file in TSV format and the BLAST database with COI sequences can be downloaded with these commands: + +.. code-block:: bash + + vtam taxonomy -output vtam_db/taxonomy.tsv --precomputed + vtam coi_blast_db --blastdbdir vtam_db/coi_blast_db + +These commands result in these new files: + +.. code-block:: bash + + ... + vtam_db + |-- coi_blast_db + | |-- coi_blast_db_20200420.nhr + | |-- coi_blast_db_20200420.nin + | |-- coi_blast_db_20200420.nog + | |-- coi_blast_db_20200420.nsd + | |-- coi_blast_db_20200420.nsi + | └-- coi_blast_db_20200420.nsq + `-- taxonomy.tsv + +.. note:: + Alternatively, you can use your own custom database or the NCBI nucleotide database -An input TSV file must be given with the run and marker combinations that must be pooled. Eg, this is the *pool_run_marker.tsv* file: +Then, we can carry out the taxonomic assignation of variants in the "asvtable_default.tsv" with the following command: .. code-block:: bash - run_name marker_name - prerun MFZR - prerun ZFZR + vtam taxassign --db asper1/db.sqlite --asvtable asper1/run1_mfzr/asvtable_default.tsv --output asper1/run1_mfzr/asvtable_default_taxa.tsv --taxonomy vtam_db/taxonomy.tsv --blastdbdir vtam_db/coi_blast_db --blastdbname coi_blast_db_20200420 -v --log asper1/vtam.log -Then the *pool_markers* subcommand can be used: +.. note:: + For info on I/O files see the Reference section +This results in an additional file: .. code-block:: bash - vtam pool_markers --db ${DB} --runmarker pool_run_marker.tsv --pooledmarkers out/pooled_markers.tsv + asper1/ + |-- run1_mfzr + | |-- asvtable_default.tsv + | |-- asvtable_default_taxa.tsv -Taxon Assignation -------------------------------------------------------------------------- +optimize: Compute optimal filter parameters based on mock and negative samples +--------------------------------------------------------------------------------------- -There is the 'taxassign' subcommand that can assign taxa. +The "optimize" command helps users choose optimal parameters for filtering that are specifically adjusted to the dataset. This optimization is based on mock samples and negative controls. -To assign variants to taxa, we need the COI blast DB and the taxonomy information. +Users should prepare a TSV file ("known_occurences_mfzr.tsv") with occurrences to be kept in the results (typically expected variants of the mock samples) and occurrences to be clearly deleted (typically all occurrences in negative controls, and unexpected occurrences in the mock samples). -Precomputed versions of these files can be downloaded in the following way: +The example TSV file for the known occurrences of the MFZR marker can be found in the "doc/data" directory . + +The first lines of this file look like this: + +.. code-block:: bash + + Marker Run Sample Mock Variant Action Sequence + MFZR run1 tpos1_run1 1 keep ACTATATTTTATTTTTGGGGCTTGATCCGGAATGCTGGGCACCTCTCTAAGCCTTCTAATTCGTGCCGAGCTGGGGCACCCGGGTTCTTTAATTGGCGACGATCAAATTTACAATGTAATCGTCACAGCCCATGCTTTTATTATGATTTTTTTCATGGTTATGCCTATTATAATC + MFZR run1 tpos1_run1 1 keep ACTTTATTTTATTTTTGGTGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATTCTAATTCGAGCTGAATTAGGTCATGCCGGTTCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCTCATGCTTTTGTAATAATTTTCTTTATAGTTATACCTATTTTAATT + + ... + MFZR run1 tpos1_run1 1 delete TTTATATTTCATTTTTGGTGCATGATCAGGTATGGTGGGTACTTCCCTTAGTTTATTAATTCGAGCAGAACTTGGTAATCCTGGTTCTTTGATTGGCGATGATCAGATTTATAACGTTATTGTCACTGCCCATGCTTTTATTATGATTTTTTTTATAGTGATACCTATTATAATT + MFZR run1 tnegtag_run1 0 delete TTTATATTTTATTTTTGGAGCCTGAGCTGGAATAGTAGGTACTTCCCTTAGTATACTTATTCGAGCCGAATTAGGACACCCAGGCTCTCTAATTGGAGACGACCAAATTTATAATGTAATTGTTACTGCTCATGCTTTTGTAATAATTTTTTTTATAGTTATGCCAATTATAATT + +.. note:: + + It is possible to add extra columns with your notes (for example taxon names) to this file after the “Sequence” column. They will be ignored by VTAM. + +The "optimize" command is run like this: + +.. code-block:: bash + + vtam optimize --db asper1/db.sqlite --sortedinfo asper1/run1_mfzr/sorted/sortedinfo.tsv --sorteddir asper1/run1_mfzr/sorted --known_occurrences asper1/user_input/known_occurrences_mfzr.tsv --outdir asper1/run1_mfzr -v --log asper1/vtam.log + +.. note:: + + For info on I/O files see the Reference section + +This command creates four new files: + +.. code-block:: bash + + asper1/ + |-- db.sqlite + |-- run1_mfzr + | |-- ... + | |-- optimize_lfn_sample_replicate.tsv + | |-- optimize_lfn_read_count_and_lfn_variant.tsv + | |-- optimize_lfn_variant_specific.tsv + | |-- optimize_pcr_error.tsv + +.. note:: + + Running vtam optimize will run three underlying scripts: + + - OptimizePCRerror, to optimize “pcr_error_var_prop” + - OptimizeLFNsampleReplicate, to optimize “lfn_sample_replicate_cutoff” + - OptimizeLFNreadCountAndLFNvariant, to optimize “lfn_read_count_cutoff” and “lfn_variant_cutoff”. + +While OptimizePCRerror and OptimizeLFNsampleReplicate do not depend on the other two parameters to be optimized, OptimizeLFNreadCountAndLFNvariant does. For a finer tuning, it is possible to run the three subscripts one by one, and use the optimized values of “pcr_error_var_prop” and “lfn_sample_replicate_cutoff” instead of their default values, when running OptimizeLFNreadCountAndLFNvariant. This procedure can propose less stringent values for “lfn_read_count_cutoff” and “lfn_variant_cutoff”, but still eliminate as many as possible unexpected occurrences, and keep all expected ones. + +To run just one subscript, the --until flag can be added to the vtam optimize command + +- --until OptimizePCRerror +- --unlit OptimizeLFNsampleReplicate +- --until OptimizeLFNreadCountAndLFNvariant + +e.g. + +.. code-block:: bash + + vtam optimize --db asper1/db.sqlite --sortedinfo asper1/run1_mfzr/sorted/sortedinfo.tsv --sorteddir asper1/run1_mfzr/sorted --known_occurrences asper1/user_input/known_occurrences_mfzr.tsv --outdir asper1/run1_mfzr -v --log asper1/vtam.log --until OptimizePCRerror + + vtam optimize --db asper1/db.sqlite --sortedinfo asper1/run1_mfzr/sorted/sortedinfo.tsv --sorteddir asper1/run1_mfzr/sorted --known_occurrences asper1/user_input/known_occurrences_mfzr.tsv --outdir asper1/run1_mfzr -v --log asper1/vtam.log --until OptimizeLFNsampleReplicate + +Create a params_optimize_mfzr.yml file that will contain the optimal values suggested for “lfn_sample_replicate_cutoff” and “pcr_error_var_prop” +lfn_sample_replicate_cutoff: 0.003 +pcr_error_var_prop: 0.1 + +Run OptimizeLFNreadCountAndLFNvariant with the optimized parameters for the above two parameters. .. code-block:: bash - vtam taxonomy -o out/taxonomy.tsv --precomputed - vtam coi_blast_db --coi_blast_db_dir out/coi_blast_db + vtam optimize --db asper1/db.sqlite --sortedinfo asper1/run1_mfzr/sorted/sortedinfo.tsv --sorteddir asper1/run1_mfzr/sorted --known_occurrences asper1/user_input/known_occurrences_mfzr.tsv --outdir asper1/run1_mfzr -v --log asper1/vtam.log --until OptimizeLFNreadCountAndLFNvariant --params asper1/user_input/params_optimize_mfzr.yml + +This step will suggest the following parameter values +lfn_variant_cutoff: 0.001 +lfn_read_count_cutoff: 20 +For simplicity, we continue the tutorial with parameters optimized previously, with running all 3 optimize steps in one command. + +filter: Create an ASV table with optimal parameters and assign variants to taxa +--------------------------------------------------------------------------------- + +Once the optimal filtering parameters are chosen, rerun the "filter" command using the existing "db.sqlite" database that already has all the variant counts. -The input file is a TSV file, where the last column are the sequence of the variants. Both the *out/asvtable.tsv* and *pool_run_marker.tsv* can be used for the assignation. +Make a "params_mfzr.yml" file that contains the parameter names and values that differ from the default settings. -The command to carry out the taxon assignation is: +The "params_mfzr.yml" can be found in the "doc/data" directory and it looks like this: .. code-block:: bash - vtam taxassign --asvtable out/pooled_markers.tsv --variant_taxa out/pooled_markers_taxa.tsv --db out/db.sqlite --taxonomy out/taxonomy.tsv --blastdbdir out/coi_blast_db --blastdbname coi_blast_db --log out/vtam.log + lfn_variant_cutoff: 0.001 + lfn_sample_replicate_cutoff: 0.003 + lfn_read_count_cutoff: 70 + pcr_error_var_prop: 0.1 + +Run filter with optimized parameters: + +.. code-block:: bash + + vtam filter --db asper1/db.sqlite --sortedinfo asper1/run1_mfzr/sorted/sortedinfo.tsv --sorteddir asper1/run1_mfzr/sorted --params asper1/user_input/params_mfzr.yml --asvtable asper1/run1_mfzr/asvtable_optimized.tsv -v --log asper1/vtam.log + +Running again "taxassign" will complete the "asvtable_optimized.tsv" with the taxonomic information. It will be very quick since most variants in the table have already gone through the taxonomic assignment, and these assignations are extracted from the “db.sqlite”. + +.. code-block:: bash -Parameter Optimization ------------------------------- + vtam taxassign --db asper1/db.sqlite --asvtable asper1/run1_mfzr/asvtable_optimized.tsv --output asper1/run1_mfzr/asvtable_optimized_taxa.tsv --taxonomy vtam_db/taxonomy.tsv --blastdbdir vtam_db/coi_blast_db --blastdbname coi_blast_db_20200420 -v --log asper1/vtam.log -To help the user select the parameters, VTAM has an *optimize* subcommand that will compute different values based on positive and negative variants present in the mock, negative and real samples. The set of known variants are defined in a TSV file like this: :download:`known_occurrences.tsv ` +We finished our first analysis with VTAM! The resulting directory structure looks like this: .. code-block:: bash - vtam optimize --fastainfo out/fastainfo.tsv --fastadir out/fastadir --known_occurrences known_occurrences.tsv --db out/db.sqlite --outdir out --log out/vtam.log -v + asper1/ + |-- db.sqlite + |-- run1_mfzr + | |-- asvtable_default.tsv + | |-- asvtable_default_taxa.tsv + | |-- asvtable_optimized.tsv + | |-- asvtable_optimized_taxa.tsv + | |-- fastainfo.tsv + | |-- merged + | | |-- mfzr_1_fw.fasta + | | |-- ... + | |-- optimize_lfn_sample_replicate.tsv + | |-- optimize_lfn_read_count_and_lfn_variant.tsv + | |-- optimize_lfn_variant_specific.tsv + | |-- optimize_pcr_error.tsv + | `-- sorted + | |-- mfzr_1_fw_000.fasta + | |-- ... + | `-- sortedinfo.tsv + +Add new run-marker data to the existing database +----------------------------------------------------- + +The same samples can be amplified by different but strongly overlapping markers. In this case, it makes sense to pool all the data into the same database, and produce just one ASV table, with information of both markers. This is the case in our test dataset. + +It is also frequent to have different sequencing runs (with one or several markers) that are part of the same study. Feeding them to the same database assures coherence in variant IDs, and gives the possibility to easily produce one ASV table with all the runs and avoids re-running the taxassign on variants that have already been assigned to a taxon. + +**We assume that you have gone through the basic pipeline in the previous section** [LINK]. +Let's see an example on how to complete the previous analyses with the dataset obtained for the same samples but for another marker (ZFZR). The principle is the same if you want to complete the analyses with data from a different sequencing run. +First we need to prepare these user inputs: +The directory with the FASTQ files: "fastqinfo_zfzr.tsv" + +This is the "merge" command for the new run-marker: +.. code-block:: bash + + vtam merge --fastqinfo asper1/user_input/fastqinfo_zfzr.tsv --fastqdir fastq --fastainfo asper1/run1_zfzr/fastainfo.tsv --fastadir asper1/run1_zfzr/merged -v --log asper1/vtam.log + +This is the "sortreads" command for the new marker ZFZR: + +.. code-block:: bash + + vtam sortreads --fastainfo asper1/run1_zfzr/fastainfo.tsv --fastadir asper1/run1_zfzr/merged --sorteddir asper1/run1_zfzr/sorted -v --log asper1/vtam.log + +The "filter" command for the new marker ZFZR is the same as in the basic pipeline, but we will complete the previous database "asper1/db.sqlite" with the new variants. + +.. code-block:: bash + + vtam filter --db asper1/db.sqlite --sortedinfo asper1/run1_zfzr/sorted/sortedinfo.tsv --sorteddir asper1/run1_zfzr/sorted --asvtable asper1/run1_zfzr/asvtable_default.tsv -v --log asper1/vtam.log + +Next we run the "taxassign" command for the new ASV table "asper1/asvtable_zfzr_default.tsv": + +.. code-block:: bash + vtam taxassign --db asper1/db.sqlite --asvtable asper1/run1_zfzr/asvtable_default.tsv --output asper1/run1_zfzr/asvtable_default_taxa.tsv --taxonomy vtam_db/taxonomy.tsv --blastdbdir vtam_db/coi_blast_db --blastdbname coi_blast_db_20200420 -v --log asper1/vtam.log diff --git a/requirements.txt b/requirements.txt index 7cb5587c..7fd91aeb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ pyyaml snakemake sqlalchemy termcolor -wopmars>=0.1.0 +wopmars>=0.1.3 diff --git a/setup.cfg b/setup.cfg index 7ff73dd4..3b585e93 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,9 @@ [metadata] description-file = README.rst -author = 'Aitor Gonzalez, Thomas Dechatre, Reda Mekdad, Emese Meglecz' -email = "aitor.gonzalez@univ-amu.fr" -copyright = "Copyright (c) 2018-2020: Aitor Gonzalez, Thomas Dechatre, Reda Mekdad, Emese Meglecz" -license = "MIT" +author = Aitor Gonzalez, Thomas Dechatre, Reda Mekdad, Emese Meglecz +email = aitor.gonzalez@univ-amu.fr +copyright = Copyright (c) 2018-2020: Aitor Gonzalez, Thomas Dechatre, Reda Mekdad, Emese Meglecz +license = MIT [easy_install] diff --git a/setup.py b/setup.py index 396b9a57..5c57367d 100755 --- a/setup.py +++ b/setup.py @@ -5,30 +5,73 @@ __email__ = "aitor.gonzalez@univ-amu.fr, emese.meglecz@univ-amu.fr" __license__ = "MIT" -from configparser import RawConfigParser +import codecs +import configparser from setuptools import setup from setuptools import find_packages -# from vtam import __version__ import os import sys -import vtam +config = configparser.RawConfigParser() +config.read(os.path.join('.', 'setup.cfg')) +author = config['metadata']['author'] +email = config['metadata']['email'] +license = config['metadata']['license'] -def read_setup_cfg_metadata(field): - """Return package version from setup.cfg.""" - config = RawConfigParser() - config.read(os.path.join('.', 'setup.cfg')) - return str(config.get('metadata', field)) +if sys.version_info < (3, 6): + print("At least Python 3.6 is required.\n", file=sys.stderr) + exit(1) + +try: + from setuptools import setup, find_packages +except ImportError: + print("Please install setuptools before installing VTAM.", + file=sys.stderr) + exit(1) + +here = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(here, 'README.rst'), encoding='utf-8') as fin: + long_description = fin.read() + +CLASSIFIERS = """\ +Development Status :: 4 - Beta +Environment :: Console +Intended Audience :: Science/Research +License :: OSI Approved :: MIT License +Programming Language :: Python +Programming Language :: Python :: 3 +Programming Language :: Python :: 3.7 +Programming Language :: Python :: 3 :: Only +Topic :: Scientific/Engineering :: Bio-Informatics +Operating System :: POSIX :: Linux +Operating System :: Microsoft :: Windows :: Windows 10 +""" + + +# Create list of package data files +def data_files_to_list(directory): + paths = [] + for (path, directories, filenames) in os.walk(directory): + for filename in filenames: + paths.append(os.path.join('..', path, filename)) + return paths + + +data_file_list = data_files_to_list('vtam/data') +data_example_file_list = data_files_to_list('vtam/data/example') + +def read(rel_path): + here = os.path.abspath(os.path.dirname(__file__)) + with codecs.open(os.path.join(here, rel_path), 'r') as fp: + return fp.read() -install_requires = [ - 'Jinja2>=2.11', - 'PyYAML>=5.3', - 'SQLAlchemy>=1.3', - 'biopython>=1.76', - 'pandas>=1.0', - 'termcolor>=1.1', - 'wopmars>=0.1.0', -] +def get_version(rel_path): + for line in read(rel_path).splitlines(): + if line.startswith('__version__'): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + else: + raise RuntimeError("Unable to find version string.") if sys.version_info < (3, 7): print("At least Python 3.7 is required.\n", file=sys.stderr) @@ -72,19 +115,18 @@ def data_files_to_list(directory): setup( name='vtam', - # version=read_setup_cfg_metadata(field='version'), - version=vtam.__version__, + version=get_version("vtam/__init__.py"), description="VTAM - Validation and Taxonomic Assignation of Metabarcoding Data", - author=read_setup_cfg_metadata(field='author'), - author_email=read_setup_cfg_metadata(field='email'), - url='https://tagc.univ-amu.fr/en/users/gonzalez-aitor, http://net.imbe.fr/~emeglecz/', - license=read_setup_cfg_metadata(field='license'), + author=author, + author_email=email, + url="https://vtam.readthedocs.io/en/latest/", + license=license, long_description=long_description, classifiers=[_f for _f in CLASSIFIERS.split('\n') if _f], packages=find_packages(), package_dir={'vtam': 'vtam'}, - package_data={'vtam': data_file_list + data_test_list}, - install_requires=install_requires, + package_data={'vtam': data_file_list}, + install_requires=['jinja2', 'pyyaml', 'sqlalchemy', 'biopython', 'pandas', 'progressbar', 'termcolor', 'wopmars'], entry_points={ 'console_scripts': ['vtam=vtam:main'] }, diff --git a/vtam/__init__.py b/vtam/__init__.py index 4c823775..c50d9473 100644 --- a/vtam/__init__.py +++ b/vtam/__init__.py @@ -22,8 +22,7 @@ from vtam.utils.WopmarsRunner import WopmarsRunner from vtam.utils.constants import FilterLFNreference_records - -__version__ = "0.1.17" +__version__ = "0.1.18" class VTAM(object): diff --git a/vtam/data/example/fastqinfo.tsv b/vtam/data/example/fastqinfo.tsv new file mode 100644 index 00000000..27c25052 --- /dev/null +++ b/vtam/data/example/fastqinfo.tsv @@ -0,0 +1,25 @@ +TagFwd PrimerFwd TagRev PrimerRev Marker Sample Replicate Run FastqFwd FastqRev +tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tpos1_run1 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tnegtag_run1 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +gtcgatcatgtca TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben01 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +catcgagtagag TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben02 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tpos1_run1 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tnegtag_run1 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +gtcgatcatgtca TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben01 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +catcgagtagag TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben02 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tpos1_run1 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq +agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tnegtag_run1 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq +gtcgatcatgtca TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben01 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq +catcgagtagag TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben02 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq +tcgatcacgatgt AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tpos1_run1 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +agatcgtactagct AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tnegtag_run1 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +gtcgatcatgtca AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben01 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +catcgagtagag AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben02 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +tcgatcacgatgt AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tpos1_run1 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +agatcgtactagct AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tnegtag_run1 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +gtcgatcatgtca AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben01 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +catcgagtagag AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben02 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +tcgatcacgatgt AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tpos1_run1 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq +agatcgtactagct AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tnegtag_run1 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq +gtcgatcatgtca AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben01 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq +catcgagtagag AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben02 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq diff --git a/vtam/data/example/fastqinfo_mfzr.tsv b/vtam/data/example/fastqinfo_mfzr.tsv new file mode 100644 index 00000000..d137399b --- /dev/null +++ b/vtam/data/example/fastqinfo_mfzr.tsv @@ -0,0 +1,13 @@ +TagFwd PrimerFwd TagRev PrimerRev Marker Sample Replicate Run FastqFwd FastqRev +tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tpos1_run1 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tnegtag_run1 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +gtcgatcatgtca TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben01 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +catcgagtagag TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben02 1 run1 mfzr_1_fw.fastq mfzr_1_rv.fastq +tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tpos1_run1 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tnegtag_run1 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +gtcgatcatgtca TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben01 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +catcgagtagag TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben02 2 run1 mfzr_2_fw.fastq mfzr_2_rv.fastq +tcgatcacgatgt TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tpos1_run1 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq +agatcgtactagct TCCACTAATCACAARGATATTGGTAC tgtcgatctacagc WACTAATCAATTWCCAAATCCTCC MFZR tnegtag_run1 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq +gtcgatcatgtca TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben01 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq +catcgagtagag TCCACTAATCACAARGATATTGGTAC acatcgacgtacg WACTAATCAATTWCCAAATCCTCC MFZR 14ben02 3 run1 mfzr_3_fw.fastq mfzr_3_rv.fastq diff --git a/vtam/data/example/fastqinfo_zfzr.tsv b/vtam/data/example/fastqinfo_zfzr.tsv new file mode 100644 index 00000000..4b11f533 --- /dev/null +++ b/vtam/data/example/fastqinfo_zfzr.tsv @@ -0,0 +1,13 @@ +TagFwd PrimerFwd TagRev PrimerRev Marker Sample Replicate Run FastqFwd FastqRev +tcgatcacgatgt AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tpos1_run1 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +agatcgtactagct AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tnegtag_run1 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +gtcgatcatgtca AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben01 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +catcgagtagag AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben02 1 run1 zfzr_1_fw.fastq zfzr_1_rv.fastq +tcgatcacgatgt AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tpos1_run1 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +agatcgtactagct AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tnegtag_run1 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +gtcgatcatgtca AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben01 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +catcgagtagag AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben02 2 run1 zfzr_2_fw.fastq zfzr_2_rv.fastq +tcgatcacgatgt AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tpos1_run1 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq +agatcgtactagct AGATATTGGAACWTTATATTTTATTTTTGG gagatcatgtca WACTAATCAATTWCCAAATCCTCC ZFZR tnegtag_run1 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq +gtcgatcatgtca AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben01 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq +catcgagtagag AGATATTGGAACWTTATATTTTATTTTTGG tatcgacgatgt WACTAATCAATTWCCAAATCCTCC ZFZR 14ben02 3 run1 zfzr_3_fw.fastq zfzr_3_rv.fastq diff --git a/vtam/data/example/known_occurrences.tsv b/vtam/data/example/known_occurrences.tsv new file mode 100644 index 00000000..52c662f8 --- /dev/null +++ b/vtam/data/example/known_occurrences.tsv @@ -0,0 +1,19 @@ +Marker Run Sample Mock Variant Action Sequence Tax_name +MFZR run1 tpos1_run1 1 keep ACTATATTTTATTTTTGGGGCTTGATCCGGAATGCTGGGCACCTCTCTAAGCCTTCTAATTCGTGCCGAGCTGGGGCACCCGGGTTCTTTAATTGGCGACGATCAAATTTACAATGTAATCGTCACAGCCCATGCTTTTATTATGATTTTTTTCATGGTTATGCCTATTATAATC Caenis pusilla +MFZR run1 tpos1_run1 1 keep ACTTTATTTTATTTTTGGTGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATTCTAATTCGAGCTGAATTAGGTCATGCCGGTTCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCTCATGCTTTTGTAATAATTTTCTTTATAGTTATACCTATTTTAATT Rheocricotopus +MFZR run1 tpos1_run1 1 keep CCTTTATCTTGTATTTGGTGCCTGGGCCGGAATGGTAGGGACCGCCCTAAGCCTTCTTATTCGGGCCGAACTAAGCCAGCCTGGCTCGCTATTAGGTGATAGCCAAATTTATAATGTTATTGTTACCGCCCACGCCTTCGTAATAATTTTCTTTATAGTCATGCCAATTCTCATT Phoxinus phoxinus +MFZR run1 tpos1_run1 1 keep CCTTTATTTTATTTTCGGTATCTGATCAGGTCTCGTAGGATCATCACTTAGATTTATTATTCGAATAGAATTAAGAACTCCTGGTAGATTTATTGGCAACGACCAAATTTATAACGTAATTGTTACATCTCATGCATTTATTATAATTTTTTTTATAGTTATACCAATCATAATT Hydropsyche pellucidula +MFZR run1 tpos1_run1 1 keep CTTATATTTTATTTTTGGTGCTTGATCAGGGATAGTGGGAACTTCTTTAAGAATTCTTATTCGAGCTGAACTTGGTCATGCGGGATCTTTAATCGGAGACGATCAAATTTACAATGTAATTGTTACTGCACACGCCTTTGTAATAATTTTTTTTATAGTTATACCTATTTTAATT Synorthocladius semivirens +MFZR run1 tpos1_run1 1 keep TCTATATTTCATTTTTGGTGCTTGGGCAGGTATGGTAGGTACCTCATTAAGACTTTTAATTCGAGCCGAGTTGGGTAACCCGGGTTCATTAATTGGGGACGATCAAATTTATAACGTAATCGTAACTGCTCATGCCTTTATTATGATTTTTTTTATAGTGATACCTATTATAATT Baetis rhodani +MFZR run1 tpos1_run1 1 delete ACTATACCTTATCTTCGCAGTATTCTCAGGAATGCTAGGAACTGCTTTTAGTGTTCTTATTCGAATGGAACTAACATCTCCAGGTGTACAATACCTACAGGGAAACCACCAACTTTACAATGTAATCATTACAGCTCACGCATTCCTAATGATCTTTTTCATGGTTATGCCAGGACTTGTT +MFZR run1 tpos1_run1 1 delete ACTCTATTTAATATTTGCTGCATTTTCAGGGGTTATAGGAACAATATTTTCTATAATTATAAGAATGGAACTTGCTTATCCAGGTGATCAAATATTGAATGGTAATCACCAACTTTATAATGTTATTGTAACTGCTCATGCATTTGTAATGATTTTTTTTATGGTTATGCCTGCCTTGATT +MFZR run1 tpos1_run1 1 delete ACTTTATTTCATTTTCGGAACATTTGCAGGAGTTGTAGGAACTTTACTTTCATTATTTATTCGACTAGAATTAGCTTATCCAGGAAATCAATTTTTTTTAGGAAATCACCAACTTTATAATGTGGTTGTGACAGCACATGCTTTTATCATGATTTTTTTCATGGTTATGCCGATTTTAATC +MFZR run1 tpos1_run1 1 delete ACTTTATTTCATTTTCGGAACATTTGCAGGAGTTGTAGGAACTTTACTTTCATTATTTATTCGTCTTGAATTAGCTTATCCAGGAAATCAATTTTTTTTAGGAAATCACCAACTTTATAATGTGGTTGTGACAGCACATGCTTTTATCATGATTTTTTTCATGGTTATGCCGATTTTAATC +MFZR run1 tpos1_run1 1 delete ATTGTACCTTATATTTGCCTTATTTTCAGGGCTATTAGGTACTGCTTTTTCTGTTTTAATAAGACTTGAATTATCAGGACCTGGTGTACAATACATAGCTGATAACCAACTTTATAACAGTATAATTACTGCACATGCAATACTTATGATTTTCTTCATGGTTATGCCTGCTATGATA +MFZR run1 tpos1_run1 1 delete TTTATATTTCATTTTTGGTGCATGATCAGGTATGGTGGGTACTTCCCTTAGTTTATTAATTCGAGCAGAACTTGGTAATCCTGGTTCTTTGATTGGCGATGATCAGATTTATAACGTTATTGTCACTGCCCATGCTTTTATTATGATTTTTTTTATAGTGATACCTATTATAATT +MFZR run1 tnegtag_run1 0 delete TTTATATTTTATTTTTGGAGCCTGAGCTGGAATAGTAGGTACTTCCCTTAGTATACTTATTCGAGCCGAATTAGGACACCCAGGCTCTCTAATTGGAGACGACCAAATTTATAATGTAATTGTTACTGCTCATGCTTTTGTAATAATTTTTTTTATAGTTATGCCAATTATAATT +ZFZR run1 tpos1_run1 1 keep GGCTTGATCCGGAATGCTGGGCACCTCTCTAAGCCTTCTAATTCGTGCCGAGCTGGGGCACCCGGGTTCTTTAATTGGCGACGATCAAATTTACAATGTAATCGTCACAGCCCATGCTTTTATTATGATTTTTTTCATGGTTATGCCTATTATAATC Caenis pusilla +ZFZR run1 tpos1_run1 1 keep TGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATTCTAATTCGAGCTGAATTAGGTCATGCCGGTTCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCTCATGCTTTTGTAATAATTTTCTTTATAGTTATACCTATTTTAATT Rheocricotopus +ZFZR run1 tpos1_run1 1 keep TGCTTGATCAGGGATAGTGGGAACTTCTTTAAGAATTCTTATTCGAGCTGAACTTGGTCATGCGGGATCTTTAATCGGAGACGATCAAATTTACAATGTAATTGTTACTGCACACGCCTTTGTAATAATTTTTTTTATAGTTATACCTATTTTAATT Chironomidae +ZFZR run1 tpos1_run1 1 keep TGCTTGGGCAGGTATGGTAGGTACCTCATTAAGACTTTTAATTCGAGCCGAGTTGGGTAACCCGGGTTCATTAATTGGGGACGATCAAATTTATAACGTAATCGTAACTGCTCATGCCTTTATTATGATTTTTTTTATAGTGATACCTATTATAATT Baetis rhodani +ZFZR run1 tpos1_run1 1 delete AGCATGATCTGGAATAGTAGGTACTTCCCTTAGTATCTTAATTCGAGCCGAATTAGGCCATGCAGGATCCTTAATTGGAGACGATCAAATTTATAACGTAATTGTTACTGCTCATGCTTTTGTAATAATTTTTTTTATAGTTATACCCATTTTAATT diff --git a/vtam/data/example/known_occurrences_mfzr.tsv b/vtam/data/example/known_occurrences_mfzr.tsv new file mode 100644 index 00000000..e7871c85 --- /dev/null +++ b/vtam/data/example/known_occurrences_mfzr.tsv @@ -0,0 +1,14 @@ +Marker Run Sample Mock Variant Action Sequence Tax_name +MFZR run1 tpos1_run1 1 keep ACTATATTTTATTTTTGGGGCTTGATCCGGAATGCTGGGCACCTCTCTAAGCCTTCTAATTCGTGCCGAGCTGGGGCACCCGGGTTCTTTAATTGGCGACGATCAAATTTACAATGTAATCGTCACAGCCCATGCTTTTATTATGATTTTTTTCATGGTTATGCCTATTATAATC Caenis pusilla +MFZR run1 tpos1_run1 1 keep ACTTTATTTTATTTTTGGTGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATTCTAATTCGAGCTGAATTAGGTCATGCCGGTTCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCTCATGCTTTTGTAATAATTTTCTTTATAGTTATACCTATTTTAATT Rheocricotopus +MFZR run1 tpos1_run1 1 keep CCTTTATCTTGTATTTGGTGCCTGGGCCGGAATGGTAGGGACCGCCCTAAGCCTTCTTATTCGGGCCGAACTAAGCCAGCCTGGCTCGCTATTAGGTGATAGCCAAATTTATAATGTTATTGTTACCGCCCACGCCTTCGTAATAATTTTCTTTATAGTCATGCCAATTCTCATT Phoxinus phoxinus +MFZR run1 tpos1_run1 1 keep CCTTTATTTTATTTTCGGTATCTGATCAGGTCTCGTAGGATCATCACTTAGATTTATTATTCGAATAGAATTAAGAACTCCTGGTAGATTTATTGGCAACGACCAAATTTATAACGTAATTGTTACATCTCATGCATTTATTATAATTTTTTTTATAGTTATACCAATCATAATT Hydropsyche pellucidula +MFZR run1 tpos1_run1 1 keep CTTATATTTTATTTTTGGTGCTTGATCAGGGATAGTGGGAACTTCTTTAAGAATTCTTATTCGAGCTGAACTTGGTCATGCGGGATCTTTAATCGGAGACGATCAAATTTACAATGTAATTGTTACTGCACACGCCTTTGTAATAATTTTTTTTATAGTTATACCTATTTTAATT Synorthocladius semivirens +MFZR run1 tpos1_run1 1 keep TCTATATTTCATTTTTGGTGCTTGGGCAGGTATGGTAGGTACCTCATTAAGACTTTTAATTCGAGCCGAGTTGGGTAACCCGGGTTCATTAATTGGGGACGATCAAATTTATAACGTAATCGTAACTGCTCATGCCTTTATTATGATTTTTTTTATAGTGATACCTATTATAATT Baetis rhodani +MFZR run1 tpos1_run1 1 delete ACTATACCTTATCTTCGCAGTATTCTCAGGAATGCTAGGAACTGCTTTTAGTGTTCTTATTCGAATGGAACTAACATCTCCAGGTGTACAATACCTACAGGGAAACCACCAACTTTACAATGTAATCATTACAGCTCACGCATTCCTAATGATCTTTTTCATGGTTATGCCAGGACTTGTT +MFZR run1 tpos1_run1 1 delete ACTCTATTTAATATTTGCTGCATTTTCAGGGGTTATAGGAACAATATTTTCTATAATTATAAGAATGGAACTTGCTTATCCAGGTGATCAAATATTGAATGGTAATCACCAACTTTATAATGTTATTGTAACTGCTCATGCATTTGTAATGATTTTTTTTATGGTTATGCCTGCCTTGATT +MFZR run1 tpos1_run1 1 delete ACTTTATTTCATTTTCGGAACATTTGCAGGAGTTGTAGGAACTTTACTTTCATTATTTATTCGACTAGAATTAGCTTATCCAGGAAATCAATTTTTTTTAGGAAATCACCAACTTTATAATGTGGTTGTGACAGCACATGCTTTTATCATGATTTTTTTCATGGTTATGCCGATTTTAATC +MFZR run1 tpos1_run1 1 delete ACTTTATTTCATTTTCGGAACATTTGCAGGAGTTGTAGGAACTTTACTTTCATTATTTATTCGTCTTGAATTAGCTTATCCAGGAAATCAATTTTTTTTAGGAAATCACCAACTTTATAATGTGGTTGTGACAGCACATGCTTTTATCATGATTTTTTTCATGGTTATGCCGATTTTAATC +MFZR run1 tpos1_run1 1 delete ATTGTACCTTATATTTGCCTTATTTTCAGGGCTATTAGGTACTGCTTTTTCTGTTTTAATAAGACTTGAATTATCAGGACCTGGTGTACAATACATAGCTGATAACCAACTTTATAACAGTATAATTACTGCACATGCAATACTTATGATTTTCTTCATGGTTATGCCTGCTATGATA +MFZR run1 tpos1_run1 1 delete TTTATATTTCATTTTTGGTGCATGATCAGGTATGGTGGGTACTTCCCTTAGTTTATTAATTCGAGCAGAACTTGGTAATCCTGGTTCTTTGATTGGCGATGATCAGATTTATAACGTTATTGTCACTGCCCATGCTTTTATTATGATTTTTTTTATAGTGATACCTATTATAATT +MFZR run1 tnegtag_run1 0 delete TTTATATTTTATTTTTGGAGCCTGAGCTGGAATAGTAGGTACTTCCCTTAGTATACTTATTCGAGCCGAATTAGGACACCCAGGCTCTCTAATTGGAGACGACCAAATTTATAATGTAATTGTTACTGCTCATGCTTTTGTAATAATTTTTTTTATAGTTATGCCAATTATAATT diff --git a/vtam/data/example/known_occurrences_zfzr.tsv b/vtam/data/example/known_occurrences_zfzr.tsv new file mode 100644 index 00000000..740d7169 --- /dev/null +++ b/vtam/data/example/known_occurrences_zfzr.tsv @@ -0,0 +1,6 @@ +Marker Run Sample Mock Variant Action Sequence Tax_name +ZFZR run1 tpos1_run1 1 keep GGCTTGATCCGGAATGCTGGGCACCTCTCTAAGCCTTCTAATTCGTGCCGAGCTGGGGCACCCGGGTTCTTTAATTGGCGACGATCAAATTTACAATGTAATCGTCACAGCCCATGCTTTTATTATGATTTTTTTCATGGTTATGCCTATTATAATC Caenis pusilla +ZFZR run1 tpos1_run1 1 keep TGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATTCTAATTCGAGCTGAATTAGGTCATGCCGGTTCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCTCATGCTTTTGTAATAATTTTCTTTATAGTTATACCTATTTTAATT Rheocricotopus +ZFZR run1 tpos1_run1 1 keep TGCTTGATCAGGGATAGTGGGAACTTCTTTAAGAATTCTTATTCGAGCTGAACTTGGTCATGCGGGATCTTTAATCGGAGACGATCAAATTTACAATGTAATTGTTACTGCACACGCCTTTGTAATAATTTTTTTTATAGTTATACCTATTTTAATT Chironomidae +ZFZR run1 tpos1_run1 1 keep TGCTTGGGCAGGTATGGTAGGTACCTCATTAAGACTTTTAATTCGAGCCGAGTTGGGTAACCCGGGTTCATTAATTGGGGACGATCAAATTTATAACGTAATCGTAACTGCTCATGCCTTTATTATGATTTTTTTTATAGTGATACCTATTATAATT Baetis rhodani +ZFZR run1 tpos1_run1 1 delete AGCATGATCTGGAATAGTAGGTACTTCCCTTAGTATCTTAATTCGAGCCGAATTAGGCCATGCAGGATCCTTAATTGGAGACGATCAAATTTATAACGTAATTGTTACTGCTCATGCTTTTGTAATAATTTTTTTTATAGTTATACCCATTTTAATT diff --git a/vtam/data/example/params.yml b/vtam/data/example/params.yml new file mode 100644 index 00000000..e69de29b diff --git a/vtam/data/example/params_mfzr.yml b/vtam/data/example/params_mfzr.yml new file mode 100644 index 00000000..8a9023f5 --- /dev/null +++ b/vtam/data/example/params_mfzr.yml @@ -0,0 +1,4 @@ +lfn_variant_cutoff: 0.001 +lfn_sample_replicate_cutoff: 0.003 +lfn_read_count_cutoff: 70 +pcr_error_var_prop: 0.1 diff --git a/vtam/data/example/params_optimize_mfzr.yml b/vtam/data/example/params_optimize_mfzr.yml new file mode 100644 index 00000000..7c3e1da5 --- /dev/null +++ b/vtam/data/example/params_optimize_mfzr.yml @@ -0,0 +1,2 @@ +lfn_sample_replicate_cutoff: 0.003 +pcr_error_var_prop: 0.1 diff --git a/vtam/data/example/params_zfzr.yml b/vtam/data/example/params_zfzr.yml new file mode 100644 index 00000000..5a1be0d8 --- /dev/null +++ b/vtam/data/example/params_zfzr.yml @@ -0,0 +1,5 @@ +lfn_variant_cutoff: 0.001 +lfn_sample_replicate_cutoff: 0.001 +lfn_read_count_cutoff: 10 +pcr_error_var_prop: 0.1 + diff --git a/vtam/data/example/pool_run_marker.tsv b/vtam/data/example/pool_run_marker.tsv new file mode 100644 index 00000000..2a90d068 --- /dev/null +++ b/vtam/data/example/pool_run_marker.tsv @@ -0,0 +1,3 @@ +run marker +run1 MFZR +run1 ZFZR diff --git a/vtam/data/example/snakeconfig.yml b/vtam/data/example/snakeconfig.yml new file mode 100644 index 00000000..5be5b34b --- /dev/null +++ b/vtam/data/example/snakeconfig.yml @@ -0,0 +1,10 @@ +project: asper2 +subproject: run1 +db: db.sqlite +fastqinfo: asper2/user_input/fastqinfo.tsv +fastqdir: fastq +known_occurrences: asper2/user_input/known_occurrences.tsv +params: asper2/user_input/params.yml +blastdbdir: vtam_db/coi_blast_db +blastdbname: coi_blast_db_20200420 +taxonomy: vtam_db/taxonomy.tsv diff --git a/vtam/data/example/snakeconfig_mfzr.yml b/vtam/data/example/snakeconfig_mfzr.yml new file mode 100644 index 00000000..e01a5ef7 --- /dev/null +++ b/vtam/data/example/snakeconfig_mfzr.yml @@ -0,0 +1,10 @@ +project: asper1 +subproject: run1_mfzr +fastqinfo: asper1/user_input/fastqinfo_mfzr.tsv +fastqdir: fastq +known_occurrences: asper1/user_input/known_occurrences_mfzr.tsv +params: asper1/user_input/params_mfzr.yml +blastdbdir: vtam_db/coi_blast_db +blastdbname: coi_blast_db_20200420 +taxonomy: vtam_db/taxonomy.tsv + diff --git a/vtam/data/example/snakeconfig_zfzr.yml b/vtam/data/example/snakeconfig_zfzr.yml new file mode 100644 index 00000000..0d15986a --- /dev/null +++ b/vtam/data/example/snakeconfig_zfzr.yml @@ -0,0 +1,10 @@ +project: asper1 +subproject: run1_zfzr +fastqinfo: asper1/user_input/fastqinfo_zfzr.tsv +fastqdir: fastq +known_occurrences: asper1/user_input/known_occurrences_zfzr.tsv +params: asper1/user_input/params_zfzr.yml +blastdbdir: vtam_db/coi_blast_db +blastdbname: coi_blast_db_20200420 +taxonomy: vtam_db/taxonomy.tsv + diff --git a/vtam/data/example/sortedinfo_mfzr.tsv b/vtam/data/example/sortedinfo_mfzr.tsv new file mode 100644 index 00000000..1b81c0e1 --- /dev/null +++ b/vtam/data/example/sortedinfo_mfzr.tsv @@ -0,0 +1,13 @@ +run marker sample replicate sortedfasta +run1 MFZR tpos1_run1 1 mfzr_1_fw_000.fasta +run1 MFZR tnegtag_run1 1 mfzr_1_fw_001.fasta +run1 MFZR 14ben01 1 mfzr_1_fw_002.fasta +run1 MFZR 14ben02 1 mfzr_1_fw_003.fasta +run1 MFZR tpos1_run1 2 mfzr_2_fw_004.fasta +run1 MFZR tnegtag_run1 2 mfzr_2_fw_005.fasta +run1 MFZR 14ben01 2 mfzr_2_fw_006.fasta +run1 MFZR 14ben02 2 mfzr_2_fw_007.fasta +run1 MFZR tpos1_run1 3 mfzr_3_fw_008.fasta +run1 MFZR tnegtag_run1 3 mfzr_3_fw_009.fasta +run1 MFZR 14ben01 3 mfzr_3_fw_010.fasta +run1 MFZR 14ben02 3 mfzr_3_fw_011.fasta diff --git a/vtam/data/example/sortedinfo_zfzr.tsv b/vtam/data/example/sortedinfo_zfzr.tsv new file mode 100644 index 00000000..837d6aac --- /dev/null +++ b/vtam/data/example/sortedinfo_zfzr.tsv @@ -0,0 +1,13 @@ +run marker sample replicate sortedfasta +run1 ZFZR tpos1_run1 1 zfzr_1_fw_012.fasta +run1 ZFZR tnegtag_run1 1 zfzr_1_fw_013.fasta +run1 ZFZR 14ben01 1 zfzr_1_fw_014.fasta +run1 ZFZR 14ben02 1 zfzr_1_fw_015.fasta +run1 ZFZR tpos1_run1 2 zfzr_2_fw_016.fasta +run1 ZFZR tnegtag_run1 2 zfzr_2_fw_017.fasta +run1 ZFZR 14ben01 2 zfzr_2_fw_018.fasta +run1 ZFZR 14ben02 2 zfzr_2_fw_019.fasta +run1 ZFZR tpos1_run1 3 zfzr_3_fw_020.fasta +run1 ZFZR tnegtag_run1 3 zfzr_3_fw_021.fasta +run1 ZFZR 14ben01 3 zfzr_3_fw_022.fasta +run1 ZFZR 14ben02 3 zfzr_3_fw_023.fasta diff --git a/vtam/tests/test_argparser.py b/vtam/tests/test_argparser.py index bbafceb4..d02ca099 100644 --- a/vtam/tests/test_argparser.py +++ b/vtam/tests/test_argparser.py @@ -12,7 +12,6 @@ class TestArgParser(unittest.TestCase): def setUp(self): self.parser = ArgParser.get_main_arg_parser() - doc_path = PathManager.get_doc_path() package_path = PathManager.get_package_path() test_path = PathManager.get_test_path() self.test_path = test_path @@ -23,12 +22,13 @@ def setUp(self): self.foopaths['dirdoesnotexist'] = "dirdoesnotexist" self.foopaths['fileisempty'] = os.path.join(test_path, "test_files", "emptyfile") self.foopaths['filenottsv'] = __file__ - self.foopaths['sortedinfo_tsv'] = os.path.join(package_path, "data/example", "sortedinfo_mfzr.tsv") - self.foopaths['params_yml'] = os.path.join(package_path, "data/example", "params_mfzr.yml") - self.foopaths['params_wrong_yml'] = os.path.join(test_path, "test_params_file", "params_wrong.yml") - self.foopaths['known_occurrences'] = os.path.join(package_path, "data/example", "known_occurrences.tsv") + self.foopaths['sortedinfo_tsv'] = os.path.join(package_path, "data/example/sortedinfo_mfzr.tsv") + self.foopaths['params_yml'] = os.path.join(package_path, "data/example/params_mfzr.yml") + self.foopaths['params_wrong_yml'] = os.path.join(test_path, "test_params_file/params_wrong.yml") + self.foopaths['known_occurrences'] = os.path.join(package_path, "data/example/known_occurrences.tsv") self.foopaths['asvtable_tsv'] = os.path.join( - test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr", "asvtable_default.tsv") + test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr/asvtable_default.tsv") + self.foopaths['runmarker_tsv'] = os.path.join(package_path, "data/example", "pool_run_marker.tsv") self.foopaths['taxonomy_tsv'] = os.path.join(PathManager.get_test_path(), diff --git a/vtam/utils/ArgParser.py b/vtam/utils/ArgParser.py index fb3f2f9f..fd191eae 100644 --- a/vtam/utils/ArgParser.py +++ b/vtam/utils/ArgParser.py @@ -2,7 +2,6 @@ import multiprocessing import os import pathlib - import pandas import yaml