Skip to content

Commit

Permalink
Merge pull request #95 from UPHL-BioNGS/edits
Browse files Browse the repository at this point in the history
Edits
  • Loading branch information
erinyoung committed Apr 7, 2022
2 parents b370c9f + 9cb05e4 commit a265cb6
Show file tree
Hide file tree
Showing 11 changed files with 1,756 additions and 332 deletions.
222 changes: 186 additions & 36 deletions Cecret.nf

Large diffs are not rendered by default.

204 changes: 139 additions & 65 deletions README.md

Large diffs are not rendered by default.

99 changes: 99 additions & 0 deletions bin/.tests.notower.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#/bin/bash
#nextflow ~/sandbox/Cecret/Cecret.nf -profile singularity --reads /home/eriny/sandbox/test_files/cecret/reads --outdir tests -with-tower -resume

test=$1

if [ -z "$test" ]; then test="small" ; fi

if [ "$test" == "small" ]
then
options=("reads" "single_reads" "fastas")

for option in ${options[@]}
do
# defaults
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--$option /home/eriny/sandbox/test_files/cecret/$option \
--outdir singularity_defaults_$option

# removed test for bamsnap and rename because of lack of interest
# attempted bcftools and filter
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--$option /home/eriny/sandbox/test_files/cecret/$option \
--outdir all_on_$option \
--bcftools_variants true \
--filter true \
-resume

# removing primer trimming
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--$option /home/eriny/sandbox/test_files/cecret/$option \
--outdir nontrimmed_$option \
--trimmer 'none' \
-resume

# changing the cleaner, aligner, and trimmer
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--$option /home/eriny/sandbox/test_files/cecret/$option \
--outdir toggled_$option \
--cleaner 'fastp' \
--trimmer 'samtools' \
--aligner 'minimap2' \
-resume

# with UPHL's config
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile uphl,artic_V3 \
--$option /home/eriny/sandbox/test_files/cecret/$option \
--outdir uphl_$option \
-resume
done

# multifasta
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--reads /home/eriny/sandbox/test_files/cecret/reads \
--single-reads /home/eriny/sandbox/test_files/cecret/single-reads \
--fastas /home/eriny/sandbox/test_files/cecret/fastas \
--multifastas /home/eriny/sandbox/test_files/cecret/multifasta \
--outdir kitchen_sink

# empty
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--reads doesntexit \
--single-reads willnotexist \
--fastas shouldntexit \
--outdir empty

else
# CDC's test data with relatedness
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--reads /home/eriny/sandbox/sars-cov-2-datasets/reads \
--outdir default_datasets \
--relatedness true

nextflow ~/sandbox/Cecret/Cecret.nf \
-profile uphl,artic_V3 \
--reads /home/eriny/sandbox/sars-cov-2-datasets/reads \
--outdir uphl_datasets \
-resume \
--relatedness true

# CDC's test data with relatedness using nextalign
nextflow ~/sandbox/Cecret/Cecret.nf \
-profile singularity,artic_V3 \
--reads /home/eriny/sandbox/sars-cov-2-datasets/reads \
--outdir toggled_datasets \
--cleaner 'fastp' \
--trimmer 'samtools' \
--aligner 'minimap2' \
--relatedness true \
--msa 'nextalign' \
-resume
fi
16 changes: 15 additions & 1 deletion bin/combine_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
pangolin_file='lineage_report.csv'
nextclade_file='nextclade.csv'
vadr_file='vadr.csv'
freyja_file='aggregated-freyja.tsv'
summary_file='combined_summary.csv'

summary_df = pd.read_csv(summary_file, dtype = str)
Expand Down Expand Up @@ -41,7 +42,7 @@
columns = ['nextclade_clade'] + columns + nextclade_columns

if exists(pangolin_file) :
pangolin_df = pd.read_csv(pangolin_file, dtype = str, usecols = ['taxon', 'lineage', 'status', 'scorpio_call', 'version', 'pangolin_version', 'pango_version', 'pangoLEARN_version'])
pangolin_df = pd.read_csv(pangolin_file, dtype = str)
pangolin_df=pangolin_df.add_prefix('pangolin_')
pangolin_columns = list(pangolin_df.columns)
pangolin_columns.remove('pangolin_taxon')
Expand All @@ -53,4 +54,17 @@
summary_df.drop('pangolin_taxon', axis=1, inplace=True)
columns = ['pangolin_lineage'] + columns + pangolin_columns

if exists(freyja_file) :
freyja_df = pd.read_table(freyja_file, dtype = str, sep="\t")
freyja_df = freyja_df.add_prefix('freyja_')
freyja_df['freyja_Unnamed: 0'] = freyja_df['freyja_Unnamed: 0'].str.replace("_variants.tsv", "")
freyja_columns = list(freyja_df.columns)
freyja_columns.remove('freyja_Unnamed: 0')

summary_df = pd.merge(summary_df, freyja_df, left_on = 'sample_id', right_on = 'freyja_Unnamed: 0', how = 'outer')
summary_df['sample_id'].fillna(summary_df['freyja_Unnamed: 0'], inplace=True)
summary_df['fasta_line'].fillna(summary_df['freyja_Unnamed: 0'], inplace=True)
summary_df.drop('freyja_Unnamed: 0', axis=1, inplace=True)
columns = columns + freyja_columns

summary_df.to_csv('cecret_results.csv', columns = ['sample_id','sample'] + columns, index=False)
38 changes: 33 additions & 5 deletions configs/cecret_config_template.config
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ nextalign_container = 'nextstrain/nextalign:latest'
snp_dists_container = 'staphb/snp-dists:latest'
iqtree2_container = 'staphb/iqtree2:latest'
pandas_container = 'quay.io/biocontainers/pandas:1.1.5'
multiqc_container = 'ewels/multiqc:latest'
freyja_container = 'staphb/freyja:latest'

//# Workflow parameters --------------------------------------

Expand Down Expand Up @@ -120,15 +122,15 @@ pandas_container = 'quay.io/biocontainers/pandas:1.1.5'
//params.filter = true

//# For process ivar_trim
//params.trimmer == 'ivar'
//params.trimmer = 'ivar'
//params.ivar_trim_options = ''

//# For process samtools_ampliconclip
//params.trimmer == 'samtools'
//params.trimmer = 'samtools'
//params.samtools_ampliconclip_options = ''

//# trimming can also be skipped with
//params.trimmer == 'none'
//params.trimmer = 'none'

//# For process ivar_variants
//params.ivar_variants = true
Expand Down Expand Up @@ -197,6 +199,15 @@ pandas_container = 'quay.io/biocontainers/pandas:1.1.5'
//params.pangolin_options = ''
//params.pangolin = true

//# For process freyja
//params.freyja = true
//params.freyja_variants_options=''
//params.freyja_demix_options=''
//params.freyja_boot_options='--nb 1000'
//params.freyja_aggregate_options=''
//params.freyja_plot_options=''
//params.freyja_plot_filetype='png'

//# For process nextclade
//params.nextclade_dataset = 'sars-cov-2'
//params.nextclade_options = ''
Expand All @@ -214,13 +225,17 @@ pandas_container = 'quay.io/biocontainers/pandas:1.1.5'
//# params.minimum_depth is shared with ivar_variants, ivar_consensus, and samtools_depth
//params.minimum_depth = 100

//# For process multiqc
//params.multiqc = true
//params.multiqc_options = ''

//# For process mafft
//params.msa == 'mafft'
//params.msa = 'mafft'
//params.mafft_options = '--maxambiguous 0.5'
//params.relatedness = true

//# For process nextalign
//params.msa == 'nextalign'
//params.msa = 'nextalign'
//params.nextalign_options = '--genes E,M,N,ORF1a,ORF1b,ORF3a,ORF6,ORF7a,ORF7b,ORF8,ORF9b,S --include-reference'
//params.relatedness = true

Expand Down Expand Up @@ -348,8 +363,17 @@ process {
container = pangolin_container
}

withName:freyja{
container = freyja_container
}

withName:freyja_aggregate{
container = freyja_container
}

withName:nextclade{
cpus = params.medcpus
memory = '4 GB'
container = nextclade_container
}

Expand All @@ -367,6 +391,10 @@ process {
container = pandas_container
}

withName:multiqc{
container = multiqc_container
}

withName:mafft{
cpus = params.maxcpus
container = mafft_container
Expand Down
Loading

0 comments on commit a265cb6

Please sign in to comment.