Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update 20240702 #347

Merged
merged 17 commits into from
Jul 9, 2024
Merged
2 changes: 2 additions & 0 deletions .github/workflows/check_versions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ jobs:
echo "New version for $base! Upgrade to $latest_version from $workflow_version." | tee -a versions.txt
issue_text="$issue_text<br>- $base from $workflow_version to $latest_version "
fi

docker rmi $base:latest
done

latest_nextclade_version=$(docker run nextstrain/nextclade:latest nextclade --version | awk '{print $2}')
Expand Down
137 changes: 132 additions & 5 deletions .github/workflows/github_actions.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,134 @@
params.vadr = false

process {
withName:ivar_consensus{
memory = '4 GB'
}
errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'}
withName:aci{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:artic{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:artic_read_filtering{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:bbnorm{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:bcftools_variants{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:bwa{
publishDir = [ path: "cecret", mode: 'link', pattern: 'logs/*/*log' ]
}
withName:download{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:fasta_prep{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:summary{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:unzip{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:fastp{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:fastqc{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:freyja_variants{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:freyja_demix{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:freyja_aggregate{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:heatcluster{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:igv_reports{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:iqtree2{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:ivar_consensus{
memory = '4 GB'
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:ivar_variants{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:ivar_trim{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:kraken2{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:mafft{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:minimap2{
publishDir = [ path: "cecret", mode: 'link', pattern: 'logs/*/*log' ]
}
withName:multiqc_combine{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:nextclade_dataset{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:nextclade{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:pango_collapse{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:pangolin{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:phytreeviz{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_stats{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_coverage{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_flagstat{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_depth{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_ampliconstats{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_plot_ampliconstats{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_sort{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_filter{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_ampliconclip{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_markdup{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:seqyclean{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:snpdists{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:vadr{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
}

4 changes: 4 additions & 0 deletions .github/workflows/test_kraken2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,9 @@ jobs:

cat cecret*/cecret_results.txt

- name: Kraken2 results
run: |
wc -l cecret/kraken2/*_kraken2_report.txt

- name: Clean
run: rm -rf work .nextflow*
37 changes: 37 additions & 0 deletions .github/workflows/test_mpx_yale.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Test mpx workflow with yale primers

on: [pull_request, workflow_dispatch]

run-name: mpx_yale

jobs:

test:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@master

- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/

- name: Download reads
run: |
mkdir reads
cd reads
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR206/024/SRR20689724/SRR20689724_1.fastq.gz
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR206/024/SRR20689724/SRR20689724_2.fastq.gz
cd ../

- name: Run Cecret
run: |
nextflow run . -profile docker,mpx_yale -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2

ls cecret*

head cecret*/cecret_results.txt

- name: Clean
run: rm -rf work .nextflow*
3 changes: 2 additions & 1 deletion .github/workflows/test_primers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ jobs:
'ncov_V3',
'ncov_V4',
'ncov_V4.1',
'ncov_V5.3.2'
'ncov_V5.3.2',
'mpx_yale'
]

steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_profile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:

- name: Run Cecret
run: |
nextflow run . -profile docker,test -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200 --vadr false
nextflow run . -profile docker,test -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200

ls cecret*

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_profile1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:

- name: Run Cecret
run: |
nextflow run . -profile docker,test1 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200 --vadr false
nextflow run . -profile docker,test1 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200

ls cecret*

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_profile2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:

- name: Run Cecret
run: |
nextflow run . -profile docker,test2 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200 --vadr false
nextflow run . -profile docker,test2 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200

ls cecret*

Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,17 @@ params.minimum_depth = 10
The defaults for Cecret continue to be for SARS-CoV-2, but there are growing demands for a workflow for Monkeypox Virus. As such, there are a few parameters that might benefit the **End User**.

### Using the Monkeypox profile
There are three profiles for Monkeypox Virus sequencing : `mpx`, `mpx_idt` and `mpx_primalseq`. The `mpx` profile has some defaults for a metagenomic-type sequencing, while `mpx_idt` is for libraries prepped with [IDT](https://www.idtdna.com/)'s primers, and `mpx_primalseq` which has been [validated](https://www.medrxiv.org/content/10.1101/2022.10.14.22280783v1.full-text) with Illumina library prep methods and sequencing platforms.
There are three profiles for Monkeypox Virus sequencing : `mpx`, `mpx_idt`, `mpx_yale`, and `mpx_primalseq`. The `mpx` profile has some defaults for a metagenomic-type sequencing, while `mpx_idt` is for libraries prepped with [IDT](https://www.idtdna.com/)'s primers, and `mpx_primalseq` which has been [validated](https://www.medrxiv.org/content/10.1101/2022.10.14.22280783v1.full-text) with Illumina library prep methods and sequencing platforms.
```
# metagenomic
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx

# using IDT's primers
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx_idt

# using primalseq with Yale's reference
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx_yale

# using Illumina library prep methods and sequencing platforms
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx_primalseq
```
Expand Down
35 changes: 22 additions & 13 deletions bin/combine_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
aci_file = 'aci_coverage_summary.csv'
ampliconstats_file = 'ampliconstats.summary'
samtools_coverage_file = 'samtools_coverage_summary.tsv'
pangolin_file = 'multiqc_data/multiqc_pangolin.txt'
pangolin_file = 'lineage_report.csv'
pango_collapse_file = 'pango_collapse.csv'
nextclade_file = 'multiqc_data/multiqc_nextclade.txt'
nextclade_file = 'nextclade.csv'
vadr_file = 'vadr.csv'
fastp_file = 'multiqc_data/multiqc_general_stats.txt'
fastq_names_file = 'fastq_names.csv'
Expand Down Expand Up @@ -277,9 +277,17 @@
summary_df = summary_df.drop('sample', axis=1)
columns = columns + ['samtools_meandepth_after_trimming', 'samtools_per_1X_coverage_after_trimming']

def vadr_sample_name(s):
if s.count('.') >=1:
if len(s.split(".")[-1]) > 2:
return ''.join(s.split(".")[:-1])
return s

if exists(vadr_file) :
print("Getting results from vadr file " + vadr_file)
vadr_df = pd.read_csv(vadr_file, dtype = str, usecols = ['name', 'p/f', 'model', 'alerts'], index_col= False)
vadr_df = vadr_df[vadr_df['name'] != 'name']
vadr_df = vadr_df[vadr_df['name'] != 'seq']
vadr_df = vadr_df.add_prefix('vadr_')
vadr_columns = list(vadr_df.columns)
vadr_columns.remove('vadr_name')
Expand All @@ -291,7 +299,8 @@
summary_df.drop('vadr_name', axis=1, inplace=True)
columns = ['vadr_p/f'] + columns + vadr_columns
else:
vadr_df['sample_match'] = vadr_df['vadr_name'].str.replace('Consensus_', '', regex = False).str.split(".").str[0]
vadr_df['sample_match'] = vadr_df['vadr_name'].str.replace('Consensus_', '', regex = False).apply(vadr_sample_name)

summary_df = pd.merge(summary_df, vadr_df, left_on = 'sample_id', right_on = 'sample_match', how = 'outer')
summary_df['sample_id'].fillna(summary_df['sample_match'], inplace=True)
summary_df.drop('vadr_name', axis=1, inplace=True)
Expand All @@ -301,42 +310,42 @@
if exists(nextclade_file) :
print("Getting results from nextclade file " + nextclade_file)

use_cols = ['Sample', 'clade', 'qc_overallstatus', 'qc_overallscore']
use_cols = ['seqName', 'clade', 'qc.overallStatus', 'qc.overallScore']

first = pd.read_table(nextclade_file, sep = '\t' , dtype = str, nrows=1)
first = pd.read_table(nextclade_file, sep = ';' , dtype = str, nrows=1)
if 'clade_who' in first.columns:
use_cols.append('clade_who')
if 'outbreak' in first.columns:
use_cols.append('outbreak')
if 'lineage' in first.columns:
use_cols.append('lineage')

nextclade_df = pd.read_table(nextclade_file, sep = '\t' , dtype = str, usecols = use_cols)
nextclade_df = pd.read_table(nextclade_file, sep = ';' , dtype = str, usecols = use_cols)
nextclade_df=nextclade_df.add_prefix('nextclade_')
nextclade_columns = list(nextclade_df.columns)
nextclade_df['sample_match'] = nextclade_df['nextclade_Sample'].str.replace('Consensus_', '', regex = False)
nextclade_columns.remove('nextclade_Sample')
nextclade_df['sample_match'] = nextclade_df['nextclade_seqName'].str.replace('Consensus_', '', regex = False).str.split(' ').str[0]
nextclade_columns.remove('nextclade_seqName')
nextclade_columns.remove('nextclade_clade')

summary_df = pd.merge(summary_df, nextclade_df, left_on = 'sample_id', right_on = 'sample_match', how = 'outer')
summary_df['sample_id'].fillna(summary_df['sample_match'], inplace=True)
summary_df.drop('nextclade_Sample', axis=1, inplace=True)
summary_df.drop('nextclade_seqName', axis=1, inplace=True)
summary_df.drop('sample_match', axis = 1, inplace = True )
columns = ['nextclade_clade'] + columns + nextclade_columns

if exists(pangolin_file) :
print("Getting results from pangolin file " + pangolin_file)

pangolin_df = pd.read_table(pangolin_file, dtype = str)
pangolin_df = pd.read_csv(pangolin_file, dtype = str)
pangolin_df = pangolin_df.add_prefix('pangolin_')
pangolin_columns = list(pangolin_df.columns)
pangolin_df['sample_match'] = pangolin_df['pangolin_Sample'].str.replace('Consensus_', '', regex= False)
pangolin_columns.remove('pangolin_Sample')
pangolin_df['sample_match'] = pangolin_df['pangolin_taxon'].str.replace('Consensus_', '', regex= False).str.split(' ').str[0]
pangolin_columns.remove('pangolin_taxon')
pangolin_columns.remove('pangolin_lineage')

summary_df = pd.merge(summary_df, pangolin_df, left_on = 'sample_id', right_on = 'sample_match', how = 'outer')
summary_df['sample_id'].fillna(summary_df['sample_match'], inplace=True)
summary_df.drop('pangolin_Sample', axis=1, inplace=True)
summary_df.drop('pangolin_taxon', axis=1, inplace=True)
summary_df.drop('sample_match', axis=1, inplace=True)
columns = ['pangolin_lineage'] + columns + pangolin_columns

Expand Down
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ ch_reads.ifEmpty { println("No fastq or fastq.gz files were found at ${param

workflow CECRET {
ch_for_dataset = Channel.empty()
ch_for_version = Channel.from("Cecret version", workflow.manifest.version).first()
ch_for_version = Channel.from("Cecret version", workflow.manifest.version).collect()
ch_prealigned = Channel.empty()
ch_versions = Channel.empty()

Expand Down
2 changes: 1 addition & 1 deletion modules/artic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ process artic {
# time stamp + capturing tool versions
date > \$log
artic --version >> \$log
artic_version=\$(artic --version)
artic_version=\$(artic --version | awk '{print \$NF}')

cp ${reference} schema/cecret/V1/cecret.reference.fasta
cp ${bed} schema/cecret/V1/cecret.scheme.bed
Expand Down
2 changes: 1 addition & 1 deletion modules/bwa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ process bwa {
# time stamp + capturing tool versions
date > \$log
echo "bwa \$(bwa 2>&1 | grep Version )" >> \$log
bwa_version="bwa : "\$(bwa 2>&1 | grep Version)
bwa_version=\$(bwa 2>&1 | grep Version | awk '{print \$NF}')

# index the reference fasta file
bwa index ${reference_genome}
Expand Down
4 changes: 2 additions & 2 deletions modules/fastp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ process fastp {
# time stamp + capturing tool versions
date > \$log
fastp --version >> \$log
cleaner_version="\$(fastp --version 2>&1 | head -n 1)"
cleaner_version=\$(fastp --version 2>&1 | awk '{print \$NF}')

fastp ${args} \
-i ${reads[0]} \
Expand All @@ -63,7 +63,7 @@ process fastp {
# time stamp + capturing tool versions
date > \$log
fastp --version >> \$log
cleaner_version="\$(fastp --version 2>&1 | head -n 1)"
cleaner_version=\$(fastp --version 2>&1 | awk '{print \$NF}')

fastp ${args} \
-i ${reads} \
Expand Down
Loading
Loading