Skip to content

Commit

Permalink
Merge pull request #5130 from paulzierep/pharokka-wrapper
Browse files Browse the repository at this point in the history
Pharokka wrapper
  • Loading branch information
bgruening authored Mar 1, 2023
2 parents a9bc153 + f3d0382 commit b5b9d62
Show file tree
Hide file tree
Showing 51 changed files with 12,764 additions and 0 deletions.
11 changes: 11 additions & 0 deletions tools/pharokka/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
owner: iuc
name: pharokka
description: rapid standardised annotation tool for bacteriophage genomes and metagenomes
long_description: |
pharokka is a rapid standardised annotation tool for bacteriophage genomes and metagenomes.
If you are looking for rapid standardised annotation of bacterial genomes, please use prokka,
which inspired the creation of pharokka, or bakta.
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/pharokka
categories:
- Genome annotation
homepage_url: https://github.com/gbouras13/pharokka
40 changes: 40 additions & 0 deletions tools/pharokka/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?xml version="1.0"?>
<macros>
<token name="@TOOL_VERSION@">1.2.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">21.05</token>
<xml name="biotools">
<xrefs>
<xref type="bio.tools">
pharokka
</xref>
</xrefs>
</xml>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">
pharokka
</requirement>
<requirement type="package" version="3.0">
zip
</requirement>
</requirements>
</xml>
<xml name="version">
<version_command>
pharokka.py --version
</version_command>
</xml>
<xml name="citations">
<citations>
<citation type="doi">
10.1093/bioinformatics/btac776
</citation>
</citations>
</xml>
<xml name="creator">
<creator>
<person givenName="Paul" familyName="Zierep" email="[email protected]" />
</creator>
</xml>
</macros>
165 changes: 165 additions & 0 deletions tools/pharokka/pharokka.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<tool id="pharokka" name="bacteriophage annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>
rapid standardised annotation tool for bacteriophage genomes and metagenomes
</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="biotools" />
<expand macro="requirements" />
<expand macro="version" />
<command detect_errors="exit_code">
<![CDATA[
##run tool
#if str( $terminase.terminase_selector ) == 'no_terminase':
pharokka.py
-i '$fasta'
-o pharokka_output
-d '$db_cached.fields.path'
-t \${GALAXY_SLOTS:-8}
$gene_predictor
$meta
-e '$evalue'
#else:
pharokka.py
-i '$fasta'
-o pharokka_output
-d '$db_cached.fields.path'
-t \${GALAXY_SLOTS:-8}
$gene_predictor
$meta
-e '$evalue'
--terminase
--terminase_strand '$terminase.terminase_strand'
--terminase_start '$terminase.terminase_start'
#end if
## create output
#if $zip_output == 'true':
&& zip -r out.zip pharokka_output
#end if
]]>
</command>
<inputs>
<!-- the genome -->
<param type="data" name="fasta" format="fasta" help="Please upload an genome file of a bacteriophage in fasta format." label="Bacteriophage genome" />
<param name="db_cached" type="select" label="Using built-in pharokka DB" help="Using built-in pharokka DB">
<options from_data_table="pharokka_db">
</options>
<validator type="no_options" message="A built-in pharokka DB is not available. Please ask the galaxy admins to install one on the server." />
</param>
<param name="gene_predictor" type="select" label="User specified gene predictor">
<option value="-g phanotate">
Phanotate
</option>
<option value="-g prodigal">
Prodigal
</option>
</param>
<param name="meta" type="boolean" checked="false" truevalue="--meta" falsevalue="" label="meta mode for metavirome input samples" />
<param name="evalue" type="float" value="1E-5" min="1E-20" max="10" label="E-value threshold for mmseqs2 PHROGs database search. Defaults to 1E-05." />
<!-- optional arguments -->
<conditional name="terminase">
<param name="terminase_selector" type="select" label="Runs - terminase large subunit - re-orientation mode. Single genome input only and requires --terminase_strand and --terminase_start to be specified.">
<option value="no_terminase">
Do not run 'terminase large subunit' re-orientation mode.
</option>
<option value="run_terminase">
Runs 'terminase large subunit' re-orientation mode.
</option>
</param>
<when value="no_terminase">
</when>
<when value="run_terminase">
<param name="terminase_strand" type="select" label="Strand of terminase large subunit.">
<option value="pos">
Positive
</option>
<option value="neg">
Negative
</option>
</param>
<param name="terminase_start" type="integer" value="1" label="Start coordinate of the terminase large subunit." />
</when>
</conditional>
<!-- optional outputs -->
<param name="zip_output" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Create a Zip archive of the complete output for further investigation." />
</inputs>
<outputs>
<data name="pharokka_gbk" format="genbank" from_work_dir="pharokka_output/pharokka.gbk" label="${tool.name} on ${on_string}: Genbank" />
<data name="pharokka_gff" format="gff" from_work_dir="pharokka_output/pharokka.gff" label="${tool.name} on ${on_string}: GFF" />
<data name="archive_output" format="zip" from_work_dir="out.zip" label="${tool.name} on ${on_string}: zip of the complete output" >
<filter>zip_output</filter>
</data>

</outputs>
<tests>
<!-- test input from DB - no zip -->
<test expect_num_outputs="2">
<param name="db_cached" value="pharokka_db" />
<param name="fasta" value="SAOMS1.fasta" />
<param name="zip_output" value="false" />
<!-- check file size and text since output is non-deterministic -->
<output name="pharokka_gbk">
<assert_contents>
<has_size value="353875" delta="10" />
<has_text text="VERSION MW460250_1" />
</assert_contents>
</output>
<output name="pharokka_gff">
<assert_contents>
<has_size value="191497" delta="10" />
<has_text text="##sequence-region MW460250_1 1 140135" />
</assert_contents>
</output>
</test>
<!-- test input from DB -->
<test expect_num_outputs="3">
<param name="db_cached" value="pharokka_db" />
<param name="fasta" value="SAOMS1.fasta" />
<param name="zip_output" value="true" />
<!-- check file size and text since output is non-deterministic -->
<output name="pharokka_gbk">
<assert_contents>
<has_size value="353875" delta="10" />
<has_text text="VERSION MW460250_1" />
</assert_contents>
</output>
<output name="pharokka_gff">
<assert_contents>
<has_size value="191497" delta="10" />
<has_text text="##sequence-region MW460250_1 1 140135" />
</assert_contents>
</output>
<!-- check created zip -->
<output name="archive_output">
<assert_contents>
<has_archive_member path=".*\/pharokka\.gff" />
<has_archive_member path=".*\/pharokka\.gbk" />
<has_archive_member path=".*\/pharokka.*\.log" />
</assert_contents>
</output>
</test>
</tests>
<help>
<![CDATA[
Pharokka is a rapid standardised annotation tool for bacteriophage genomes and metagenomes.
Pharokka identifies predicted coding sequences (CDS), transfer RNAs (tRNAs),
transfer-messenger RNAs (tmRNAs) and clustered regularly interspaced short
palindromic repeats (CRISPRs), providing functional annotation for CDS using the PHROGs database.
Pharokka requires assembled DNA sequences in FASTA format.
For phage isolates, this usually consists of one complete contig,
but Pharokka can also annotate incomplete assemblies or metavirome samples with multiple
contigs in the multiFASTA format.
Furthermore, metagenomically assembled phage genomes and genomic contigs,
derived using programs such as Virstorter2, Hecatomb and Cenote-taker 2,
are also suitable to be annotated using Pharokka using meta mode.
If you are looking for rapid standardised annotation of bacterial genomes, please use bakta.
]]>
</help>
<expand macro="citations" />
<expand macro="creator" />
</tool>
Loading

0 comments on commit b5b9d62

Please sign in to comment.