-
Notifications
You must be signed in to change notification settings - Fork 117
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1040 from taylorpaisie/tkp-rdp
RDP classifier
- Loading branch information
Showing
7 changed files
with
177 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# set global variables | ||
ARG RDP_VER="2.14" | ||
|
||
# build Dockerfile | ||
FROM ubuntu:jammy as app | ||
ARG RDP_VER | ||
|
||
LABEL base.image="ubuntu:jammy" | ||
LABEL dockerfile.version="1" | ||
LABEL software="RDP Classifier" | ||
LABEL software.version=${RDP_VER} | ||
LABEL description="The RDP Classifier is a naive Bayesian classifier which was developed to provide rapid taxonomic placement based on rRNA sequence data." | ||
LABEL website="https://github.com/rdpstaff/classifier" | ||
LABEL documentation="https://sourceforge.net/projects/rdp-classifier/" | ||
LABEL license.url="https://github.com/rdpstaff/classifier/blob/master/LICENSE" | ||
LABEL maintainer="Taylor K. Paisie" | ||
LABEL maintainer.email='[email protected]' | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
# Install dependencies | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
openjdk-11-jre \ | ||
wget \ | ||
unzip && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
# Install rdp_classifer | ||
RUN wget -q https://sourceforge.net/projects/rdp-classifier/files/rdp-classifier/rdp_classifier_${RDP_VER}.zip &&\ | ||
unzip rdp_classifier_${RDP_VER}.zip &&\ | ||
mv /rdp_classifier_${RDP_VER} /rdp_classifier &&\ | ||
chmod +x /rdp_classifier/dist/classifier.jar &&\ | ||
echo "#!/bin/bash" >> /rdp_classifier/dist/classifier &&\ | ||
echo "exec java -jar /rdp_classifier/dist/classifier.jar """"$""@"""" " >> /rdp_classifier/dist/classifier &&\ | ||
chmod +x /rdp_classifier/dist/classifier | ||
|
||
ENV PATH="${PATH}:/rdp_classifier/dist" LC_ALL=C | ||
|
||
CMD classifier | ||
|
||
RUN mkdir data/ | ||
WORKDIR /data | ||
|
||
# Running RDP on test controls | ||
FROM app as test | ||
|
||
WORKDIR /test | ||
|
||
# running help to ensure executable is in path | ||
RUN classifier | ||
|
||
# testing on real files | ||
RUN apt-get update && apt-get install -y \ | ||
python3 \ | ||
wget | ||
|
||
RUN mkdir ../tests/ | ||
COPY tests/ ../tests/ | ||
RUN python3 -m unittest discover -v -s ../tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# RDP Classifier | ||
|
||
Main tool: [RDP Classifier](https://sourceforge.net/projects/rdp-classifier/) | ||
|
||
Code repository: https://github.com/rdpstaff/classifier | ||
|
||
Basic information on how to use this tool: | ||
- executable: | | ||
``` | ||
classify - classify one or multiple samples | ||
crossvalidate - cross validate accuracy testing | ||
libcompare - compare two samples | ||
loot - leave one (sequence or taxon) out accuracy testing | ||
merge-detail - merge classification detail result files to create a taxon assignment counts file | ||
merge-count - merge multiple taxon assignment count files to into one count file | ||
random-sample - random select a subset or subregion of sequences | ||
rm-dupseq - remove identical or any sequence contained by another sequence | ||
rm-partialseq - remove partial sequences | ||
taxa-sim - calculate and plot the similarities within taxa | ||
train - retrain classifier | ||
``` | ||
|
||
- help: classify # with no flags | ||
- version: NA | ||
- description: | | ||
> The RDP Classifier is a naive Bayesian classifier which was developed to provide rapid taxonomic placement based on rRNA sequence data. | ||
|
||
Full documentation: https://sourceforge.net/projects/rdp-classifier/ | ||
|
||
|
||
## Example analysis | ||
Get test data: | ||
``` | ||
# Download test data | ||
wget -nv https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/rdp/2.14/16S_rRNA_gene.Burkholderia_pseudomallei.2002721184.AY305776.1.fasta -O 16S_test.fa | ||
wget -nv https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/rdp/2.14/18S_rRNA_gene.Homo_sapiens.T2T-CHM13v2.0.Chromosome13.fasta -O 18S_test.fa | ||
``` | ||
|
||
Use RDP Classifier to get taxonomic assignments for bacterial and archaeal 16S rRNA sequences: | ||
``` | ||
classifier classify -o taxa_16S_test.txt 16S_test.fa | ||
classifier classify -o taxa_18S_test.txt 18S_test.fa | ||
``` | ||
|
||
## Output | ||
``` | ||
head -2 taxa_16S_test.txt | ||
AY305776.1 Root rootrank 1.0 Bacteria domain 1.0 Pseudomonadota phylum 1.0 Betaproteobacteria class 1.0 Burkholderiales order 1.0 Burkholderiaceae family 1.0 Burkholderia genus 1.0 | ||
``` | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/bin/bash | ||
|
||
# Download test data | ||
wget -nv https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/rdp/2.14/16S_rRNA_gene.Burkholderia_pseudomallei.2002721184.AY305776.1.fasta -O 16S_test.fa | ||
wget -nv https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/rdp/2.14/18S_rRNA_gene.Homo_sapiens.T2T-CHM13v2.0.Chromosome13.fasta -O 18S_test.fa | ||
|
||
# Get taxonomic assignments for your data | ||
classifier classify -o taxa_16S_test.txt 16S_test.fa | ||
classifier classify -o taxa_18S_test.txt 18S_test.fa | ||
|
||
# run checksum on files | ||
sha256sum 16S_test.fa > 16S_checksum.txt | ||
sha256sum 18S_test.fa > 18S_checksum.txt | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import unittest | ||
import subprocess | ||
from subprocess import PIPE | ||
|
||
|
||
class TestControls(unittest.TestCase): | ||
@classmethod | ||
def setUpClass(cls): | ||
command = "bash /tests/scripts/run_controls.sh" | ||
subprocess.run(command, shell=True, stdout=PIPE) | ||
|
||
|
||
def test_rdp16S(self): | ||
with open("16S_checksum.txt") as f: | ||
rdp_checksum = f.readlines()[0].split(" ")[0] | ||
self.assertEqual( | ||
rdp_checksum, | ||
"a38342a9ba63946ffb4324c7858f5cc43b873673cb08080437f7500dda351f65", | ||
) | ||
|
||
def test_rdp18S(self): | ||
with open("18S_checksum.txt") as f: | ||
rdp_checksum = f.readlines()[0].split(" ")[0] | ||
self.assertEqual( | ||
rdp_checksum, | ||
"44bf9c60750ff3b804b3e3a56969dab982307a16faee63f0928b2f54e70b02f7", | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import unittest | ||
import subprocess | ||
import sys | ||
import re | ||
|
||
|
||
class TestVersion(unittest.TestCase): | ||
def test_python(self): | ||
version = f"{sys.version_info.major}.{sys.version_info.minor}" | ||
self.assertEqual(version, "3.10") # Update this with the expected Python version | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |