Skip to content

Commit

Permalink
Merge pull request #988 from Kincekara/clair
Browse files Browse the repository at this point in the history
add clair3
  • Loading branch information
erinyoung authored Jun 25, 2024
2 parents 8987a2a + c56c817 commit 4ea4db9
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 0 deletions.
1 change: 1 addition & 0 deletions Program_Licenses.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ The licenses of the open-source software that is contained in these Docker image
| Circlator | GNU GPLv3 | https://github.com/sanger-pathogens/circlator/blob/master/LICENSE |
| Circos | GNU GPLv3 | https://circos.ca/ |
| CirculoCov | GNU GPLv3 | https://github.com/erinyoung/CirculoCov/blob/main/LICENSE |
| Clair3 | non-standard | https://github.com/HKU-BAL/Clair3/blob/main/LICENSE.md |
| colorid | MIT | https://github.com/hcdenbakker/colorid/blob/master/LICENSE |
| datasets-sars-cov-2 | Apache 2.0 | https://github.com/CDCgov/datasets-sars-cov-2/blob/master/LICENSE |
| diamond | GNU GPLv3 | https://github.com/bbuchfink/diamond/blob/master/LICENSE |
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ To learn more about the docker pull rate limits and the open source software pro
| [Circlator](https://hub.docker.com/r/staphb/circlator) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/circlator)](https://hub.docker.com/r/staphb/circlator) | <ul><li>1.5.6</li><li>1.5.5</li></ul> | https://github.com/sanger-pathogens/circlator |
| [Circos](https://hub.docker.com/r/staphb/circos) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/circos)](https://hub.docker.com/r/staphb/circos) | <ul><li>[0.69-9](./circos/0.69.9/)</li></ul> | https://circos.ca/ |
| [CirculoCov](https://hub.docker.com/r/staphb/circulocov) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/circulocov)](https://hub.docker.com/r/staphb/circulocov) | <ul><li>[0.1.20240104](./circulocov/0.1.20240104/)</li></ul> | https://github.com/erinyoung/CirculoCov |
| [Clair3](https://hub.docker.com/r/staphb/clair3) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/clair3)](https://hub.docker.com/r/staphb/clair3) | <ul><li>[1.0.9](./clair3/1.0.9/)</li></ul> | https://github.com/HKU-BAL/Clair3 |
| [Clustalo](https://hub.docker.com/r/staphb/clustalo) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/clustalo)](https://hub.docker.com/r/staphb/clustalo) | <ul><li>1.2.4</li></ul> | http://www.clustal.org/omega/ |
| [colorid](https://hub.docker.com/r/staphb/colorid) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/colorid)](https://hub.docker.com/r/staphb/colorid) | <ul><li>0.1.4.3</li></ul> | https://github.com/hcdenbakker/colorid |
| [cutshaw-report-env](https://hub.docker.com/r/staphb/cutshaw-report-env) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/cutshaw-report-env)](https://hub.docker.com/r/staphb/cutshaw-report-env) | <ul><li>1.0.0</li></ul> | https://github.com/VADGS/CutShaw |
Expand Down
119 changes: 119 additions & 0 deletions clair3/1.0.9/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
ARG CLAIR3_VER="1.0.9"

FROM mambaorg/micromamba:1.5.8 as builder

ARG CLAIR3_VER

USER root

WORKDIR /

RUN apt-get update && apt-get install -y \
wget \
bzip2 \
make \
g++ \
libboost-graph-dev

RUN micromamba install --name base -c conda-forge -c bioconda \
python=3.9.0 \
pypy3.6 \
tensorflow-cpu=2.8.0 \
pytables \
pigz \
cffi=1.14.4 \
parallel=20191122 \
zstd \
samtools=1.15.1 \
whatshap=1.7 \
xz \
zlib \
bzip2 \
automake \
curl &&\
micromamba clean -a -y

ENV PATH="$PATH:/opt/conda/bin/"

ARG MAMBA_DOCKERFILE_ACTIVATE=1

RUN pypy3 -m ensurepip && \
pypy3 -m pip install mpmath==1.2.1 &&\
pip install tensorflow-addons

RUN wget https://github.com/HKU-BAL/Clair3/archive/refs/tags/v${CLAIR3_VER}.tar.gz &&\
tar -xvf v${CLAIR3_VER}.tar.gz &&\
cd Clair3-${CLAIR3_VER}/preprocess/realign/ &&\
g++ -std=c++14 -O1 -shared -fPIC -o realigner ssw_cpp.cpp ssw.c realigner.cpp && \
g++ -std=c++11 -shared -fPIC -o debruijn_graph -O3 debruijn_graph.cpp &&\
cd ../.. &&\
make &&\
mkdir /clair3 &&\
cp -rv clair3 preprocess postprocess scripts shared /clair3 &&\
cp clair3.py run_clair3.sh /clair3 &&\
cp longphase libclair3* /clair3 &&\
cp LICENSE.md /clair3

RUN mkdir /clair3/models &&\
wget http://www.bio8.cs.hku.hk/clair3/clair3_models/clair3_models.tar.gz &&\
tar --no-same-owner -C /clair3/models -xvf clair3_models.tar.gz

## App ##
FROM mambaorg/micromamba:1.5.8 as app

ARG CLAIR3_VER

USER root

WORKDIR /

LABEL base.image="mambaorg/micromamba:1.5.8"
LABEL dockerfile.version="1"
LABEL software="CLAIR3"
LABEL software.version="${CLAIR3_VER}"
LABEL description="Clair3 is a germline small variant caller for long-reads."
LABEL website="https://github.com/HKU-BAL/Clair3"
LABEL license="https://github.com/HKU-BAL/Clair3/blob/main/LICENSE.md"
LABEL maintainer="Kutluhan Incekara"
LABEL maintainer.email="[email protected]"

RUN apt-get update && apt-get install --no-install-recommends -y\
procps

RUN micromamba install --name base -c conda-forge -c bioconda \
python=3.9.0 \
numpy=1.24.3 \
pypy3.6 \
tensorflow-cpu=2.8.0 \
pytables \
pigz \
cffi=1.14.4 \
parallel=20191122 \
zstd \
samtools=1.15.1 \
whatshap=1.7 &&\
micromamba clean -a -y &&\
rm -rf /opt/conda/pkgs/

ENV PATH="/opt/conda/bin/:/clair3:${PATH}" \
LC_ALL=C.UTF-8

RUN pypy3 -m ensurepip &&\
pypy3 -m pip install --no-cache mpmath==1.2.1 &&\
pip install --no-cache tensorflow-addons

COPY --from=builder /clair3 /clair3

CMD run_clair3.sh

WORKDIR /data

## Test ##
FROM app as test

RUN apt-get update && apt-get install -y wget

COPY ont_quick_test.sh .

RUN chmod +x ont_quick_test.sh &&\
./ont_quick_test.sh
32 changes: 32 additions & 0 deletions clair3/1.0.9/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Clair3 container

Main tool: [clair3](https://github.com/HKU-BAL/Clair3)

Code repository: https://github.com/HKU-BAL/Clair3

Basic information on how to use this tool:
- executable: run_clair3.sh
- help: -h, --help
- version: -v, --version
- description: <tool does something>

Additional information:

This container includes models in `/clair3/models`

Full documentation: https://github.com/HKU-BAL/Clair3

## Example Usage

```bash
run_clair3.sh \
--bam_fn=${BAM} \
--ref_fn=${REF} \
--threads=${THREADS} \
--platform="ont" \ ## options: {ont,hifi,ilmn}
--model_path=${MODEL_PREFIX} \ ## absolute model path prefix
--output=${OUTPUT_DIR} ## absolute output path prefix
## pileup output file: ${OUTPUT_DIR}/pileup.vcf.gz
## full-alignment output file: ${OUTPUT_DIR}/full_alignment.vcf.gz
## Clair3 final output file: ${OUTPUT_DIR}/merge_output.vcf.gz
```
44 changes: 44 additions & 0 deletions clair3/1.0.9/ont_quick_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
## Adapted from http://www.bio8.cs.hku.hk/clair3/demo/clair3_ont_quick_demo.sh
# Parameters
PLATFORM='ont'
INPUT_DIR="/data/clair3_ont_quickDemo"
OUTPUT_DIR="${INPUT_DIR}/output"
THREADS=4

## Create local directory structure
mkdir -p ${INPUT_DIR}
mkdir -p ${OUTPUT_DIR}

# Download quick demo data
#GRCh38_no_alt Reference
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/GRCh38_no_alt_chr20.fa
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/GRCh38_no_alt_chr20.fa.fai
# BAM chr20:100000-300000
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_chr20_demo.bam
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_chr20_demo.bam.bai
# GIAB Truth VCF and BED
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_GRCh38_chr20_v4.2.1_benchmark.vcf.gz
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_GRCh38_chr20_v4.2.1_benchmark.vcf.gz.tbi
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_GRCh38_chr20_v4.2.1_benchmark_noinconsistent.bed

REF="GRCh38_no_alt_chr20.fa"
BAM="HG003_chr20_demo.bam"
BASELINE_VCF_FILE_PATH="HG003_GRCh38_chr20_v4.2.1_benchmark.vcf.gz"
BASELINE_BED_FILE_PATH="HG003_GRCh38_chr20_v4.2.1_benchmark_noinconsistent.bed"
OUTPUT_VCF_FILE_PATH="merge_output.vcf.gz"

CONTIGS="chr20"
START_POS=100000
END_POS=300000
echo -e "${CONTIGS}\t${START_POS}\t${END_POS}" > ${INPUT_DIR}/quick_demo.bed

cd ${OUTPUT_DIR}
# Run Clair3 using one command
run_clair3.sh \
--bam_fn=${INPUT_DIR}/${BAM} \
--ref_fn=${INPUT_DIR}/${REF} \
--threads=${THREADS} \
--platform=${PLATFORM} \
--model_path="/clair3/models/${PLATFORM}" \
--output=${OUTPUT_DIR} \
--bed_fn=${INPUT_DIR}/quick_demo.bed

0 comments on commit 4ea4db9

Please sign in to comment.