-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #988 from Kincekara/clair
add clair3
- Loading branch information
Showing
5 changed files
with
197 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
ARG CLAIR3_VER="1.0.9" | ||
|
||
FROM mambaorg/micromamba:1.5.8 as builder | ||
|
||
ARG CLAIR3_VER | ||
|
||
USER root | ||
|
||
WORKDIR / | ||
|
||
RUN apt-get update && apt-get install -y \ | ||
wget \ | ||
bzip2 \ | ||
make \ | ||
g++ \ | ||
libboost-graph-dev | ||
|
||
RUN micromamba install --name base -c conda-forge -c bioconda \ | ||
python=3.9.0 \ | ||
pypy3.6 \ | ||
tensorflow-cpu=2.8.0 \ | ||
pytables \ | ||
pigz \ | ||
cffi=1.14.4 \ | ||
parallel=20191122 \ | ||
zstd \ | ||
samtools=1.15.1 \ | ||
whatshap=1.7 \ | ||
xz \ | ||
zlib \ | ||
bzip2 \ | ||
automake \ | ||
curl &&\ | ||
micromamba clean -a -y | ||
|
||
ENV PATH="$PATH:/opt/conda/bin/" | ||
|
||
ARG MAMBA_DOCKERFILE_ACTIVATE=1 | ||
|
||
RUN pypy3 -m ensurepip && \ | ||
pypy3 -m pip install mpmath==1.2.1 &&\ | ||
pip install tensorflow-addons | ||
|
||
RUN wget https://github.com/HKU-BAL/Clair3/archive/refs/tags/v${CLAIR3_VER}.tar.gz &&\ | ||
tar -xvf v${CLAIR3_VER}.tar.gz &&\ | ||
cd Clair3-${CLAIR3_VER}/preprocess/realign/ &&\ | ||
g++ -std=c++14 -O1 -shared -fPIC -o realigner ssw_cpp.cpp ssw.c realigner.cpp && \ | ||
g++ -std=c++11 -shared -fPIC -o debruijn_graph -O3 debruijn_graph.cpp &&\ | ||
cd ../.. &&\ | ||
make &&\ | ||
mkdir /clair3 &&\ | ||
cp -rv clair3 preprocess postprocess scripts shared /clair3 &&\ | ||
cp clair3.py run_clair3.sh /clair3 &&\ | ||
cp longphase libclair3* /clair3 &&\ | ||
cp LICENSE.md /clair3 | ||
|
||
RUN mkdir /clair3/models &&\ | ||
wget http://www.bio8.cs.hku.hk/clair3/clair3_models/clair3_models.tar.gz &&\ | ||
tar --no-same-owner -C /clair3/models -xvf clair3_models.tar.gz | ||
|
||
## App ## | ||
FROM mambaorg/micromamba:1.5.8 as app | ||
|
||
ARG CLAIR3_VER | ||
|
||
USER root | ||
|
||
WORKDIR / | ||
|
||
LABEL base.image="mambaorg/micromamba:1.5.8" | ||
LABEL dockerfile.version="1" | ||
LABEL software="CLAIR3" | ||
LABEL software.version="${CLAIR3_VER}" | ||
LABEL description="Clair3 is a germline small variant caller for long-reads." | ||
LABEL website="https://github.com/HKU-BAL/Clair3" | ||
LABEL license="https://github.com/HKU-BAL/Clair3/blob/main/LICENSE.md" | ||
LABEL maintainer="Kutluhan Incekara" | ||
LABEL maintainer.email="[email protected]" | ||
|
||
RUN apt-get update && apt-get install --no-install-recommends -y\ | ||
procps | ||
|
||
RUN micromamba install --name base -c conda-forge -c bioconda \ | ||
python=3.9.0 \ | ||
numpy=1.24.3 \ | ||
pypy3.6 \ | ||
tensorflow-cpu=2.8.0 \ | ||
pytables \ | ||
pigz \ | ||
cffi=1.14.4 \ | ||
parallel=20191122 \ | ||
zstd \ | ||
samtools=1.15.1 \ | ||
whatshap=1.7 &&\ | ||
micromamba clean -a -y &&\ | ||
rm -rf /opt/conda/pkgs/ | ||
|
||
ENV PATH="/opt/conda/bin/:/clair3:${PATH}" \ | ||
LC_ALL=C.UTF-8 | ||
|
||
RUN pypy3 -m ensurepip &&\ | ||
pypy3 -m pip install --no-cache mpmath==1.2.1 &&\ | ||
pip install --no-cache tensorflow-addons | ||
|
||
COPY --from=builder /clair3 /clair3 | ||
|
||
CMD run_clair3.sh | ||
|
||
WORKDIR /data | ||
|
||
## Test ## | ||
FROM app as test | ||
|
||
RUN apt-get update && apt-get install -y wget | ||
|
||
COPY ont_quick_test.sh . | ||
|
||
RUN chmod +x ont_quick_test.sh &&\ | ||
./ont_quick_test.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Clair3 container | ||
|
||
Main tool: [clair3](https://github.com/HKU-BAL/Clair3) | ||
|
||
Code repository: https://github.com/HKU-BAL/Clair3 | ||
|
||
Basic information on how to use this tool: | ||
- executable: run_clair3.sh | ||
- help: -h, --help | ||
- version: -v, --version | ||
- description: <tool does something> | ||
|
||
Additional information: | ||
|
||
This container includes models in `/clair3/models` | ||
|
||
Full documentation: https://github.com/HKU-BAL/Clair3 | ||
|
||
## Example Usage | ||
|
||
```bash | ||
run_clair3.sh \ | ||
--bam_fn=${BAM} \ | ||
--ref_fn=${REF} \ | ||
--threads=${THREADS} \ | ||
--platform="ont" \ ## options: {ont,hifi,ilmn} | ||
--model_path=${MODEL_PREFIX} \ ## absolute model path prefix | ||
--output=${OUTPUT_DIR} ## absolute output path prefix | ||
## pileup output file: ${OUTPUT_DIR}/pileup.vcf.gz | ||
## full-alignment output file: ${OUTPUT_DIR}/full_alignment.vcf.gz | ||
## Clair3 final output file: ${OUTPUT_DIR}/merge_output.vcf.gz | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
## Adapted from http://www.bio8.cs.hku.hk/clair3/demo/clair3_ont_quick_demo.sh | ||
# Parameters | ||
PLATFORM='ont' | ||
INPUT_DIR="/data/clair3_ont_quickDemo" | ||
OUTPUT_DIR="${INPUT_DIR}/output" | ||
THREADS=4 | ||
|
||
## Create local directory structure | ||
mkdir -p ${INPUT_DIR} | ||
mkdir -p ${OUTPUT_DIR} | ||
|
||
# Download quick demo data | ||
#GRCh38_no_alt Reference | ||
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/GRCh38_no_alt_chr20.fa | ||
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/GRCh38_no_alt_chr20.fa.fai | ||
# BAM chr20:100000-300000 | ||
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_chr20_demo.bam | ||
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_chr20_demo.bam.bai | ||
# GIAB Truth VCF and BED | ||
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_GRCh38_chr20_v4.2.1_benchmark.vcf.gz | ||
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_GRCh38_chr20_v4.2.1_benchmark.vcf.gz.tbi | ||
wget -q -P ${INPUT_DIR} http://www.bio8.cs.hku.hk/clair3/demo/quick_demo/ont/HG003_GRCh38_chr20_v4.2.1_benchmark_noinconsistent.bed | ||
|
||
REF="GRCh38_no_alt_chr20.fa" | ||
BAM="HG003_chr20_demo.bam" | ||
BASELINE_VCF_FILE_PATH="HG003_GRCh38_chr20_v4.2.1_benchmark.vcf.gz" | ||
BASELINE_BED_FILE_PATH="HG003_GRCh38_chr20_v4.2.1_benchmark_noinconsistent.bed" | ||
OUTPUT_VCF_FILE_PATH="merge_output.vcf.gz" | ||
|
||
CONTIGS="chr20" | ||
START_POS=100000 | ||
END_POS=300000 | ||
echo -e "${CONTIGS}\t${START_POS}\t${END_POS}" > ${INPUT_DIR}/quick_demo.bed | ||
|
||
cd ${OUTPUT_DIR} | ||
# Run Clair3 using one command | ||
run_clair3.sh \ | ||
--bam_fn=${INPUT_DIR}/${BAM} \ | ||
--ref_fn=${INPUT_DIR}/${REF} \ | ||
--threads=${THREADS} \ | ||
--platform=${PLATFORM} \ | ||
--model_path="/clair3/models/${PLATFORM}" \ | ||
--output=${OUTPUT_DIR} \ | ||
--bed_fn=${INPUT_DIR}/quick_demo.bed |