forked from nextstrain/docker-base
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
426 lines (356 loc) · 17.7 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
# This is a multi-stage image build.
#
# We first create a "builder" image and then create our final image by copying
# things from the builder image. The point is to avoid bloating the final
# image with tools only needed during the image build.
# First build the temporary image.
FROM python:3.10-slim-bullseye AS builder
SHELL ["/bin/bash", "-e", "-u", "-o", "pipefail", "-c"]
# Add system deps for building
# autoconf, automake: for building VCFtools; may be used by package managers to build from source
# build-essential: contains gcc, g++, make, etc. for building various tools; may be used by package managers to build from source
# ca-certificates: for secure HTTPS connections
# curl: for downloading source files
# git: for git pip installs
# jq: used in builder-scripts/latest-augur-release-tag
# libsqlite3-dev: for building pyfastx (for Augur)
# pkg-config: for building VCFtools; may be used by package managers to build from source
# zlib1g-dev: for building VCFtools and pyfastx; may be used by package managers to build from source
# nodejs: for installing Auspice
RUN apt-get update && apt-get install -y --no-install-recommends \
autoconf \
automake \
build-essential \
ca-certificates \
curl \
git \
jq \
libsqlite3-dev \
pkg-config \
zlib1g-dev
# Install a specific Node.js version
# https://github.com/nodesource/distributions/blob/0d81da75/README.md#installation-instructions
RUN curl -fsSL https://deb.nodesource.com/setup_14.x | bash - \
&& apt-get update && apt-get install -y nodejs
# Used for platform-specific instructions
ARG TARGETPLATFORM
ARG TARGETOS
ARG TARGETARCH
# Add dependencies. All should be pinned to specific versions, except
# Nextstrain-maintained software.
# This includes pathogen-specific workflow dependencies. Since we only maintain a
# single Docker image to support all pathogen workflows, some pathogen-specific
# functionality must live in this Dockerfile. The following dependencies may be
# used by multiple pathogen workflows, but they have been commented according to
# the original pathogen that added these dependencies.
# Create directories to be copied in final stage.
RUN mkdir -p /final/bin /final/share /final/libexec
# 1. Build programs from source
# Build RAxML
# linux/arm64 does not support -mavx and -msse3 compilation flags which are used in the official repository.
# Make these changes in a fork for now: https://github.com/nextstrain/standard-RAxML/tree/simde
# TODO: Use the official repository if this PR is ever merged: https://github.com/stamatak/standard-RAxML/pull/50
WORKDIR /build/RAxML
RUN curl -fsSL https://api.github.com/repos/nextstrain/standard-RAxML/tarball/4621552064304a219ff03810f5f0d91e1063b68f \
| tar xzvpf - --no-same-owner --strip-components=1 \
&& make -f Makefile.AVX.PTHREADS.gcc \
&& cp -p raxmlHPC-PTHREADS-AVX /final/bin
# Build FastTree
WORKDIR /build/FastTree
RUN curl -fsSL https://api.github.com/repos/tsibley/FastTree/tarball/50c5b098ea085b46de30bfc29da5e3f113353e6f \
| tar xzvpf - --no-same-owner --strip-components=1 \
&& make FastTreeDblMP \
&& cp -p FastTreeDblMP /final/bin
# Build vcftools
WORKDIR /build/vcftools
RUN curl -fsSL https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \
| tar xzvpf - --no-same-owner --strip-components=2 \
&& ./configure --prefix=$PWD/built \
&& make && make install \
&& cp -rp built/bin/* /final/bin \
&& cp -rp built/share/* /final/share
# 2. Download pre-built programs
# Download MAFFT
# NOTE: Running this program requires support for emulation on the Docker host
# if the processor architecture is not amd64.
# TODO: Build from source to avoid emulation. Instructions: https://mafft.cbrc.jp/alignment/software/installation_without_root.html
WORKDIR /download/mafft
RUN curl -fsSL https://mafft.cbrc.jp/alignment/software/mafft-7.475-linux.tgz \
| tar xzvpf - --no-same-owner --strip-components=2 mafft-linux64/mafftdir/ \
&& cp -p bin/* /final/bin \
&& cp -p libexec/* /final/libexec
# Download IQ-TREE
# NOTE: Running this program requires support for emulation on the Docker host
# if the processor architecture is not amd64.
# TODO: Build from source to avoid emulation. Instructions: http://www.iqtree.org/doc/Compilation-Guide
WORKDIR /download/IQ-TREE
RUN curl -fsSL https://github.com/iqtree/iqtree2/releases/download/v2.1.2/iqtree-2.1.2-Linux.tar.gz \
| tar xzvpf - --no-same-owner --strip-components=1 \
&& mv bin/iqtree2 /final/bin/iqtree
# Download Nextalign v1
# NOTE: Running this program requires support for emulation on the Docker host
# if the processor architecture is not amd64.
# TODO: Build from source to avoid emulation. Example: https://github.com/nextstrain/nextclade/blob/1.11.0/.circleci/config.yml#L183-L223
RUN curl -fsSL -o /final/bin/nextalign1 https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextalign-Linux-x86_64
# Download Nextclade v1
# NOTE: Running this program requires support for emulation on the Docker host
# if the processor architecture is not amd64.
# TODO: Build from source to avoid emulation. Example: https://github.com/nextstrain/nextclade/blob/1.11.0/.circleci/config.yml#L183-L223
RUN curl -fsSL -o /final/bin/nextclade1 https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextclade-Linux-x86_64
# Download tsv-utils
# NOTE: Running this program requires support for emulation on the Docker host
# if the processor architecture is not amd64.
# TODO: Build from source to avoid emulation. Instructions: https://github.com/eBay/tsv-utils/tree/v2.2.0#build-from-source-files
RUN curl -L -o tsv-utils.tar.gz https://github.com/eBay/tsv-utils/releases/download/v2.2.0/tsv-utils-v2.2.0_linux-x86_64_ldc2.tar.gz \
&& tar -x --no-same-owner -v -C /final/bin -z --strip-components 2 --wildcards -f tsv-utils.tar.gz "*/bin/*" \
&& rm -f tsv-utils.tar.gz
# Download csvtk
RUN curl -L https://github.com/shenwei356/csvtk/releases/download/v0.24.0/csvtk_${TARGETOS}_${TARGETARCH}.tar.gz | tar xz --no-same-owner -C /final/bin
# Download seqkit
RUN curl -L https://github.com/shenwei356/seqkit/releases/download/v2.2.0/seqkit_${TARGETOS}_${TARGETARCH}.tar.gz | tar xz --no-same-owner -C /final/bin
# Download gofasta (for ncov/Pangolin)
# NOTE: Running this program requires support for emulation on the Docker host
# if the processor architecture is not amd64.
# TODO: Build from source to avoid emulation. Instructions: https://github.com/virus-evolution/gofasta/tree/v0.0.6#installation
RUN curl -fsSL https://github.com/virus-evolution/gofasta/releases/download/v0.0.6/gofasta-linux-amd64 \
-o /final/bin/gofasta
# Download minimap2 (for ncov/Pangolin)
# NOTE: Running this program requires support for emulation on the Docker host
# if the processor architecture is not amd64.
# TODO: Build from source to avoid emulation. Instructions: https://github.com/lh3/minimap2/tree/v2.24#install
RUN curl -fsSL https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 \
| tar xjvpf - --no-same-owner --strip-components=1 -C /final/bin minimap2-2.24_x64-linux/minimap2
# 3. Install programs via pip
# Install jaxlib on linux/arm64
# jaxlib, an evofr dependency, does not have official pre-built binaries for
# linux/arm64. A GitHub user has provided them in a fork repo.
# https://github.com/google/jax/issues/7097#issuecomment-1110730040
RUN if [[ "$TARGETPLATFORM" == linux/arm64 ]]; then \
pip3 install https://github.com/yoziru/jax/releases/download/jaxlib-v0.3.25/jaxlib-0.3.25-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl \
; \
fi
# Install envdir, which is used by pathogen builds
RUN pip3 install envdir==1.0.1
# Install tooling for our AWS Batch builds, which use `aws s3`.
RUN pip3 install awscli==1.18.195
# Install Snakemake and related optional dependencies.
# Pinned to 7.24.1 for stability (2023-03-13)
RUN pip3 install snakemake==7.24.1
# Google Cloud Storage package is required for Snakemake to fetch remote files
# from Google Storage URIs.
RUN pip3 install google-cloud-storage==2.7.0
# Install epiweeks (for ncov)
RUN pip3 install epiweeks==2.1.2
# Install Pangolin and PangoLEARN + deps (for ncov)
# The cov-lineages projects aren't available on PyPI, so install via git URLs.
RUN pip3 install git+https://github.com/cov-lineages/[email protected]
RUN pip3 install git+https://github.com/cov-lineages/pangoLEARN.git@2021-12-06
RUN pip3 install git+https://github.com/cov-lineages/[email protected]
RUN pip3 install git+https://github.com/cov-lineages/[email protected]
RUN pip3 install git+https://github.com/cov-lineages/pango-designation.git@19d9a537b9
RUN pip3 install pysam==0.19.1
# Install pango_aliasor (for forecasts-ncov)
RUN pip3 install pango_aliasor==0.3.0
# 4. Add unpinned programs
# Allow caching to be avoided from here on out by calling
# docker build --build-arg CACHE_DATE="$(date)"
# NOTE: All versioned software added below should be checked in
# devel/validate-platforms.
ARG CACHE_DATE
# Install our own CLI so builds can do things like `nextstrain deploy`
RUN pip3 install nextstrain-cli
# Add helper scripts
COPY builder-scripts/ /builder-scripts/
# Nextclade/Nextalign v2 are downloaded directly but using the latest version,
# so they belong after CACHE_DATE (unlike Nextclade/Nextalign v1).
# Download Nextalign v2
# Set default Nextalign version to 2
RUN curl -fsSL -o /final/bin/nextalign2 https://github.com/nextstrain/nextclade/releases/latest/download/nextalign-$(/builder-scripts/target-triple) \
&& ln -sv nextalign2 /final/bin/nextalign
# Download Nextclade v2
# Set default Nextclade version to 2
RUN curl -fsSL -o /final/bin/nextclade2 https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-$(/builder-scripts/target-triple) \
&& ln -sv nextclade2 /final/bin/nextclade
# Fauna
WORKDIR /nextstrain/fauna
RUN /builder-scripts/download-repo https://github.com/nextstrain/fauna master . \
&& pip3 install --requirement=requirements.txt
# Add Treetime
RUN pip3 install phylo-treetime
# Augur
# Build CVXOPT on linux/arm64
# CVXOPT, an Augur dependency, does not have pre-built binaries for linux/arm64.
#
# First, add system deps for building¹:
# - libopenblas-dev: Contains optimized versions of BLAS and LAPACK.
# - SuiteSparse: Download the source code so it can be built alongside CVXOPT.
#
# Then, "install" (build) separately since the process requires a special
# environment variable².
#
# ¹ https://cvxopt.org/install/#building-and-installing-from-source
# ² https://cvxopt.org/install/#ubuntu-debian
WORKDIR /cvxopt
RUN if [[ "$TARGETPLATFORM" == linux/arm64 ]]; then \
apt-get update && apt-get install -y --no-install-recommends \
libopenblas-dev \
&& mkdir SuiteSparse \
&& curl -fsSL https://api.github.com/repos/DrTimothyAldenDavis/SuiteSparse/tarball/v5.8.1 \
| tar xzvpf - --no-same-owner --strip-components=1 -C SuiteSparse \
&& CVXOPT_SUITESPARSE_SRC_DIR=$(pwd)/SuiteSparse \
pip3 install cvxopt \
; \
fi
# Augur is an editable install so we can overlay the augur version in the image
# with --volume=.../augur:/nextstrain/augur and still have it globally
# accessible and importable.
WORKDIR /nextstrain/augur
RUN /builder-scripts/download-repo https://github.com/nextstrain/augur "$(/builder-scripts/latest-augur-release-tag)" . \
&& pip3 install --editable .
# Auspice
# Install Node deps, build Auspice, and link it into the global search path. A
# fresh install is only ~40 seconds, so we're not worrying about caching these
# as we did the Python deps. Building auspice means we can run it without
# hot-reloading, which is time-consuming and generally unnecessary in the
# container image. Linking is equivalent to an editable Python install and
# used for the same reasons described above.
WORKDIR /nextstrain/auspice
RUN /builder-scripts/download-repo https://github.com/nextstrain/auspice release . \
&& npm update && npm install && npm run build && npm link
# Add evofr for forecasting
RUN pip3 install evofr
# Add NCBI Datasets command line tools for access to NCBI Datsets Virus Data Packages
RUN curl -fsSL -o /final/bin/datasets https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-${TARGETARCH}/datasets
RUN curl -fsSL -o /final/bin/dataformat https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-${TARGETARCH}/dataformat
# ———————————————————————————————————————————————————————————————————— #
# Now build the final image.
FROM python:3.10-slim-bullseye AS final
SHELL ["/bin/bash", "-e", "-u", "-o", "pipefail", "-c"]
# Add system runtime deps
# bzip2, gzip, xz-utils, zip, unzip, zstd: install compression tools
# ca-certificates: [Dockerfile] for secure HTTPS connections; may be used by workflows
# curl: [Dockerfile] for downloading binaries directly; may be used by workflows
# dos2unix: tsv-utils needs unix line endings
# jq: may be used by workflows
# less: for usability in an interactive prompt
# libgomp1: for running FastTree
# libsqlite3: for pyfastx (for Augur)
# perl: for running VCFtools
# ruby: may be used by workflows
# wget: may be used by workflows
# zlib1g: for pyfastx (for Augur)
# nodejs: for running Auspice
RUN apt-get update && apt-get install -y --no-install-recommends \
bzip2 \
ca-certificates \
curl \
dos2unix \
gzip \
jq \
less \
libgomp1 \
libsqlite3-0 \
perl \
ruby \
util-linux \
wget \
xz-utils \
zip unzip \
zlib1g \
zstd
# Install a specific Node.js version
# https://github.com/nodesource/distributions/blob/0d81da75/README.md#installation-instructions
RUN curl -fsSL https://deb.nodesource.com/setup_14.x | bash - \
&& apt-get update && apt-get install -y nodejs
# Used for platform-specific instructions
ARG TARGETPLATFORM
# Install CVXOPT deps on linux/arm64
# CVXOPT, an Augur dependency, was built separately above without runtime deps¹
# packaged like they are for the amd64 wheel.
#
# ¹ https://cvxopt.org/install/#building-and-installing-from-source
RUN if [[ "$TARGETPLATFORM" == linux/arm64 ]]; then \
apt-get update && apt-get install -y --no-install-recommends \
libopenblas-base \
; \
fi
# Configure bash for interactive usage
COPY bashrc /etc/bash.bashrc
# Copy binaries
COPY --from=builder /final/bin/ /usr/local/bin/
COPY --from=builder /final/share/ /usr/local/share/
COPY --from=builder /final/libexec/ /usr/local/libexec/
# Set MAFFT_BINARIES explicitly for MAFFT
ENV MAFFT_BINARIES=/usr/local/libexec
# Ensure all container users can execute these programs
RUN chmod a+rx /usr/local/bin/* /usr/local/libexec/*
# Add installed Python libs
COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/
# Add installed Python scripts that we need.
#
# XXX TODO: This isn't great. It's prone to needing manual updates because it
# doesn't pull in scripts which got installed but that we don't list. Consider
# alternatives (like installing the deps into an empty prefix tree and then
# copying the whole prefix tree, or using pip's installed-files.txt manifests
# as the set of things to copy) in the future if the maintenance burden becomes
# troublesome or excessive.
# -trs, 15 June 2018
COPY --from=builder \
/usr/local/bin/augur \
/usr/local/bin/aws \
/usr/local/bin/envdir \
/usr/local/bin/nextstrain \
/usr/local/bin/pangolin \
/usr/local/bin/pangolearn.smk \
/usr/local/bin/scorpio \
/usr/local/bin/snakemake \
/usr/local/bin/treetime \
/usr/local/bin/
# Add installed Node libs
COPY --from=builder /usr/lib/node_modules/ /usr/lib/node_modules/
# Add globally linked Auspice script.
#
# This symlink is present in the "builder" image, but using COPY results in the
# _contents_ of the target being copied instead of a symlink being created.
# The symlink is required so that Auspice's locally-installed deps are
# correctly discovered by node.
RUN ln -sv /usr/lib/node_modules/auspice/auspice.js /usr/local/bin/auspice
# Add Nextstrain components
COPY --from=builder /nextstrain /nextstrain
# Add our entrypoints
COPY entrypoint entrypoint-aws-batch /sbin/
RUN chmod a+rx /sbin/entrypoint*
# Make /nextstrain a global HOME, writable by any UID (like /tmp)
RUN chmod a+rwXt /nextstrain
ENV HOME=/nextstrain
# Setup a non-root user for optional use
RUN useradd nextstrain \
--system \
--user-group \
--shell /bin/bash \
--home-dir /nextstrain \
--no-log-init
# The host should bind mount the pathogen build dir into /nextstrain/build.
WORKDIR /nextstrain/build
RUN chown nextstrain:nextstrain /nextstrain/build
ENTRYPOINT ["/sbin/entrypoint"]
# Finally, add metadata at the end so it doesn't bust cached layers.
#
# Optionally passed in during build. Used by a label below.
ARG GIT_REVISION
# Add some metadata to our image for searching later. The "maintainer" label
# is community convention and comes from the old MAINTAINER command. Other
# labels should be namedspaced a la Java classes. We mostly use the keys
# defined in the OpenContainers spec:
#
# https://github.com/opencontainers/image-spec/blob/master/annotations.md#pre-defined-annotation-keys
#
# The custom "org.nextstrain.image.name" label in particular will likely be
# used by nextstrain-cli's image pruning, as labels are the only way to have
# persistent metadata values (tags are removed from old images after pulling a
# new image with the tag).
LABEL maintainer="Nextstrain team <[email protected]>"
LABEL org.opencontainers.image.authors="Nextstrain team <[email protected]>"
LABEL org.opencontainers.image.source="https://github.com/nextstrain/docker-base"
LABEL org.opencontainers.image.revision="${GIT_REVISION}"
LABEL org.nextstrain.image.name="nextstrain/base"