-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
Dockerfile.ci
101 lines (82 loc) · 3.13 KB
/
Dockerfile.ci
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# syntax=docker/dockerfile:1-labs
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.07-py3
ARG IMAGE_LABEL
FROM ${BASE_IMAGE}
LABEL "nemo.library"=${IMAGE_LABEL}
ENV TRANSFORMERS_OFFLINE=0
ENV HYDRA_FULL_ERROR=1
ENV PYTHONUNBUFFERED=1
# APT packages
RUN <<"EOF" bash -ex
apt-get update
apt-get install -y bc libsox-fmt-all -y
apt-get clean
EOF
WORKDIR /workspace
# Install Mamba Dependancy
ARG CAUSAL_CONV_TAG=v1.2.2.post1
RUN <<"EOF" bash -ex
# Mamba dependancy installation
git clone --depth 1 --branch ${CAUSAL_CONV_TAG} https://github.com/Dao-AILab/causal-conv1d && \
cd causal-conv1d && \
python setup.py install && \
cd .. && \
rm -rf causal-conv1d
EOF
RUN pip install hatchling # needed to install nemo-run
ARG NEMO_RUN_TAG=34259bd3e752fef94045a9a019e4aaf62bd11ce2
RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_TAG}
# Install NeMo requirements
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
ARG MODELOPT_VERSION=0.19.0
ARG MCORE_TAG=bd677bfb13ac2f19deaa927adc6da6f9201d66aa
ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
RUN \
--mount=type=bind,source=requirements,target=requirements \
--mount=type=bind,source=tools,target=tools \
--mount=type=bind,source=setup.py,target=setup.py \
--mount=type=bind,source=nemo/package_info.py,target=nemo/package_info.py \
--mount=type=bind,source=nemo/__init__.py,target=nemo/__init__.py <<"EOF" bash -ex
pip install --no-cache-dir --no-build-isolation --extra-index-url https://pypi.nvidia.com \
"transformer-engine @ git+https://github.com/NVIDIA/TransformerEngine.git@${TE_TAG}" \
"megatron_core @ git+https://github.com/NVIDIA/Megatron-LM.git@${MCORE_TAG}" \
"nvidia-modelopt[torch]~=${MODELOPT_VERSION}" \
"apex @ git+https://github.com/NVIDIA/apex.git@${APEX_TAG}" \
"unstructured==0.14.9" \
"llama-index==0.10.43" \
"onnxscript @ git+https://github.com/microsoft/onnxscript" \
-r tools/ctc_segmentation/requirements.txt \
".[all]"
# Megatron Core installation
git clone https://github.com/NVIDIA/Megatron-LM.git && \
pushd Megatron-LM && \
git checkout ${MCORE_TAG} && \
pushd megatron/core/datasets && \
make && \
popd && \
popd
export PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"
# Install nvidia-resiliency-ext
pip install --no-cache-dir "git+https://github.com/NVIDIA/nvidia-resiliency-ext.git@97aad77609d2e25ed38ac5c99f0c13f93c48464e"
EOF
# Copy over NeMo code
COPY ./ ./
RUN <<"EOF" bash -ex
pip install --no-cache-dir --no-build-isolation ".[all]"
# set permission
chmod 777 -R /workspace
EOF
ENV PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"