Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump ubuntu image version from 16.04 to 18.04 #304

Draft
wants to merge 12 commits into
base: 1.0-1
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions ci/buildspec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
version: 0.2

phases:
install:
runtime-versions:
python: 3.8
docker: 19
pre_build:
commands:
- echo Pre-build started on `date`
- echo Installing dependencies...
- curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
- bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
- export PATH=/miniconda3/bin:${PATH}
- conda install python=3.8
- conda update -y conda
- python3 -m pip install pip==20.1 # The new pip denpendency resolver in 20.2+ can't resolve 1.0-1 and 0.90 dependencies
- python3 -m pip install .[test]
build:
commands:
- echo Build started on `date`
- echo Docker login...
- docker login -u $dockerhub_username -p $dockerhub_password
- echo Building the Docker image...
- docker build -t xgboost-container-base:$FRAMEWORK_VERSION-cpu-py3 -f docker/$FRAMEWORK_VERSION/base/Dockerfile.cpu .
- python3 setup.py bdist_wheel --universal
- docker build -t preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 -f docker/$FRAMEWORK_VERSION/final/Dockerfile.cpu .
- echo Running tox...
- printf "FROM preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3\nADD . /app\nWORKDIR /app\nRUN python3 -m pip install .[test]" > Dockerfile.test
- docker build -t test-xgboost-container -f Dockerfile.test .
- docker run --rm -t test-xgboost-container sh -c 'pytest --cov=sagemaker_xgboost_container --cov-fail-under=60 test/unit'
- docker run --rm -t test-xgboost-container sh -c 'flake8 setup.py src test'
- echo Running container tests...
- pytest test/integration/local --docker-base-name preprod-xgboost-container --tag $FRAMEWORK_VERSION-cpu-py3 --py-version 3 --framework-version $FRAMEWORK_VERSION
- docker tag preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION-cpu-py3
- docker tag preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION
post_build:
commands:
- echo Build completed on `date`
- |
case $CODEBUILD_WEBHOOK_EVENT in
PULL_REQUEST_MERGED)
echo Logging in to Amazon ECR...
$(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION)
echo Pushing the Docker image...
docker push $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION-cpu-py3 | grep -v -E "[0-9]{12}.dkr.ecr.\S+.amazonaws.com"
docker push $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION | grep -v -E "[0-9]{12}.dkr.ecr.\S+.amazonaws.com"
;;
PULL_REQUEST_CREATED | PULL_REQUEST_UPDATED | PULL_REQUEST_REOPENED)
echo Logging in to Amazon ECR...
$(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION)
echo Pushing the Docker image...
# pushes test tag for manual verification, requires cleanup in ECR every once in a while though
TEST_TAG=$SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:${FRAMEWORK_VERSION}-cpu-py3-test
docker tag preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 ${TEST_TAG}
docker push ${TEST_TAG} | grep -v -E "[0-9]{12}.dkr.ecr.\S+.amazonaws.com"
;;
*)
echo Undefined behavior for webhook event type $CODEBUILD_WEBHOOK_EVENT
;;
esac
142 changes: 112 additions & 30 deletions docker/1.0-1/base/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -1,41 +1,123 @@
FROM ubuntu:16.04
ARG UBUNTU_VERSION=18.04
ARG CUDA_VERSION=10.2
ARG IMAGE_DIGEST=218afa9c2002be9c4629406c07ae4daaf72a3d65eb3c5a5614d9d7110840a46e

# Install python and other runtime dependencies
RUN apt-get update && \
apt-get -y install \
build-essential \
libatlas-dev \
git \
wget \
curl \
nginx \
jq

RUN apt-get update
RUN apt-get clean
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}@sha256:${IMAGE_DIGEST}

RUN apt-get -y install openjdk-8-jdk-headless
ARG MINICONDA_VERSION=4.9.2
ARG CONDA_PY_VERSION=39
ARG CONDA_CHECKSUM="b4e46fcc8029e2cfa731b788f25b1d36"
ARG CONDA_PKG_VERSION=4.10.1
ARG PYTHON_VERSION=3.8.13
ARG PYARROW_VERSION=1.0
ARG MLIO_VERSION=0.7.0
ARG XGBOOST_VERSION=1.0

# Install mlio
RUN echo 'installing miniconda' && \
curl -LO https://repo.anaconda.com/miniconda/Miniconda3-py38_4.8.3-Linux-x86_64.sh && \
echo "d63adf39f2c220950a063e0529d4ff74 Miniconda3-py38_4.8.3-Linux-x86_64.sh" | md5sum -c - && \
bash Miniconda3-py38_4.8.3-Linux-x86_64.sh -bfp /miniconda3 && \
rm Miniconda3-py38_4.8.3-Linux-x86_64.sh

ENV PATH=/miniconda3/bin:${PATH}

RUN conda install -c conda-forge python=3.6.13 && \
conda update -y conda && \
conda install pip=20.1 && \
conda install -c conda-forge pyarrow=0.14.1 && \
conda install -c mlio -c conda-forge mlio-py=0.1
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8

# Python won’t try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING='utf-8'

RUN rm /etc/apt/sources.list.d/cuda.list && \
rm /etc/apt/sources.list.d/nvidia-ml.list && \
apt-key del 7fa2af80 && \
apt-get update && apt-get install -y --no-install-recommends wget && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
apt-get update && \
apt-get -y upgrade && \
apt-get -y install --no-install-recommends \
build-essential \
curl \
git \
jq \
libatlas-base-dev \
nginx \
openjdk-8-jdk-headless \
unzip \
wget \
&& \
# MLIO build dependencies
# Official Ubuntu APT repositories do not contain an up-to-date version of CMake required to build MLIO.
# Kitware contains the latest version of CMake.
apt-get -y install --no-install-recommends \
apt-transport-https \
ca-certificates \
gnupg \
software-properties-common \
&& \
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \
gpg --dearmor - | \
tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \
apt-get update && \
rm /usr/share/keyrings/kitware-archive-keyring.gpg && \
apt-get install -y --no-install-recommends \
autoconf \
automake \
build-essential \
cmake=3.18.4-0kitware1 \
cmake-data=3.18.4-0kitware1 \
doxygen \
kitware-archive-keyring \
libcurl4-openssl-dev \
libssl-dev \
libtool \
ninja-build \
python3-dev \
python3-distutils \
python3-pip \
zlib1g-dev \
&& \
rm -rf /var/lib/apt/lists/*

# Install conda
RUN cd /tmp && \
curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \
echo "${CONDA_CHECKSUM} /tmp/Miniconda3.sh" | md5sum -c - && \
bash /tmp/Miniconda3.sh -bfp /miniconda3 && \
rm /tmp/Miniconda3.sh

ENV PATH=/miniconda3/bin:${PATH}

# Install MLIO with Apache Arrow integration
# We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size
# which increases training time. We build from source to minimize the image size.
RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \
# Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html
conda config --system --set auto_update_conda false && \
conda config --system --set show_channel_urls true && \
echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \
conda install -c conda-forge python=${PYTHON_VERSION} && \
conda install conda=${CONDA_PKG_VERSION} && \
conda update -y conda && \
conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \
cd /tmp && \
git clone --branch v${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \
cd mlio && \
build-tools/build-dependency build/third-party all && \
mkdir -p build/release && \
cd build/release && \
cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" ../.. && \
cmake --build . && \
cmake --build . --target install && \
cmake -DMLIO_INCLUDE_PYTHON_EXTENSION=ON -DPYTHON_EXECUTABLE="/miniconda3/bin/python3" \
-DMLIO_INCLUDE_ARROW_INTEGRATION=ON ../.. && \
cmake --build . --target mlio-py && \
cmake --build . --target mlio-arrow && \
cd ../../src/mlio-py && \
python3 setup.py bdist_wheel && \
python3 -m pip install typing && \
python3 -m pip install --upgrade pip && \
python3 -m pip install dist/*.whl && \
cp -r /tmp/mlio/build/third-party/lib/intel64/gcc4.7/* /usr/local/lib/ && \
ldconfig && \
rm -rf /tmp/mlio

# Install latest version of XGBoost
RUN python3 -m pip install --no-cache -I xgboost==1.0
RUN python3 -m pip install --no-cache -I xgboost==${XGBOOST_VERSION}
22 changes: 13 additions & 9 deletions docker/1.0-1/final/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
FROM xgboost-container-base:1.0-1-cpu-py3
ENV SAGEMAKER_XGBOOST_VERSION 1.0-1
ARG SAGEMAKER_XGBOOST_VERSION=1.0-1
ARG PYTHON_VERSION=3.8

FROM xgboost-container-base:${SAGEMAKER_XGBOOST_VERSION}-cpu-py3

ARG SAGEMAKER_XGBOOST_VERSION

########################
# Install dependencies #
Expand All @@ -11,20 +15,20 @@ RUN python3 -m pip install -r /requirements.txt && rm /requirements.txt
# Copy wheel to container #
###########################
COPY dist/sagemaker_xgboost_container-2.0-py2.py3-none-any.whl /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl
# https://github.com/googleapis/google-cloud-python/issues/6647
RUN rm -rf /miniconda3/lib/python3.6/site-packages/numpy-1.19.5.dist-info && \
RUN rm -rf /miniconda3/lib/python3.8/site-packages/numpy-1.21.2.dist-info && \
python3 -m pip install --no-cache /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \
python3 -m pip uninstall -y typing && \
rm /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl

##############
# DMLC PATCH #
##############
# TODO: remove after making contributions back to xgboost for tracker.py
COPY src/sagemaker_xgboost_container/dmlc_patch/tracker.py \
/miniconda3/lib/python3.6/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py
/miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py

# Include DMLC python code in PYTHONPATH to use RabitTracker
ENV PYTHONPATH=$PYTHONPATH:/miniconda3/lib/python3.6/site-packages/xgboost/dmlc-core/tracker
ENV PYTHONPATH=$PYTHONPATH:/miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker

#######
# MMS #
Expand All @@ -34,12 +38,12 @@ RUN useradd -m model-server
RUN mkdir -p /home/model-server/tmp && chown -R model-server /home/model-server

# Copy MMS configs
COPY docker/$SAGEMAKER_XGBOOST_VERSION/resources/mms/config.properties.tmp /home/model-server
COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/config.properties.tmp /home/model-server
ENV XGBOOST_MMS_CONFIG=/home/model-server/config.properties

# Copy execution parameters endpoint plugin for MMS
RUN mkdir -p /tmp/plugins
COPY docker/$SAGEMAKER_XGBOOST_VERSION/resources/mms/endpoints-1.0.jar /tmp/plugins
COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/endpoints-1.0.jar /tmp/plugins
RUN chmod +x /tmp/plugins/endpoints-1.0.jar

# Create directory for models
Expand Down Expand Up @@ -67,4 +71,4 @@ ENV SAGEMAKER_SERVING_MODULE sagemaker_xgboost_container.serving:main

EXPOSE 8080
ENV TEMP=/home/model-server/tmp
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
38 changes: 22 additions & 16 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
Flask==1.1.1 # sagemaker-containers requires flask 1.1.1
PyYAML==5.4.1
boto3==1.10.14
botocore==1.13.14
gunicorn<20.0.0
cryptography==3.4.6
matplotlib==3.3.2
multi-model-server==1.1.1
Pillow==9.1.0
boto3==1.17.52
botocore==1.20.52
cryptography==35.0.0
gunicorn==19.10.0
itsdangerous==2.0.1
matplotlib==3.4.1
multi-model-server==1.1.2
numpy==1.19.2
pandas==1.1.3
pandas==1.2.4
protobuf==3.20.1
psutil==5.6.7 # sagemaker-containers requires psutil 5.6.7
python-dateutil==2.8.0
requests<2.21
python-dateutil==2.8.1
requests==2.25.1
retrying==1.3.3
sagemaker-containers>=2.8.3,<2.9
sagemaker-inference==1.2.0
scikit-learn==0.23.2
scipy==1.2.2
smdebug==0.4.13
urllib3==1.25.9
wheel
sagemaker-containers==2.8.6.post2
sagemaker-inference==1.5.5
scikit-learn==0.24.1
scipy==1.6.2
smdebug==1.0.10
urllib3==1.26.5
wheel==0.36.2
jinja2==2.11.3
MarkupSafe==1.1.1
Werkzeug==0.15.6
22 changes: 8 additions & 14 deletions src/sagemaker_xgboost_container/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,40 +442,34 @@ def get_parquet_dmatrix(path, is_pipe=False):

def get_recordio_protobuf_dmatrix(path, is_pipe=False):
"""Get Data Matrix from recordio-protobuf data.

:param path: Path where recordio-protobuf formatted training data resides, either directory, file, or SageMaker pipe
:param is_pipe: Boolean to indicate if data is being read in pipe mode
:return: xgb.DMatrix or None
"""
try:
if is_pipe:
dataset = [mlio.SageMakerPipe(path)]
reader = mlio.RecordIOProtobufReader(dataset=dataset,
batch_size=BATCH_SIZE)
pipes_path = path if isinstance(path, list) else [path]
dataset = [mlio.SageMakerPipe(pipe_path) for pipe_path in pipes_path]
else:
dataset = mlio.list_files(path)
reader = mlio.RecordIOProtobufReader(dataset=dataset,
batch_size=BATCH_SIZE)

reader_params = mlio.DataReaderParams(dataset=dataset, batch_size=BATCH_SIZE)
reader = mlio.RecordIOProtobufReader(reader_params)

if reader.peek_example() is not None:
# recordio-protobuf tensor may be dense (use numpy) or sparse (use scipy)
if type(reader.peek_example()['values']) is mlio.core.DenseTensor:
to_matrix = as_numpy
vstack = np.vstack
else:
to_matrix = to_coo_matrix
vstack = scipy_vstack
is_dense_tensor = type(reader.peek_example()['values']) is mlio.DenseTensor

all_features = []
all_labels = []
for example in reader:
features = to_matrix(example['values'])
features = as_numpy(example['values']) if is_dense_tensor else to_coo_matrix(example['values'])
all_features.append(features)

labels = as_numpy(example['label_values'])
all_labels.append(labels)

all_features = vstack(all_features)
all_features = np.vstack(all_features) if is_dense_tensor else scipy_vstack(all_features).tocsr()
all_labels = np.concatenate(all_labels, axis=None)
dmatrix = xgb.DMatrix(all_features, label=all_labels)
return dmatrix
Expand Down
Loading