Skip to content

Commit

Permalink
Update RAPIDS accelerated UDF Dockerfile to better match spark-rapids…
Browse files Browse the repository at this point in the history
…-jni (#395)

Signed-off-by: Jason Lowe <[email protected]>
  • Loading branch information
jlowe committed Jun 24, 2024
1 parent 7c3eab7 commit 01c7256
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 63 deletions.
98 changes: 40 additions & 58 deletions examples/UDF-Examples/RAPIDS-accelerated-UDFs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,54 @@

# A container that can be used to build UDF native code against libcudf
ARG CUDA_VERSION=11.8.0
ARG LINUX_VERSION=ubuntu18.04
ARG LINUX_VERSION=rockylinux8

FROM nvidia/cuda:${CUDA_VERSION}-devel-${LINUX_VERSION}

ARG DEBIAN_FRONTEND=noninteractive
ARG TOOLSET_VERSION=11
ENV TOOLSET_VERSION=11
ARG PARALLEL_LEVEL=10
ENV PARALLEL_LEVEL=10

RUN GCC_VERSION=$(bash -c '\
CUDA_VERSION=$(nvcc --version | head -n4 | tail -n1 | cut -d" " -f5 | cut -d"," -f1); \
CUDA_VERSION_MAJOR=$(echo $CUDA_VERSION | tr -d '.' | cut -c 1-2); \
CUDA_VERSION_MINOR=$(echo $CUDA_VERSION | tr -d '.' | cut -c 3); \
if [[ "$CUDA_VERSION_MAJOR" == 9 ]]; then echo "7"; \
elif [[ "$CUDA_VERSION_MAJOR" == 10 ]]; then echo "8"; \
elif [[ "$CUDA_VERSION_MAJOR" == 11 ]]; then echo "9"; \
else echo "10"; \
fi') \
&& apt update -y \
&& apt install -y software-properties-common \
&& add-apt-repository -y ppa:git-core/ppa \
&& add-apt-repository -y ppa:ubuntu-toolchain-r/test \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt update -y \
&& apt install -y \
build-essential git rsync wget \
gcc-${GCC_VERSION} g++-${GCC_VERSION} \
openjdk-8-jdk maven tzdata \
# CMake dependencies
curl libssl-dev libcurl4-openssl-dev zlib1g-dev \
&& apt autoremove -y \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& update-alternatives \
--install /usr/bin/gcc gcc /usr/bin/gcc-${GCC_VERSION} 100 \
&& update-alternatives \
--install /usr/bin/g++ g++ /usr/bin/g++-${GCC_VERSION} 100 \
# Set gcc-${GCC_VERSION} as the default gcc
&& update-alternatives --set gcc /usr/bin/gcc-${GCC_VERSION} \
# Set gcc-${GCC_VERSION} as the default g++
&& update-alternatives --set g++ /usr/bin/g++-${GCC_VERSION} \
# Set JDK8 as the default Java
&& update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
### Install basic requirements
RUN dnf --enablerepo=powertools install -y \
gcc-toolset-${TOOLSET_VERSION} \
git \
java-1.8.0-openjdk \
maven \
ninja-build \
patch \
python39 \
scl-utils \
tar \
wget \
zlib-devel \
&& alternatives --set python /usr/bin/python3

# 3.22.3: CUDA architecture 'native' support + flexible CMAKE_<LANG>_*_LAUNCHER for ccache
ARG CMAKE_VERSION=3.26.4
# default x86_64 from x86 build, aarch64 cmake for arm build
ARG CMAKE_ARCH=x86_64
RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
tar zxf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
rm cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz
ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}/bin:$PATH

# Install CMake
RUN cd /tmp \
&& curl -fsSLO --compressed "https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION.tar.gz" -o /tmp/cmake-$CMAKE_VERSION.tar.gz \
&& tar -xvzf /tmp/cmake-$CMAKE_VERSION.tar.gz && cd /tmp/cmake-$CMAKE_VERSION \
&& /tmp/cmake-$CMAKE_VERSION/bootstrap \
--system-curl \
--parallel=${PARALLEL_LEVEL} \
&& make install -j${PARALLEL_LEVEL} \
&& cd /tmp && rm -rf /tmp/cmake-$CMAKE_VERSION*

# Install ccache
# ccache for interactive builds
ARG CCACHE_VERSION=4.6
RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \
tar zxf ccache-${CCACHE_VERSION}.tar.gz && \
rm ccache-${CCACHE_VERSION}.tar.gz && \
cd ccache-${CCACHE_VERSION} && \
mkdir build && \
cd build && \
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DZSTD_FROM_INTERNET=ON \
-DREDIS_STORAGE_BACKEND=OFF && \
cmake --build . --parallel ${PARALLEL_LEVEL} --target install && \
cd ../.. && \
rm -rf ccache-${CCACHE_VERSION}
tar zxf ccache-${CCACHE_VERSION}.tar.gz && \
rm ccache-${CCACHE_VERSION}.tar.gz && \
cd ccache-${CCACHE_VERSION} && \
mkdir build && \
cd build && \
scl enable gcc-toolset-${TOOLSET_VERSION} \
"cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DZSTD_FROM_INTERNET=ON \
-DREDIS_STORAGE_BACKEND=OFF && \
cmake --build . --parallel ${PARALLEL_LEVEL} --target install" && \
cd ../.. && \
rm -rf ccache-${CCACHE_VERSION}

ENTRYPOINT /usr/bin/scl enable gcc-toolset-${TOOLSET_VERSION} -- bash
10 changes: 5 additions & 5 deletions examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ Run the following commands to build and start a docker

```bash
cd spark-rapids-examples/examples/UDF-Examples/RAPIDS-accelerated-UDFs
docker build -t my-local:my-udf-example-ubuntu .
nvidia-docker run -it my-local:my-udf-example-ubuntu
docker build -t my-local:my-udf-example .
nvidia-docker run -it my-local:my-udf-example
```

### Build the udf-examples jar
Expand All @@ -139,7 +139,7 @@ export CCACHE_DIR="$LOCAL_CCACHE_DIR"
export CMAKE_C_COMPILER_LAUNCHER="ccache"
export CMAKE_CXX_COMPILER_LAUNCHER="ccache"
export CMAKE_CUDA_COMPILER_LAUNCHER="ccache"
export CMAKE_CXX_LINKER_LAUNCHER="ccache
export CMAKE_CXX_LINKER_LAUNCHER="ccache"
mvn clean package -Pudf-native-examples
```

Expand Down Expand Up @@ -206,9 +206,9 @@ $SPARK_HOME/bin/pyspark --master local[*] \

### Test native based UDF

Input the following commands to test wordcount JIN UDF
Input the following commands to test wordcount JNI UDF

```bash
```python
from pyspark.sql.types import *
schema = StructType([
StructField("c1", StringType()),
Expand Down

0 comments on commit 01c7256

Please sign in to comment.