Skip to content

Commit

Permalink
Merge pull request #512 from NVIDIA/branch-23.10
Browse files Browse the repository at this point in the history
release 23.10 [skip ci]
  • Loading branch information
pxLi authored Nov 8, 2023
2 parents 5dab107 + 83c5f20 commit f6fc5b8
Show file tree
Hide file tree
Showing 52 changed files with 2,339 additions and 804 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/auto-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ name: auto-merge HEAD to BASE
on:
pull_request_target:
branches:
- branch-23.08
- branch-23.10
types: [closed]

jobs:
Expand All @@ -29,14 +29,14 @@ jobs:
steps:
- uses: actions/checkout@v3
with:
ref: branch-23.08 # force to fetch from latest upstream instead of PR ref
ref: branch-23.10 # force to fetch from latest upstream instead of PR ref

- name: auto-merge job
uses: ./.github/workflows/auto-merge
env:
OWNER: NVIDIA
REPO_NAME: spark-rapids-ml
HEAD: branch-23.08
BASE: branch-23.10
HEAD: branch-23.10
BASE: branch-23.12
AUTOMERGE_TOKEN: ${{ secrets.AUTOMERGE_TOKEN }} # use to merge PR

2 changes: 1 addition & 1 deletion ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
&& conda config --set solver libmamba

# install cuML
ARG CUML_VER=23.08
ARG CUML_VER=23.10
RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$CUML_VER python=3.9 cuda-version=11.8 \
&& conda clean --all -f -y
42 changes: 32 additions & 10 deletions ci/docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,47 @@
# limitations under the License.
#

# get version tag
TAG=$(git describe --tag)
if [[ $? != 0 ]]; then
echo "Can only deploy from a version tag."
exit 1
if [[ $1 == "nightly" ]]; then
TAG=$(git log -1 --format="%h")
BRANCH=$(git branch --show-current)
else
# get version tag
TAG="v$VERSION"
fi

set -ex

# build and publish docs
pushd docs
make clean
make html
git worktree add --track -b gh-pages _site origin/gh-pages
cp -r build/html/* _site/api/python
cp -r site/* _site

pushd _site
if [[ $1 == "nightly" ]]; then
# draft copy
api_dest=api/python-draft
else
# release copy
api_dest=api/python
# also copy site wide changes for release
cp -r ../site/* .
fi

# in _site
mkdir -p $api_dest
cp -r ../build/html/* $api_dest/

git add --all
git commit -m "${TAG}"
git push origin gh-pages
dff=$(git diff --staged --stat)
repo_url=$(git config --get remote.origin.url)
url=${repo_url#https://}
github_account=${GITHUB_ACCOUNT:-nvauto}
if [[ -n $dff ]]; then
git commit -m "Update draft api docs to commit ${TAG} on ${BRANCH}"
git push -f https://${github_account}:${GITHUB_TOKEN}@${url} gh-pages
fi

popd #_site
git worktree remove _site
git worktree remove _site --force
popd
18 changes: 11 additions & 7 deletions ci/lint_python.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from typing import Dict, List, Tuple

import argparse
from io import StringIO
import os
import subprocess
import sys
from multiprocessing import Pool, cpu_count
from pylint import epylint
from pylint.lint import Run
from pylint.reporters.text import TextReporter

# This script is copied from dmlc/xgboost

Expand Down Expand Up @@ -52,14 +54,16 @@ def __init__(self) -> None:
]

def run(self, path: str) -> Tuple[Dict, str, str]:
(pylint_stdout, pylint_stderr) = epylint.py_run(
" ".join([str(path)] + self.pylint_opts), return_std=True
)
emap = {}
err = pylint_stderr.read()

pylint_output = StringIO()
reporter = TextReporter(pylint_output)
Run([str(path)] + self.pylint_opts, reporter=reporter, exit=False)

emap = {}
err = ""

out = []
for line in pylint_stdout:
for line in pylint_output:
out.append(line)
key = line.split(":")[-1].split("(")[0].strip()
if key not in self.pylint_cats:
Expand Down
7 changes: 6 additions & 1 deletion ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ case $type in
"pre-merge" | "")
ut_args=""
;;
"nightly")
"nightly" | "release")
ut_args="--runslow"
;;
*)
Expand All @@ -45,8 +45,13 @@ pip install -r requirements_dev.txt && pip install -e .
./run_benchmark.sh $bench_args

# check compatibility with Spark 3.3 in nightly run
# also push draft release docs to gh-pages
if [[ $type == "nightly" ]]; then
pip uninstall pyspark -y
pip install pyspark~=3.3.0
./run_benchmark.sh $bench_args
# if everything passed till now update draft release docs in gh-pages
# need to invoke docs.sh from top level of repo
cd .. # top level of repo
ci/docs.sh nightly
fi
2 changes: 1 addition & 1 deletion docker/Dockerfile.pip
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ARG CUDA_VERSION=11.8.0
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04

ARG PYSPARK_VERSION=3.3.1
ARG RAPIDS_VERSION=23.8.0
ARG RAPIDS_VERSION=23.10.0
ARG ARCH=amd64
#ARG ARCH=arm64
# Install packages to build spark-rapids-ml
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
ARG CUDA_VERSION=11.8.0
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04

ARG CUML_VERSION=23.08
ARG CUML_VERSION=23.10

# Install packages to build spark-rapids-ml
RUN apt update -y \
Expand Down
4 changes: 3 additions & 1 deletion docs/site/_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,6 @@ exclude:
- vendor/ruby/

include:
- _static
- _static
- _sphinx*

4 changes: 3 additions & 1 deletion docs/site/api/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ nav_order: 5
---
# API Documentation

- [Python API](python)
- Python API
- [Stable Release](python)
- [Draft](python-draft)
4 changes: 3 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
project = 'spark-rapids-ml'
copyright = '2023, NVIDIA'
author = 'NVIDIA'
release = '23.8.0'
release = '23.10.0'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down Expand Up @@ -41,6 +41,8 @@

html_theme = 'pydata_sphinx_theme'

html_show_sourcelink = False

import inspect
from spark_rapids_ml.utils import _unsupported_methods_attributes

Expand Down
2 changes: 1 addition & 1 deletion jvm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ repository, usually in your `~/.m2/repository`.
Add the artifact jar to the Spark, for example:
```bash
ML_JAR="target/rapids-4-spark-ml_2.12-23.08.0-SNAPSHOT.jar"
PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.08.1-SNAPSHOT/rapids-4-spark_2.12-23.08.1-SNAPSHOT.jar"
PLUGIN_JAR="~/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.08.2-SNAPSHOT/rapids-4-spark_2.12-23.08.2-SNAPSHOT.jar"
$SPARK_HOME/bin/spark-shell --master $SPARK_MASTER \
--driver-memory 20G \
Expand Down
2 changes: 1 addition & 1 deletion notebooks/aws-emr/init-bootstrap-action.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ sudo chmod a+rwx -R /sys/fs/cgroup/devices
sudo yum install -y gcc openssl-devel bzip2-devel libffi-devel tar gzip wget make mysql-devel
sudo bash -c "wget https://www.python.org/ftp/python/3.9.9/Python-3.9.9.tgz && tar xzf Python-3.9.9.tgz && cd Python-3.9.9 && ./configure --enable-optimizations && make altinstall"

RAPIDS_VERSION=23.8.0
RAPIDS_VERSION=23.10.0

# install scikit-learn
sudo /usr/local/bin/pip3.9 install scikit-learn
Expand Down
32 changes: 20 additions & 12 deletions notebooks/databricks/README.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
## Running notebooks on Databricks

If you already have a Databricks account, you can run the example notebooks on a Databricks cluster, as follows:
- Install the [databricks-cli](https://docs.databricks.com/dev-tools/cli/index.html).
- Install the latest [databricks-cli](https://docs.databricks.com/dev-tools/cli/index.html). Note that Databricks has deprecated the legacy python based cli in favor of a self contained executable. Make sure the new version is first on the executables PATH after installation.
- Configure it with your workspace URL and an [access token](https://docs.databricks.com/dev-tools/api/latest/authentication.html). For demonstration purposes, we will configure a new [connection profile](https://docs.databricks.com/dev-tools/cli/index.html#connection-profiles) named `spark-rapids-ml`. If you already have a connection profile, just set the `PROFILE` environment variable accordingly and skip the configure step.
```
```bash
export PROFILE=spark-rapids-ml
databricks configure --token --profile ${PROFILE}
```
- Create a zip file for the `spark-rapids-ml` package.
```
```bash
cd spark-rapids-ml/python/src
zip -r spark_rapids_ml.zip spark_rapids_ml
```
- Copy the zip file to DBFS, setting `SAVE_DIR` to the directory of your choice.
```"
```bash
export SAVE_DIR="/path/to/save/artifacts"
databricks fs cp spark_rapids_ml.zip dbfs:${SAVE_DIR}/spark_rapids_ml.zip --profile ${PROFILE}
```
- Edit the [init-pip-cuda-11.8.sh](init-pip-cuda-11.8.sh) init script to set the `SPARK_RAPIDS_ML_ZIP` variable to the DBFS location used above.
```
```bash
cd spark-rapids-ml/notebooks/databricks
sed -i"" -e "s;/path/to/zip/file;${SAVE_DIR}/spark_rapids_ml.zip;" init-pip-cuda-11.8.sh
```
Expand All @@ -28,20 +28,25 @@ If you already have a Databricks account, you can run the example notebooks on a
- updates the CUDA runtime to 11.8 (required for Spark Rapids ML dependencies).
- downloads and installs the [Spark-Rapids](https://github.com/NVIDIA/spark-rapids) plugin for accelerating data loading and Spark SQL.
- installs various `cuXX` dependencies via pip.
- Copy the modified `init-pip-cuda-11.8.sh` init script to DBFS.
```
databricks fs cp init-pip-cuda-11.8.sh dbfs:${SAVE_DIR}/init-pip-cuda-11.8.sh --profile ${PROFILE}

**Note**: as of the last update of this README, Azure Databricks requires a CUDA driver forward compatibility package. Uncomment the designated lines for this in the init script. AWS Databricks does not need this and leave the lines commented in that case.

- Copy the modified `init-pip-cuda-11.8.sh` init script to your *workspace* (not DBFS) (ex. workspace directory: /Users/< databricks-user-name >/init_scripts).
```bash
export WS_SAVE_DIR="/path/to/directory/in/workspace"
databricks workspace mkdirs ${WS_SAVE_DIR} --profile ${PROFILE}
databricks workspace import --format AUTO --content $(base64 -i init-pip-cuda-11.8.sh) ${WS_SAVE_DIR}/init-pip-cuda-11.8.sh --profile ${PROFILE}
```
- Create a cluster using **Databricks 11.3 LTS ML GPU Runtime** using at least two single-gpu workers and add the following configurations to the **Advanced options**.
- Create a cluster using **Databricks 12.2 LTS ML GPU Runtime** using at least two single-gpu workers and add the following configurations to the **Advanced options**.
- **Init Scripts**
- add the DBFS path to the uploaded init script, e.g. `dbfs:/path/to/save/artifacts/init-pip-cuda-11.8.sh`.
- add the workspace path to the uploaded init script, e.g. `${WS_SAVE_DIR}/init-pip-cuda-11.8.sh`.
- **Spark**
- **Spark config**
```
spark.task.resource.gpu.amount 1
spark.databricks.delta.preview.enabled true
spark.python.worker.reuse true
spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.08.1.jar:/databricks/spark/python
spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.08.2.jar:/databricks/spark/python
spark.sql.execution.arrow.maxRecordsPerBatch 100000
spark.rapids.memory.gpu.minAllocFraction 0.0001
spark.plugins com.nvidia.spark.SQLPlugin
Expand All @@ -63,8 +68,11 @@ If you already have a Databricks account, you can run the example notebooks on a
- **Environment variables**
```
LIBCUDF_CUFILE_POLICY=OFF
LD_LIBRARY_PATH=/usr/local/cuda/compat:/usr/local/cuda/lib64
NCCL_DEBUG=INFO
```
- **Additional Environment variable for Azure Databricks**
```
LD_LIBRARY_PATH=/usr/local/cuda/compat:/usr/local/cuda/lib64
```
- Start the configured cluster.
- Select your workspace and upload the desired [notebook](../) via `Import` in the drop down menu for your workspace.
29 changes: 14 additions & 15 deletions notebooks/databricks/init-pip-cuda-11.8.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash
# set portion of path below after /dbfs/ to dbfs zip file location
SPARK_RAPIDS_ML_ZIP=/dbfs/path/to/zip/file
# IMPORTANT: specify RAPIDS_VERSION fully 23.8.0 and not 23.8
# IMPORTANT: specify RAPIDS_VERSION fully 23.10.0 and not 23.10
# also RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
# while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.1 and not 23.8.1)
RAPIDS_VERSION=23.8.0
SPARK_RAPIDS_VERSION=23.08.1
# while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
RAPIDS_VERSION=23.10.0
SPARK_RAPIDS_VERSION=23.08.2

curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar

Expand All @@ -14,21 +14,20 @@ wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/
sh cuda_11.8.0_520.61.05_linux.run --silent --toolkit

# install forward compatibility package due to old driver
distro=ubuntu2004
arch=x86_64
apt-key del 7fa2af80
wget https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-keyring_1.0-1_all.deb
dpkg -i cuda-keyring_1.0-1_all.deb
apt-get update
apt-get install -y cuda-compat-11-8

# uncomment below lines on Azure Databricks
# distro=ubuntu2004
# arch=x86_64
# apt-key del 7fa2af80
# wget https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-keyring_1.0-1_all.deb
# dpkg -i cuda-keyring_1.0-1_all.deb
# apt-get update
# apt-get install -y cuda-compat-11-8
# export LD_LIBRARY_PATH=/usr/local/cuda/compat:/usr/local/cuda/lib64
# ldconfig

# reset symlink and update library loading paths
# **** set LD_LIBRARY_PATH as below in env var section of cluster config in DB cluster UI ****
rm /usr/local/cuda
ln -s /usr/local/cuda-11.8 /usr/local/cuda
export LD_LIBRARY_PATH=/usr/local/cuda/compat:/usr/local/cuda/lib64
ldconfig

# upgrade pip
/databricks/python/bin/pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion notebooks/dataproc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ If you already have a Dataproc account, you can run the example notebooks on a D
- Create a cluster with at least two single-gpu workers. **Note**: in addition to the initialization script from above, this also uses the standard [initialization actions](https://github.com/GoogleCloudDataproc/initialization-actions) for installing the GPU drivers and RAPIDS:
```
export CUDA_VERSION=11.8
export RAPIDS_VERSION=23.8.0
export RAPIDS_VERSION=23.10.0
gcloud dataproc clusters create $USER-spark-rapids-ml \
--image-version=2.1-ubuntu \
Expand Down
2 changes: 1 addition & 1 deletion notebooks/dataproc/spark_rapids_ml.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

RAPIDS_VERSION=23.8.0
RAPIDS_VERSION=23.10.0

# patch existing packages
mamba install "llvmlite<0.40,>=0.39.0dev0" "numba>=0.56.2"
Expand Down
Loading

0 comments on commit f6fc5b8

Please sign in to comment.