From 880544aa51fe3175cd68dd8c391dd30b2c86dc5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vakaris=20Ba=C5=A1kirov?= <59808854+vakarisbk@users.noreply.github.com> Date: Mon, 7 Oct 2024 17:27:35 +0300 Subject: [PATCH] Add documentation for testing on Kubernetes (#23) * add Dockerfile and docs, modify example * doc fixes --- docs/testing-on-k8s.md | 71 ++++++++++++++++++++++++++++++++++++++++++ examples/tips.py | 5 +-- k8s/Dockerfile | 34 ++++++++++++++++++++ 3 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 docs/testing-on-k8s.md create mode 100644 k8s/Dockerfile diff --git a/docs/testing-on-k8s.md b/docs/testing-on-k8s.md new file mode 100644 index 0000000..f6e2666 --- /dev/null +++ b/docs/testing-on-k8s.md @@ -0,0 +1,71 @@ +# Testing in Kubernetes + +This guide explains how to test DataFusion Ray on Kubernetes during development. It assumes you have an existing Kubernetes cluster. + +## 1. Deploy the KubeRay Operator + +To manage Ray clusters, you need to deploy the KubeRay operator using Helm. This step is required once per Kubernetes cluster. + +```shell +helm repo add kuberay https://ray-project.github.io/kuberay-helm/ +helm repo update + +# Install the Custom Resource Definitions (CRDs) and KubeRay operator +helm install kuberay-operator kuberay/kuberay-operator + +# Verify that the operator is running in the `default` namespace. +kubectl get pods + +# Example output: +# NAME READY STATUS RESTARTS AGE +# kuberay-operator-7fbdbf8c89-pt8bk 1/1 Running 0 27s +``` + +You can customize the operator's settings (e.g., resource limits and requests). For basic testing, the default configuration should suffice. +For more details and customization options, refer to the [KubeRay Helm Chart documentation](https://github.com/ray-project/kuberay-helm/tree/main/helm-chart/kuberay-operator). + +## 2. Build a Custom Docker Image +You need to build a custom Docker image containing your local development copy of DataFusion Ray rather than using the default PyPi release. + +Run the following command to build your Docker image: + +```shell +docker build -t [YOUR_IMAGE_NAME]:[YOUR_TAG] -f k8s/Dockerfile . +``` +After building the image, push it to a container registry accessible by your Kubernetes cluster. + +## 3. Deploy a RayCluster +Next, deploy a RayCluster using the custom image. + +```shell +helm repo update +helm install datafusion-ray kuberay/ray-cluster \ + --set 'image.repository=[YOUR_REPOSITORY]' \ + --set 'image.tag=[YOUR_TAG]' \ + --set 'imagePullPolicy=Always' +``` +Make sure you replace *[YOUR_REPOSITORY]* and *[YOUR_TAG]* with your actual container registry and image tag values. + +You can further customize RayCluster settings (such as resource allocations, autoscaling, and more). +For full configuration options, refer to the [RayCluster Helm Chart documentation](https://github.com/ray-project/kuberay-helm/tree/main/helm-chart/ray-cluster). + +## 4. Port Forwarding + +To access Ray's dashboard, set up port forwarding between your local machine and the Ray cluster's head node: + +```shell +kubectl port-forward service/raycluster-kuberay-head-svc 8265:8265 +``` + +This makes Ray’s dashboard and API available at `http://127.0.0.1:8265`. + + +## 5. Run an Example +From the examples directory in your project, you can run a sample job using the following commands: + +``` +export RAY_ADDRESS="http://127.0.0.1:8265" +ray job submit --working-dir ./examples/ -- python3 tips.py +``` + +### Expected output: diff --git a/examples/tips.py b/examples/tips.py index e72425a..a4e499e 100644 --- a/examples/tips.py +++ b/examples/tips.py @@ -22,11 +22,8 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) -# Start a local cluster -ray.init(resources={"worker": 1}) - # Connect to a cluster -# ray.init() +ray.init() # Create a context and register a table ctx = DatafusionRayContext(2) diff --git a/k8s/Dockerfile b/k8s/Dockerfile new file mode 100644 index 0000000..9fb4531 --- /dev/null +++ b/k8s/Dockerfile @@ -0,0 +1,34 @@ +FROM rayproject/ray:2.37.0.cabc24-py312 + +RUN sudo apt update && \ + sudo apt install -y curl build-essential + +# Intall Rust +RUN curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y + +WORKDIR /home/ray + +# install dependencies +COPY requirements-in.txt /home/ray/ +RUN python3 -m venv venv && \ + source venv/bin/activate && \ + pip3 install -r requirements-in.txt + +# add sources +RUN mkdir /home/ray/src +RUN mkdir /home/ray/datafusion_ray +COPY src /home/ray/src/ +COPY datafusion_ray /home/ray/datafusion_ray/ +COPY pyproject.toml /home/ray/ +COPY Cargo.* /home/ray/ +COPY build.rs /home/ray/ +COPY README.md /home/ray/ + +# build datafusion_ray +RUN source venv/bin/activate && \ + source /home/ray/.cargo/env && \ + maturin build --release + +FROM rayproject/ray:2.37.0.cabc24-py312 +COPY --from=0 /home/ray/target/wheels/datafusion_ray-0.6.0-cp38-abi3-manylinux_2_35_x86_64.whl /home/ray/datafusion_ray-0.6.0-cp38-abi3-manylinux_2_35_x86_64.whl +RUN pip3 install /home/ray/datafusion_ray-0.6.0-cp38-abi3-manylinux_2_35_x86_64.whl