diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml
index f89865c3f..b292dca00 100644
--- a/.github/workflows/auto-merge.yml
+++ b/.github/workflows/auto-merge.yml
@@ -18,7 +18,7 @@ name: auto-merge HEAD to BASE
 on:
   pull_request_target:
     branches:
-    - branch-22.06
+    - branch-22.08
     types: [closed]
 
 jobs:
@@ -29,13 +29,13 @@ jobs:
     steps:
       - uses: actions/checkout@v2
         with:
-          ref: branch-22.06 # force to fetch from latest upstream instead of PR ref
+          ref: branch-22.08 # force to fetch from latest upstream instead of PR ref
 
       - name: auto-merge job
         uses: ./.github/workflows/auto-merge
         env:
           OWNER: NVIDIA
           REPO_NAME: spark-rapids-examples
-          HEAD: branch-22.06
-          BASE: branch-22.08
+          HEAD: branch-22.08
+          BASE: branch-22.10
           AUTOMERGE_TOKEN: ${{ secrets.AUTOMERGE_TOKEN }} # use to merge PR
diff --git a/datasets/mortgage-small.tar.gz b/datasets/mortgage-small.tar.gz
deleted file mode 100644
index 2f7c6a016..000000000
Binary files a/datasets/mortgage-small.tar.gz and /dev/null differ
diff --git a/docs/get-started/xgboost-examples/building-sample-apps/python.md b/docs/get-started/xgboost-examples/building-sample-apps/python.md
index 28f35d40d..53f4e66c6 100644
--- a/docs/get-started/xgboost-examples/building-sample-apps/python.md
+++ b/docs/get-started/xgboost-examples/building-sample-apps/python.md
@@ -17,7 +17,8 @@ Two files are required by PySpark:
 
 + *samples.zip*
   
-  the package including all example code
+  the package including all example code. 
+  Executing the above build commands generates the samples.zip file in 'spark-rapids-examples/examples/XGBoost-Examples' folder
 
 + *main.py*
   
diff --git a/docs/get-started/xgboost-examples/csp/databricks/databricks.md b/docs/get-started/xgboost-examples/csp/databricks/databricks.md
index 073b1902e..c09576d0e 100644
--- a/docs/get-started/xgboost-examples/csp/databricks/databricks.md
+++ b/docs/get-started/xgboost-examples/csp/databricks/databricks.md
@@ -49,7 +49,7 @@ cluster.
       
     - [Databricks 10.4 LTS
     ML](https://docs.databricks.com/release-notes/runtime/9.1ml.html#system-environment) has CUDA 11
-    installed.  Users will need to use 22.06.0 or later on Databricks 10.4 LTS ML. In this case use
+    installed.  Users will need to use 22.04.0 or later on Databricks 10.4 LTS ML. In this case use
     [generate-init-script-10.4.ipynb](generate-init-script-10.4.ipynb) which will install
     the RAPIDS Spark plugin.
       
diff --git a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb
index 53d61c456..3be77a4b5 100644
--- a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb
+++ b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb
@@ -24,7 +24,7 @@
    "source": [
     "%sh\n",
     "cd ../../dbfs/FileStore/jars/\n",
-    "sudo wget -O rapids-4-spark_2.12-22.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar\n",
+    "sudo wget -O rapids-4-spark_2.12-22.08.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar\n",
     "sudo wget -O xgboost4j-gpu_2.12-1.6.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-gpu_2.12/1.6.1/xgboost4j-gpu_2.12-1.6.1.jar\n",
     "sudo wget -O xgboost4j-spark-gpu_2.12-1.6.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-spark-gpu_2.12/1.6.1/xgboost4j-spark-gpu_2.12-1.6.1.jar\n",
     "ls -ltr\n",
@@ -60,7 +60,7 @@
     "sudo rm -f /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.5.2.jar\n",
     "\n",
     "sudo cp /dbfs/FileStore/jars/xgboost4j-gpu_2.12-1.6.1.jar /databricks/jars/\n",
-    "sudo cp /dbfs/FileStore/jars/rapids-4-spark_2.12-22.06.0.jar /databricks/jars/\n",
+    "sudo cp /dbfs/FileStore/jars/rapids-4-spark_2.12-22.08.0.jar /databricks/jars/\n",
     "sudo cp /dbfs/FileStore/jars/xgboost4j-spark-gpu_2.12-1.6.1.jar /databricks/jars/\"\"\", True)"
    ]
   },
@@ -133,7 +133,7 @@
     "1. Edit your cluster, adding an initialization script from `dbfs:/databricks/init_scripts/init.sh` in the \"Advanced Options\" under \"Init Scripts\" tab\n",
     "2. Reboot the cluster\n",
     "3. Go to \"Libraries\" tab under your cluster and install `dbfs:/FileStore/jars/xgboost4j-spark-gpu_2.12-1.6.1.jar` in your cluster by selecting the \"DBFS\" option for installing jars\n",
-    "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.06/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb`\n",
+    "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.08/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb`\n",
     "5. Inside the mortgage example notebook, update the data paths\n",
     "  `train_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-train.csv')`\n",
     "  `trans_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-trans.csv')`"
diff --git a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb
index e81611b43..540132062 100644
--- a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb
+++ b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb
@@ -24,7 +24,7 @@
    "source": [
     "%sh\n",
     "cd ../../dbfs/FileStore/jars/\n",
-    "sudo wget -O rapids-4-spark_2.12-22.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar\n",
+    "sudo wget -O rapids-4-spark_2.12-22.08.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar\n",
     "sudo wget -O xgboost4j-gpu_2.12-1.6.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-gpu_2.12/1.6.1/xgboost4j-gpu_2.12-1.6.1.jar\n",
     "sudo wget -O xgboost4j-spark-gpu_2.12-1.6.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-spark-gpu_2.12/1.6.1/xgboost4j-spark-gpu_2.12-1.6.1.jar\n",
     "ls -ltr\n",
@@ -60,7 +60,7 @@
     "sudo rm -f /databricks/jars/spark--maven-trees--ml--9.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.4.1.jar\n",
     "\n",
     "sudo cp /dbfs/FileStore/jars/xgboost4j-gpu_2.12-1.6.1.jar /databricks/jars/\n",
-    "sudo cp /dbfs/FileStore/jars/rapids-4-spark_2.12-22.06.0.jar /databricks/jars/\n",
+    "sudo cp /dbfs/FileStore/jars/rapids-4-spark_2.12-22.08.0.jar /databricks/jars/\n",
     "sudo cp /dbfs/FileStore/jars/xgboost4j-spark-gpu_2.12-1.6.1.jar /databricks/jars/\"\"\", True)"
    ]
   },
@@ -133,7 +133,7 @@
     "1. Edit your cluster, adding an initialization script from `dbfs:/databricks/init_scripts/init.sh` in the \"Advanced Options\" under \"Init Scripts\" tab\n",
     "2. Reboot the cluster\n",
     "3. Go to \"Libraries\" tab under your cluster and install `dbfs:/FileStore/jars/xgboost4j-spark-gpu_2.12-1.6.1.jar` in your cluster by selecting the \"DBFS\" option for installing jars\n",
-    "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.06/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb`\n",
+    "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.08/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb`\n",
     "5. Inside the mortgage example notebook, update the data paths\n",
     "  `train_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-train.csv')`\n",
     "  `trans_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-trans.csv')`"
diff --git a/docs/get-started/xgboost-examples/dataset/mortgage.md b/docs/get-started/xgboost-examples/dataset/mortgage.md
new file mode 100644
index 000000000..1c36155fa
--- /dev/null
+++ b/docs/get-started/xgboost-examples/dataset/mortgage.md
@@ -0,0 +1,22 @@
+# How to download the Mortgage dataset
+
+
+
+## Steps to download the data
+
+1. Go to the [Fannie Mae](https://capitalmarkets.fanniemae.com/credit-risk-transfer/single-family-credit-risk-transfer/fannie-mae-single-family-loan-performance-data) website
+2. Click on [Single-Family Loan Performance Data](https://datadynamics.fanniemae.com/data-dynamics/?&_ga=2.181456292.2043790680.1657122341-289272350.1655822609#/reportMenu;category=HP)
+    * Register as a new user if you are using the website for the first time
+    * Use the credentials to login
+3. Select [HP](https://datadynamics.fanniemae.com/data-dynamics/#/reportMenu;category=HP)
+4. Click on  **Download Data** and choose *Single-Family Loan Performance Data*
+5. You will find a tabular list of 'Acquisition and Performance' files sorted based on year and quarter. Click on the file to download `Eg: 2017Q1.zip`
+6. Unzip the downlad file to extract the csv file `Eg: 2017Q1.csv`
+7. Copy only the csv files to a new folder for the ETL to read
+
+## Notes
+1. Refer to the [Loan Performance Data Tutorial](https://capitalmarkets.fanniemae.com/media/9066/display) for more details. 
+2. Note that *Single-Family Loan Performance Data* has 2 componenets. However, the Mortgage ETL requires only the first one (primary dataset)
+    * Primary Dataset:  Acquisition and Performance Files
+    * HARP Dataset
+3. Use the [Resources](https://datadynamics.fanniemae.com/data-dynamics/#/resources/HP) section to know more about the dataset
\ No newline at end of file
diff --git a/docs/get-started/xgboost-examples/notebook/python-notebook.md b/docs/get-started/xgboost-examples/notebook/python-notebook.md
index 94486d58c..3bfd71174 100644
--- a/docs/get-started/xgboost-examples/notebook/python-notebook.md
+++ b/docs/get-started/xgboost-examples/notebook/python-notebook.md
@@ -20,6 +20,10 @@ and the home directory for Apache Spark respectively.
 
 3. Launch the notebook:
 
+   Note: For ETL jobs, Set `spark.task.resource.gpu.amount` to `1/spark.executor.cores`.
+
+    For ETL:
+
     ``` bash
     PYSPARK_DRIVER_PYTHON=jupyter       \
     PYSPARK_DRIVER_PYTHON_OPTS=notebook \
@@ -28,14 +32,38 @@ and the home directory for Apache Spark respectively.
     --jars ${RAPIDS_JAR},${XGBOOST4J_JAR},${XGBOOST4J_SPARK_JAR}\
     --py-files ${XGBOOST4J_SPARK_JAR},${SAMPLE_ZIP}      \
     --conf spark.plugins=com.nvidia.spark.SQLPlugin \
-    --conf spark.rapids.memory.gpu.pooling.enabled=false \
     --conf spark.executor.resource.gpu.amount=1 \
+    --conf spark.executor.cores=10 \
+    --conf spark.task.resource.gpu.amount=0.1 \
+    --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
+    --conf spark.rapids.sql.hasNans=false \
+    --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
+    --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh
+    ```
+
+    For XGBoost:
+
+    ``` bash
+    PYSPARK_DRIVER_PYTHON=jupyter       \
+    PYSPARK_DRIVER_PYTHON_OPTS=notebook \
+    pyspark                             \
+    --master ${SPARK_MASTER}            \
+    --jars ${RAPIDS_JAR},${XGBOOST4J_JAR},${XGBOOST4J_SPARK_JAR}\
+    --py-files ${XGBOOST4J_SPARK_JAR},${SAMPLE_ZIP}      \
+    --conf spark.plugins=com.nvidia.spark.SQLPlugin \
+    --conf spark.rapids.memory.gpu.pool=NONE \
+    --conf spark.executor.resource.gpu.amount=1 \
+    --conf spark.executor.cores=10 \
     --conf spark.task.resource.gpu.amount=1 \
+    --conf spark.rapids.sql.hasNans=false \
     --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
     --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh
     ```
 
+
+
 4. Launch ETL Part 
+
 - Mortgage ETL Notebook: [Python](../../../../examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb)
 - Taxi ETL Notebook: [Python](../../../../examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb)
 - Note: Agaricus does not have ETL part.
diff --git a/docs/get-started/xgboost-examples/notebook/toree.md b/docs/get-started/xgboost-examples/notebook/toree.md
index 5be31c180..e338fd909 100644
--- a/docs/get-started/xgboost-examples/notebook/toree.md
+++ b/docs/get-started/xgboost-examples/notebook/toree.md
@@ -29,18 +29,39 @@ and the home directory for Apache Spark respectively.
 
 4. Install a new kernel with gpu enabled and launch the notebook
 
+    Note: For ETL jobs, Set `spark.task.resource.gpu.amount` to `1/spark.executor.cores`.
+
+    For ETL:
     ``` bash
     jupyter toree install                                \
     --spark_home=${SPARK_HOME}                             \
     --user                                          \
     --toree_opts='--nosparkcontext'                         \
-    --kernel_name="XGBoost4j-Spark"                         \
+    --kernel_name="ETL-Spark"                         \
+    --spark_opts='--master ${SPARK_MASTER} \
+      --jars ${RAPIDS_JAR},${SAMPLE_JAR}       \
+      --conf spark.plugins=com.nvidia.spark.SQLPlugin  \
+      --conf spark.executor.extraClassPath=${RAPIDS_JAR} \
+      --conf spark.executor.cores=10 \
+      --conf spark.task.resource.gpu.amount=0.1 \
+      --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
+      --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh'
+    ```
+
+    For XGBoost:
+     ``` bash
+    jupyter toree install                                \
+    --spark_home=${SPARK_HOME}                             \
+    --user                                          \
+    --toree_opts='--nosparkcontext'                         \
+    --kernel_name="XGBoost-Spark"                         \
     --spark_opts='--master ${SPARK_MASTER} \
       --jars ${RAPIDS_JAR},${SAMPLE_JAR}       \
       --conf spark.plugins=com.nvidia.spark.SQLPlugin  \
       --conf spark.executor.extraClassPath=${RAPIDS_JAR} \
-      --conf spark.rapids.memory.gpu.pooling.enabled=false \
+      --conf spark.rapids.memory.gpu.pool=NONE \
       --conf spark.executor.resource.gpu.amount=1 \
+      --conf spark.executor.cores=10 \
       --conf spark.task.resource.gpu.amount=1 \
       --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
       --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh'
diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md
index 887c39d02..11d1fb4dd 100644
--- a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md
+++ b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md
@@ -40,7 +40,7 @@ export SPARK_DOCKER_IMAGE=<gpu spark docker image repo and name>
 export SPARK_DOCKER_TAG=<spark docker image tag>
 
 pushd ${SPARK_HOME}
-wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-22.06/dockerfile/Dockerfile
+wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-22.08/dockerfile/Dockerfile
 
 # Optionally install additional jars into ${SPARK_HOME}/jars/
 
@@ -60,9 +60,10 @@ on cluster filesystems like HDFS, or in [object stores like S3 and GCS](https://
 Note that using [application dependencies](https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management) from 
 the submission client’s local file system is currently not yet supported.
 
-Note: the `mortgage_eval_merged.csv` and `mortgage_train_merged.csv` are not Mortgage raw data,
-they are the data produced by Mortgage ETL job. If user wants to use a larger size Mortgage data, please refer to [Launch ETL job](#etl).
-Taxi ETL job is the same. But Agaricus does not have ETL process, it is combined with XGBoost as there is just a filter operation.
+#### Note: 
+1. Mortgage and Taxi jobs have ETLs to generate the processed data. 
+2. For convenience, a subset of [Taxi](/datasets/) dataset is made available in this repo that can be readily used for launching XGBoost job. Use [ETL](#etl) to generate larger datasets for trainig and testing. 
+3. Agaricus does not have an ETL process, it is combined with XGBoost as there is just a filter operation.
 
 Save Kubernetes Template Resources
 ----------------------------------
@@ -89,16 +90,23 @@ to execute using a GPU which is already in use -- causing undefined behavior and
 
 <span id="etl">Launch Mortgage or Taxi ETL Part</span>
 ---------------------------
+Use the ETL app to process raw Mortgage data. You can either use this ETLed data to split into training and evaluation data or run the ETL on different subsets of the dataset to produce training and evaluation datasets. 
+
+Note: For ETL jobs, Set `spark.task.resource.gpu.amount` to `1/spark.executor.cores`.
 
 Run spark-submit
 
 ``` bash
 ${SPARK_HOME}/bin/spark-submit \
    --conf spark.plugins=com.nvidia.spark.SQLPlugin \
-   --conf spark.rapids.memory.gpu.pooling.enabled=false \
    --conf spark.executor.resource.gpu.amount=1 \
-   --conf spark.task.resource.gpu.amount=1 \
+   --conf spark.executor.cores=10 \
+   --conf spark.task.resource.gpu.amount=0.1 \
+   --conf spark.rapids.sql.incompatibleDateFormats.enabled=true \
+   --conf spark.rapids.sql.csv.read.double.enabled=true \
    --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
+   --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
+   --conf spark.rapids.sql.hasNans=false \
    --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh \
    --jars ${RAPIDS_JAR}                                           \
    --master <k8s://ip:port or k8s://URL>                                                                  \
@@ -106,18 +114,17 @@ ${SPARK_HOME}/bin/spark-submit \
    --num-executors ${SPARK_NUM_EXECUTORS}                                         \
    --driver-memory ${SPARK_DRIVER_MEMORY}                                         \
    --executor-memory ${SPARK_EXECUTOR_MEMORY}                                     \
-   --class ${EXAMPLE_CLASS}                                                       \
    --class com.nvidia.spark.examples.mortgage.ETLMain  \
    $SAMPLE_JAR \
    -format=csv \
-   -dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-train/" \
-   -dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-train/" \
-   -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/train/"
-
-# if generating eval data, change the data path to eval as well as the corresponding perf-eval and acq-eval data
-# -dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-eval"
-# -dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-eval"
-# -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/eval/"
+   -dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/" \
+   -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/train/" \
+   -dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+
+# if generating eval data, change the data path to eval
+# -dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/"
+# -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/eval/"
+# -dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
 # if running Taxi ETL benchmark, change the class and data path params to
 # -class com.nvidia.spark.examples.taxi.ETLMain  
 # -dataPath="raw::${SPARK_XGBOOST_DIR}/taxi/your-path"
@@ -163,9 +170,9 @@ export SPARK_DRIVER_MEMORY=4g
 export SPARK_EXECUTOR_MEMORY=8g
 
 # example class to use
-export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.GPUMain
-# or change to com.nvidia.spark.examples.taxi.GPUMain to run Taxi Xgboost benchmark
-# or change to com.nvidia.spark.examples.agaricus.GPUMain to run Agaricus Xgboost benchmark
+export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.Main
+# or change to com.nvidia.spark.examples.taxi.Main to run Taxi Xgboost benchmark
+# or change to com.nvidia.spark.examples.agaricus.Main to run Agaricus Xgboost benchmark
 
 # tree construction algorithm
 export TREE_METHOD=gpu_hist
@@ -176,9 +183,10 @@ Run spark-submit:
 ``` bash
 ${SPARK_HOME}/bin/spark-submit                                                          \
   --conf spark.plugins=com.nvidia.spark.SQLPlugin \
-  --conf spark.rapids.memory.gpu.pooling.enabled=false \
+  --conf spark.rapids.memory.gpu.pool=NONE \
   --conf spark.executor.resource.gpu.amount=1 \
   --conf spark.task.resource.gpu.amount=1 \
+  --conf spark.rapids.sql.hasNans=false \
   --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
   --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh \
   --jars ${RAPIDS_JAR}                           \
@@ -192,9 +200,9 @@ ${SPARK_HOME}/bin/spark-submit
   --conf spark.kubernetes.executor.podTemplateFile=${TEMPLATE_PATH}                     \
   --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark                  \
   ${SAMPLE_JAR}                                                                        \
-  -dataPath=train::${DATA_PATH}/mortgage/csv/train/mortgage_train_merged.csv              \
-  -dataPath=trans::${DATA_PATH}/mortgage/csv/test/mortgage_eval_merged.csv                 \
-  -format=csv                                                                           \
+  -dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/output/train/                   \
+  -dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/output/eval/                    \
+  -format=parquet                                                                \
   -numWorkers=${SPARK_NUM_EXECUTORS}                                                    \
   -treeMethod=${TREE_METHOD}                                                            \
   -numRound=100                                                                         \
diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md
index 55ac2a1c4..6132a7563 100644
--- a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md
+++ b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md
@@ -53,6 +53,13 @@ Get Application Files, Jar and Dataset
 
 Make sure you have prepared the necessary packages and dataset by following this [guide](/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md)
 
+
+#### Note: 
+1. Mortgage and Taxi jobs have ETLs to generate the processed data.
+2. For convenience, a subset of [Taxi](/datasets/) dataset is made available in this repo that can be readily used for launching XGBoost job. Use [ETL](#etl) to generate larger datasets for trainig and testing. 
+3. Agaricus does not have an ETL process, it is combined with XGBoost as there is just a filter operation.
+
+
 Launch a Standalone Spark Cluster
 ---------------------------------
 
@@ -83,30 +90,57 @@ Launch a Standalone Spark Cluster
 
 Launch Mortgage or Taxi ETL Part
 ---------------------------
+Use the ETL app to process raw Mortgage data. You can either use this ETLed data to split into training and evaluation data or run the ETL on different subsets of the dataset to produce training and evaluation datasets.
 
-Run spark-submit
-
+Note: For ETL jobs, Set `spark.task.resource.gpu.amount` to `1/spark.executor.cores`.
+### ETL on GPU
 ``` bash
 ${SPARK_HOME}/bin/spark-submit \
     --master spark://$HOSTNAME:7077 \
     --executor-memory 32G \
     --conf spark.executor.resource.gpu.amount=1 \
-    --conf spark.task.resource.gpu.amount=1 \
+    --conf spark.executor.cores=10 \
+    --conf spark.task.resource.gpu.amount=0.1 \
     --conf spark.plugins=com.nvidia.spark.SQLPlugin \
     --conf spark.rapids.sql.incompatibleDateFormats.enabled=true \
     --conf spark.rapids.sql.csv.read.double.enabled=true \
+    --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
+    --conf spark.rapids.sql.hasNans=false \
+    --py-files ${SAMPLE_ZIP} \
+    main.py \
+    --mainClass='com.nvidia.spark.examples.mortgage.etl_main' \
+    --format=csv \
+    --dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/" \
+    --dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/train/" \
+    --dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+
+# if generating eval data, change the data path to eval
+# --dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/"
+# --dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/eval/"
+# --dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+# if running Taxi ETL benchmark, change the class and data path params to
+# -class com.nvidia.spark.examples.taxi.ETLMain  
+# -dataPath="raw::${SPARK_XGBOOST_DIR}/taxi/your-path"
+# -dataPath="out::${SPARK_XGBOOST_DIR}/taxi/your-path"
+```
+### ETL on CPU
+```bash
+${SPARK_HOME}/bin/spark-submit \
+    --master spark://$HOSTNAME:7077 \
+    --executor-memory 32G \
+    --conf spark.executor.instances=1 \
     --py-files ${SAMPLE_ZIP} \
     main.py \
     --mainClass='com.nvidia.spark.examples.mortgage.etl_main' \
     --format=csv \
-    --dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-train/" \
-    --dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-train/" \
-    --dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/train/"
-
-# if generating eval data, change the data path to eval as well as the corresponding perf-eval and acq-eval data
-# --dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-eval"
-# --dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-eval"
-# --dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/eval/"
+    --dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/" \
+    --dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/train/" \
+    --dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+
+# if generating eval data, change the data path to eval
+# --dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/"
+# --dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/eval/"
+# --dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
 # if running Taxi ETL benchmark, change the class and data path params to
 # -class com.nvidia.spark.examples.taxi.ETLMain  
 # -dataPath="raw::${SPARK_XGBOOST_DIR}/taxi/your-path"
@@ -155,9 +189,10 @@ Run spark-submit:
 ``` bash
 ${SPARK_HOME}/bin/spark-submit                                                  \
  --conf spark.plugins=com.nvidia.spark.SQLPlugin                       \
- --conf spark.rapids.memory.gpu.pooling.enabled=false                     \
+ --conf spark.rapids.memory.gpu.pool=NONE                     \
  --conf spark.executor.resource.gpu.amount=1                           \
  --conf spark.task.resource.gpu.amount=1                              \
+ --conf spark.rapids.sql.hasNans=false \
  --master ${SPARK_MASTER}                                                       \
  --driver-memory ${SPARK_DRIVER_MEMORY}                                         \
  --executor-memory ${SPARK_EXECUTOR_MEMORY}                                     \
@@ -166,8 +201,8 @@ ${SPARK_HOME}/bin/spark-submit
  --py-files ${XGBOOST4J_SPARK_JAR},${SAMPLE_ZIP}                   \
  ${MAIN_PY}                                                     \
  --mainClass=${EXAMPLE_CLASS}                                                   \
- --dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/out/train/      \
- --dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/out/eval/      \
+ --dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/output/train/      \
+ --dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/output/eval/      \
  --format=parquet                                 \
  --numWorkers=${SPARK_NUM_EXECUTORS}                                            \
  --treeMethod=${TREE_METHOD}                                                    \
@@ -240,8 +275,8 @@ ${SPARK_HOME}/bin/spark-submit
  --py-files ${XGBOOST4J_SPARK_JAR},${SAMPLE_ZIP}                       \
  ${SPARK_PYTHON_ENTRYPOINT}                                                     \
  --mainClass=${EXAMPLE_CLASS}                                                   \
- --dataPath=train::${DATA_PATH}/mortgage/out/train/      \
- --dataPath=trans::${DATA_PATH}/mortgage/out/eval/         \
+ --dataPath=train::${DATA_PATH}/mortgage/output/train/      \
+ --dataPath=trans::${DATA_PATH}/mortgage/output/eval/         \
  --format=parquet                                                               \
  --numWorkers=${SPARK_NUM_EXECUTORS}                                            \
  --treeMethod=${TREE_METHOD}                                                    \
diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md
index 5493340c2..c1b512b07 100644
--- a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md
+++ b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md
@@ -53,9 +53,11 @@ Get Jars and Dataset
 Make sure you have prepared the necessary packages and dataset 
 by following this [guide](/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md)
 
-Note: the `mortgage_eval_merged.csv` and `mortgage_train_merged.csv` are not Mortgage raw data,
-they are the data produced by Mortgage ETL job. If user wants to use a larger size Mortgage data, please refer to [Launch ETL job](#etl).
-Taxi ETL job is the same. But Agaricus does not have ETL process, it is combined with XGBoost as there is just a filter operation.
+#### Note: 
+1. Mortgage and Taxi jobs have ETLs to generate the processed data. 
+2. For convenience, a subset of [Taxi](/datasets/) dataset is made available in this repo that can be readily used for launching XGBoost job. Use [ETL](#etl) to generate larger datasets for trainig and testing. 
+3. Agaricus does not have an ETL process, it is combined with XGBoost as there is just a filter operation.
+
 
 Launch a Standalone Spark Cluster
 ---------------------------------
@@ -90,31 +92,59 @@ Launch a Standalone Spark Cluster
 <span id="etl">Launch Mortgage or Taxi ETL Part</span>
 ---------------------------
 
-If user wants to use a larger size dataset other than the default one, we provide an ETL app to process raw Mortgage data.
-
+Use the ETL app to process raw Mortgage data. You can either use this ETLed data to split into training and evaluation data or run the ETL on different subsets of the dataset to produce training and evaluation datasets.
 Run spark-submit
 
+Note: For ETL jobs, Set `spark.task.resource.gpu.amount` to `1/spark.executor.cores`.
+
+### ETL on GPU 
 ``` bash
 ${SPARK_HOME}/bin/spark-submit \
     --master spark://$HOSTNAME:7077 \
     --executor-memory 32G \
-    --conf spark.rapids.memory.gpu.pooling.enabled=false \
     --conf spark.executor.resource.gpu.amount=1 \
-    --conf spark.task.resource.gpu.amount=1 \
+    --conf spark.executor.cores=10 \
+    --conf spark.task.resource.gpu.amount=0.1 \
     --conf spark.plugins=com.nvidia.spark.SQLPlugin \
     --conf spark.rapids.sql.incompatibleDateFormats.enabled=true \
     --conf spark.rapids.sql.csv.read.double.enabled=true \
+    --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
+    --conf spark.rapids.sql.hasNans=false \
     --class com.nvidia.spark.examples.mortgage.ETLMain  \
     $SAMPLE_JAR \
     -format=csv \
-    -dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-train/" \
-    -dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-train/" \
-    -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/train/"
-
-# if generating eval data, change the data path to eval as well as the corresponding perf-eval and acq-eval data
-# -dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-eval"
-# -dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-eval"
-# -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/eval/"
+    -dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/" \
+    -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/train/" \
+    -dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+
+# if generating eval data, change the data path to eval 
+# -dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/"
+# -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/eval/"
+# -dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+# if running Taxi ETL benchmark, change the class and data path params to
+# -class com.nvidia.spark.examples.taxi.ETLMain  
+# -dataPath="raw::${SPARK_XGBOOST_DIR}/taxi/your-path"
+# -dataPath="out::${SPARK_XGBOOST_DIR}/taxi/your-path"
+```
+
+### ETL on CPU
+
+```bash
+${SPARK_HOME}/bin/spark-submit \
+--master spark://$HOSTNAME:7077 \
+--executor-memory 32G \
+--conf spark.executor.instances=1 \
+--conf spark.sql.broadcastTimeout=700 \
+--class com.nvidia.spark.examples.mortgage.ETLMain  \
+$SAMPLE_JAR \
+-format=csv \
+-dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/" \
+-dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/train/" \
+-dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+
+# if generating eval data, change the data path to eval 
+# -dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/"
+# -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/eval/"
 # if running Taxi ETL benchmark, change the class and data path params to
 # -class com.nvidia.spark.examples.taxi.ETLMain  
 # -dataPath="raw::${SPARK_XGBOOST_DIR}/taxi/your-path"
@@ -150,9 +180,9 @@ export SPARK_DRIVER_MEMORY=4g
 export SPARK_EXECUTOR_MEMORY=8g
 
 # example class to use
-export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.GPUMain
-# or change to com.nvidia.spark.examples.taxi.GPUMain to run Taxi Xgboost benchmark
-# or change to com.nvidia.spark.examples.agaricus.GPUMain to run Agaricus Xgboost benchmark
+export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.Main
+# or change to com.nvidia.spark.examples.taxi.Main to run Taxi Xgboost benchmark
+# or change to com.nvidia.spark.examples.agaricus.Main to run Agaricus Xgboost benchmark
 
 # tree construction algorithm
 export TREE_METHOD=gpu_hist
@@ -163,7 +193,8 @@ Run spark-submit:
 ``` bash
 ${SPARK_HOME}/bin/spark-submit                                                  \
  --conf spark.plugins=com.nvidia.spark.SQLPlugin                       \
- --conf spark.rapids.memory.gpu.pooling.enabled=false                     \
+ --conf spark.rapids.memory.gpu.pool=NONE                     \
+ --conf spark.rapids.sql.hasNans=false \
  --conf spark.executor.resource.gpu.amount=1                           \
  --conf spark.task.resource.gpu.amount=1                              \
  --master ${SPARK_MASTER}                                                       \
@@ -172,9 +203,9 @@ ${SPARK_HOME}/bin/spark-submit
  --conf spark.cores.max=${TOTAL_CORES}                                          \
  --class ${EXAMPLE_CLASS}                                                       \
  ${SAMPLE_JAR}                                                                 \
- -dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/csv/train/mortgage_train_merged.csv       \
- -dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/csv/test/mortgage_eval_merged.csv          \
- -format=csv                                                                    \
+ -dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/output/train/      \
+ -dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/output/eval/          \
+ -format=parquet                                                                    \
  -numWorkers=${SPARK_NUM_EXECUTORS}                                             \
  -treeMethod=${TREE_METHOD}                                                     \
  -numRound=100                                                                  \
@@ -229,7 +260,7 @@ export SPARK_DRIVER_MEMORY=4g
 export SPARK_EXECUTOR_MEMORY=8g
 
 # example class to use
-export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.CPUMain
+export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.Main
 # Please make sure to change the class while running Taxi or Agaricus benchmark     
 
 # tree construction algorithm
@@ -238,7 +269,7 @@ export TREE_METHOD=hist
 
 This is the same command as for the GPU example, repeated for convenience:
 
-``` bash
+```bash
 ${SPARK_HOME}/bin/spark-submit                                                  \
  --master ${SPARK_MASTER}                                                       \
  --driver-memory ${SPARK_DRIVER_MEMORY}                                         \
@@ -246,9 +277,9 @@ ${SPARK_HOME}/bin/spark-submit
  --conf spark.cores.max=${TOTAL_CORES}                                          \
  --class ${EXAMPLE_CLASS}                                                       \
  ${SAMPLE_JAR}                                                                 \
- -dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/csv/train/mortgage_train_merged.csv       \
- -dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/csv/test/mortgage_eval_merged.csv          \
- -format=csv                                                                    \
+ -dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/output/train/      \
+ -dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/output/eval/          \
+ -format=parquet                                                                    \
  -numWorkers=${SPARK_NUM_EXECUTORS}                                             \
  -treeMethod=${TREE_METHOD}                                                     \
  -numRound=100                                                                  \
diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md
index 7966791a2..9d92da01a 100644
--- a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md
+++ b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md
@@ -47,25 +47,35 @@ Then create a directory in HDFS, and run below commands,
 Launch Mortgage or Taxi ETL Part
 ---------------------------
 
-Run spark-submit:
+Use the ETL app to process raw Mortgage data. You can either use this ETLed data to split into training and evaluation data or run the ETL on different subsets of the dataset to produce training and evaluation datasets.
+
+Note: For ETL jobs, Set `spark.task.resource.gpu.amount` to `1/spark.executor.cores`.
 
 ``` bash
 # location where data was downloaded
 export DATA_PATH=hdfs:/tmp/xgboost4j_spark_python/
 
 ${SPARK_HOME}/bin/spark-submit \
-    --master yarn 
-    --deploy-mode cluster
+    --master yarn \
+    --deploy-mode cluster \
+    --conf spark.executor.cores=10 \
+    --conf spark.task.resource.gpu.amount=0.1 \
+    --conf spark.rapids.sql.incompatibleDateFormats.enabled=true \
+    --conf spark.rapids.sql.csv.read.double.enabled=true \
+    --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
+    --conf spark.rapids.sql.hasNans=false \
     --jars ${RAPIDS_JAR}\
     ${MAIN_PY} \
     --mainClass='com.nvidia.spark.examples.mortgage.etl_main' \
     --format=csv \
-    --dataPath="perf::${DATA_PATH}/mortgage/data/mortgage/perf/" \
-    --dataPath="acq::${DATA_PATH}/mortgage/data/mortgage/acq/" \
-    --dataPath="out::${DATA_PATH}/mortgage/data/mortgage/out/train/"
-
-# if generate eval data, change the data path to eval
-# --dataPath="out::${DATA_PATH}/mortgage/data/mortgage/out/eval/
+    --dataPath="data::${DATA_PATH}/mortgage/data/mortgage/input/" \
+    --dataPath="out::${DATA_PATH}/mortgage/data/mortgage/output/train/" \
+    --dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+
+# if generating eval data, change the data path to eval
+# --dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/"
+# --dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/eval/"
+# --dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
 # if running Taxi ETL benchmark, change the class and data path params to
 # -class com.nvidia.spark.examples.taxi.ETLMain  
 # -dataPath="raw::${SPARK_XGBOOST_DIR}/taxi/your-path"
@@ -111,9 +121,10 @@ Run spark-submit:
 ``` bash
 ${SPARK_HOME}/bin/spark-submit                                                  \
  --conf spark.plugins=com.nvidia.spark.SQLPlugin                       \
- --conf spark.rapids.memory.gpu.pooling.enabled=false                     \
+ --conf spark.rapids.memory.gpu.pool=NONE                     \
  --conf spark.executor.resource.gpu.amount=1                           \
  --conf spark.task.resource.gpu.amount=1                              \
+ --conf spark.rapids.sql.hasNans=false \
  --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh        \
  --files ${SPARK_HOME}/examples/src/main/scripts/getGpusResources.sh            \
  --master yarn                                                                  \
@@ -194,8 +205,8 @@ ${SPARK_HOME}/bin/spark-submit
  --py-files ${XGBOOST4J_SPARK_JAR},${SAMPLE_ZIP}                                  \
  ${MAIN_PY}                                                     \
  --mainClass=${EXAMPLE_CLASS}                                                   \
- --dataPath=train::${DATA_PATH}/mortgage/out/train/       \
- --dataPath=trans::${DATA_PATH}/mortgage/out/eval/         \
+ --dataPath=train::${DATA_PATH}/mortgage/output/train/       \
+ --dataPath=trans::${DATA_PATH}/mortgage/output/eval/         \
  --format=parquet                                                               \
  --numWorkers=${SPARK_NUM_EXECUTORS}                                            \
  --treeMethod=${TREE_METHOD}                                                    \
diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md
index 9e6e4367b..dc6918ee4 100644
--- a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md
+++ b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md
@@ -35,6 +35,11 @@ Get Jars and Dataset
 
 Make sure you have prepared the necessary packages and dataset by following this [guide](/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md)
 
+#### Note: 
+1. Mortgage and Taxi jobs have ETLs to generate the processed data.
+2. For convenience, a subset of [Taxi](/datasets/) dataset is made available in this repo that can be readily used for launching XGBoost job. Use [ETL](#etl) to generate larger datasets for trainig and testing. 
+3. Agaricus does not have an ETL process, it is combined with XGBoost as there is just a filter operation.
+
 Create a directory in HDFS, and copy:
 
 ``` bash
@@ -45,19 +50,24 @@ Create a directory in HDFS, and copy:
 <span id="etl">Launch Mortgage or Taxi ETL Part</span>
 ---------------------------
 
-Note: the `mortgage_eval_merged.csv` and `mortgage_train_merged.csv` are not Mortgage raw data,
-they are the data produced by Mortgage ETL job. If user wants to use a larger size Mortgage data, please refer to [Launch ETL job](#etl).
-Taxi ETL job is the same. But Agaricus does not have ETL process, it is combined with XGBoost as there is just a filter operation.
+Use the ETL app to process raw Mortgage data. You can either use this ETLed data to split into training and evaluation data or run the ETL on different subsets of the dataset to produce training and evaluation datasets.
+
+Note: For ETL jobs, Set `spark.task.resource.gpu.amount` to `1/spark.executor.cores`.
+
 
 Run spark-submit
 
 ``` bash
 ${SPARK_HOME}/bin/spark-submit \
    --conf spark.plugins=com.nvidia.spark.SQLPlugin \
-   --conf spark.rapids.memory.gpu.pooling.enabled=false \
    --conf spark.executor.resource.gpu.amount=1 \
-   --conf spark.task.resource.gpu.amount=1 \
+   --conf spark.executor.cores=10 \
+   --conf spark.task.resource.gpu.amount=0.1 \
+   --conf spark.rapids.sql.incompatibleDateFormats.enabled=true \
+   --conf spark.rapids.sql.csv.read.double.enabled=true \
    --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
+   --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
+   --conf spark.rapids.sql.hasNans=false \
    --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh \
    --jars ${RAPIDS_JAR}                                           \
    --master yarn                                                                  \
@@ -65,18 +75,17 @@ ${SPARK_HOME}/bin/spark-submit \
    --num-executors ${SPARK_NUM_EXECUTORS}                                         \
    --driver-memory ${SPARK_DRIVER_MEMORY}                                         \
    --executor-memory ${SPARK_EXECUTOR_MEMORY}                                     \
-   --class ${EXAMPLE_CLASS}                                                       \
    --class com.nvidia.spark.examples.mortgage.ETLMain  \
    $SAMPLE_JAR \
    -format=csv \
-   -dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-train/" \
-   -dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-train/" \
-   -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/train/"
-
-# if generating eval data, change the data path to eval as well as the corresponding perf-eval and acq-eval data
-# -dataPath="perf::${SPARK_XGBOOST_DIR}/mortgage/perf-eval"
-# -dataPath="acq::${SPARK_XGBOOST_DIR}/mortgage/acq-eval"
-# -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/out/eval/"
+   -dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/" \
+   -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/train/" \
+   -dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
+
+# if generating eval data, change the data path to eval 
+# -dataPath="data::${SPARK_XGBOOST_DIR}/mortgage/input/"
+# -dataPath="out::${SPARK_XGBOOST_DIR}/mortgage/output/eval/"
+# -dataPath="tmp::${SPARK_XGBOOST_DIR}/mortgage/output/tmp/"
 # if running Taxi ETL benchmark, change the class and data path params to
 # -class com.nvidia.spark.examples.taxi.ETLMain  
 # -dataPath="raw::${SPARK_XGBOOST_DIR}/taxi/your-path"
@@ -106,9 +115,9 @@ export SPARK_DRIVER_MEMORY=4g
 export SPARK_EXECUTOR_MEMORY=8g
 
 # example class to use
-export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.GPUMain
-# or change to com.nvidia.spark.examples.taxi.GPUMain to run Taxi Xgboost benchmark
-# or change to com.nvidia.spark.examples.agaricus.GPUMain to run Agaricus Xgboost benchmark
+export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.Main
+# or change to com.nvidia.spark.examples.taxi.Main to run Taxi Xgboost benchmark
+# or change to com.nvidia.spark.examples.agaricus.Main to run Agaricus Xgboost benchmark
 
 # tree construction algorithm
 export TREE_METHOD=gpu_hist
@@ -119,9 +128,10 @@ Run spark-submit:
 ``` bash
 ${SPARK_HOME}/bin/spark-submit                                                  \
  --conf spark.plugins=com.nvidia.spark.SQLPlugin \
- --conf spark.rapids.memory.gpu.pooling.enabled=false \
+ --conf spark.rapids.memory.gpu.pool=NONE \
  --conf spark.executor.resource.gpu.amount=1 \
  --conf spark.task.resource.gpu.amount=1 \
+ --conf spark.rapids.sql.hasNans=false \
  --conf spark.executor.resource.gpu.discoveryScript=./getGpusResources.sh \
  --files $SPARK_HOME/examples/src/main/scripts/getGpusResources.sh \
  --jars ${RAPIDS_JAR}                                           \
@@ -132,9 +142,9 @@ ${SPARK_HOME}/bin/spark-submit
  --executor-memory ${SPARK_EXECUTOR_MEMORY}                                     \
  --class ${EXAMPLE_CLASS}                                                       \
  ${SAMPLE_JAR}                                                                 \
- -dataPath=train::${DATA_PATH}/mortgage/csv/train/mortgage_train_merged.csv       \
- -dataPath=trans::${DATA_PATH}/mortgage/csv/test/mortgage_eval_merged.csv          \
- -format=csv                                                                    \
+ -dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/output/train/                   \
+ -dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/output/eval/                    \
+ -format=parquet                                                                \
  -numWorkers=${SPARK_NUM_EXECUTORS}                                             \
  -treeMethod=${TREE_METHOD}                                                     \
  -numRound=100                                                                  \
@@ -181,7 +191,7 @@ export SPARK_DRIVER_MEMORY=4g
 export SPARK_EXECUTOR_MEMORY=8g
 
 # example class to use
-export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.CPUMain
+export EXAMPLE_CLASS=com.nvidia.spark.examples.mortgage.Main
 # Please make sure to change the class while running Taxi or Agaricus benchmark   
 
 # tree construction algorithm
@@ -199,9 +209,9 @@ ${SPARK_HOME}/bin/spark-submit
  --executor-memory ${SPARK_EXECUTOR_MEMORY}                                     \
  --class ${EXAMPLE_CLASS}                                                       \
  ${SAMPLE_JAR}                                                                 \
- -dataPath=train::${DATA_PATH}/mortgage/csv/train/mortgage_train_merged.csv       \
- -dataPath=trans::${DATA_PATH}/mortgage/csv/test/mortgage_eval_merged.csv          \
- -format=csv                                                                    \
+ -dataPath=train::${SPARK_XGBOOST_DIR}/mortgage/output/train/                   \
+ -dataPath=trans::${SPARK_XGBOOST_DIR}/mortgage/output/eval/                    \
+ -format=parquet                                                                \
  -numWorkers=${SPARK_NUM_EXECUTORS}                                             \
  -treeMethod=${TREE_METHOD}                                                     \
  -numRound=100                                                                  \
diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md
index cbeeccdbb..6f511be5b 100644
--- a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md
+++ b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md
@@ -9,7 +9,7 @@ For simplicity export the location to these jars. All examples assume the packag
    * [XGBoost4j-Spark Package](https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.3.0/)
 
 2. Download the RAPIDS Accelerator for Apache Spark plugin jar
-   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)
+   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)
 
 ### Build XGBoost Python Examples
 
@@ -17,15 +17,16 @@ Following this [guide](/docs/get-started/xgboost-examples/building-sample-apps/p
 
 ### Download dataset
 
-You need to download Mortgage dataset to `/opt/xgboost` from this [site](https://docs.rapids.ai/datasets/mortgage-data)
-, download Taxi dataset from this [site](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
-, download Agaricus dataset from this [site](https://gust.dev/r/xgboost-agaricus).
+You need to copy the dataset to `/opt/xgboost`. Use the following links to download the data.
+1. [Mortgage dataset](/docs/get-started/xgboost-examples/dataset/mortgage.md)
+2. [Taxi dataset](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
+3. [Agaricus dataset](https://gust.dev/r/xgboost-agaricus)
 
 ### Setup environments
 
 ``` bash
 export SPARK_XGBOOST_DIR=/opt/xgboost
-export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.06.0.jar
+export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.08.0.jar
 export XGBOOST4J_JAR=${SPARK_XGBOOST_DIR}/xgboost4j_3.0-1.4.2-0.3.0.jar
 export XGBOOST4J_SPARK_JAR=${SPARK_XGBOOST_DIR}/xgboost4j-spark_3.0-1.4.2-0.3.0.jar
 export SAMPLE_ZIP=${SPARK_XGBOOST_DIR}/samples.zip
diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md
index a5f451778..e5bf88571 100644
--- a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md
+++ b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md
@@ -5,7 +5,7 @@ For simplicity export the location to these jars. All examples assume the packag
 ### Download the jars
 
 1. Download the RAPIDS Accelerator for Apache Spark plugin jar
-   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)
+   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)
 
 ### Build XGBoost Scala Examples
 
@@ -13,14 +13,15 @@ Following this [guide](/docs/get-started/xgboost-examples/building-sample-apps/s
 
 ### Download dataset
 
-You need to download mortgage dataset to `/opt/xgboost` from this [site](https://docs.rapids.ai/datasets/mortgage-data)
-, download Taxi dataset from this [site](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
-, download Agaricus dataset from this [site](https://gust.dev/r/xgboost-agaricus).
+You need to copy the dataset to `/opt/xgboost`. Use the following links to download the data.
+1. [Mortgage dataset](/docs/get-started/xgboost-examples/dataset/mortgage.md)
+2. [Taxi dataset](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
+3. [Agaricus dataset](https://gust.dev/r/xgboost-agaricus)
 
 ### Setup environments
 
 ``` bash
 export SPARK_XGBOOST_DIR=/opt/xgboost
-export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.06.0.jar
+export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.08.0.jar
 export SAMPLE_JAR=${SPARK_XGBOOST_DIR}/sample_xgboost_apps-0.2.3-jar-with-dependencies.jar
 ```
diff --git a/docs/img/guides/cuspatial/Nycd-Community-Districts.png b/docs/img/guides/cuspatial/Nycd-Community-Districts.png
new file mode 100644
index 000000000..fa96b3b60
Binary files /dev/null and b/docs/img/guides/cuspatial/Nycd-Community-Districts.png differ
diff --git a/docs/img/guides/cuspatial/Nyct2000.png b/docs/img/guides/cuspatial/Nyct2000.png
new file mode 100644
index 000000000..055f3de8f
Binary files /dev/null and b/docs/img/guides/cuspatial/Nyct2000.png differ
diff --git a/docs/img/guides/cuspatial/install-jar.png b/docs/img/guides/cuspatial/install-jar.png
new file mode 100644
index 000000000..0d11c81ec
Binary files /dev/null and b/docs/img/guides/cuspatial/install-jar.png differ
diff --git a/docs/img/guides/cuspatial/sample-polygon.png b/docs/img/guides/cuspatial/sample-polygon.png
new file mode 100644
index 000000000..f8afb907f
Binary files /dev/null and b/docs/img/guides/cuspatial/sample-polygon.png differ
diff --git a/docs/img/guides/cuspatial/taxi-zones.png b/docs/img/guides/cuspatial/taxi-zones.png
new file mode 100644
index 000000000..a8682cb03
Binary files /dev/null and b/docs/img/guides/cuspatial/taxi-zones.png differ
diff --git a/docs/img/guides/mortgage-perf.png b/docs/img/guides/mortgage-perf.png
index 0548ffd0e..23715ce9a 100644
Binary files a/docs/img/guides/mortgage-perf.png and b/docs/img/guides/mortgage-perf.png differ
diff --git a/examples/ML+DL-Examples/Spark-DL/criteo_train/Dockerfile.conda_db b/examples/ML+DL-Examples/Spark-DL/criteo_train/Dockerfile.conda_db
index 135a3328b..475b99149 100644
--- a/examples/ML+DL-Examples/Spark-DL/criteo_train/Dockerfile.conda_db
+++ b/examples/ML+DL-Examples/Spark-DL/criteo_train/Dockerfile.conda_db
@@ -13,15 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu20.04
-
+FROM nvidia/cuda:11.4.3-cudnn8-devel-ubuntu20.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 # Disable NVIDIA repos to prevent accidental upgrades.
 RUN cd /etc/apt/sources.list.d && \
-    mv cuda.list cuda.list.disabled && \
-    mv nvidia-ml.list nvidia-ml.list.disabled
+    mv cuda.list cuda.list.disabled
 
 # See https://github.com/databricks/containers/blob/master/ubuntu/minimal/Dockerfile
 RUN apt-get update && \
@@ -52,21 +49,20 @@ RUN wget -q https://repo.continuum.io/miniconda/Miniconda3-py38_4.9.2-Linux-x86_
     conda clean --all
 
 # install openjdk8, cmake, openmpi openmpi-mpicc
-RUN conda install cmake openmpi openmpi-mpicc -y 
-RUN pip install jupyter
+RUN conda install cmake openmpi openmpi-mpicc -y
 ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64
 ENV PATH $PATH:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/bin:/usr/lib/jvm/java-1.8.0-openjdk-amd64/bin
 
-RUN pip uninstall tensorflow -y; pip install tensorflow
+RUN conda install -y -c nvidia -c rapidsai -c numba -c conda-forge nvtabular=1.2.2 python=3.8 cudatoolkit=11.4 scikit-learn
 
-RUN HOROVOD_WITH_MPI=1 HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_TENSORFLOW=1 \
+RUN pip uninstall tensorflow -y; pip install tensorflow-gpu==2.8
+RUN pip install torch==1.11.0+cu115 torchvision==0.12.0+cu115 torchaudio===0.11.0+cu115 -f https://download.pytorch.org/whl/cu115/torch_stable.html
+RUN rm -rf /databricks/conda/include/google
+RUN HOROVOD_WITH_MPI=1 HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 \
     pip install horovod[spark] --no-cache-dir
+RUN pip install pynvml jupyter matplotlib
 
-RUN conda install -c nvidia -c rapidsai -c numba -c conda-forge nvtabular=0.9.0 python=3.8 cudatoolkit=11.2
-RUN pip install pynvml
 
-RUN conda install -c conda-forge ipython==7.19.0 matplotlib==3.4.2 jinja2==2.11.3
-RUN pip uninstall pandas -y; pip install pandas==1.1.5
 RUN apt-get update && apt-get install wget openssh-client openssh-server \
     -y --allow-downgrades --allow-change-held-packages --no-install-recommends
 RUN useradd --create-home --shell /bin/bash --groups sudo ubuntu
@@ -75,6 +71,8 @@ ENV PYSPARK_PYTHON=/databricks/conda/bin/python
 ENV USER root
 ENV DEFAULT_DATABRICKS_ROOT_CONDA_ENV=base
 ENV DATABRICKS_ROOT_CONDA_ENV=base
+# disable gds due to errors
+ENV LIBCUDF_CUFILE_POLICY=OFF
 # required by DB
 RUN pip install virtualenv
 RUN pip install adlfs
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile b/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile
index 3f0ae80e8..ea40e1ed4 100644
--- a/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile
+++ b/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile
@@ -17,7 +17,7 @@
 
 ARG CUDA_VER=11.5.1
 FROM nvidia/cuda:${CUDA_VER}-devel-ubuntu20.04
-ARG BRANCH_VER=22.06
+ARG BRANCH_VER=22.08
 
 RUN apt-get update
 RUN apt-get install -y wget ninja-build git
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/README.md b/examples/ML+DL-Examples/Spark-cuML/pca/README.md
index f844553b5..4c1d9e861 100644
--- a/examples/ML+DL-Examples/Spark-cuML/pca/README.md
+++ b/examples/ML+DL-Examples/Spark-cuML/pca/README.md
@@ -12,7 +12,7 @@ User can also download the release jar from Maven central:
 
 [rapids-4-spark-ml_2.12-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark-ml_2.12/22.02.0/rapids-4-spark-ml_2.12-22.02.0-cuda11.jar)
 
-[rapids-4-spark_2.12-22.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)
+[rapids-4-spark_2.12-22.08.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)
 
 
 ## Sample code
@@ -48,7 +48,7 @@ It is assumed that a Standalone Spark cluster has been set up, the `SPARK_MASTER
 
     ``` bash
     RAPIDS_ML_JAR=PATH_TO_rapids-4-spark-ml_2.12-22.02.0-cuda11.jar
-    PLUGIN_JAR=PATH_TO_rapids-4-spark_2.12-22.06.0.jar
+    PLUGIN_JAR=PATH_TO_rapids-4-spark_2.12-22.08.0.jar
 
     jupyter toree install                                \
     --spark_home=${SPARK_HOME}                             \
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/pom.xml b/examples/ML+DL-Examples/Spark-cuML/pca/pom.xml
index 6a856d9f9..7a7b399d5 100644
--- a/examples/ML+DL-Examples/Spark-cuML/pca/pom.xml
+++ b/examples/ML+DL-Examples/Spark-cuML/pca/pom.xml
@@ -21,7 +21,7 @@
     <groupId>com.nvidia</groupId>
     <artifactId>PCAExample</artifactId>
     <packaging>jar</packaging>
-    <version>22.06.0-SNAPSHOT</version>
+    <version>22.08.0-SNAPSHOT</version>
 
     <properties>
         <maven.compiler.source>8</maven.compiler.source>
@@ -51,7 +51,7 @@
         <dependency>
             <groupId>com.nvidia</groupId>
             <artifactId>rapids-4-spark-ml_2.12</artifactId>
-            <version>22.06.0-SNAPSHOT</version>
+            <version>22.08.0-SNAPSHOT</version>
         </dependency>
     </dependencies>
 
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/spark-submit.sh b/examples/ML+DL-Examples/Spark-cuML/pca/spark-submit.sh
index 4e5f796d1..03381d8e9 100755
--- a/examples/ML+DL-Examples/Spark-cuML/pca/spark-submit.sh
+++ b/examples/ML+DL-Examples/Spark-cuML/pca/spark-submit.sh
@@ -15,8 +15,8 @@
 # limitations under the License.
 #
 
-ML_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark-ml_2.12/22.06.0-SNAPSHOT/rapids-4-spark-ml_2.12-22.06.0-SNAPSHOT.jar
-PLUGIN_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark_2.12/22.06.0-SNAPSHOT/rapids-4-spark_2.12-22.06.0-SNAPSHOT.jar
+ML_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark-ml_2.12/22.08.0-SNAPSHOT/rapids-4-spark-ml_2.12-22.08.0-SNAPSHOT.jar
+PLUGIN_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark_2.12/22.08.0-SNAPSHOT/rapids-4-spark_2.12-22.08.0-SNAPSHOT.jar
 
 $SPARK_HOME/bin/spark-submit \
 --master spark://127.0.0.1:7077  \
@@ -38,4 +38,4 @@ $SPARK_HOME/bin/spark-submit \
 --conf spark.network.timeout=1000s \
 --jars $ML_JAR,$PLUGIN_JAR \
 --class com.nvidia.spark.examples.pca.Main \
-/workspace/target/PCAExample-22.06.0-SNAPSHOT.jar
+/workspace/target/PCAExample-22.08.0-SNAPSHOT.jar
diff --git a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-cpu.ipynb b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-cpu.ipynb
index a90f57aa6..ce5c5a797 100644
--- a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-cpu.ipynb
+++ b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-cpu.ipynb
@@ -120,7 +120,7 @@
     "# By default, Spark will try to distribute the data among all the tasks in the cluster, \n",
     "# but on large clusters with large parquet files the splittable portions of the parquet files end up not being distributed evenly \n",
     "# and it is faster to re-partition the data to redistribute it than to deal with skew.\n",
-    "spark.read.parquet(\"hdfs:///data/tpcds_sf3000-parquet/useDecimal=true,useDate=true,filterNull=false/customer\").repartition(512).createOrReplaceTempView(\"customer\")\n",
+    "spark.read.parquet(dataRoot + \"/tpcds/customer\").repartition(512).createOrReplaceTempView(\"customer\")\n",
     "\n",
     "print(\"-\"*50)"
    ]
diff --git a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb
index a72b24727..cc1d11331 100644
--- a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb
+++ b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb
@@ -22,7 +22,7 @@
     "import os\n",
     "# Change to your cluster ip:port and directories\n",
     "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark:your-ip:port\")\n",
-    "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-22.06.0.jar\")\n"
+    "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-22.08.0.jar\")\n"
    ]
   },
   {
@@ -497,7 +497,7 @@
    ],
    "source": [
     "start = time() \n",
-    "spark.read.parquet(\"hdfs:///data/tpcds_sf3000-parquet/useDecimal=true,useDate=true,filterNull=false/customer\").limit(1000000).write.format(\"parquet\").mode(\"overwrite\").save(\"/data/tmp/customer1m\")\n",
+    "spark.read.parquet(dataRoot + \"/tpcds/customer\").limit(1000000).write.format(\"parquet\").mode(\"overwrite\").save(\"/data/tmp/customer1m\")\n",
     "end = time()\n",
     "# Parquet file scanning and writing will be about 3 times faster running on GPU\n",
     "print(\"scanning and writing parquet cost : {} seconds\".format(round(end - start, 2)))\n",
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md
index d924f4b06..242719b2e 100644
--- a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md
+++ b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md
@@ -108,7 +108,7 @@ See above Prerequisites section
 First finish the steps in "Building with Native Code Examples and run test cases" section, then do the following in the docker.
 
 ### Get jars from Maven Central
-[rapids-4-spark_2.12-22.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)
+[rapids-4-spark_2.12-22.08.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)
 
 ### Launch a local mode Spark
 
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml
index 237d2d2cb..95f18cbc4 100644
--- a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml
+++ b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml
@@ -25,7 +25,7 @@
         user defined functions for use with the RAPIDS Accelerator
         for Apache Spark
     </description>
-    <version>22.06.0-SNAPSHOT</version>
+    <version>22.08.0-SNAPSHOT</version>
 
     <properties>
         <maven.compiler.source>1.8</maven.compiler.source>
@@ -37,7 +37,7 @@
         <cuda.version>cuda11</cuda.version>
         <scala.binary.version>2.12</scala.binary.version>
         <!-- Depends on release version, Snapshot version is not published to the Maven Central -->
-        <rapids4spark.version>22.06.0</rapids4spark.version>
+        <rapids4spark.version>22.08.0</rapids4spark.version>
         <spark.version>3.1.1</spark.version>
         <scala.version>2.12.15</scala.version>
         <udf.native.build.path>${project.build.directory}/cpp-build</udf.native.build.path>
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt
index 11b21ae15..b9b4929d5 100755
--- a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt
+++ b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt
@@ -16,7 +16,7 @@
 
 cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
 
-file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.06/RAPIDS.cmake
+file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.08/RAPIDS.cmake
      ${CMAKE_BINARY_DIR}/RAPIDS.cmake)
 include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
 
@@ -32,7 +32,7 @@ if(DEFINED GPU_ARCHS)
 endif()
 rapids_cuda_init_architectures(UDFEXAMPLESJNI)
 
-project(UDFEXAMPLESJNI VERSION 22.06.0 LANGUAGES C CXX CUDA)
+project(UDFEXAMPLESJNI VERSION 22.08.0 LANGUAGES C CXX CUDA)
 
 option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF)
 option(BUILD_UDF_BENCHMARKS "Build the benchmarks" OFF)
@@ -84,10 +84,10 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w --expt-extended-lambda --expt-relax
 set(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
 
 rapids_cpm_init()
-rapids_cpm_find(cudf 22.06.00
+rapids_cpm_find(cudf 22.08.00
         CPM_ARGS
         GIT_REPOSITORY  https://github.com/rapidsai/cudf.git
-        GIT_TAG         branch-22.06
+        GIT_TAG         branch-22.08
         GIT_SHALLOW     TRUE
         SOURCE_SUBDIR   cpp
         OPTIONS         "BUILD_TESTS OFF"
diff --git a/examples/UDF-Examples/Spark-cuSpatial/Dockerfile b/examples/UDF-Examples/Spark-cuSpatial/Dockerfile
index 253ecc290..6d81a260c 100644
--- a/examples/UDF-Examples/Spark-cuSpatial/Dockerfile
+++ b/examples/UDF-Examples/Spark-cuSpatial/Dockerfile
@@ -18,6 +18,7 @@
 ARG CUDA_VER=11.2.2
 FROM nvidia/cuda:${CUDA_VER}-devel-ubuntu18.04
 
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
 RUN apt-get update
 RUN apt-get install -y wget ninja-build git
 
@@ -38,11 +39,11 @@ RUN conda --version
 RUN conda install -c conda-forge openjdk=8 maven=3.8.1 -y
 
 # install cuDF dependency.
-RUN conda install -c rapidsai-nightly -c nvidia -c conda-forge -c defaults libcuspatial=22.06 python=3.8 -y
+RUN conda install -c rapidsai -c nvidia -c conda-forge -c defaults libcuspatial=22.08 python=3.8 -y
 
 RUN wget --quiet \
     https://github.com/Kitware/CMake/releases/download/v3.21.3/cmake-3.21.3-linux-x86_64.tar.gz \
     && tar -xzf cmake-3.21.3-linux-x86_64.tar.gz \
     && rm -rf cmake-3.21.3-linux-x86_64.tar.gz
 
-ENV PATH="/cmake-3.21.3-linux-x86_64/bin:${PATH}"
+ENV PATH="/cmake-3.21.3-linux-x86_64/bin:${PATH}"
\ No newline at end of file
diff --git a/examples/UDF-Examples/Spark-cuSpatial/Dockerfile.awsdb b/examples/UDF-Examples/Spark-cuSpatial/Dockerfile.awsdb
index 73a3e8646..98839d1ed 100644
--- a/examples/UDF-Examples/Spark-cuSpatial/Dockerfile.awsdb
+++ b/examples/UDF-Examples/Spark-cuSpatial/Dockerfile.awsdb
@@ -18,6 +18,9 @@ FROM nvidia/cuda:11.2.2-devel-ubuntu18.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+# update cuda pub key to avoid GPG error
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+
 # See https://github.com/databricks/containers/blob/master/ubuntu/minimal/Dockerfile
 RUN apt-get update && \
     apt-get install --yes --no-install-recommends \
@@ -45,7 +48,7 @@ RUN wget -q https://repo.continuum.io/miniconda/Miniconda3-py38_4.9.2-Linux-x86_
     conda config --system --set always_yes True && \
     conda clean --all
 
-RUN conda install -c rapidsai-nightly -c nvidia -c conda-forge  -c defaults libcuspatial=22.06
+RUN conda install -c rapidsai-nightly -c nvidia -c conda-forge  -c defaults libcuspatial=22.08
 RUN conda install -c conda-forge libgdal==3.3.1
 RUN pip install jupyter
 ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64
diff --git a/examples/UDF-Examples/Spark-cuSpatial/README.md b/examples/UDF-Examples/Spark-cuSpatial/README.md
index 49598396e..b90b34be7 100644
--- a/examples/UDF-Examples/Spark-cuSpatial/README.md
+++ b/examples/UDF-Examples/Spark-cuSpatial/README.md
@@ -5,19 +5,39 @@ It implements a [RapidsUDF](https://nvidia.github.io/spark-rapids/docs/additiona
 interface to call the cuSpatial functions through JNI. It can be run on a distributed Spark cluster with scalability.
 
 ## Performance
-We got the end-2-end time as below table when running with 2009 NYC Taxi trip pickup location,
-which includes 168,898,952 points, and 3 sets of polygons(taxi_zone, nyct2000, nycd).
-The data can be downloaded from [TLC Trip Record Data](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page) 
-and [NYC Open data](https://www1.nyc.gov/site/planning/data-maps/open-data.page#district_political).
-| Environment | Taxi_zones (263 Polygons) | Nyct2000 (2216 Polygons) | Nycd (71 Complex Polygons)|
+We got the end-2-end hot run times as below table when running with 2009 NYC Taxi trip pickup location,
+which includes 170,896,055 points, and 3 sets of polygons(taxi_zone, nyct2000, nycd Community-Districts).
+The point data can be downloaded from [TLC Trip Record Data](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
+The polygon data can be downloaded from [taxi_zone dataset](https://data.cityofnewyork.us/Transportation/NYC-Taxi-Zones/d3c5-ddgc),
+[nyct2000 dataset](https://data.cityofnewyork.us/City-Government/2000-Census-Tracts/ysjj-vb9j) and 
+[nycd Community-Districts dataset](https://data.cityofnewyork.us/City-Government/Community-Districts/yfnk-k7r4)
+
+| Environment | Taxi_zones (263 Polygons) | Nyct2000 (2216 Polygons) | Nycd Community-Districts (71 Complex Polygons)|
 | ----------- | :---------: | :---------: | :---------: |
-| 4-core CPU | 1122.9 seconds | 5525.4 seconds| 6642.7 seconds |
-| 1 GPU(Titan V) on local | 4.5 seconds | 5.7 seconds | 6.6 seconds|
-| 2 GPU(T4) on Databricks | 9.1 seconds | 10.0 seconds | 12.1 seconds |
+| 4-core CPU | 3.9 minutes | 4.0 minutes| 4.1 minutes |
+| 1 GPU(T4) on Databricks | 25 seconds | 27 seconds | 28 seconds|
+| 2 GPU(T4) on Databricks | 15 seconds | 14 seconds | 17 seconds |
+| 4 GPU(T4) on Databricks | 11 seconds | 11 seconds | 12 seconds |
+
+Note: Please update the `x,y` column names to `Start_Lon,Start_Lat` in
+the [notebook](./notebooks/cuspatial_sample_db.ipynb) if you test with the download points.
+
+taxi-zones map:
+
+<img src="../../../docs/img/guides/cuspatial/taxi-zones.png" width="600">
+
+nyct2000 map:
+
+<img src="../../../docs/img/guides/cuspatial/Nyct2000.png" width="600">
+
+nyct-community-districts map:
+
+<img src="../../../docs/img/guides/cuspatial/Nycd-Community-Districts.png" width="600">
 
 ## Build
-You can build the jar file [in Docker](#build-in-docker) with the provided [Dockerfile](Dockerfile)
-or you can build it [in local](#build-in-local) machine after some prerequisites.
+Firstly build the UDF JAR from source code before running this demo.
+You can build the JAR [in Docker](#build-in-docker) with the provided [Dockerfile](Dockerfile), 
+or [in local machine](#build-in-local-machine) after prerequisites.
 
 ### Build in Docker
 1. Build the docker image [Dockerfile](Dockerfile), then run the container.
@@ -25,16 +45,18 @@ or you can build it [in local](#build-in-local) machine after some prerequisites
      docker build -f Dockerfile . -t build-spark-cuspatial
      docker run -it build-spark-cuspatial bash
      ```
-2. Get the code, then run "mvn package".
+2. Get the code, then run `mvn package`.
      ```Bash
      git clone https://github.com/NVIDIA/spark-rapids-examples.git
      cd spark-rapids-examples/examples/UDF-Examples/Spark-cuSpatial/
      mvn package
      ```
-3. You'll get the jar named like "spark-cuspatial-<version>.jar" in the target folder.
+3. You'll get the jar named `spark-cuspatial-<version>.jar` in the target folder.
+
+Note: The docker env is just for building the jar, not for running the application.
 
-### Build in Local:
-1. essential build tools:
+### Build in local machine:
+1. Essential build tools:
     - [cmake(>=3.20)](https://cmake.org/download/),
     - [ninja(>=1.10)](https://github.com/ninja-build/ninja/releases),
     - [gcc(>=9.3)](https://gcc.gnu.org/releases.html)
@@ -42,40 +64,42 @@ or you can build it [in local](#build-in-local) machine after some prerequisites
 3. conda: use [miniconda](https://docs.conda.io/en/latest/miniconda.html) to maintain header files and cmake dependecies
 4. [cuspatial](https://github.com/rapidsai/cuspatial): install libcuspatial
     ```Bash
-    # get libcuspatial from conda
-    conda install -c rapidsai -c nvidia -c conda-forge  -c defaults libcuspatial=22.04
+    # Install libcuspatial from conda
+    conda install -c rapidsai -c nvidia -c conda-forge  -c defaults libcuspatial=22.06
     # or below command for the nightly (aka SNAPSHOT) version.
-    conda install -c rapidsai-nightly -c nvidia -c conda-forge  -c defaults libcuspatial=22.06
+    conda install -c rapidsai-nightly -c nvidia -c conda-forge  -c defaults libcuspatial=22.08
     ```
-5. Get the code, then run "mvn package".
+5. Build the JAR using `mvn package`.
      ```Bash
      git clone https://github.com/NVIDIA/spark-rapids-examples.git
      cd spark-rapids-examples/examples/Spark-cuSpatial/
      mvn package
      ```
-6. You'll get "spark-cuspatial-<version>.jar" in the target folder.      
-
+6. `spark-cuspatial-<version>.jar` will be generated in the target folder.
 
 ## Run
-### Run on-premises clusters: standalone
+### GPU Demo on Spark Standalone on-premises cluster
 1. Install necessary libraries. Besides `cudf` and `cuspatial`, the `gdal` library that is compatible with the installed `cuspatial` may also be needed.
-    Install it by running the command below.
     ```
     conda install -c conda-forge libgdal=3.3.1
     ```
 2. Set up [a standalone cluster](/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md) of Spark. Make sure the conda/lib is included in LD_LIBRARY_PATH, so that spark executors can load libcuspatial.so.
 
-3. Download spark-rapids jars
-   * [spark-rapids v22.06.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar) or above
-4. Prepare the dataset & jars. Copy the sample dataset from [cuspatial_data](../../../datasets/cuspatial_data.tar.gz) to "/data/cuspatial_data".
-    Copy spark-rapids & spark-cuspatial-22.06.0-SNAPSHOT.jar to "/data/cuspatial_data/jars".
-    You can use your own path, but remember to update the paths in "gpu-run.sh" accordingly.
-5. Run "gpu-run.sh"
+3. Download Spark RAPIDS JAR
+   * [Spark RAPIDS JAR v22.08.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar) or above
+4. Prepare sample dataset and JARs. Copy the [sample dataset](../../../datasets/cuspatial_data.tar.gz) to `/data/cuspatial_data/`.
+    Copy Spark RAPIDS JAR and `spark-cuspatial-<version>.jar` to `/data/cuspatial_data/jars/`.
+    If you build the `spark-cuspatial-<version>.jar` in docker, please copy the jar from docker to local:
+    ```
+    docker cp YOUR_DOCKER_CONTAINER:/PATH/TO/spark-cuspatial-<version>.jar ./YOUR_LOCAL_PATH
+    ```
+    Note: update the paths in `gpu-run.sh` accordingly.
+5. Run `gpu-run.sh`
     ```Bash
     ./gpu-run.sh
     ```
-### Run on AWS Databricks
-1. Build the customized docker image [Dockerfile.awsdb](Dockerfile.awsdb) and push to dockerhub so that it can be accessible by AWS Databricks.
+### GPU Demo on AWS Databricks
+1. Build a customized docker image using [Dockerfile.awsdb](Dockerfile.awsdb) and push to a Docker registry such as [Docker Hub](https://hub.docker.com/) which can be accessible by AWS Databricks.
      ```Bash
      # replace your dockerhub repo, your tag or any other repo AWS DB can access
      docker build -f Dockerfile.awsdb . -t <your-dockerhub-repo>:<your-tag>
@@ -83,15 +107,15 @@ or you can build it [in local](#build-in-local) machine after some prerequisites
      ```
  
 2. Follow the [Spark-rapids get-started document](https://nvidia.github.io/spark-rapids/docs/get-started/getting-started-databricks.html#start-a-databricks-cluster) to create a GPU cluster on AWS Databricks.
- Something different from the document.
+ Below are some different steps since a custom docker image is used with Databricks:
     * Databricks Runtime Version
-  You should choose a Standard version of the Runtime version like "Runtime: 9.1 LTS(Scala 2.12, Spark 3.1.2)" and
-  choose GPU instance type like "g4dn.xlarge". Note that ML runtime does not support customized docker container.
-  If you choose a ML version, it says "Support for Databricks container services requires runtime version 5.3+" 
-  and the "Confirm" button is disabled.
+  Choose a non-ML Databricks Runtime such as `Runtime: 9.1 LTS(Scala 2.12, Spark 3.1.2)` and
+  choose GPU AWS instance type such as `g4dn.xlarge`. Note that ML runtime does not support customized docker container with below messages:
+`Support for Databricks container services requires runtime version 5.3+` 
+  and the `Confirm` button is disabled.
     * Use your own Docker container
-  Input "Docker Image URL" as "your-dockerhub-repo:your-tag"
-    * For the other configurations, you can follow the get-started document.
+  Input `Docker Image URL` as `your-dockerhub-repo:your-tag`
+    * Follow the Databricks get-started document for other steps.
 
 3. Copy the sample [cuspatial_data.tar.gz](../../../datasets/cuspatial_data.tar.gz) or your data to DBFS by using Databricks CLI.
     ```Bash
@@ -103,5 +127,38 @@ or you can build it [in local](#build-in-local) machine after some prerequisites
         points
         polygons
     ```
-4. Import the Library "spark-cuspatial-22.06.0-SNAPSHOT.jar" to the Databricks, then install it to your cluster.
-5. Import [cuspatial_sample.ipynb](notebooks/cuspatial_sample_db.ipynb) to your workspace in Databricks. Attach to your cluster, then run it.
+   The sample points and polygons are randomly generated.
+   
+   Sample polygons: 
+
+   <img src="../../../docs/img/guides/cuspatial/sample-polygon.png" width="600">
+   
+4. Upload `spark-cuspatial-<version>.jar` on dbfs and then install it in Databricks cluster.
+   
+   <img src="../../../docs/img/guides/cuspatial/install-jar.png" width="600">    
+
+5. Import [cuspatial_sample.ipynb](notebooks/cuspatial_sample_db.ipynb) to Databricks workspace, attach it to Databricks cluster and run it.
+
+### CPU Demo on AWS Databricks
+1. Create a Databricks cluster. For example, Databricks Runtime 10.3.
+
+2. Install the Sedona jars and Sedona Python libs on Databricks using web UI. 
+   The Sedona version should be 1.1.1-incubating or higher.
+   * Install below jars from Maven Coordinates in Libraries tab:
+    ```Bash
+    org.apache.sedona:sedona-python-adapter-3.0_2.12:1.2.0-incubating
+    org.datasyslab:geotools-wrapper:1.1.0-25.2
+    ```
+   * To enable python support, install below python lib from PyPI in Libraries tab 
+    ```Bash
+    apache-sedona
+    ```
+3. From your cluster configuration (Cluster -> Edit -> Configuration -> Advanced options -> Spark) activate the 
+   Sedona functions and the kryo serializer by adding below to the Spark Config
+    ```Bash
+    spark.sql.extensions org.apache.sedona.viz.sql.SedonaVizExtensions,org.apache.sedona.sql.SedonaSqlExtensions
+    spark.serializer org.apache.spark.serializer.KryoSerializer
+    spark.kryo.registrator org.apache.sedona.core.serde.SedonaKryoRegistrator
+    ```
+   
+4. Upload the sample data files to DBFS, start the cluster, attach the [notebook](notebooks/spacial-cpu-apache-sedona_db.ipynb) to the cluster, and run it.
diff --git a/examples/UDF-Examples/Spark-cuSpatial/gpu-run.sh b/examples/UDF-Examples/Spark-cuSpatial/gpu-run.sh
index 27571388d..987a3ea52 100755
--- a/examples/UDF-Examples/Spark-cuSpatial/gpu-run.sh
+++ b/examples/UDF-Examples/Spark-cuSpatial/gpu-run.sh
@@ -15,10 +15,10 @@
 #
 
 # change to your spark folder
-SPARK_HOME=/data/spark-3.2.0-bin-hadoop3.2
+SPARK_HOME=${SPARK_HOME:-/data/spark-3.2.0-bin-hadoop3.2}
 
 # change this path to your root path for the dataset
-ROOT_PATH=/data/cuspatial_data
+ROOT_PATH=${ROOT_PATH:-/data/cuspatial_data}
 # Extract the sample dataset in ../../datasets/cuspatial_data.tar.gz
 # Copy the polygons and points data into the root path or change the root path to where they are
 SHAPE_FILE_DIR=$ROOT_PATH/polygons
@@ -31,7 +31,7 @@ rm -rf $DATA_OUT_PATH
 # the path to keep the jars of spark-rapids & spark-cuspatial
 JARS=$ROOT_PATH/jars
 
-JARS_PATH=$JARS/rapids-4-spark_2.12-22.06.0.jar,$JARS/spark-cuspatial-22.06.0-SNAPSHOT.jar
+JARS_PATH=${JARS_PATH:-$JARS/rapids-4-spark_2.12-22.08.0.jar,$JARS/spark-cuspatial-22.08.0-SNAPSHOT.jar}
 
 $SPARK_HOME/bin/spark-submit --master spark://$HOSTNAME:7077 \
 --name "Gpu Spatial Join UDF" \
diff --git a/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_db.ipynb b/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_db.ipynb
index ba535f63d..4797ada9c 100644
--- a/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_db.ipynb
+++ b/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_db.ipynb
@@ -1 +1,328 @@
-{"cells":[{"cell_type":"code","source":["#define the input & output path\ninputPath='dbfs:/data/cuspatial_data/points'\noutputPath='dbfs:/data/output'\n\n# add the shapefile(.shp & .shx)\nspark.sparkContext.addFile(\"dbfs:/data/cuspatial_data/polygons/polygons.shp\")\nspark.sparkContext.addFile(\"dbfs:/data/cuspatial_data/polygons/polygons.shx\")\nspark.conf.set(\"spark.cuspatial.sql.udf.shapeFileName\", \"polygons.shp\")"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"4f12a9eb-6580-4575-a6a8-4c08820fe8e0"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\"></div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n  .ansiout {\n    display: block;\n    unicode-bidi: embed;\n    white-space: pre-wrap;\n    word-wrap: break-word;\n    word-break: break-all;\n    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n    font-size: 13px;\n    color: #555;\n    margin-left: 4px;\n    line-height: 19px;\n  }\n</style>\n<div class=\"ansiout\"></div>"]}}],"execution_count":0},{"cell_type":"code","source":["# register the UDF\nspark.udf.registerJavaFunction(\"point_in_polygon\", \"com.nvidia.spark.rapids.udf.PointInPolygon\", None)\n\ndf = spark.read.parquet(inputPath)\ndf = df.selectExpr('x', 'y', 'point_in_polygon(x, y) as ret')\ndf.write.mode(\"overwrite\").parquet(outputPath)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"741761ee-d5cf-4c43-ae73-30703d246901"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\"></div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n  .ansiout {\n    display: block;\n    unicode-bidi: embed;\n    white-space: pre-wrap;\n    word-wrap: break-word;\n    word-break: break-all;\n    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n    font-size: 13px;\n    color: #555;\n    margin-left: 4px;\n    line-height: 19px;\n  }\n</style>\n<div class=\"ansiout\"></div>"]}}],"execution_count":0},{"cell_type":"code","source":["print(\"Input rows: \", df.count())"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"592bfcde-ca6f-4b94-aac1-e3b298f25fcb"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\">Input rows:  71\n</div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n  .ansiout {\n    display: block;\n    unicode-bidi: embed;\n    white-space: pre-wrap;\n    word-wrap: break-word;\n    word-break: break-all;\n    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n    font-size: 13px;\n    color: #555;\n    margin-left: 4px;\n    line-height: 19px;\n  }\n</style>\n<div class=\"ansiout\">Input rows:  71\n</div>"]}}],"execution_count":0},{"cell_type":"code","source":["# show the result\nresult = spark.read.parquet(outputPath)\nprint(\"Output rows: \", result.count())\nresult.show(71)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"f1b744a4-13a3-4260-b8f0-efcbbf1c0337"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\">Output rows:  71\n+-------------------+--------------------+---+\n|                  x|                   y|ret|\n+-------------------+--------------------+---+\n|0.48171647380517046|  1.9022922214961997| []|\n| 1.2591725716781235|  0.1448705855995005| []|\n| 0.1895259128530169|  0.5431061133894604| []|\n|  3.028362149164369|0.027638405909631958| []|\n| 1.3890664414691907|  1.5177694304735412| []|\n| 3.1907684812039956|  0.2621847215928189| []|\n| 3.7080407833612004| 0.09804238103130436| []|\n| 3.0706987088385853|  0.9376313558467103| []|\n| 2.0697434332621234|  1.1809465376402173| []|\n|  2.175448214220591|  1.2372448404986038| []|\n|  2.113652420701984|  1.2774712415624014| []|\n| 2.9909779614491687|  1.2420487904041893| []|\n|   4.07037627210835|  1.9486902798139454| []|\n|  4.822583857757069|  0.3234041700489503| []|\n|  4.849847745942472|  1.9531893897409585| []|\n|  4.732546857961497|  0.5659923375279095| []|\n|  4.529792124514895|   1.942673409259531| []|\n| 3.2648444465931474|   2.693039435509084| []|\n| 2.1807636574967466|  3.2296461832828114|[3]|\n| 3.7164018490892348|  2.4612194182614333| []|\n| 2.2006520196663066|  3.7672478678985257|[3]|\n|  6.291790729917634|   2.983311357415729|[1]|\n| 2.5104987015171574|  3.0668114607133137|[3]|\n| 2.3007438625108882|  3.6045900851589048|[3]|\n|  6.101327777646798|  2.5239201807166616|[1]|\n|  6.109985464455084|  2.2235950639628523|[1]|\n| 6.4274219368674315|  2.9754616970668213|[1]|\n|  7.886010001346151|   3.538128217886674| []|\n|    7.5085184104988|   3.623862886287816| []|\n|  7.430677191305505|   3.380489849365283| []|\n| 1.7015273093278767|   7.478882372510933| []|\n|  7.769497359206111|   3.253257011908445| []|\n|   3.86008672302403|   7.513564222799629| []|\n|  0.059011873032214|   5.823535317960799| []|\n|  3.154282922203257|   5.788316610960881| []|\n| 2.4264509160270813|   5.188939408363776|[0]|\n| 1.9804558865545805|  1.3472225743317712| []|\n| 0.8178039499335275|  0.8138440641113271| []|\n| 0.2536015260915061|  1.8762161698642947| []|\n|  3.710910700915217|  0.9937713340192049| []|\n|  3.918090468102582|  0.3338651960183463| []|\n|  3.572744183805594| 0.33184908855075124| []|\n|   3.70669993057843|  0.7485845679979923| []|\n| 3.3588457228653024|  0.2346381514128677| []|\n|  2.520755151373394|   1.902015274420646| []|\n| 2.5322042870739683|   1.419555755682142| []|\n| 2.4613232527836137|  1.0484414482621331| []|\n|  4.975578758530645|  0.9606291981013242| []|\n| 4.5584381091040616|  1.8996548860019926| []|\n|  4.300706849071861|0.021365525588281198| []|\n|   3.01954722322135|    2.57810040095543| []|\n| 3.7622247877537456|  2.8709552313924487| []|\n|   4.75489831780737|  0.7800065259479418| []|\n|  2.566986568683904|  3.6607732238530897|[3]|\n| 3.7002781846945347|  2.3345952955903906| []|\n|  2.493975723955388|  3.3999020934055837|[3]|\n| 2.8222482218882474|  3.8159308233351266|[3]|\n| 6.0821276168848994|  2.5470532680258002|[1]|\n|  2.241538022180476|  3.8812819070357545|[3]|\n|  6.325158445513714|  2.8765450351723674|[1]|\n|  6.444584786789386|   2.174562817047202|[1]|\n|    6.6793884701899|  2.5605928243991434|[1]|\n|  7.250745898479374|  3.4154469467473447| []|\n|  7.079453687660189|   3.063690547962938|[1]|\n|  7.897735998643542|   3.380784914178574| []|\n|    2.2065031771469|   6.896038613284851| []|\n| 1.8703303641352362|   4.209727933188015|[3]|\n| 2.7456295127617385|   7.474216636277054| []|\n| 1.9143371250907073|   6.885401350515916| []|\n| 3.7176098065039747|   6.194330707468438| []|\n| 3.1162712022943757|   6.789029097334483| []|\n+-------------------+--------------------+---+\n\n</div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n  .ansiout {\n    display: block;\n    unicode-bidi: embed;\n    white-space: pre-wrap;\n    word-wrap: break-word;\n    word-break: break-all;\n    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n    font-size: 13px;\n    color: #555;\n    margin-left: 4px;\n    line-height: 19px;\n  }\n</style>\n<div class=\"ansiout\">Output rows:  71\n+-------------------+--------------------+---+\n                  x|                   y|ret|\n+-------------------+--------------------+---+\n0.48171647380517046|  1.9022922214961997| []|\n 1.2591725716781235|  0.1448705855995005| []|\n 0.1895259128530169|  0.5431061133894604| []|\n  3.028362149164369|0.027638405909631958| []|\n 1.3890664414691907|  1.5177694304735412| []|\n 3.1907684812039956|  0.2621847215928189| []|\n 3.7080407833612004| 0.09804238103130436| []|\n 3.0706987088385853|  0.9376313558467103| []|\n 2.0697434332621234|  1.1809465376402173| []|\n  2.175448214220591|  1.2372448404986038| []|\n  2.113652420701984|  1.2774712415624014| []|\n 2.9909779614491687|  1.2420487904041893| []|\n   4.07037627210835|  1.9486902798139454| []|\n  4.822583857757069|  0.3234041700489503| []|\n  4.849847745942472|  1.9531893897409585| []|\n  4.732546857961497|  0.5659923375279095| []|\n  4.529792124514895|   1.942673409259531| []|\n 3.2648444465931474|   2.693039435509084| []|\n 2.1807636574967466|  3.2296461832828114|[3]|\n 3.7164018490892348|  2.4612194182614333| []|\n 2.2006520196663066|  3.7672478678985257|[3]|\n  6.291790729917634|   2.983311357415729|[1]|\n 2.5104987015171574|  3.0668114607133137|[3]|\n 2.3007438625108882|  3.6045900851589048|[3]|\n  6.101327777646798|  2.5239201807166616|[1]|\n  6.109985464455084|  2.2235950639628523|[1]|\n 6.4274219368674315|  2.9754616970668213|[1]|\n  7.886010001346151|   3.538128217886674| []|\n    7.5085184104988|   3.623862886287816| []|\n  7.430677191305505|   3.380489849365283| []|\n 1.7015273093278767|   7.478882372510933| []|\n  7.769497359206111|   3.253257011908445| []|\n   3.86008672302403|   7.513564222799629| []|\n  0.059011873032214|   5.823535317960799| []|\n  3.154282922203257|   5.788316610960881| []|\n 2.4264509160270813|   5.188939408363776|[0]|\n 1.9804558865545805|  1.3472225743317712| []|\n 0.8178039499335275|  0.8138440641113271| []|\n 0.2536015260915061|  1.8762161698642947| []|\n  3.710910700915217|  0.9937713340192049| []|\n  3.918090468102582|  0.3338651960183463| []|\n  3.572744183805594| 0.33184908855075124| []|\n   3.70669993057843|  0.7485845679979923| []|\n 3.3588457228653024|  0.2346381514128677| []|\n  2.520755151373394|   1.902015274420646| []|\n 2.5322042870739683|   1.419555755682142| []|\n 2.4613232527836137|  1.0484414482621331| []|\n  4.975578758530645|  0.9606291981013242| []|\n 4.5584381091040616|  1.8996548860019926| []|\n  4.300706849071861|0.021365525588281198| []|\n   3.01954722322135|    2.57810040095543| []|\n 3.7622247877537456|  2.8709552313924487| []|\n   4.75489831780737|  0.7800065259479418| []|\n  2.566986568683904|  3.6607732238530897|[3]|\n 3.7002781846945347|  2.3345952955903906| []|\n  2.493975723955388|  3.3999020934055837|[3]|\n 2.8222482218882474|  3.8159308233351266|[3]|\n 6.0821276168848994|  2.5470532680258002|[1]|\n  2.241538022180476|  3.8812819070357545|[3]|\n  6.325158445513714|  2.8765450351723674|[1]|\n  6.444584786789386|   2.174562817047202|[1]|\n    6.6793884701899|  2.5605928243991434|[1]|\n  7.250745898479374|  3.4154469467473447| []|\n  7.079453687660189|   3.063690547962938|[1]|\n  7.897735998643542|   3.380784914178574| []|\n    2.2065031771469|   6.896038613284851| []|\n 1.8703303641352362|   4.209727933188015|[3]|\n 2.7456295127617385|   7.474216636277054| []|\n 1.9143371250907073|   6.885401350515916| []|\n 3.7176098065039747|   6.194330707468438| []|\n 3.1162712022943757|   6.789029097334483| []|\n+-------------------+--------------------+---+\n\n</div>"]}}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"cuspatial_sample","dashboards":[],"notebookMetadata":{"pythonIndentUnit":2},"language":"python","widgets":{},"notebookOrigID":3352849421916703}},"nbformat":4,"nbformat_minor":0}
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "4f12a9eb-6580-4575-a6a8-4c08820fe8e0",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\"></div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\"></div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#define the input & output path\n",
+    "inputPath='dbfs:/data/cuspatial_data/points'\n",
+    "outputPath='dbfs:/data/output'\n",
+    "\n",
+    "# add the shapefile(.shp & .shx)\n",
+    "spark.sparkContext.addFile(\"dbfs:/data/cuspatial_data/polygons/polygons.shp\")\n",
+    "spark.sparkContext.addFile(\"dbfs:/data/cuspatial_data/polygons/polygons.shx\")\n",
+    "spark.conf.set(\"spark.cuspatial.sql.udf.shapeFileName\", \"polygons.shp\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "741761ee-d5cf-4c43-ae73-30703d246901",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\"></div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\"></div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# register the UDF\n",
+    "spark.udf.registerJavaFunction(\"point_in_polygon\", \"com.nvidia.spark.rapids.udf.PointInPolygon\", None)\n",
+    "\n",
+    "df = spark.read.parquet(inputPath)\n",
+    "df = df.selectExpr('x', 'y', 'point_in_polygon(x, y) as ret')\n",
+    "df.write.mode(\"overwrite\").parquet(outputPath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "592bfcde-ca6f-4b94-aac1-e3b298f25fcb",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\">Input rows:  71\n",
+       "</div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\">Input rows:  71\n</div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print(\"Input rows: \", df.count())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "f1b744a4-13a3-4260-b8f0-efcbbf1c0337",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\">Output rows:  71\n",
+       "+-------------------+--------------------+---+\n",
+       "                  x|                   y|ret|\n",
+       "+-------------------+--------------------+---+\n",
+       "0.48171647380517046|  1.9022922214961997| []|\n",
+       " 1.2591725716781235|  0.1448705855995005| []|\n",
+       " 0.1895259128530169|  0.5431061133894604| []|\n",
+       "  3.028362149164369|0.027638405909631958| []|\n",
+       " 1.3890664414691907|  1.5177694304735412| []|\n",
+       " 3.1907684812039956|  0.2621847215928189| []|\n",
+       " 3.7080407833612004| 0.09804238103130436| []|\n",
+       " 3.0706987088385853|  0.9376313558467103| []|\n",
+       " 2.0697434332621234|  1.1809465376402173| []|\n",
+       "  2.175448214220591|  1.2372448404986038| []|\n",
+       "  2.113652420701984|  1.2774712415624014| []|\n",
+       " 2.9909779614491687|  1.2420487904041893| []|\n",
+       "   4.07037627210835|  1.9486902798139454| []|\n",
+       "  4.822583857757069|  0.3234041700489503| []|\n",
+       "  4.849847745942472|  1.9531893897409585| []|\n",
+       "  4.732546857961497|  0.5659923375279095| []|\n",
+       "  4.529792124514895|   1.942673409259531| []|\n",
+       " 3.2648444465931474|   2.693039435509084| []|\n",
+       " 2.1807636574967466|  3.2296461832828114|[3]|\n",
+       " 3.7164018490892348|  2.4612194182614333| []|\n",
+       " 2.2006520196663066|  3.7672478678985257|[3]|\n",
+       "  6.291790729917634|   2.983311357415729|[1]|\n",
+       " 2.5104987015171574|  3.0668114607133137|[3]|\n",
+       " 2.3007438625108882|  3.6045900851589048|[3]|\n",
+       "  6.101327777646798|  2.5239201807166616|[1]|\n",
+       "  6.109985464455084|  2.2235950639628523|[1]|\n",
+       " 6.4274219368674315|  2.9754616970668213|[1]|\n",
+       "  7.886010001346151|   3.538128217886674| []|\n",
+       "    7.5085184104988|   3.623862886287816| []|\n",
+       "  7.430677191305505|   3.380489849365283| []|\n",
+       " 1.7015273093278767|   7.478882372510933| []|\n",
+       "  7.769497359206111|   3.253257011908445| []|\n",
+       "   3.86008672302403|   7.513564222799629| []|\n",
+       "  0.059011873032214|   5.823535317960799| []|\n",
+       "  3.154282922203257|   5.788316610960881| []|\n",
+       " 2.4264509160270813|   5.188939408363776|[0]|\n",
+       " 1.9804558865545805|  1.3472225743317712| []|\n",
+       " 0.8178039499335275|  0.8138440641113271| []|\n",
+       " 0.2536015260915061|  1.8762161698642947| []|\n",
+       "  3.710910700915217|  0.9937713340192049| []|\n",
+       "  3.918090468102582|  0.3338651960183463| []|\n",
+       "  3.572744183805594| 0.33184908855075124| []|\n",
+       "   3.70669993057843|  0.7485845679979923| []|\n",
+       " 3.3588457228653024|  0.2346381514128677| []|\n",
+       "  2.520755151373394|   1.902015274420646| []|\n",
+       " 2.5322042870739683|   1.419555755682142| []|\n",
+       " 2.4613232527836137|  1.0484414482621331| []|\n",
+       "  4.975578758530645|  0.9606291981013242| []|\n",
+       " 4.5584381091040616|  1.8996548860019926| []|\n",
+       "  4.300706849071861|0.021365525588281198| []|\n",
+       "   3.01954722322135|    2.57810040095543| []|\n",
+       " 3.7622247877537456|  2.8709552313924487| []|\n",
+       "   4.75489831780737|  0.7800065259479418| []|\n",
+       "  2.566986568683904|  3.6607732238530897|[3]|\n",
+       " 3.7002781846945347|  2.3345952955903906| []|\n",
+       "  2.493975723955388|  3.3999020934055837|[3]|\n",
+       " 2.8222482218882474|  3.8159308233351266|[3]|\n",
+       " 6.0821276168848994|  2.5470532680258002|[1]|\n",
+       "  2.241538022180476|  3.8812819070357545|[3]|\n",
+       "  6.325158445513714|  2.8765450351723674|[1]|\n",
+       "  6.444584786789386|   2.174562817047202|[1]|\n",
+       "    6.6793884701899|  2.5605928243991434|[1]|\n",
+       "  7.250745898479374|  3.4154469467473447| []|\n",
+       "  7.079453687660189|   3.063690547962938|[1]|\n",
+       "  7.897735998643542|   3.380784914178574| []|\n",
+       "    2.2065031771469|   6.896038613284851| []|\n",
+       " 1.8703303641352362|   4.209727933188015|[3]|\n",
+       " 2.7456295127617385|   7.474216636277054| []|\n",
+       " 1.9143371250907073|   6.885401350515916| []|\n",
+       " 3.7176098065039747|   6.194330707468438| []|\n",
+       " 3.1162712022943757|   6.789029097334483| []|\n",
+       "+-------------------+--------------------+---+\n",
+       "\n",
+       "</div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\">Output rows:  71\n+-------------------+--------------------+---+\n|                  x|                   y|ret|\n+-------------------+--------------------+---+\n|0.48171647380517046|  1.9022922214961997| []|\n| 1.2591725716781235|  0.1448705855995005| []|\n| 0.1895259128530169|  0.5431061133894604| []|\n|  3.028362149164369|0.027638405909631958| []|\n| 1.3890664414691907|  1.5177694304735412| []|\n| 3.1907684812039956|  0.2621847215928189| []|\n| 3.7080407833612004| 0.09804238103130436| []|\n| 3.0706987088385853|  0.9376313558467103| []|\n| 2.0697434332621234|  1.1809465376402173| []|\n|  2.175448214220591|  1.2372448404986038| []|\n|  2.113652420701984|  1.2774712415624014| []|\n| 2.9909779614491687|  1.2420487904041893| []|\n|   4.07037627210835|  1.9486902798139454| []|\n|  4.822583857757069|  0.3234041700489503| []|\n|  4.849847745942472|  1.9531893897409585| []|\n|  4.732546857961497|  0.5659923375279095| []|\n|  4.529792124514895|   1.942673409259531| []|\n| 3.2648444465931474|   2.693039435509084| []|\n| 2.1807636574967466|  3.2296461832828114|[3]|\n| 3.7164018490892348|  2.4612194182614333| []|\n| 2.2006520196663066|  3.7672478678985257|[3]|\n|  6.291790729917634|   2.983311357415729|[1]|\n| 2.5104987015171574|  3.0668114607133137|[3]|\n| 2.3007438625108882|  3.6045900851589048|[3]|\n|  6.101327777646798|  2.5239201807166616|[1]|\n|  6.109985464455084|  2.2235950639628523|[1]|\n| 6.4274219368674315|  2.9754616970668213|[1]|\n|  7.886010001346151|   3.538128217886674| []|\n|    7.5085184104988|   3.623862886287816| []|\n|  7.430677191305505|   3.380489849365283| []|\n| 1.7015273093278767|   7.478882372510933| []|\n|  7.769497359206111|   3.253257011908445| []|\n|   3.86008672302403|   7.513564222799629| []|\n|  0.059011873032214|   5.823535317960799| []|\n|  3.154282922203257|   5.788316610960881| []|\n| 2.4264509160270813|   5.188939408363776|[0]|\n| 1.9804558865545805|  1.3472225743317712| []|\n| 0.8178039499335275|  0.8138440641113271| []|\n| 0.2536015260915061|  1.8762161698642947| []|\n|  3.710910700915217|  0.9937713340192049| []|\n|  3.918090468102582|  0.3338651960183463| []|\n|  3.572744183805594| 0.33184908855075124| []|\n|   3.70669993057843|  0.7485845679979923| []|\n| 3.3588457228653024|  0.2346381514128677| []|\n|  2.520755151373394|   1.902015274420646| []|\n| 2.5322042870739683|   1.419555755682142| []|\n| 2.4613232527836137|  1.0484414482621331| []|\n|  4.975578758530645|  0.9606291981013242| []|\n| 4.5584381091040616|  1.8996548860019926| []|\n|  4.300706849071861|0.021365525588281198| []|\n|   3.01954722322135|    2.57810040095543| []|\n| 3.7622247877537456|  2.8709552313924487| []|\n|   4.75489831780737|  0.7800065259479418| []|\n|  2.566986568683904|  3.6607732238530897|[3]|\n| 3.7002781846945347|  2.3345952955903906| []|\n|  2.493975723955388|  3.3999020934055837|[3]|\n| 2.8222482218882474|  3.8159308233351266|[3]|\n| 6.0821276168848994|  2.5470532680258002|[1]|\n|  2.241538022180476|  3.8812819070357545|[3]|\n|  6.325158445513714|  2.8765450351723674|[1]|\n|  6.444584786789386|   2.174562817047202|[1]|\n|    6.6793884701899|  2.5605928243991434|[1]|\n|  7.250745898479374|  3.4154469467473447| []|\n|  7.079453687660189|   3.063690547962938|[1]|\n|  7.897735998643542|   3.380784914178574| []|\n|    2.2065031771469|   6.896038613284851| []|\n| 1.8703303641352362|   4.209727933188015|[3]|\n| 2.7456295127617385|   7.474216636277054| []|\n| 1.9143371250907073|   6.885401350515916| []|\n| 3.7176098065039747|   6.194330707468438| []|\n| 3.1162712022943757|   6.789029097334483| []|\n+-------------------+--------------------+---+\n\n</div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# show the result\n",
+    "result = spark.read.parquet(outputPath)\n",
+    "print(\"Output rows: \", result.count())\n",
+    "result.show(71)"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "cuspatial_sample",
+   "notebookOrigID": 3352849421916703,
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_standalone.ipynb b/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_standalone.ipynb
index a53a6d813..f13889ed1 100644
--- a/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_standalone.ipynb
+++ b/examples/UDF-Examples/Spark-cuSpatial/notebooks/cuspatial_sample_standalone.ipynb
@@ -8,8 +8,10 @@
    "outputs": [],
    "source": [
     "from pyspark.sql import SparkSession\n",
+    "import os\n",
+    "jarsPath = os.getenv(\"JARS_PATH\", \"/data/cuspatial_data/jars/rapids-4-spark_2.12-22.08.0.jar,/data/cuspatial_data/jars/spark-cuspatial-22.08.0-SNAPSHOT.jar\")\n",
     "spark = SparkSession.builder \\\n",
-    "    .config(\"spark.jars\", \"/data/cuspatial_data/jars/rapids-4-spark_2.12-22.06.0.jar,/data/cuspatial_data/jars/spark-cuspatial-22.06.0-SNAPSHOT.jar\") \\\n",
+    "    .config(\"spark.jars\", jarsPath) \\\n",
     "    .config(\"spark.sql.adaptive.enabled\", \"false\") \\\n",
     "    .config(\"spark.executor.memory\", \"20GB\") \\\n",
     "    .config(\"spark.executor.cores\", \"6\") \\\n",
@@ -28,11 +30,12 @@
    "outputs": [],
    "source": [
     "# prepare shape files\n",
-    "spark.sparkContext.addFile(\"/data/cuspatial_data/polygons/polygons.shp\")\n",
-    "spark.sparkContext.addFile(\"/data/cuspatial_data/polygons/polygons.shx\")\n",
+    "rootPath = os.getenv(\"ROOT_PATH\", \"/data/cuspatial_data\")\n",
+    "spark.sparkContext.addFile(rootPath + \"/polygons/polygons.shp\")\n",
+    "spark.sparkContext.addFile(rootPath + \"/polygons/polygons.shx\")\n",
     "\n",
-    "inputPath = \"/data/cuspatial_data/points/\"\n",
-    "outputPath = \"/data/cuspatial_data/output/\""
+    "inputPath = rootPath + \"/points/\"\n",
+    "outputPath = rootPath + \"/output/\""
    ]
   },
   {
diff --git a/examples/UDF-Examples/Spark-cuSpatial/notebooks/spacial-cpu-apache-sedona_db.ipynb b/examples/UDF-Examples/Spark-cuSpatial/notebooks/spacial-cpu-apache-sedona_db.ipynb
new file mode 100644
index 000000000..47cf5e872
--- /dev/null
+++ b/examples/UDF-Examples/Spark-cuSpatial/notebooks/spacial-cpu-apache-sedona_db.ipynb
@@ -0,0 +1,364 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "61a74001-716b-4411-aecb-77d07058d200",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\">Requirement already satisfied: geopandas in /databricks/python3/lib/python3.8/site-packages (0.11.0)\n",
+       "Requirement already satisfied: pyproj&gt;=2.6.1.post1 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (3.3.1)\n",
+       "Requirement already satisfied: shapely&lt;2,&gt;=1.7 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (1.8.2)\n",
+       "Requirement already satisfied: pandas&gt;=1.0.0 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (1.2.4)\n",
+       "Requirement already satisfied: packaging in /databricks/python3/lib/python3.8/site-packages (from geopandas) (20.9)\n",
+       "Requirement already satisfied: fiona&gt;=1.8 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (1.8.21)\n",
+       "Requirement already satisfied: six&gt;=1.7 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (1.15.0)\n",
+       "Requirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from fiona&gt;=1.8-&gt;geopandas) (52.0.0)\n",
+       "Requirement already satisfied: click&gt;=4.0 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (8.1.3)\n",
+       "Requirement already satisfied: certifi in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (2020.12.5)\n",
+       "Requirement already satisfied: munch in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (2.5.0)\n",
+       "Requirement already satisfied: cligj&gt;=0.5 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (0.7.2)\n",
+       "Requirement already satisfied: click-plugins&gt;=1.0 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (1.1.1)\n",
+       "Requirement already satisfied: attrs&gt;=17 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (20.3.0)\n",
+       "Requirement already satisfied: pytz&gt;=2017.3 in /databricks/python3/lib/python3.8/site-packages (from pandas&gt;=1.0.0-&gt;geopandas) (2020.5)\n",
+       "Requirement already satisfied: numpy&gt;=1.16.5 in /databricks/python3/lib/python3.8/site-packages (from pandas&gt;=1.0.0-&gt;geopandas) (1.20.1)\n",
+       "Requirement already satisfied: python-dateutil&gt;=2.7.3 in /databricks/python3/lib/python3.8/site-packages (from pandas&gt;=1.0.0-&gt;geopandas) (2.8.1)\n",
+       "Requirement already satisfied: pyparsing&gt;=2.0.2 in /databricks/python3/lib/python3.8/site-packages (from packaging-&gt;geopandas) (2.4.7)\n",
+       "WARNING: You are using pip version 21.0.1; however, version 22.1.2 is available.\n",
+       "You should consider upgrading via the &#39;/databricks/python3/bin/python -m pip install --upgrade pip&#39; command.\n",
+       "</div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\">Requirement already satisfied: geopandas in /databricks/python3/lib/python3.8/site-packages (0.11.0)\nRequirement already satisfied: pyproj&gt;=2.6.1.post1 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (3.3.1)\nRequirement already satisfied: shapely&lt;2,&gt;=1.7 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (1.8.2)\nRequirement already satisfied: pandas&gt;=1.0.0 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (1.2.4)\nRequirement already satisfied: packaging in /databricks/python3/lib/python3.8/site-packages (from geopandas) (20.9)\nRequirement already satisfied: fiona&gt;=1.8 in /databricks/python3/lib/python3.8/site-packages (from geopandas) (1.8.21)\nRequirement already satisfied: six&gt;=1.7 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (1.15.0)\nRequirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from fiona&gt;=1.8-&gt;geopandas) (52.0.0)\nRequirement already satisfied: click&gt;=4.0 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (8.1.3)\nRequirement already satisfied: certifi in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (2020.12.5)\nRequirement already satisfied: munch in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (2.5.0)\nRequirement already satisfied: cligj&gt;=0.5 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (0.7.2)\nRequirement already satisfied: click-plugins&gt;=1.0 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (1.1.1)\nRequirement already satisfied: attrs&gt;=17 in /databricks/python3/lib/python3.8/site-packages (from fiona&gt;=1.8-&gt;geopandas) (20.3.0)\nRequirement already satisfied: pytz&gt;=2017.3 in /databricks/python3/lib/python3.8/site-packages (from pandas&gt;=1.0.0-&gt;geopandas) (2020.5)\nRequirement already satisfied: numpy&gt;=1.16.5 in /databricks/python3/lib/python3.8/site-packages (from pandas&gt;=1.0.0-&gt;geopandas) (1.20.1)\nRequirement already satisfied: python-dateutil&gt;=2.7.3 in /databricks/python3/lib/python3.8/site-packages (from pandas&gt;=1.0.0-&gt;geopandas) (2.8.1)\nRequirement already satisfied: pyparsing&gt;=2.0.2 in /databricks/python3/lib/python3.8/site-packages (from packaging-&gt;geopandas) (2.4.7)\nWARNING: You are using pip version 21.0.1; however, version 22.1.2 is available.\nYou should consider upgrading via the &#39;/databricks/python3/bin/python -m pip install --upgrade pip&#39; command.\n</div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%sh \n",
+    "pip install geopandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "4fd97bf3-dc08-495f-9cfe-e9e551f40e16",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\"></div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\"></div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import geopandas as gpd\n",
+    "from pyspark.sql.functions import col, expr, when\n",
+    "from sedona.register import SedonaRegistrator\n",
+    "from sedona.utils import SedonaKryoRegistrator, KryoSerializer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "6b15de34-d411-457b-89fb-7232587ae949",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\">/databricks/spark/python/pyspark/sql/pandas/conversion.py:340: UserWarning: createDataFrame attempted Arrow optimization because &#39;spark.sql.execution.arrow.pyspark.enabled&#39; is set to true; however, failed by the reason below:\n",
+       "  Did not pass numpy.dtype object\n",
+       "Attempting non-optimization as &#39;spark.sql.execution.arrow.pyspark.fallback.enabled&#39; is set to true.\n",
+       "  warnings.warn(msg)\n",
+       "Out[9]: 4</div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\">/databricks/spark/python/pyspark/sql/pandas/conversion.py:340: UserWarning: createDataFrame attempted Arrow optimization because &#39;spark.sql.execution.arrow.pyspark.enabled&#39; is set to true; however, failed by the reason below:\n  Did not pass numpy.dtype object\nAttempting non-optimization as &#39;spark.sql.execution.arrow.pyspark.fallback.enabled&#39; is set to true.\n  warnings.warn(msg)\nOut[9]: 4</div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# read the shapefile\n",
+    "polygons = gpd.read_file(\"/dbfs/data/cuspatial_data/polygons/polygons.shp\")\n",
+    "polygons_df = spark.createDataFrame(\n",
+    "    polygons\n",
+    ")\n",
+    "polygons_df.createOrReplaceTempView(\"polygons\")\n",
+    "polygons_df.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "3a4d1a80-72e4-490d-8152-f6f231cac37f",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\">+--------------------+\n",
+       "             mypoint|\n",
+       "+--------------------+\n",
+       "POINT (0.48171647...|\n",
+       "POINT (1.25917257...|\n",
+       "POINT (0.18952591...|\n",
+       "POINT (3.02836214...|\n",
+       "POINT (1.38906644...|\n",
+       "+--------------------+\n",
+       "only showing top 5 rows\n",
+       "\n",
+       "</div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\">+--------------------+\n|             mypoint|\n+--------------------+\n|POINT (0.48171647...|\n|POINT (1.25917257...|\n|POINT (0.18952591...|\n|POINT (3.02836214...|\n|POINT (1.38906644...|\n+--------------------+\nonly showing top 5 rows\n\n</div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "point_parquet_df = spark.read.format(\"parquet\").\\\n",
+    "    load(\"dbfs:/data/cuspatial_data/points\")\n",
+    "\n",
+    "point_parquet_df.createOrReplaceTempView(\"pointtable\")\n",
+    "point_df = spark.sql(\"select ST_Point(x, y) as mypoint from pointtable\")\n",
+    "point_df.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "inputWidgets": {},
+     "nuid": "8ec977f5-937e-45ce-89d6-46fa3b48cc39",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style scoped>\n",
+       "  .ansiout {\n",
+       "    display: block;\n",
+       "    unicode-bidi: embed;\n",
+       "    white-space: pre-wrap;\n",
+       "    word-wrap: break-word;\n",
+       "    word-break: break-all;\n",
+       "    font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n",
+       "    font-size: 13px;\n",
+       "    color: #555;\n",
+       "    margin-left: 4px;\n",
+       "    line-height: 19px;\n",
+       "  }\n",
+       "</style>\n",
+       "<div class=\"ansiout\">+--------------------+------------------+------------------+\n",
+       "            geometry|                 x|                 y|\n",
+       "+--------------------+------------------+------------------+\n",
+       "POLYGON ((2.08811...|2.1807636574967466|3.2296461832828114|\n",
+       "POLYGON ((2.08811...|2.2006520196663066|3.7672478678985257|\n",
+       "POLYGON ((2.08811...|2.5104987015171574|3.0668114607133137|\n",
+       "POLYGON ((2.08811...|2.3007438625108882|3.6045900851589048|\n",
+       "POLYGON ((2.08811...| 2.566986568683904|3.6607732238530897|\n",
+       "POLYGON ((2.08811...| 2.493975723955388|3.3999020934055837|\n",
+       "POLYGON ((2.08811...|2.8222482218882474|3.8159308233351266|\n",
+       "POLYGON ((2.08811...| 2.241538022180476|3.8812819070357545|\n",
+       "POLYGON ((2.08811...|1.8703303641352362| 4.209727933188015|\n",
+       "POLYGON ((2.48845...|2.4264509160270813| 5.188939408363776|\n",
+       "POLYGON ((5.03982...| 6.291790729917634| 2.983311357415729|\n",
+       "POLYGON ((5.03982...| 6.101327777646798|2.5239201807166616|\n",
+       "POLYGON ((5.03982...| 6.109985464455084|2.2235950639628523|\n",
+       "POLYGON ((5.03982...|6.4274219368674315|2.9754616970668213|\n",
+       "POLYGON ((5.03982...|6.0821276168848994|2.5470532680258002|\n",
+       "POLYGON ((5.03982...| 6.325158445513714|2.8765450351723674|\n",
+       "POLYGON ((5.03982...| 6.444584786789386| 2.174562817047202|\n",
+       "POLYGON ((5.03982...|   6.6793884701899|2.5605928243991434|\n",
+       "POLYGON ((5.03982...| 7.079453687660189| 3.063690547962938|\n",
+       "+--------------------+------------------+------------------+\n",
+       "\n",
+       "</div>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.databricks.v1+output": {
+       "addedWidgets": {},
+       "arguments": {},
+       "data": "<div class=\"ansiout\">+--------------------+------------------+------------------+\n|            geometry|                 x|                 y|\n+--------------------+------------------+------------------+\n|POLYGON ((2.08811...|2.1807636574967466|3.2296461832828114|\n|POLYGON ((2.08811...|2.2006520196663066|3.7672478678985257|\n|POLYGON ((2.08811...|2.5104987015171574|3.0668114607133137|\n|POLYGON ((2.08811...|2.3007438625108882|3.6045900851589048|\n|POLYGON ((2.08811...| 2.566986568683904|3.6607732238530897|\n|POLYGON ((2.08811...| 2.493975723955388|3.3999020934055837|\n|POLYGON ((2.08811...|2.8222482218882474|3.8159308233351266|\n|POLYGON ((2.08811...| 2.241538022180476|3.8812819070357545|\n|POLYGON ((2.08811...|1.8703303641352362| 4.209727933188015|\n|POLYGON ((2.48845...|2.4264509160270813| 5.188939408363776|\n|POLYGON ((5.03982...| 6.291790729917634| 2.983311357415729|\n|POLYGON ((5.03982...| 6.101327777646798|2.5239201807166616|\n|POLYGON ((5.03982...| 6.109985464455084|2.2235950639628523|\n|POLYGON ((5.03982...|6.4274219368674315|2.9754616970668213|\n|POLYGON ((5.03982...|6.0821276168848994|2.5470532680258002|\n|POLYGON ((5.03982...| 6.325158445513714|2.8765450351723674|\n|POLYGON ((5.03982...| 6.444584786789386| 2.174562817047202|\n|POLYGON ((5.03982...|   6.6793884701899|2.5605928243991434|\n|POLYGON ((5.03982...| 7.079453687660189| 3.063690547962938|\n+--------------------+------------------+------------------+\n\n</div>",
+       "datasetInfos": [],
+       "metadata": {},
+       "removedWidgets": [],
+       "type": "html"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "result = spark.sql(\"\\\n",
+    "SELECT polygons.*, pointtable.* FROM polygons ,pointtable \\\n",
+    "WHERE ST_Contains(polygons.geometry, ST_Point(pointtable.x, pointtable.y)) \\\n",
+    "\")\n",
+    "result.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "spacial-cpu-apache-sedona",
+   "notebookOrigID": 1618423020047086,
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/examples/UDF-Examples/Spark-cuSpatial/pom.xml b/examples/UDF-Examples/Spark-cuSpatial/pom.xml
index 1196fd775..6f8f08a49 100644
--- a/examples/UDF-Examples/Spark-cuSpatial/pom.xml
+++ b/examples/UDF-Examples/Spark-cuSpatial/pom.xml
@@ -24,13 +24,13 @@
   <name>UDF of the cuSpatial case for the RAPIDS Accelerator</name>
   <description>The RAPIDS accelerated user defined function of the cuSpatial case
     for use with the RAPIDS Accelerator for Apache Spark</description>
-  <version>22.06.0-SNAPSHOT</version>
+  <version>22.08.0-SNAPSHOT</version>
 
   <properties>
     <maven.compiler.source>1.8</maven.compiler.source>
     <maven.compiler.target>1.8</maven.compiler.target>
     <java.major.version>8</java.major.version>
-    <rapids.version>22.06.0</rapids.version>
+    <rapids.version>22.08.0</rapids.version>
     <scala.binary.version>2.12</scala.binary.version>
     <spark.version>3.2.0</spark.version>
     <udf.native.build.path>${project.build.directory}/cpp-build</udf.native.build.path>
diff --git a/examples/UDF-Examples/Spark-cuSpatial/src/main/native/CMakeLists.txt b/examples/UDF-Examples/Spark-cuSpatial/src/main/native/CMakeLists.txt
index 0ab52a7c8..40eff8c31 100755
--- a/examples/UDF-Examples/Spark-cuSpatial/src/main/native/CMakeLists.txt
+++ b/examples/UDF-Examples/Spark-cuSpatial/src/main/native/CMakeLists.txt
@@ -16,7 +16,7 @@
 
 cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
 
-project(SPATIALUDJNI VERSION 22.06.0 LANGUAGES C CXX CUDA)
+project(SPATIALUDJNI VERSION 22.08.0 LANGUAGES C CXX CUDA)
 
 ###################################################################################################
 # - build type ------------------------------------------------------------------------------------
diff --git a/examples/XGBoost-Examples/README.md b/examples/XGBoost-Examples/README.md
index 4d064c8d7..f4e654530 100644
--- a/examples/XGBoost-Examples/README.md
+++ b/examples/XGBoost-Examples/README.md
@@ -6,7 +6,7 @@ For PySpark based XGBoost, please refer to the [Spark-RAPIDS-examples 22.04 bran
 uses [NVIDIA’s Spark XGBoost version](https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.3.0/).
 Most data scientists spend a lot of time not only on
 Training models but also processing the large amounts of data needed to train these models.
-As you can see below, XGBoost training on GPUs can be upto 7X and data processing using
+As you can see below, XGBoost training on GPUs can be up to 10X and data processing using
 RAPIDS Accelerator can also be accelerated with an end-to-end speed-up of 7X on GPU compared to CPU.
 In the public cloud, better performance can lead to significantly lower costs as demonstrated in this [blog](https://developer.nvidia.com/blog/gpu-accelerated-spark-xgboost/).
 
@@ -96,3 +96,8 @@ otherwise the customized CrossValidator may schedule more than 1 xgboost trainin
 For XGBoost job, if the number of shuffle stage tasks before training is less than the num_worker, 
 the training tasks will be scheduled to run on part of nodes instead of all nodes due to Spark Data Locality feature.
 The workaround is to increase the partitions of the shuffle stage by setting `spark.sql.files.maxPartitionBytes=RightNum`.
+If you are running XGBoost scala notebooks on Dataproc, please make sure to update below configs to avoid job failure:
+```
+spark.dynamicAllocation.enabled=false
+spark.task.resource.gpu.amount=1
+```
\ No newline at end of file
diff --git a/examples/XGBoost-Examples/agaricus/notebooks/scala/agaricus-gpu.ipynb b/examples/XGBoost-Examples/agaricus/notebooks/scala/agaricus-gpu.ipynb
index 738a5a0cb..06efa2ba9 100644
--- a/examples/XGBoost-Examples/agaricus/notebooks/scala/agaricus-gpu.ipynb
+++ b/examples/XGBoost-Examples/agaricus/notebooks/scala/agaricus-gpu.ipynb
@@ -604,9 +604,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "XGBoost4j-Spark-2206 - Scala",
+   "display_name": "XGBoost4j-Spark - Scala",
    "language": "scala",
-   "name": "xgboost4j-spark-2206_scala"
+   "name": "XGBoost4j-Spark_scala"
   },
   "language_info": {
    "codemirror_mode": "text/x-scala",
diff --git a/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_python.md b/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_python.md
index 10b1c293a..b3cf72ebf 100644
--- a/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_python.md
+++ b/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_python.md
@@ -30,6 +30,7 @@ This is a description of all the parameters available when you are running examp
    * `--dataPath=raw::[path]`: Path to the raw data files to be transformed by taxi/ETLMain.
    * `--dataPath=perf::[path]`,`-dataPath=acq::[path]`: Paths to the raw data files to be transformed by mortgage/ETLMain.
    * `--dataPath=out::`: Path where to place the output data files for both mortgage/ETLMain and taxi/ETLMain.
+   * `--dataPath=tmp::`: Path where to place the output data files for converting raw csv format to parquet.
 6. `--modelPath=[path]`: Path to save model after training, or where to load model for transforming only. Required only when mode is 'transform'.
 7. `--overwrite=[true|false]`: Whether to overwrite the current model data under 'modelPath'. Default is false. You may need to set to true to avoid IOException when saving the model to a path already exists.
 8. `--hasHeader=[true|false]`: Indicate whether the csv file has header.
diff --git a/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_scala.md b/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_scala.md
index 5b1d77410..838404342 100644
--- a/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_scala.md
+++ b/examples/XGBoost-Examples/app-parameters/supported_xgboost_parameters_scala.md
@@ -19,6 +19,7 @@ This is a description of all the parameters available when you are running examp
    * `-dataPath=raw::[path]`: Path to the raw data files to be transformed by taxi/ETLMain.
    * `-dataPath=perf::[path]`,`-dataPath=acq::[path]`: Paths to the raw data files to be transformed by mortgage/ETLMain.
    * `-dataPath=out::`: Path where to place the output data files for both mortgage/ETLMain and taxi/ETLMain.
+   * `-dataPath=tmp::`: Path where to place the output data files for converting raw csv format to parquet.
 5. `-modelPath=[path]`: Path to save model after training, or where to load model for transforming only. Required only when mode is 'transform'.
 6. `-overwrite=[true|false]`: Whether to overwrite the current model data under 'modelPath'. Default is false. You may need to set to true to avoid IOException when saving the model to a path already exists.
 7. `-hasHeader=[true|false]`: Indicate whether the csv file has header.
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb
index a544f5795..63a11ccfd 100644
--- a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb
+++ b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb
@@ -4,6 +4,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "# Dataset\n",
+    "\n",
+    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
+    "\n",
     "# ETL + XGBoost train & transform\n",
     "\n",
     "This notebook is an end-to-end example of ETL + XGBoost Train & Transform by using [Spark-Rapids](https://github.com/NVIDIA/spark-rapids) and [XGBoost](https://github.com/nvidia/spark-xgboost) with GPU accelerated.\n",
@@ -50,8 +54,8 @@
    "source": [
     "# The input path of dataset\n",
     "dataRoot = os.getenv(\"DATA_ROOT\", \"/data\")\n",
-    "orig_perf_path=dataRoot + \"/mortgage/Performance/\"\n",
-    "orig_acq_path=dataRoot + \"/mortgage/Acquisition/\""
+    "orig_raw_path = dataRoot + \"/mortgage/input/\"",
+    "orig_raw_path_csv2parquet = dataRoot + \"/mortgage/output/csv2parquet/\""
    ]
   },
   {
@@ -72,15 +76,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Set True to save the dataset after ETL\n",
+    "# Set True to save processed dataset after ETL\n",
     "# Set False, the dataset after ETL will be directly used in XGBoost train and transform\n",
+    "\n",
     "is_save_dataset=True\n",
-    "# the path to save the train dataset\n",
-    "output_path_train=dataRoot + \"/mortgage/output/train/\"\n",
-    "# the path to save the test dataset\n",
-    "output_path_test=dataRoot + \"/mortgage/output/test/\"\n",
+    "output_path_data=dataRoot + \"/mortgage/output/data/\"\n",
     "# the path to save the xgboost model\n",
-    "output_path_model=dataRoot + \"/mortgage/new-model-path\""
+    "output_path_model=dataRoot + \"/mortgage/output/model/\""
    ]
   },
   {
@@ -97,65 +99,117 @@
    "outputs": [],
    "source": [
     "# File schema\n",
-    "_csv_perf_schema = StructType([\n",
-    "    StructField(\"loan_id\", LongType()),\n",
-    "    StructField(\"monthly_reporting_period\", StringType()),\n",
-    "    StructField(\"servicer\", StringType()),\n",
-    "    StructField(\"interest_rate\", DoubleType()),\n",
-    "    StructField(\"current_actual_upb\", DoubleType()),\n",
-    "    StructField(\"loan_age\", DoubleType()),\n",
-    "    StructField(\"remaining_months_to_legal_maturity\", DoubleType()),\n",
-    "    StructField(\"adj_remaining_months_to_maturity\", DoubleType()),\n",
-    "    StructField(\"maturity_date\", StringType()),\n",
-    "    StructField(\"msa\", DoubleType()),\n",
-    "    StructField(\"current_loan_delinquency_status\", IntegerType()),\n",
-    "    StructField(\"mod_flag\", StringType()),\n",
-    "    StructField(\"zero_balance_code\", StringType()),\n",
-    "    StructField(\"zero_balance_effective_date\", StringType()),\n",
-    "    StructField(\"last_paid_installment_date\", StringType()),\n",
-    "    StructField(\"foreclosed_after\", StringType()),\n",
-    "    StructField(\"disposition_date\", StringType()),\n",
-    "    StructField(\"foreclosure_costs\", DoubleType()),\n",
-    "    StructField(\"prop_preservation_and_repair_costs\", DoubleType()),\n",
-    "    StructField(\"asset_recovery_costs\", DoubleType()),\n",
-    "    StructField(\"misc_holding_expenses\", DoubleType()),\n",
-    "    StructField(\"holding_taxes\", DoubleType()),\n",
-    "    StructField(\"net_sale_proceeds\", DoubleType()),\n",
-    "    StructField(\"credit_enhancement_proceeds\", DoubleType()),\n",
-    "    StructField(\"repurchase_make_whole_proceeds\", StringType()),\n",
-    "    StructField(\"other_foreclosure_proceeds\", DoubleType()),\n",
-    "    StructField(\"non_interest_bearing_upb\", DoubleType()),\n",
-    "    StructField(\"principal_forgiveness_upb\", StringType()),\n",
-    "    StructField(\"repurchase_make_whole_proceeds_flag\", StringType()),\n",
-    "    StructField(\"foreclosure_principal_write_off_amount\", StringType()),\n",
-    "    StructField(\"servicing_activity_indicator\", StringType())])\n",
     "\n",
-    "_csv_acq_schema = StructType([\n",
-    "    StructField(\"loan_id\", LongType()),\n",
-    "    StructField(\"orig_channel\", StringType()),\n",
-    "    StructField(\"seller_name\", StringType()),\n",
-    "    StructField(\"orig_interest_rate\", DoubleType()),\n",
-    "    StructField(\"orig_upb\", IntegerType()),\n",
-    "    StructField(\"orig_loan_term\", IntegerType()),\n",
-    "    StructField(\"orig_date\", StringType()),\n",
-    "    StructField(\"first_pay_date\", StringType()),\n",
-    "    StructField(\"orig_ltv\", DoubleType()),\n",
-    "    StructField(\"orig_cltv\", DoubleType()),\n",
-    "    StructField(\"num_borrowers\", DoubleType()),\n",
-    "    StructField(\"dti\", DoubleType()),\n",
-    "    StructField(\"borrower_credit_score\", DoubleType()),\n",
-    "    StructField(\"first_home_buyer\", StringType()),\n",
-    "    StructField(\"loan_purpose\", StringType()),\n",
-    "    StructField(\"property_type\", StringType()),\n",
-    "    StructField(\"num_units\", IntegerType()),\n",
-    "    StructField(\"occupancy_status\", StringType()),\n",
-    "    StructField(\"property_state\", StringType()),\n",
-    "    StructField(\"zip\", IntegerType()),\n",
-    "    StructField(\"mortgage_insurance_percent\", DoubleType()),\n",
-    "    StructField(\"product_type\", StringType()),\n",
-    "    StructField(\"coborrow_credit_score\", DoubleType()),\n",
-    "    StructField(\"mortgage_insurance_type\", DoubleType()),\n",
-    "    StructField(\"relocation_mortgage_indicator\", StringType())])"
+    "_csv_raw_schema = StructType([\n",
+    "      StructField(\"reference_pool_id\", StringType()),\n",
+    "      StructField(\"loan_id\", LongType()),\n",
+    "      StructField(\"monthly_reporting_period\", StringType()),\n",
+    "      StructField(\"orig_channel\", StringType()),\n",
+    "      StructField(\"seller_name\", StringType()),\n",
+    "      StructField(\"servicer\", StringType()),\n",
+    "      StructField(\"master_servicer\", StringType()),\n",
+    "      StructField(\"orig_interest_rate\", DoubleType()),\n",
+    "      StructField(\"interest_rate\", DoubleType()),\n",
+    "      StructField(\"orig_upb\", DoubleType()),\n",
+    "      StructField(\"upb_at_issuance\", StringType()),\n",
+    "      StructField(\"current_actual_upb\", DoubleType()),\n",
+    "      StructField(\"orig_loan_term\", IntegerType()),\n",
+    "      StructField(\"orig_date\", StringType()),\n",
+    "      StructField(\"first_pay_date\", StringType()),    \n",
+    "      StructField(\"loan_age\", DoubleType()),\n",
+    "      StructField(\"remaining_months_to_legal_maturity\", DoubleType()),\n",
+    "      StructField(\"adj_remaining_months_to_maturity\", DoubleType()),\n",
+    "      StructField(\"maturity_date\", StringType()),\n",
+    "      StructField(\"orig_ltv\", DoubleType()),\n",
+    "      StructField(\"orig_cltv\", DoubleType()),\n",
+    "      StructField(\"num_borrowers\", DoubleType()),\n",
+    "      StructField(\"dti\", DoubleType()),\n",
+    "      StructField(\"borrower_credit_score\", DoubleType()),\n",
+    "      StructField(\"coborrow_credit_score\", DoubleType()),\n",
+    "      StructField(\"first_home_buyer\", StringType()),\n",
+    "      StructField(\"loan_purpose\", StringType()),\n",
+    "      StructField(\"property_type\", StringType()),\n",
+    "      StructField(\"num_units\", IntegerType()),\n",
+    "      StructField(\"occupancy_status\", StringType()),\n",
+    "      StructField(\"property_state\", StringType()),\n",
+    "      StructField(\"msa\", DoubleType()),\n",
+    "      StructField(\"zip\", IntegerType()),\n",
+    "      StructField(\"mortgage_insurance_percent\", DoubleType()),\n",
+    "      StructField(\"product_type\", StringType()),\n",
+    "      StructField(\"prepayment_penalty_indicator\", StringType()),\n",
+    "      StructField(\"interest_only_loan_indicator\", StringType()),\n",
+    "      StructField(\"interest_only_first_principal_and_interest_payment_date\", StringType()),\n",
+    "      StructField(\"months_to_amortization\", StringType()),\n",
+    "      StructField(\"current_loan_delinquency_status\", IntegerType()),\n",
+    "      StructField(\"loan_payment_history\", StringType()),\n",
+    "      StructField(\"mod_flag\", StringType()),\n",
+    "      StructField(\"mortgage_insurance_cancellation_indicator\", StringType()),\n",
+    "      StructField(\"zero_balance_code\", StringType()),\n",
+    "      StructField(\"zero_balance_effective_date\", StringType()),\n",
+    "      StructField(\"upb_at_the_time_of_removal\", StringType()),\n",
+    "      StructField(\"repurchase_date\", StringType()),\n",
+    "      StructField(\"scheduled_principal_current\", StringType()),\n",
+    "      StructField(\"total_principal_current\", StringType()),\n",
+    "      StructField(\"unscheduled_principal_current\", StringType()),\n",
+    "      StructField(\"last_paid_installment_date\", StringType()),\n",
+    "      StructField(\"foreclosed_after\", StringType()),\n",
+    "      StructField(\"disposition_date\", StringType()),\n",
+    "      StructField(\"foreclosure_costs\", DoubleType()),\n",
+    "      StructField(\"prop_preservation_and_repair_costs\", DoubleType()),\n",
+    "      StructField(\"asset_recovery_costs\", DoubleType()),\n",
+    "      StructField(\"misc_holding_expenses\", DoubleType()),\n",
+    "      StructField(\"holding_taxes\", DoubleType()),\n",
+    "      StructField(\"net_sale_proceeds\", DoubleType()),\n",
+    "      StructField(\"credit_enhancement_proceeds\", DoubleType()),\n",
+    "      StructField(\"repurchase_make_whole_proceeds\", StringType()),\n",
+    "      StructField(\"other_foreclosure_proceeds\", DoubleType()),\n",
+    "      StructField(\"non_interest_bearing_upb\", DoubleType()),\n",
+    "      StructField(\"principal_forgiveness_upb\", StringType()),\n",
+    "      StructField(\"original_list_start_date\", StringType()),\n",
+    "      StructField(\"original_list_price\", StringType()),\n",
+    "      StructField(\"current_list_start_date\", StringType()),\n",
+    "      StructField(\"current_list_price\", StringType()),\n",
+    "      StructField(\"borrower_credit_score_at_issuance\", StringType()),\n",
+    "      StructField(\"co-borrower_credit_score_at_issuance\", StringType()),\n",
+    "      StructField(\"borrower_credit_score_current\", StringType()),\n",
+    "      StructField(\"co-Borrower_credit_score_current\", StringType()),\n",
+    "      StructField(\"mortgage_insurance_type\", DoubleType()),\n",
+    "      StructField(\"servicing_activity_indicator\", StringType()),\n",
+    "      StructField(\"current_period_modification_loss_amount\", StringType()),\n",
+    "      StructField(\"cumulative_modification_loss_amount\", StringType()),\n",
+    "      StructField(\"current_period_credit_event_net_gain_or_loss\", StringType()),\n",
+    "      StructField(\"cumulative_credit_event_net_gain_or_loss\", StringType()),\n",
+    "      StructField(\"homeready_program_indicator\", StringType()),\n",
+    "      StructField(\"foreclosure_principal_write_off_amount\", StringType()),\n",
+    "      StructField(\"relocation_mortgage_indicator\", StringType()),\n",
+    "      StructField(\"zero_balance_code_change_date\", StringType()),\n",
+    "      StructField(\"loan_holdback_indicator\", StringType()),\n",
+    "      StructField(\"loan_holdback_effective_date\", StringType()),\n",
+    "      StructField(\"delinquent_accrued_interest\", StringType()),\n",
+    "      StructField(\"property_valuation_method\", StringType()),\n",
+    "      StructField(\"high_balance_loan_indicator\", StringType()),\n",
+    "      StructField(\"arm_initial_fixed-rate_period_lt_5_yr_indicator\", StringType()),\n",
+    "      StructField(\"arm_product_type\", StringType()),\n",
+    "      StructField(\"initial_fixed-rate_period\", StringType()),\n",
+    "      StructField(\"interest_rate_adjustment_frequency\", StringType()),\n",
+    "      StructField(\"next_interest_rate_adjustment_date\", StringType()),\n",
+    "      StructField(\"next_payment_change_date\", StringType()),\n",
+    "      StructField(\"index\", StringType()),\n",
+    "      StructField(\"arm_cap_structure\", StringType()),\n",
+    "      StructField(\"initial_interest_rate_cap_up_percent\", StringType()),\n",
+    "      StructField(\"periodic_interest_rate_cap_up_percent\", StringType()),\n",
+    "      StructField(\"lifetime_interest_rate_cap_up_percent\", StringType()),\n",
+    "      StructField(\"mortgage_margin\", StringType()),\n",
+    "      StructField(\"arm_balloon_indicator\", StringType()),\n",
+    "      StructField(\"arm_plan_number\", StringType()),\n",
+    "      StructField(\"borrower_assistance_plan\", StringType()),\n",
+    "      StructField(\"hltv_refinance_option_indicator\", StringType()),\n",
+    "      StructField(\"deal_name\", StringType()),\n",
+    "      StructField(\"repurchase_make_whole_proceeds_flag\", StringType()),\n",
+    "      StructField(\"alternative_delinquency_resolution\", StringType()),\n",
+    "      StructField(\"alternative_delinquency_resolution_count\", StringType()),\n",
+    "      StructField(\"total_deferral_amount\", StringType())\n",
+    "      ])"
    ]
   },
   {
@@ -312,14 +366,14 @@
    "outputs": [],
    "source": [
     "def _get_quarter_from_csv_file_name():\n",
-    "    return substring_index(substring_index(input_file_name(), \".\", 1), \"_\", -1)"
+    "    return substring_index(substring_index(input_file_name(), \".\", 1), \"/\", -1)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "* Define function to read Performance CSV data file"
+    "* Define function to read raw CSV data file"
    ]
   },
   {
@@ -328,37 +382,98 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def read_perf_csv(spark, path):\n",
-    "    return spark.read.format(\"csv\") \\\n",
-    "            .option(\"nullValue\", \"\") \\\n",
-    "            .option(\"header\", \"false\") \\\n",
-    "            .option(\"delimiter\", \"|\") \\\n",
-    "            .schema(_csv_perf_schema) \\\n",
+    "def read_raw_csv(spark, path):\n",
+    "    return spark.read.format('csv') \\\n",
+    "            .option('nullValue', '') \\\n",
+    "            .option('header', False) \\\n",
+    "            .option('delimiter', '|') \\\n",
+    "            .schema(_csv_raw_schema) \\\n",
     "            .load(path) \\\n",
-    "            .withColumn(\"quarter\", _get_quarter_from_csv_file_name())"
+    "            .withColumn('quarter', _get_quarter_from_csv_file_name())"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "* Define function to read Acquisition CSV file"
+    "* Functions to extract perf and acq columns from raw schema"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def read_acq_csv(spark, path):\n",
-    "    return spark.read.format(\"csv\") \\\n",
-    "            .option(\"nullValue\", \"\") \\\n",
-    "            .option(\"header\", \"false\") \\\n",
-    "            .option(\"delimiter\", \"|\") \\\n",
-    "            .schema(_csv_acq_schema) \\\n",
-    "            .load(path) \\\n",
-    "            .withColumn(\"quarter\", _get_quarter_from_csv_file_name())"
+    "def extract_perf_columns(rawDf):\n",
+    "    perfDf = rawDf.select(\n",
+    "      col(\"loan_id\"),\n",
+    "      date_format(to_date(col(\"monthly_reporting_period\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"monthly_reporting_period\"),\n",
+    "      upper(col(\"servicer\")).alias(\"servicer\"),\n",
+    "      col(\"interest_rate\"),\n",
+    "      col(\"current_actual_upb\"),\n",
+    "      col(\"loan_age\"),\n",
+    "      col(\"remaining_months_to_legal_maturity\"),\n",
+    "      col(\"adj_remaining_months_to_maturity\"),\n",
+    "      date_format(to_date(col(\"maturity_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"maturity_date\"),\n",
+    "      col(\"msa\"),\n",
+    "      col(\"current_loan_delinquency_status\"),\n",
+    "      col(\"mod_flag\"),\n",
+    "      col(\"zero_balance_code\"),\n",
+    "      date_format(to_date(col(\"zero_balance_effective_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"zero_balance_effective_date\"),\n",
+    "      date_format(to_date(col(\"last_paid_installment_date\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"last_paid_installment_date\"),\n",
+    "      date_format(to_date(col(\"foreclosed_after\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"foreclosed_after\"),\n",
+    "      date_format(to_date(col(\"disposition_date\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"disposition_date\"),\n",
+    "      col(\"foreclosure_costs\"),\n",
+    "      col(\"prop_preservation_and_repair_costs\"),\n",
+    "      col(\"asset_recovery_costs\"),\n",
+    "      col(\"misc_holding_expenses\"),\n",
+    "      col(\"holding_taxes\"),\n",
+    "      col(\"net_sale_proceeds\"),\n",
+    "      col(\"credit_enhancement_proceeds\"),\n",
+    "      col(\"repurchase_make_whole_proceeds\"),\n",
+    "      col(\"other_foreclosure_proceeds\"),\n",
+    "      col(\"non_interest_bearing_upb\"),\n",
+    "      col(\"principal_forgiveness_upb\"),\n",
+    "      col(\"repurchase_make_whole_proceeds_flag\"),\n",
+    "      col(\"foreclosure_principal_write_off_amount\"),\n",
+    "      col(\"servicing_activity_indicator\"),\n",
+    "      col('quarter')\n",
+    "    )\n",
+    "    return perfDf.select(\"*\").filter(\"current_actual_upb != 0.0\")\n",
+    "\n",
+    "def extract_acq_columns(rawDf):\n",
+    "    acqDf = rawDf.select(\n",
+    "      col(\"loan_id\"),\n",
+    "      col(\"orig_channel\"),\n",
+    "      upper(col(\"seller_name\")).alias(\"seller_name\"),\n",
+    "      col(\"orig_interest_rate\"),\n",
+    "      col(\"orig_upb\"),\n",
+    "      col(\"orig_loan_term\"),\n",
+    "      date_format(to_date(col(\"orig_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"orig_date\"),\n",
+    "      date_format(to_date(col(\"first_pay_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"first_pay_date\"),\n",
+    "      col(\"orig_ltv\"),\n",
+    "      col(\"orig_cltv\"),\n",
+    "      col(\"num_borrowers\"),\n",
+    "      col(\"dti\"),\n",
+    "      col(\"borrower_credit_score\"),\n",
+    "      col(\"first_home_buyer\"),\n",
+    "      col(\"loan_purpose\"),\n",
+    "      col(\"property_type\"),\n",
+    "      col(\"num_units\"),\n",
+    "      col(\"occupancy_status\"),\n",
+    "      col(\"property_state\"),\n",
+    "      col(\"zip\"),\n",
+    "      col(\"mortgage_insurance_percent\"),\n",
+    "      col(\"product_type\"),\n",
+    "      col(\"coborrow_credit_score\"),\n",
+    "      col(\"mortgage_insurance_type\"),\n",
+    "      col(\"relocation_mortgage_indicator\"),\n",
+    "      dense_rank().over(Window.partitionBy(\"loan_id\").orderBy(to_date(col(\"monthly_reporting_period\"),\"MMyyyy\"))).alias(\"rank\"),\n",
+    "      col('quarter')\n",
+    "      )\n",
+    "\n",
+    "    return acqDf.select(\"*\").filter(col(\"rank\")==1)"
    ]
   },
   {
@@ -372,7 +487,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -398,7 +513,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -477,7 +592,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -524,7 +639,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -548,7 +663,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -583,7 +698,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -610,7 +725,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -619,12 +734,9 @@
     "# CPU run, set to false, it can only make ETL run on CPU when is_save_dataset=True.\n",
     "# spark.conf.set(\"spark.rapids.sql.enabled\", \"false\")\n",
     "spark.conf.set(\"spark.sql.files.maxPartitionBytes\", \"1G\")\n",
-    "spark.conf.set(\"spark.sql.shuffle.partitions\", \"192\")\n",
     "spark.conf.set(\"spark.rapids.sql.explain\", \"ALL\")\n",
-    "spark.conf.set(\"spark.rapids.sql.incompatibleOps.enabled\", \"true\")\n",
     "spark.conf.set(\"spark.rapids.sql.batchSizeBytes\", \"512M\")\n",
     "spark.conf.set(\"spark.rapids.sql.reader.batchSizeBytes\", \"768M\")\n",
-    "spark.conf.set(\"spark.rapids.sql.incompatibleDateFormats.enabled\", \"true\")\n",
     "spark.conf.set(\"spark.rapids.sql.hasNans\", \"false\")\n",
     "# use GPU to read CSV\n",
     "spark.conf.set(\"spark.rapids.sql.csv.read.double.enabled\", \"true\")"
@@ -639,7 +751,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 20,
    "metadata": {
     "scrolled": false
    },
@@ -648,27 +760,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "ETL takes 41.10439682006836\n"
+      "ETL takes 135.9117729663849\n"
      ]
     }
    ],
    "source": [
     "\n",
     "# read raw dataset\n",
-    "perf = read_perf_csv(spark, orig_perf_path)\n",
-    "acq = read_acq_csv(spark, orig_acq_path)\n",
+    "rawDf = read_raw_csv(spark, orig_raw_path)\n",
+    "rawDf.write.parquet(orig_raw_path_csv2parquet, mode='overwrite')\n",
+    "rawDf = spark.read.parquet(orig_raw_path_csv2parquet)\n",
+    "\n",
+    "acq = extract_acq_columns(rawDf)\n",
+    "perf = extract_perf_columns(rawDf)\n",
     "\n",
     "# run main function to process data\n",
     "out = run_mortgage(spark, perf, acq)\n",
     "\n",
-    "# split 80% for training, 20% for test\n",
-    "splits = out.randomSplit([0.8, 0.2])\n",
-    "\n",
     "# save processed data\n",
     "if is_save_dataset:\n",
     "    start = time.time()\n",
-    "    splits[0].write.parquet(output_path_train, mode=\"overwrite\")\n",
-    "    splits[1].write.parquet(output_path_test, mode=\"overwrite\")\n",
+    "    out.write.parquet(output_path_data, mode=\"overwrite\")\n",
     "    end = time.time()\n",
     "    print(\"ETL takes {}\".format(end - start))"
    ]
@@ -689,7 +801,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -706,7 +818,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -725,7 +837,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -742,7 +854,7 @@
     "    StructField(\"seller_name\", FloatType()),\n",
     "    StructField(\"mod_flag\", FloatType()),\n",
     "    StructField(\"orig_interest_rate\", FloatType()),\n",
-    "    StructField(\"orig_upb\", IntegerType()),\n",
+    "    StructField(\"orig_upb\", DoubleType()),\n",
     "    StructField(\"orig_loan_term\", IntegerType()),\n",
     "    StructField(\"orig_ltv\", FloatType()),\n",
     "    StructField(\"orig_cltv\", FloatType()),\n",
@@ -764,17 +876,20 @@
     "\n",
     "if is_save_dataset:\n",
     "    # load dataset from file\n",
-    "    train_data = reader.parquet(output_path_train)\n",
-    "    test_data = reader.parquet(output_path_test)\n",
+    "    etlDf = reader.parquet(output_path_data)\n",
+    "    splits = etlDf.randomSplit([0.8, 0.2])\n",
+    "    train_data = splits[0]\n",
+    "    test_data = splits[1]\n",
     "else:\n",
     "    # use Dataframe from ETL directly\n",
+    "    splits = out.randomSplit([0.8, 0.2])\n",
     "    train_data = splits[0]\n",
     "    test_data = splits[1]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -785,21 +900,21 @@
     "    \"growPolicy\": \"depthwise\",\n",
     "    \"nthread\": 1,\n",
     "    \"numRound\": 100,\n",
-    "    \"numWorkers\": 2,\n",
+    "    \"numWorkers\": 1,\n",
     "}\n",
     "classifier = XGBoostClassifier(**params).setLabelCol(label).setFeaturesCols(features)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Training takes 23.666603565216064 seconds\n"
+      "Training takes 18.92583155632019 seconds\n"
      ]
     }
    ],
@@ -815,7 +930,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -825,22 +940,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Transformation takes 10.464573383331299 seconds\n",
+      "Transformation takes 8.959877967834473 seconds\n",
       "+--------------+--------------------+--------------------+----------+\n",
       "|delinquency_12|       rawPrediction|         probability|prediction|\n",
       "+--------------+--------------------+--------------------+----------+\n",
-      "|             0|[11.3724613189697...|[0.99998849205439...|       0.0|\n",
-      "|             0|[8.75509834289550...|[0.99984236936143...|       0.0|\n",
-      "|             0|[8.56840324401855...|[0.99981002029380...|       0.0|\n",
-      "|             0|[8.45872020721435...|[0.99978800168901...|       0.0|\n",
-      "|             0|[8.45872020721435...|[0.99978800168901...|       0.0|\n",
+      "|             0|[7.92072248458862...|[0.99963699193904...|       0.0|\n",
+      "|             0|[7.92072248458862...|[0.99963699193904...|       0.0|\n",
+      "|             0|[8.43130302429199...|[0.99978211015695...|       0.0|\n",
+      "|             0|[8.20779895782470...|[0.99972755435737...|       0.0|\n",
+      "|             0|[8.885986328125,-...|[0.99986170543706...|       0.0|\n",
       "+--------------+--------------------+--------------------+----------+\n",
       "only showing top 5 rows\n",
       "\n"
@@ -858,15 +973,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Evaluation takes 0.770418643951416 seconds\n",
-      "Accuracy is 0.9881320119084719\n"
+      "Evaluation takes 0.6158628463745117 seconds\n",
+      "Accuracy is 0.9861453808970397\n"
      ]
     }
    ],
@@ -879,7 +994,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb
index d36474176..433d35880 100644
--- a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb
+++ b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb
@@ -6,10 +6,10 @@
    "source": [
     "## Prerequirement\n",
     "### 1. Download data\n",
-    "All data could be found at https://docs.rapids.ai/datasets/mortgage-data\n",
+    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
     "\n",
     "### 2. Download needed jars\n",
-    "* [rapids-4-spark_2.12-22.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-22.08.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)\n",
     "\n",
     "\n",
     "### 3. Start Spark Standalone\n",
@@ -17,7 +17,7 @@
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-22.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-22.08.0.jar\n",
     "$ export PYSPARK_DRIVER_PYTHON=jupyter \n",
     "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n",
     "```\n",
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,7 +60,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -82,70 +82,121 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
     "# File schema\n",
-    "_csv_perf_schema = StructType([\n",
-    "    StructField('loan_id', LongType()),\n",
-    "    StructField('monthly_reporting_period', StringType()),\n",
-    "    StructField('servicer', StringType()),\n",
-    "    StructField('interest_rate', DoubleType()),\n",
-    "    StructField('current_actual_upb', DoubleType()),\n",
-    "    StructField('loan_age', DoubleType()),\n",
-    "    StructField('remaining_months_to_legal_maturity', DoubleType()),\n",
-    "    StructField('adj_remaining_months_to_maturity', DoubleType()),\n",
-    "    StructField('maturity_date', StringType()),\n",
-    "    StructField('msa', DoubleType()),\n",
-    "    StructField('current_loan_delinquency_status', IntegerType()),\n",
-    "    StructField('mod_flag', StringType()),\n",
-    "    StructField('zero_balance_code', StringType()),\n",
-    "    StructField('zero_balance_effective_date', StringType()),\n",
-    "    StructField('last_paid_installment_date', StringType()),\n",
-    "    StructField('foreclosed_after', StringType()),\n",
-    "    StructField('disposition_date', StringType()),\n",
-    "    StructField('foreclosure_costs', DoubleType()),\n",
-    "    StructField('prop_preservation_and_repair_costs', DoubleType()),\n",
-    "    StructField('asset_recovery_costs', DoubleType()),\n",
-    "    StructField('misc_holding_expenses', DoubleType()),\n",
-    "    StructField('holding_taxes', DoubleType()),\n",
-    "    StructField('net_sale_proceeds', DoubleType()),\n",
-    "    StructField('credit_enhancement_proceeds', DoubleType()),\n",
-    "    StructField('repurchase_make_whole_proceeds', StringType()),\n",
-    "    StructField('other_foreclosure_proceeds', DoubleType()),\n",
-    "    StructField('non_interest_bearing_upb', DoubleType()),\n",
-    "    StructField('principal_forgiveness_upb', StringType()),\n",
-    "    StructField('repurchase_make_whole_proceeds_flag', StringType()),\n",
-    "    StructField('foreclosure_principal_write_off_amount', StringType()),\n",
-    "    StructField('servicing_activity_indicator', StringType())])\n",
-    "\n",
-    "_csv_acq_schema = StructType([\n",
-    "    StructField('loan_id', LongType()),\n",
-    "    StructField('orig_channel', StringType()),\n",
-    "    StructField('seller_name', StringType()),\n",
-    "    StructField('orig_interest_rate', DoubleType()),\n",
-    "    StructField('orig_upb', IntegerType()),\n",
-    "    StructField('orig_loan_term', IntegerType()),\n",
-    "    StructField('orig_date', StringType()),\n",
-    "    StructField('first_pay_date', StringType()),\n",
-    "    StructField('orig_ltv', DoubleType()),\n",
-    "    StructField('orig_cltv', DoubleType()),\n",
-    "    StructField('num_borrowers', DoubleType()),\n",
-    "    StructField('dti', DoubleType()),\n",
-    "    StructField('borrower_credit_score', DoubleType()),\n",
-    "    StructField('first_home_buyer', StringType()),\n",
-    "    StructField('loan_purpose', StringType()),\n",
-    "    StructField('property_type', StringType()),\n",
-    "    StructField('num_units', IntegerType()),\n",
-    "    StructField('occupancy_status', StringType()),\n",
-    "    StructField('property_state', StringType()),\n",
-    "    StructField('zip', IntegerType()),\n",
-    "    StructField('mortgage_insurance_percent', DoubleType()),\n",
-    "    StructField('product_type', StringType()),\n",
-    "    StructField('coborrow_credit_score', DoubleType()),\n",
-    "    StructField('mortgage_insurance_type', DoubleType()),\n",
-    "    StructField('relocation_mortgage_indicator', StringType())])"
+    "_csv_raw_schema = StructType([\n",
+    "      StructField(\"reference_pool_id\", StringType()),\n",
+    "      StructField(\"loan_id\", LongType()),\n",
+    "      StructField(\"monthly_reporting_period\", StringType()),\n",
+    "      StructField(\"orig_channel\", StringType()),\n",
+    "      StructField(\"seller_name\", StringType()),\n",
+    "      StructField(\"servicer\", StringType()),\n",
+    "      StructField(\"master_servicer\", StringType()),\n",
+    "      StructField(\"orig_interest_rate\", DoubleType()),\n",
+    "      StructField(\"interest_rate\", DoubleType()),\n",
+    "      StructField(\"orig_upb\", DoubleType()),\n",
+    "      StructField(\"upb_at_issuance\", StringType()),\n",
+    "      StructField(\"current_actual_upb\", DoubleType()),\n",
+    "      StructField(\"orig_loan_term\", IntegerType()),\n",
+    "      StructField(\"orig_date\", StringType()),\n",
+    "      StructField(\"first_pay_date\", StringType()),    \n",
+    "      StructField(\"loan_age\", DoubleType()),\n",
+    "      StructField(\"remaining_months_to_legal_maturity\", DoubleType()),\n",
+    "      StructField(\"adj_remaining_months_to_maturity\", DoubleType()),\n",
+    "      StructField(\"maturity_date\", StringType()),\n",
+    "      StructField(\"orig_ltv\", DoubleType()),\n",
+    "      StructField(\"orig_cltv\", DoubleType()),\n",
+    "      StructField(\"num_borrowers\", DoubleType()),\n",
+    "      StructField(\"dti\", DoubleType()),\n",
+    "      StructField(\"borrower_credit_score\", DoubleType()),\n",
+    "      StructField(\"coborrow_credit_score\", DoubleType()),\n",
+    "      StructField(\"first_home_buyer\", StringType()),\n",
+    "      StructField(\"loan_purpose\", StringType()),\n",
+    "      StructField(\"property_type\", StringType()),\n",
+    "      StructField(\"num_units\", IntegerType()),\n",
+    "      StructField(\"occupancy_status\", StringType()),\n",
+    "      StructField(\"property_state\", StringType()),\n",
+    "      StructField(\"msa\", DoubleType()),\n",
+    "      StructField(\"zip\", IntegerType()),\n",
+    "      StructField(\"mortgage_insurance_percent\", DoubleType()),\n",
+    "      StructField(\"product_type\", StringType()),\n",
+    "      StructField(\"prepayment_penalty_indicator\", StringType()),\n",
+    "      StructField(\"interest_only_loan_indicator\", StringType()),\n",
+    "      StructField(\"interest_only_first_principal_and_interest_payment_date\", StringType()),\n",
+    "      StructField(\"months_to_amortization\", StringType()),\n",
+    "      StructField(\"current_loan_delinquency_status\", IntegerType()),\n",
+    "      StructField(\"loan_payment_history\", StringType()),\n",
+    "      StructField(\"mod_flag\", StringType()),\n",
+    "      StructField(\"mortgage_insurance_cancellation_indicator\", StringType()),\n",
+    "      StructField(\"zero_balance_code\", StringType()),\n",
+    "      StructField(\"zero_balance_effective_date\", StringType()),\n",
+    "      StructField(\"upb_at_the_time_of_removal\", StringType()),\n",
+    "      StructField(\"repurchase_date\", StringType()),\n",
+    "      StructField(\"scheduled_principal_current\", StringType()),\n",
+    "      StructField(\"total_principal_current\", StringType()),\n",
+    "      StructField(\"unscheduled_principal_current\", StringType()),\n",
+    "      StructField(\"last_paid_installment_date\", StringType()),\n",
+    "      StructField(\"foreclosed_after\", StringType()),\n",
+    "      StructField(\"disposition_date\", StringType()),\n",
+    "      StructField(\"foreclosure_costs\", DoubleType()),\n",
+    "      StructField(\"prop_preservation_and_repair_costs\", DoubleType()),\n",
+    "      StructField(\"asset_recovery_costs\", DoubleType()),\n",
+    "      StructField(\"misc_holding_expenses\", DoubleType()),\n",
+    "      StructField(\"holding_taxes\", DoubleType()),\n",
+    "      StructField(\"net_sale_proceeds\", DoubleType()),\n",
+    "      StructField(\"credit_enhancement_proceeds\", DoubleType()),\n",
+    "      StructField(\"repurchase_make_whole_proceeds\", StringType()),\n",
+    "      StructField(\"other_foreclosure_proceeds\", DoubleType()),\n",
+    "      StructField(\"non_interest_bearing_upb\", DoubleType()),\n",
+    "      StructField(\"principal_forgiveness_upb\", StringType()),\n",
+    "      StructField(\"original_list_start_date\", StringType()),\n",
+    "      StructField(\"original_list_price\", StringType()),\n",
+    "      StructField(\"current_list_start_date\", StringType()),\n",
+    "      StructField(\"current_list_price\", StringType()),\n",
+    "      StructField(\"borrower_credit_score_at_issuance\", StringType()),\n",
+    "      StructField(\"co-borrower_credit_score_at_issuance\", StringType()),\n",
+    "      StructField(\"borrower_credit_score_current\", StringType()),\n",
+    "      StructField(\"co-Borrower_credit_score_current\", StringType()),\n",
+    "      StructField(\"mortgage_insurance_type\", DoubleType()),\n",
+    "      StructField(\"servicing_activity_indicator\", StringType()),\n",
+    "      StructField(\"current_period_modification_loss_amount\", StringType()),\n",
+    "      StructField(\"cumulative_modification_loss_amount\", StringType()),\n",
+    "      StructField(\"current_period_credit_event_net_gain_or_loss\", StringType()),\n",
+    "      StructField(\"cumulative_credit_event_net_gain_or_loss\", StringType()),\n",
+    "      StructField(\"homeready_program_indicator\", StringType()),\n",
+    "      StructField(\"foreclosure_principal_write_off_amount\", StringType()),\n",
+    "      StructField(\"relocation_mortgage_indicator\", StringType()),\n",
+    "      StructField(\"zero_balance_code_change_date\", StringType()),\n",
+    "      StructField(\"loan_holdback_indicator\", StringType()),\n",
+    "      StructField(\"loan_holdback_effective_date\", StringType()),\n",
+    "      StructField(\"delinquent_accrued_interest\", StringType()),\n",
+    "      StructField(\"property_valuation_method\", StringType()),\n",
+    "      StructField(\"high_balance_loan_indicator\", StringType()),\n",
+    "      StructField(\"arm_initial_fixed-rate_period_lt_5_yr_indicator\", StringType()),\n",
+    "      StructField(\"arm_product_type\", StringType()),\n",
+    "      StructField(\"initial_fixed-rate_period\", StringType()),\n",
+    "      StructField(\"interest_rate_adjustment_frequency\", StringType()),\n",
+    "      StructField(\"next_interest_rate_adjustment_date\", StringType()),\n",
+    "      StructField(\"next_payment_change_date\", StringType()),\n",
+    "      StructField(\"index\", StringType()),\n",
+    "      StructField(\"arm_cap_structure\", StringType()),\n",
+    "      StructField(\"initial_interest_rate_cap_up_percent\", StringType()),\n",
+    "      StructField(\"periodic_interest_rate_cap_up_percent\", StringType()),\n",
+    "      StructField(\"lifetime_interest_rate_cap_up_percent\", StringType()),\n",
+    "      StructField(\"mortgage_margin\", StringType()),\n",
+    "      StructField(\"arm_balloon_indicator\", StringType()),\n",
+    "      StructField(\"arm_plan_number\", StringType()),\n",
+    "      StructField(\"borrower_assistance_plan\", StringType()),\n",
+    "      StructField(\"hltv_refinance_option_indicator\", StringType()),\n",
+    "      StructField(\"deal_name\", StringType()),\n",
+    "      StructField(\"repurchase_make_whole_proceeds_flag\", StringType()),\n",
+    "      StructField(\"alternative_delinquency_resolution\", StringType()),\n",
+    "      StructField(\"alternative_delinquency_resolution_count\", StringType()),\n",
+    "      StructField(\"total_deferral_amount\", StringType())\n",
+    "      ])"
    ]
   },
   {
@@ -157,7 +208,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -254,7 +305,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -300,67 +351,129 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 2. Define ETL Process\n",
-    "\n",
-    "Define the function to do the ETL process\n",
-    "\n",
-    "#### 2.1 Define Functions to Read Raw CSV File\n",
-    "\n",
-    "* Define function to get quarter from input CSV file name"
+    "* Functions to extract perf and acq columns from raw schema"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def _get_quarter_from_csv_file_name():\n",
-    "    return substring_index(substring_index(input_file_name(), '.', 1), '_', -1)"
+    "def extract_perf_columns(rawDf):\n",
+    "    perfDf = rawDf.select(\n",
+    "      col(\"loan_id\"),\n",
+    "      date_format(to_date(col(\"monthly_reporting_period\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"monthly_reporting_period\"),\n",
+    "      upper(col(\"servicer\")).alias(\"servicer\"),\n",
+    "      col(\"interest_rate\"),\n",
+    "      col(\"current_actual_upb\"),\n",
+    "      col(\"loan_age\"),\n",
+    "      col(\"remaining_months_to_legal_maturity\"),\n",
+    "      col(\"adj_remaining_months_to_maturity\"),\n",
+    "      date_format(to_date(col(\"maturity_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"maturity_date\"),\n",
+    "      col(\"msa\"),\n",
+    "      col(\"current_loan_delinquency_status\"),\n",
+    "      col(\"mod_flag\"),\n",
+    "      col(\"zero_balance_code\"),\n",
+    "      date_format(to_date(col(\"zero_balance_effective_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"zero_balance_effective_date\"),\n",
+    "      date_format(to_date(col(\"last_paid_installment_date\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"last_paid_installment_date\"),\n",
+    "      date_format(to_date(col(\"foreclosed_after\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"foreclosed_after\"),\n",
+    "      date_format(to_date(col(\"disposition_date\"),\"MMyyyy\"), \"MM/dd/yyyy\").alias(\"disposition_date\"),\n",
+    "      col(\"foreclosure_costs\"),\n",
+    "      col(\"prop_preservation_and_repair_costs\"),\n",
+    "      col(\"asset_recovery_costs\"),\n",
+    "      col(\"misc_holding_expenses\"),\n",
+    "      col(\"holding_taxes\"),\n",
+    "      col(\"net_sale_proceeds\"),\n",
+    "      col(\"credit_enhancement_proceeds\"),\n",
+    "      col(\"repurchase_make_whole_proceeds\"),\n",
+    "      col(\"other_foreclosure_proceeds\"),\n",
+    "      col(\"non_interest_bearing_upb\"),\n",
+    "      col(\"principal_forgiveness_upb\"),\n",
+    "      col(\"repurchase_make_whole_proceeds_flag\"),\n",
+    "      col(\"foreclosure_principal_write_off_amount\"),\n",
+    "      col(\"servicing_activity_indicator\"),\n",
+    "      col('quarter')\n",
+    "    )\n",
+    "\n",
+    "    return perfDf.select(\"*\").filter(\"current_actual_upb != 0.0\")\n",
+    "\n",
+    "def extract_acq_columns(rawDf):\n",
+    "    acqDf = rawDf.select(\n",
+    "      col(\"loan_id\"),\n",
+    "      col(\"orig_channel\"),\n",
+    "      upper(col(\"seller_name\")).alias(\"seller_name\"),\n",
+    "      col(\"orig_interest_rate\"),\n",
+    "      col(\"orig_upb\"),\n",
+    "      col(\"orig_loan_term\"),\n",
+    "      date_format(to_date(col(\"orig_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"orig_date\"),\n",
+    "      date_format(to_date(col(\"first_pay_date\"),\"MMyyyy\"), \"MM/yyyy\").alias(\"first_pay_date\"),\n",
+    "      col(\"orig_ltv\"),\n",
+    "      col(\"orig_cltv\"),\n",
+    "      col(\"num_borrowers\"),\n",
+    "      col(\"dti\"),\n",
+    "      col(\"borrower_credit_score\"),\n",
+    "      col(\"first_home_buyer\"),\n",
+    "      col(\"loan_purpose\"),\n",
+    "      col(\"property_type\"),\n",
+    "      col(\"num_units\"),\n",
+    "      col(\"occupancy_status\"),\n",
+    "      col(\"property_state\"),\n",
+    "      col(\"zip\"),\n",
+    "      col(\"mortgage_insurance_percent\"),\n",
+    "      col(\"product_type\"),\n",
+    "      col(\"coborrow_credit_score\"),\n",
+    "      col(\"mortgage_insurance_type\"),\n",
+    "      col(\"relocation_mortgage_indicator\"),\n",
+    "      dense_rank().over(Window.partitionBy(\"loan_id\").orderBy(to_date(col(\"monthly_reporting_period\"),\"MMyyyy\"))).alias(\"rank\"),\n",
+    "      col('quarter')\n",
+    "      )\n",
+    "\n",
+    "    return acqDf.select(\"*\").filter(col(\"rank\")==1)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "* Define function to read Performance CSV data file"
+    "### 2. Define ETL Process\n",
+    "\n",
+    "Define the function to do the ETL process\n",
+    "\n",
+    "#### 2.1 Define Functions to Read Raw CSV File\n",
+    "\n",
+    "* Define function to get quarter from input CSV file name"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def read_perf_csv(spark, path):\n",
-    "    return spark.read.format('csv') \\\n",
-    "            .option('nullValue', '') \\\n",
-    "            .option('header', 'false') \\\n",
-    "            .option('delimiter', '|') \\\n",
-    "            .schema(_csv_perf_schema) \\\n",
-    "            .load(path) \\\n",
-    "            .withColumn('quarter', _get_quarter_from_csv_file_name())"
+    "def _get_quarter_from_csv_file_name():\n",
+    "    return substring_index(substring_index(input_file_name(), '.', 1), '/', -1)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "* Define function to read Acquisition CSV file"
+    "* Define function to read raw CSV data file"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def read_acq_csv(spark, path):\n",
+    "def read_raw_csv(spark, path):\n",
     "    return spark.read.format('csv') \\\n",
     "            .option('nullValue', '') \\\n",
-    "            .option('header', 'false') \\\n",
+    "            .option('header', False) \\\n",
     "            .option('delimiter', '|') \\\n",
-    "            .schema(_csv_acq_schema) \\\n",
+    "            .schema(_csv_raw_schema) \\\n",
     "            .load(path) \\\n",
     "            .withColumn('quarter', _get_quarter_from_csv_file_name())"
    ]
@@ -376,7 +489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -402,7 +515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -481,7 +594,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -528,7 +641,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -552,7 +665,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -587,7 +700,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -615,31 +728,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [],
    "source": [
     "# You need to update them to your real paths!\n",
     "dataRoot = os.getenv(\"DATA_ROOT\", \"/data\")\n",
-    "orig_perf_path=dataRoot + '/mortgage/Performance/'\n",
-    "orig_acq_path=dataRoot + '/mortgage/Acquisition/'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "* Define temporary folder path "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tmp_perf_path=dataRoot + '/mortgage/perf/'\n",
-    "tmp_acq_path=dataRoot + '/mortgage/acq/'"
+    "orig_raw_path = dataRoot + '/mortgage/input/'"
    ]
   },
   {
@@ -651,11 +746,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
-    "output_path=dataRoot + '/mortgage/output/'"
+    "output_path = dataRoot + '/mortgage/output/data/'\n",
+    "output_csv2parquet = dataRoot + '/mortgage/output/csv2parquet/'\n",
+    "output_path_train = dataRoot + '/mortgage/output/train/'\n",
+    "output_path_eval = dataRoot + '/mortgage/output/eval/'\n",
+    "save_train_eval_dataset = True"
    ]
   },
   {
@@ -667,12 +766,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [],
    "source": [
     "spark.conf.set('spark.rapids.sql.explain', 'ALL')\n",
-    "spark.conf.set('spark.rapids.sql.incompatibleOps.enabled', 'true')\n",
     "spark.conf.set('spark.rapids.sql.batchSizeBytes', '512M')\n",
     "spark.conf.set('spark.rapids.sql.reader.batchSizeBytes', '768M')"
    ]
@@ -681,50 +779,28 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Run Part\n",
-    "### Read Raw File and Transcode Data\n",
-    "#### 1. Add additional Spark settings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# we want a few big files instead of lots of small files\n",
-    "spark.conf.set('spark.sql.files.maxPartitionBytes', '200G')"
+    "## Run Part"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### 2. Read Raw File and Transcode to Parquet"
+    "### Read Raw File"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "6.568682670593262\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "start = time.time()\n",
-    "# read data and transcode to qarquet\n",
-    "acq = read_acq_csv(spark, orig_acq_path)\n",
-    "acq.repartition(12).write.parquet(tmp_acq_path, mode='overwrite')\n",
-    "perf = read_perf_csv(spark, orig_perf_path)\n",
-    "perf.coalesce(96).write.parquet(tmp_perf_path, mode='overwrite')\n",
-    "end = time.time()\n",
-    "print(end - start)"
+    "rawDf = read_raw_csv(spark, orig_raw_path)\n",
+    "rawDf.write.parquet(output_csv2parquet, mode='overwrite')\n",
+    "rawDf = spark.read.parquet(output_csv2parquet)\n",
+    "\n",
+    "acq = extract_acq_columns(rawDf)\n",
+    "perf = extract_perf_columns(rawDf)"
    ]
   },
   {
@@ -737,7 +813,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 60,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -746,7 +822,9 @@
     "# CPU run, set to false\n",
     "# spark.conf.set('spark.rapids.sql.enabled', 'false')\n",
     "spark.conf.set('spark.sql.files.maxPartitionBytes', '1G')\n",
-    "spark.conf.set('spark.sql.shuffle.partitions', '192')"
+    "spark.conf.set(\"spark.rapids.sql.hasNans\", \"false\")\n",
+    "# use GPU to read CSV\n",
+    "spark.conf.set(\"spark.rapids.sql.csv.read.double.enabled\", \"true\")"
    ]
   },
   {
@@ -758,7 +836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 61,
    "metadata": {},
    "outputs": [
     {
@@ -766,786 +844,881 @@
      "output_type": "stream",
      "text": [
       "== Physical Plan ==\n",
-      "*(5) GpuColumnarToRow false\n",
-      "+- !GpuProject [gpucoalesce(orig_channel#27851, 0) AS orig_channel#29615, gpucoalesce(first_home_buyer#28053, 0) AS first_home_buyer#29616, gpucoalesce(loan_purpose#28255, 0) AS loan_purpose#29617, gpucoalesce(property_type#28457, 0) AS property_type#29618, gpucoalesce(occupancy_status#28659, 0) AS occupancy_status#29619, gpucoalesce(property_state#28861, 0) AS property_state#29620, gpucoalesce(relocation_mortgage_indicator#29063, 0) AS relocation_mortgage_indicator#29621, gpucoalesce(seller_name#29265, 0) AS seller_name#29622, gpucoalesce(id#27657, 0) AS mod_flag#29623, gpucoalesce(gpunanvl(orig_interest_rate#26291, null), 0.0) AS orig_interest_rate#29624, gpucoalesce(orig_upb#26292, 0) AS orig_upb#29625, gpucoalesce(orig_loan_term#26293, 0) AS orig_loan_term#29626, gpucoalesce(gpunanvl(orig_ltv#26296, null), 0.0) AS orig_ltv#29627, gpucoalesce(gpunanvl(orig_cltv#26297, null), 0.0) AS orig_cltv#29628, gpucoalesce(gpunanvl(num_borrowers#26298, null), 0.0) AS num_borrowers#29629, gpucoalesce(gpunanvl(dti#26299, null), 0.0) AS dti#29630, gpucoalesce(gpunanvl(borrower_credit_score#26300, null), 0.0) AS borrower_credit_score#29631, gpucoalesce(num_units#26304, 0) AS num_units#29632, gpucoalesce(zip#26307, 0) AS zip#29633, gpucoalesce(gpunanvl(mortgage_insurance_percent#26308, null), 0.0) AS mortgage_insurance_percent#29634, gpucoalesce(current_loan_delinquency_status#26234, 0) AS current_loan_delinquency_status#29635, gpucoalesce(gpunanvl(current_actual_upb#26228, null), 0.0) AS current_actual_upb#29636, gpucoalesce(gpunanvl(interest_rate#26227, null), 0.0) AS interest_rate#29637, gpucoalesce(gpunanvl(loan_age#26229, null), 0.0) AS loan_age#29638, ... 3 more fields]\n",
-      "   +- !GpuBroadcastHashJoin [mod_flag#26235], [mod_flag#29333], LeftOuter, BuildRight\n",
-      "      :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, num_units#26304, zip#26307, mortgage_insurance_percent#26308, orig_channel#27851, first_home_buyer#28053, loan_purpose#28255, property_type#28457, occupancy_status#28659, ... 3 more fields]\n",
-      "      :  +- !GpuBroadcastHashJoin [seller_name#27396], [seller_name#29131], LeftOuter, BuildRight\n",
-      "      :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, num_units#26304, zip#26307, mortgage_insurance_percent#26308, orig_channel#27851, first_home_buyer#28053, loan_purpose#28255, property_type#28457, ... 3 more fields]\n",
-      "      :     :  +- !GpuBroadcastHashJoin [relocation_mortgage_indicator#26312], [relocation_mortgage_indicator#28929], LeftOuter, BuildRight\n",
-      "      :     :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, num_units#26304, zip#26307, mortgage_insurance_percent#26308, relocation_mortgage_indicator#26312, orig_channel#27851, first_home_buyer#28053, loan_purpose#28255, ... 3 more fields]\n",
-      "      :     :     :  +- !GpuBroadcastHashJoin [property_state#26306], [property_state#28727], LeftOuter, BuildRight\n",
-      "      :     :     :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, num_units#26304, property_state#26306, zip#26307, mortgage_insurance_percent#26308, relocation_mortgage_indicator#26312, orig_channel#27851, first_home_buyer#28053, ... 3 more fields]\n",
-      "      :     :     :     :  +- !GpuBroadcastHashJoin [occupancy_status#26305], [occupancy_status#28525], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, num_units#26304, occupancy_status#26305, property_state#26306, zip#26307, mortgage_insurance_percent#26308, relocation_mortgage_indicator#26312, orig_channel#27851, ... 3 more fields]\n",
-      "      :     :     :     :     :  +- !GpuBroadcastHashJoin [property_type#26303], [property_type#28323], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, property_type#26303, num_units#26304, occupancy_status#26305, property_state#26306, zip#26307, mortgage_insurance_percent#26308, relocation_mortgage_indicator#26312, ... 3 more fields]\n",
-      "      :     :     :     :     :     :  +- !GpuBroadcastHashJoin [loan_purpose#26302], [loan_purpose#28121], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, loan_purpose#26302, property_type#26303, num_units#26304, occupancy_status#26305, property_state#26306, zip#26307, mortgage_insurance_percent#26308, ... 3 more fields]\n",
-      "      :     :     :     :     :     :     :  +- !GpuBroadcastHashJoin [first_home_buyer#26301], [first_home_buyer#27919], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, first_home_buyer#26301, loan_purpose#26302, property_type#26303, num_units#26304, occupancy_status#26305, property_state#26306, zip#26307, ... 3 more fields]\n",
-      "      :     :     :     :     :     :     :     :  +- !GpuBroadcastHashJoin [orig_channel#26289], [orig_channel#27717], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :     :- !GpuProject [interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036, orig_channel#26289, seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, first_home_buyer#26301, loan_purpose#26302, property_type#26303, num_units#26304, occupancy_status#26305, property_state#26306, ... 3 more fields]\n",
-      "      :     :     :     :     :     :     :     :     :  +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :     :     :     :     :     :     :     :     :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#17112]\n",
-      "      :     :     :     :     :     :     :     :     :     :     +- !GpuProject [quarter#26255, loan_id#26224L, interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, delinquency_12#27036]\n",
-      "      :     :     :     :     :     :     :     :     :     :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :     :     :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#17081]\n",
-      "      :     :     :     :     :     :     :     :     :     :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :           :        +- *(1) Project [loan_id#26224L, interest_rate#26227, current_actual_upb#26228, loan_age#26229, msa#26233, current_loan_delinquency_status#26234, mod_flag#26235, non_interest_bearing_upb#26250, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :     :     :     :     :     :     :     :     :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :     :     :     :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :     :     :     :     :     :     :     :     :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,interest_rate#26227,current_actual_upb#26228,loan_age#26229,msa#26233,current_loan_delinquency_status#26234,mod_flag#26235,non_interest_bearing_upb#26250,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,interest_rate:double,current_actual_upb:dou...\n",
-      "      :     :     :     :     :     :     :     :     :     :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :     :     :     :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#17108]\n",
-      "      :     :     :     :     :     :     :     :     :     :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[gpumax(delinquency_12#26806), gpumin(upb_12#26842)]), filters=ArrayBuffer(None, None))\n",
-      "      :     :     :     :     :     :     :     :     :     :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#17105]\n",
-      "      :     :     :     :     :     :     :     :     :     :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[partial_gpumax(delinquency_12#26806), partial_gpumin(upb_12#26842)]), filters=ArrayBuffer(None, None))\n",
-      "      :     :     :     :     :     :     :     :     :     :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931, delinquency_12#26806, upb_12#26842]\n",
-      "      :     :     :     :     :     :     :     :     :     :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :     :     :     :     :     :     :     :     :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, delinquency_12#26806, upb_12#26842, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :     :     :     :     :     :     :     :     :                                         +- !GpuProject [loan_id#27136L, quarter#27167, delinquency_12#26806, upb_12#26842, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :     :     :     :     :     :     :     :     :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :     :     :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, current_loan_delinquency_status#27146 AS delinquency_12#26806, current_actual_upb#27140 AS upb_12#26842, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :     :     :     :     :     :     :     :     :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :     :     :     :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :     :     :     :     :     :     :     :     :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,current_actual_upb#27140,current_loan_delinquency_status#27146,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_actual_upb:double,current_loan_deli...\n",
-      "      :     :     :     :     :     :     :     :     :     :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#17097]\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#17094]\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :     :     :     :     :     :     :     :     :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :     :     :     :     :     :     :     :     +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :     :     :        +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#17122]\n",
-      "      :     :     :     :     :     :     :     :     :           +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, first_home_buyer#26301, loan_purpose#26302, property_type#26303, num_units#26304, occupancy_status#26305, property_state#26306, zip#26307, mortgage_insurance_percent#26308, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :     :     :     :              +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :     :                 :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :                 :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#16848]\n",
-      "      :     :     :     :     :     :     :     :     :                 :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, orig_interest_rate#26291, orig_upb#26292, orig_loan_term#26293, orig_ltv#26296, orig_cltv#26297, num_borrowers#26298, dti#26299, borrower_credit_score#26300, first_home_buyer#26301, loan_purpose#26302, property_type#26303, num_units#26304, occupancy_status#26305, property_state#26306, zip#26307, mortgage_insurance_percent#26308, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :     :     :     :                 :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :                 :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :     :     :     :     :     :     :     :                 :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,orig_interest_rate#26291,orig_upb#26292,orig_loan_term#26293,orig_ltv#26296,orig_cltv#26297,num_borrowers#26298,dti#26299,borrower_credit_score#26300,first_home_buyer#26301,loan_purpose#26302,property_type#26303,num_units#26304,occupancy_status#26305,property_state#26306,zip#26307,mortgage_insurance_percent#26308,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,orig_interest_rate:double,orig_upb:i...\n",
-      "      :     :     :     :     :     :     :     :     :                 +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :     :     :                    +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#17118]\n",
-      "      :     :     :     :     :     :     :     :     :                       +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :                          +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :     :     :     :     :     :     :     :                             +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :     :                                +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     :     :     :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#16958]\n",
-      "      :     :     :     :     :     :     :     :        +- !GpuProject [data#27646 AS orig_channel#27717, id#27657]\n",
-      "      :     :     :     :     :     :     :     :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :              +- !GpuFilter ((column_id#27643 = 0) AND gpuisnotnull(data#27646))\n",
-      "      :     :     :     :     :     :     :     :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 0), isnotnull(data#27646)]\n",
-      "      :     :     :     :     :     :     :     :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :     :     :     :     :     :     :     :                                +- *(5) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :     :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :     :     :     :     :     :     :     :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :     :     :     :     :     :     :     :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :     :     :     :     :     :     :     :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :     :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :     :     :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :     :     :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :     :     :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :     :     :     :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :     :     :     :     :     :     :     :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :     :     :     :     :     :     :     :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :     :     :     :     :     :     :     :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :     :     :     :     :     :     :     :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :     :     :     :     :     :     :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :     :     :     :     :     :     :     :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :     :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :     :     :     :     :     :     :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :     :     :     :     :     :     :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :     :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :     :     :     :     :     :     :     :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :     :     :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :     :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :     :     :     :     :     :     :     :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :     :     :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :     :     :     :     :     :     :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :     :     :     :     :     :     :     :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :     :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :     :     :     :     :     :     :     :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :     :     :     :     :     :     :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :     :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     :     :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#16965]\n",
-      "      :     :     :     :     :     :     :        +- !GpuProject [data#27646 AS first_home_buyer#27919, id#27657]\n",
-      "      :     :     :     :     :     :     :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :              +- !GpuFilter ((column_id#27643 = 1) AND gpuisnotnull(data#27646))\n",
-      "      :     :     :     :     :     :     :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 1), isnotnull(data#27646)]\n",
-      "      :     :     :     :     :     :     :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :     :     :     :     :     :     :                                +- *(5) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :     :     :     :     :     :     :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :     :     :     :     :     :     :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :     :     :     :     :     :     :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :     :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :     :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :     :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :     :     :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :     :     :     :     :     :     :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :     :     :     :     :     :     :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :     :     :     :     :     :     :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :     :     :     :     :     :     :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :     :     :     :     :     :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :     :     :     :     :     :     :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :     :     :     :     :     :     :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :     :     :     :     :     :     :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :     :     :     :     :     :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :     :     :     :     :     :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :     :     :     :     :     :     :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :     :     :     :     :     :     :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :     :     :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :     :     :     :     :     :     :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :     :     :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :     :     :     :     :     :     :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :     :     :     :     :     :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :     :     :     :     :     :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :     :     :     :     :     :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :     :     :     :     :     :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :     :     :     :     :     :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :     :     :     :     :     :     :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :     :     :     :     :     :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :     :     :     :     :     :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :     :     :     :     :     :     :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :     :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :     :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :     :     :     :     :     :     :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :     :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :     :     :     :     :     :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :     :     :     :     :     :     :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :     :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :     :     :     :     :     :     :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :     :     :     :     :     :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :     :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#16972]\n",
-      "      :     :     :     :     :     :        +- !GpuProject [data#27646 AS loan_purpose#28121, id#27657]\n",
-      "      :     :     :     :     :     :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :              +- !GpuFilter ((column_id#27643 = 2) AND gpuisnotnull(data#27646))\n",
-      "      :     :     :     :     :     :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 2), isnotnull(data#27646)]\n",
-      "      :     :     :     :     :     :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :     :     :     :     :     :                                +- *(5) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :     :     :     :     :     :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :     :     :     :     :     :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :     :     :     :     :     :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :     :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :     :     :     :     :     :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :     :     :     :     :     :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :     :     :     :     :     :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :     :     :     :     :     :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :     :     :     :     :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :     :     :     :     :     :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :     :     :     :     :     :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :     :     :     :     :     :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :     :     :     :     :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :     :     :     :     :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :     :     :     :     :     :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :     :     :     :     :     :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :     :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :     :     :     :     :     :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :     :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :     :     :     :     :     :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :     :     :     :     :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :     :     :     :     :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :     :     :     :     :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :     :     :     :     :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :     :     :     :     :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :     :     :     :     :     :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :     :     :     :     :     :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :     :     :     :     :     :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :     :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :     :     :     :     :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :     :     :     :     :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :     :     :     :     :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :     :     :     :     :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :     :     :     :     :     :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :     :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :     :     :     :     :     :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :     :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :     :     :     :     :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :     :     :     :     :     :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :     :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :     :     :     :     :     :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :     :     :     :     :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :     :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#16979]\n",
-      "      :     :     :     :     :        +- !GpuProject [data#27646 AS property_type#28323, id#27657]\n",
-      "      :     :     :     :     :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :              +- !GpuFilter ((column_id#27643 = 3) AND gpuisnotnull(data#27646))\n",
-      "      :     :     :     :     :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 3), isnotnull(data#27646)]\n",
-      "      :     :     :     :     :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :     :     :     :     :                                +- *(5) GpuColumnarToRow false\n",
-      "      :     :     :     :     :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :     :     :     :     :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :     :     :     :     :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :     :     :     :     :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :     :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :     :     :     :     :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :     :     :     :     :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :     :     :     :     :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :     :     :     :     :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :     :     :     :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :     :     :     :     :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :     :     :     :     :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :     :     :     :     :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :     :     :     :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :     :     :     :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :     :     :     :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :     :     :     :     :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :     :     :     :     :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :     :     :     :     :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :     :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :     :     :     :     :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :     :     :     :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :     :     :     :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :     :     :     :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :     :     :     :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :     :     :     :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :     :     :     :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :     :     :     :     :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :     :     :     :     :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :     :     :     :     :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :     :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :     :     :     :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :     :     :     :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :     :     :     :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :     :     :     :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :     :     :     :     :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :     :     :     :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :     :     :     :     :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :     :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :     :     :     :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :     :     :     :     :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :     :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :     :     :     :     :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :     :     :     :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :     :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#16986]\n",
-      "      :     :     :     :        +- !GpuProject [data#27646 AS occupancy_status#28525, id#27657]\n",
-      "      :     :     :     :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :              +- !GpuFilter ((column_id#27643 = 4) AND gpuisnotnull(data#27646))\n",
-      "      :     :     :     :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 4), isnotnull(data#27646)]\n",
-      "      :     :     :     :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :     :     :     :                                +- *(5) GpuColumnarToRow false\n",
-      "      :     :     :     :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :     :     :     :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :     :     :     :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :     :     :     :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :     :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :     :     :     :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :     :     :     :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :     :     :     :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :     :     :     :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :     :     :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :     :     :     :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :     :     :     :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :     :     :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :     :     :     :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :     :     :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :     :     :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :     :     :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :     :     :     :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :     :     :     :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :     :     :     :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :     :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :     :     :     :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :     :     :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :     :     :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :     :     :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :     :     :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :     :     :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :     :     :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :     :     :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :     :     :     :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :     :     :     :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :     :     :     :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :     :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :     :     :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :     :     :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :     :     :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :     :     :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :     :     :     :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :     :     :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :     :     :     :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :     :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :     :     :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :     :     :     :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :     :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :     :     :     :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :     :     :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :     :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#16993]\n",
-      "      :     :     :        +- !GpuProject [data#27646 AS property_state#28727, id#27657]\n",
-      "      :     :     :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :              +- !GpuFilter ((column_id#27643 = 5) AND gpuisnotnull(data#27646))\n",
-      "      :     :     :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 5), isnotnull(data#27646)]\n",
-      "      :     :     :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :     :     :                                +- *(5) GpuColumnarToRow false\n",
-      "      :     :     :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :     :     :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :     :     :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :     :     :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :     :     :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :     :     :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :     :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :     :     :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :     :     :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :     :     :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :     :     :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :     :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :     :     :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :     :     :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :     :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :     :     :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :     :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :     :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :     :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :     :     :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :     :     :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :     :     :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :     :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :     :     :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :     :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :     :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :     :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :     :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :     :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :     :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :     :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :     :     :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :     :     :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :     :     :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :     :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :     :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :     :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :     :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :     :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :     :     :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :     :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :     :     :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :     :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :     :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :     :     :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :     :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :     :     :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :     :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :     :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :     :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#17000]\n",
-      "      :     :        +- !GpuProject [data#27646 AS relocation_mortgage_indicator#28929, id#27657]\n",
-      "      :     :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :              +- !GpuFilter ((column_id#27643 = 6) AND gpuisnotnull(data#27646))\n",
-      "      :     :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 6), isnotnull(data#27646)]\n",
-      "      :     :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :     :                                +- *(5) GpuColumnarToRow false\n",
-      "      :     :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :     :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :     :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :     :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :     :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :     :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :     :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :     :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :     :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :     :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :     :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :     :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :     :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :     :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :     :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :     :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :     :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :     :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :     :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :     :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :     :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :     :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :     :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :     :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :     :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :     :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :     :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :     :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :     :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :     :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :     :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :     :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :     :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :     :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :     :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :     :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :     :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :     :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :     :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :     :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :     :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :     :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :     :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :     :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :     :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :     :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :     :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :     :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :     :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :     :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#17007]\n",
-      "      :        +- !GpuProject [data#27646 AS seller_name#29131, id#27657]\n",
-      "      :           +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :              +- !GpuFilter ((column_id#27643 = 7) AND gpuisnotnull(data#27646))\n",
-      "      :                 +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :                    +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 7), isnotnull(data#27646)]\n",
-      "      :                          +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "      :                                +- *(5) GpuColumnarToRow false\n",
-      "      :                                   +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "      :                                      +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "      :                                         +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "      :                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :                                               +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "      :                                                  +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                        +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "      :                                                           +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "      :                                                              +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "      :                                                                 +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                    +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "      :                                                                       +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "      :                                                                          +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "      :                                                                             +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "      :                                                                                :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "      :                                                                                :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "      :                                                                                :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "      :                                                                                :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "      :                                                                                :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :                                                                                :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "      :                                                                                :           :           +- *(1) GpuColumnarToRow false\n",
-      "      :                                                                                :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "      :                                                                                :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "      :                                                                                :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :                                                                                :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "      :                                                                                :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :                                                                                :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "      :                                                                                :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "      :                                                                                :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "      :                                                                                :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "      :                                                                                :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "      :                                                                                :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "      :                                                                                :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "      :                                                                                :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :                                                                                :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "      :                                                                                :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "      :                                                                                :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "      :                                                                                :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "      :                                                                                :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "      :                                                                                :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :                                                                                :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "      :                                                                                :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "      :                                                                                :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :                                                                                :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "      :                                                                                :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "      :                                                                                :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "      :                                                                                :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "      :                                                                                +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :                                                                                   +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "      :                                                                                      +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :                                                                                         +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "      :                                                                                            :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                            :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "      :                                                                                            :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "      :                                                                                            :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                            :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "      :                                                                                            :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "      :                                                                                            +- GpuCoalesceBatches RequireSingleBatch\n",
-      "      :                                                                                               +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "      :                                                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "      :                                                                                                     +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "      :                                                                                                        +- GpuRowToColumnar TargetSize(536870912)\n",
-      "      :                                                                                                           +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
-      "      +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#17014]\n",
-      "         +- !GpuProject [data#27646 AS mod_flag#29333, id#27657]\n",
-      "            +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "               +- !GpuFilter ((column_id#27643 = 8) AND gpuisnotnull(data#27646))\n",
-      "                  +- GpuRowToColumnar TargetSize(536870912)\n",
-      "                     +- InMemoryTableScan [column_id#27643, data#27646, id#27657], [(column_id#27643 = 8), isnotnull(data#27646)]\n",
-      "                           +- InMemoryRelation [column_id#27643, data#27646, id#27657], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
-      "                                 +- *(5) GpuColumnarToRow false\n",
-      "                                    +- !GpuProject [column_id#27643, data#27646, id#27657]\n",
-      "                                       +- !GpuWindow [gpurownumber() gpuwindowspecdefinition(column_id#27643, count#27652L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#27657]\n",
-      "                                          +- !GpuSort [column_id#27643 ASC NULLS FIRST, count#27652L DESC NULLS LAST], false, RequireSingleBatch, 0\n",
-      "                                             +- GpuCoalesceBatches RequireSingleBatch\n",
-      "                                                +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, 192), true, [id=#15166]\n",
-      "                                                   +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "                                                      +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                         +- !GpuColumnarExchange gpuhashpartitioning(column_id#27643, data#27646, 192), true, [id=#15163]\n",
-      "                                                            +- GpuHashAggregate(keys=[column_id#27643, data#27646], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
-      "                                                               +- !GpuProject [pos#27639 AS column_id#27643, col#27640 AS data#27646]\n",
-      "                                                                  +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                     +- !GpuFilter gpuisnotnull(col#27640)\n",
-      "                                                                        +- !GpuGenerate true, [orig_channel#26289, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, seller_name#27396, mod_flag#26235], [pos#27639, col#27640]\n",
-      "                                                                           +- !GpuProject [mod_flag#26235, orig_channel#26289, seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312]\n",
-      "                                                                              +- !GpuShuffledHashJoin [loan_id#26224L, quarter#26255], [loan_id#26288L, quarter#26313], Inner, BuildRight\n",
-      "                                                                                 :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                 :  +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26224L, quarter#26255, 192), true, [id=#15144]\n",
-      "                                                                                 :     +- !GpuProject [quarter#26255, loan_id#26224L, mod_flag#26235]\n",
-      "                                                                                 :        +- !GpuShuffledHashJoin [quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint)], [quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L], LeftOuter, BuildRight\n",
-      "                                                                                 :           :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                 :           :  +- !GpuColumnarExchange gpuhashpartitioning(quarter#26255, loan_id#26224L, cast(timestamp_year#27100 as bigint), cast(timestamp_month#27064 as bigint), 192), true, [id=#15113]\n",
-      "                                                                                 :           :     +- GpuRowToColumnar TargetSize(536870912)\n",
-      "                                                                                 :           :        +- *(1) Project [loan_id#26224L, mod_flag#26235, quarter#26255, month(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#27064, year(cast(cast(unix_timestamp(monthly_reporting_period#26225, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#27100]\n",
-      "                                                                                 :           :           +- *(1) GpuColumnarToRow false\n",
-      "                                                                                 :           :              +- !GpuFilter (gpuisnotnull(loan_id#26224L) AND gpuisnotnull(quarter#26255))\n",
-      "                                                                                 :           :                 +- GpuFileScan parquet [loan_id#26224L,monthly_reporting_period#26225,mod_flag#26235,quarter#26255] Batched: true, DataFilters: [isnotnull(loan_id#26224L), isnotnull(quarter#26255)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
-      "                                                                                 :           +- GpuCoalesceBatches RequireSingleBatch\n",
-      "                                                                                 :              +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, timestamp_year#26990L, timestamp_month#27019L, 192), true, [id=#15140]\n",
-      "                                                                                 :                 +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "                                                                                 :                    +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                 :                       +- !GpuColumnarExchange gpuhashpartitioning(quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931, 192), true, [id=#15137]\n",
-      "                                                                                 :                          +- GpuHashAggregate(keys=[quarter#27167, loan_id#27136L, josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, month_y#26931], functions=[]), filters=ArrayBuffer())\n",
-      "                                                                                 :                             +- !GpuProject [quarter#27167, FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) AS josh_mody_n#26947L, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680, loan_id#27136L, month_y#26931]\n",
-      "                                                                                 :                                +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                 :                                   +- !GpuFilter (gpuisnotnull(CASE WHEN ((((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) = 0) THEN 12 ELSE (((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast(month_y#26931 as bigint)) pmod 12) END) AND gpuisnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#26770 * 12) + timestamp_month#26734) - 24000) - month_y#26931) as double) / 12.0)) * 12)) + cast((month_y#26931 - 1) as bigint)) as double) / 12.0))))\n",
-      "                                                                                 :                                      +- GpuGenerate false, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680], [month_y#26931]\n",
-      "                                                                                 :                                         +- !GpuProject [loan_id#27136L, quarter#27167, timestamp_month#26734, timestamp_year#26770, ever_30#26687, ever_90#26688, ever_180#26689, delinquency_30#26676, delinquency_90#26678, delinquency_180#26680]\n",
-      "                                                                                 :                                            +- !GpuBroadcastHashJoin [loan_id#27136L, quarter#27167], [loan_id#26885L, quarter#26916], LeftOuter, BuildRight\n",
-      "                                                                                 :                                               :- GpuRowToColumnar TargetSize(536870912)\n",
-      "                                                                                 :                                               :  +- *(2) Project [quarter#27167, loan_id#27136L, month(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_month#26734, year(cast(cast(unix_timestamp(monthly_reporting_period#27137, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date)) AS timestamp_year#26770]\n",
-      "                                                                                 :                                               :     +- *(2) GpuColumnarToRow false\n",
-      "                                                                                 :                                               :        +- !GpuFilter (gpuisnotnull(loan_id#27136L) AND gpuisnotnull(quarter#27167))\n",
-      "                                                                                 :                                               :           +- GpuFileScan parquet [loan_id#27136L,monthly_reporting_period#27137,quarter#27167] Batched: true, DataFilters: [isnotnull(loan_id#27136L), isnotnull(quarter#27167)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
-      "                                                                                 :                                               +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true])), [id=#15129]\n",
-      "                                                                                 :                                                  +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[gpumax(current_loan_delinquency_status#26895), gpumin(delinquency_30#26658), gpumin(delinquency_90#26659), gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "                                                                                 :                                                     +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                 :                                                        +- !GpuColumnarExchange gpuhashpartitioning(quarter#26916, loan_id#26885L, 192), true, [id=#15126]\n",
-      "                                                                                 :                                                           +- GpuHashAggregate(keys=[quarter#26916, loan_id#26885L], functions=[partial_gpumax(current_loan_delinquency_status#26895), partial_gpumin(delinquency_30#26658), partial_gpumin(delinquency_90#26659), partial_gpumin(delinquency_180#26660)]), filters=ArrayBuffer(None, None, None, None))\n",
-      "                                                                                 :                                                              +- GpuRowToColumnar TargetSize(536870912)\n",
-      "                                                                                 :                                                                 +- *(3) Project [quarter#26916, loan_id#26885L, current_loan_delinquency_status#26895, CASE WHEN (current_loan_delinquency_status#26895 >= 1) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_30#26658, CASE WHEN (current_loan_delinquency_status#26895 >= 3) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_90#26659, CASE WHEN (current_loan_delinquency_status#26895 >= 6) THEN cast(cast(unix_timestamp(monthly_reporting_period#26886, MM/dd/yyyy, Some(Asia/Shanghai)) as timestamp) as date) END AS delinquency_180#26660]\n",
-      "                                                                                 :                                                                    +- *(3) GpuColumnarToRow false\n",
-      "                                                                                 :                                                                       +- !GpuFilter (gpuisnotnull(loan_id#26885L) AND gpuisnotnull(quarter#26916))\n",
-      "                                                                                 :                                                                          +- GpuFileScan parquet [loan_id#26885L,monthly_reporting_period#26886,current_loan_delinquency_status#26895,quarter#26916] Batched: true, DataFilters: [isnotnull(loan_id#26885L), isnotnull(quarter#26916)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
-      "                                                                                 +- GpuCoalesceBatches RequireSingleBatch\n",
-      "                                                                                    +- !GpuColumnarExchange gpuhashpartitioning(loan_id#26288L, quarter#26313, 192), true, [id=#15154]\n",
-      "                                                                                       +- !GpuProject [loan_id#26288L, orig_channel#26289, gpucoalesce(to_seller_name#27253, seller_name#26290) AS seller_name#27396, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "                                                                                          +- !GpuShuffledHashJoin [seller_name#26290], [from_seller_name#27252], LeftOuter, BuildRight\n",
-      "                                                                                             :- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                             :  +- !GpuColumnarExchange gpuhashpartitioning(seller_name#26290, 192), true, [id=#14965]\n",
-      "                                                                                             :     +- !GpuProject [loan_id#26288L, orig_channel#26289, seller_name#26290, first_home_buyer#26301, loan_purpose#26302, property_type#26303, occupancy_status#26305, property_state#26306, relocation_mortgage_indicator#26312, quarter#26313]\n",
-      "                                                                                             :        +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                             :           +- !GpuFilter (gpuisnotnull(loan_id#26288L) AND gpuisnotnull(quarter#26313))\n",
-      "                                                                                             :              +- GpuFileScan parquet [loan_id#26288L,orig_channel#26289,seller_name#26290,first_home_buyer#26301,loan_purpose#26302,property_type#26303,occupancy_status#26305,property_state#26306,relocation_mortgage_indicator#26312,quarter#26313] Batched: true, DataFilters: [isnotnull(loan_id#26288L), isnotnull(quarter#26313)], Format: Parquet, Location: InMemoryFileIndex[file:/home/mengmengg/xgboost4j_spark/data/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
-      "                                                                                             +- GpuCoalesceBatches RequireSingleBatch\n",
-      "                                                                                                +- !GpuColumnarExchange gpuhashpartitioning(from_seller_name#27252, 192), true, [id=#15150]\n",
-      "                                                                                                   +- GpuCoalesceBatches TargetSize(536870912)\n",
-      "                                                                                                      +- !GpuFilter gpuisnotnull(from_seller_name#27252)\n",
-      "                                                                                                         +- GpuRowToColumnar TargetSize(536870912)\n",
-      "                                                                                                            +- *(4) Scan ExistingRDD[from_seller_name#27252,to_seller_name#27253]\n",
+      "GpuColumnarToRow false\n",
+      "+- GpuProject [gpucoalesce(orig_channel#3146, 0) AS orig_channel#5143, gpucoalesce(first_home_buyer#3351, 0) AS first_home_buyer#5144, gpucoalesce(loan_purpose#3556, 0) AS loan_purpose#5145, gpucoalesce(property_type#3761, 0) AS property_type#5146, gpucoalesce(occupancy_status#3966, 0) AS occupancy_status#5147, gpucoalesce(property_state#4171, 0) AS property_state#5148, gpucoalesce(product_type#4376, 0) AS product_type#5149, gpucoalesce(relocation_mortgage_indicator#4581, 0) AS relocation_mortgage_indicator#5150, gpucoalesce(seller_name#4786, 0) AS seller_name#5151, gpucoalesce(id#2956, 0) AS mod_flag#5152, gpucoalesce(gpunanvl(orig_interest_rate#1606, null), 0.0) AS orig_interest_rate#5153, gpucoalesce(orig_upb#1607, 0) AS orig_upb#5154, gpucoalesce(orig_loan_term#1608, 0) AS orig_loan_term#5155, gpucoalesce(gpunanvl(orig_ltv#1611, null), 0.0) AS orig_ltv#5156, gpucoalesce(gpunanvl(orig_cltv#1612, null), 0.0) AS orig_cltv#5157, gpucoalesce(gpunanvl(num_borrowers#1613, null), 0.0) AS num_borrowers#5158, gpucoalesce(gpunanvl(dti#1614, null), 0.0) AS dti#5159, gpucoalesce(gpunanvl(borrower_credit_score#1615, null), 0.0) AS borrower_credit_score#5160, gpucoalesce(num_units#1619, 0) AS num_units#5161, gpucoalesce(zip#1622, 0) AS zip#5162, gpucoalesce(gpunanvl(mortgage_insurance_percent#1623, null), 0.0) AS mortgage_insurance_percent#5163, gpucoalesce(current_loan_delinquency_status#1549, 0) AS current_loan_delinquency_status#5164, gpucoalesce(gpunanvl(current_actual_upb#1543, null), 0.0) AS current_actual_upb#5165, gpucoalesce(gpunanvl(interest_rate#1542, null), 0.0) AS interest_rate#5166, ... 4 more fields]\n",
+      "   +- GpuBroadcastHashJoin [mod_flag#1550], [mod_flag#4855], LeftOuter, GpuBuildRight\n",
+      "      :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, num_units#1619, zip#1622, mortgage_insurance_percent#1623, orig_channel#3146, first_home_buyer#3351, loan_purpose#3556, property_type#3761, occupancy_status#3966, ... 4 more fields]\n",
+      "      :  +- GpuBroadcastHashJoin [seller_name#2689], [seller_name#4650], LeftOuter, GpuBuildRight\n",
+      "      :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, num_units#1619, zip#1622, mortgage_insurance_percent#1623, orig_channel#3146, first_home_buyer#3351, loan_purpose#3556, property_type#3761, ... 4 more fields]\n",
+      "      :     :  +- GpuBroadcastHashJoin [relocation_mortgage_indicator#1627], [relocation_mortgage_indicator#4445], LeftOuter, GpuBuildRight\n",
+      "      :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, num_units#1619, zip#1622, mortgage_insurance_percent#1623, relocation_mortgage_indicator#1627, orig_channel#3146, first_home_buyer#3351, loan_purpose#3556, ... 4 more fields]\n",
+      "      :     :     :  +- GpuBroadcastHashJoin [product_type#1624], [product_type#4240], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, num_units#1619, zip#1622, mortgage_insurance_percent#1623, product_type#1624, relocation_mortgage_indicator#1627, orig_channel#3146, first_home_buyer#3351, ... 4 more fields]\n",
+      "      :     :     :     :  +- GpuBroadcastHashJoin [property_state#1621], [property_state#4035], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, num_units#1619, property_state#1621, zip#1622, mortgage_insurance_percent#1623, product_type#1624, relocation_mortgage_indicator#1627, orig_channel#3146, ... 4 more fields]\n",
+      "      :     :     :     :     :  +- GpuBroadcastHashJoin [occupancy_status#1620], [occupancy_status#3830], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, num_units#1619, occupancy_status#1620, property_state#1621, zip#1622, mortgage_insurance_percent#1623, product_type#1624, relocation_mortgage_indicator#1627, ... 4 more fields]\n",
+      "      :     :     :     :     :     :  +- GpuBroadcastHashJoin [property_type#1618], [property_type#3625], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, property_type#1618, num_units#1619, occupancy_status#1620, property_state#1621, zip#1622, mortgage_insurance_percent#1623, product_type#1624, ... 4 more fields]\n",
+      "      :     :     :     :     :     :     :  +- GpuBroadcastHashJoin [loan_purpose#1617], [loan_purpose#3420], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, loan_purpose#1617, property_type#1618, num_units#1619, occupancy_status#1620, property_state#1621, zip#1622, mortgage_insurance_percent#1623, ... 4 more fields]\n",
+      "      :     :     :     :     :     :     :     :  +- GpuBroadcastHashJoin [first_home_buyer#1616], [first_home_buyer#3215], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, first_home_buyer#1616, loan_purpose#1617, property_type#1618, num_units#1619, occupancy_status#1620, property_state#1621, zip#1622, ... 4 more fields]\n",
+      "      :     :     :     :     :     :     :     :     :  +- GpuBroadcastHashJoin [orig_channel#1604], [orig_channel#3010], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :     :     :     :- GpuProject [interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353, orig_channel#1604, seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, first_home_buyer#1616, loan_purpose#1617, property_type#1618, num_units#1619, occupancy_status#1620, property_state#1621, ... 4 more fields]\n",
+      "      :     :     :     :     :     :     :     :     :     :  +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :     :     :     :     :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :     :     :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#3885]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :     +- GpuProject [quarter#1570, loan_id#1539L, interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, delinquency_12#2353]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :     :     :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :     :     :     :     :     :     :     :     :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :     :     :     :     :     :     :     :     :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#3847]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :              :     +- *(1) Project [loan_id#1539L, interest_rate#1542, current_actual_upb#1543, loan_age#1544, msa#1548, current_loan_delinquency_status#1549, mod_flag#1550, non_interest_bearing_upb#1565, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :     :     :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :     :     :     :     :     :     :     :     :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,interest_rate#1542,current_actual_upb#1543,loan_age#1544,msa#1548,current_loan_delinquency_status#1549,mod_flag#1550,non_interest_bearing_upb#1565,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,interest_rate:double,current_actual_upb:dou...\n",
+      "      :     :     :     :     :     :     :     :     :     :     :              +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#3878]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[max(delinquency_12#2123), min(upb_12#2159)])\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                +- Exchange hashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#3873]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                   +- *(5) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[partial_max(delinquency_12#2123), partial_min(upb_12#2159)])\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                      +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248, delinquency_12#2123, upb_12#2159]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                         +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                            +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                               +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, delinquency_12#2123, upb_12#2159, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                  +- GpuProject [loan_id#2453L, quarter#2484, delinquency_12#2123, upb_12#2159, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                     +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                        :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                        :  +- *(3) Project [quarter#2484, loan_id#2453L, current_loan_delinquency_status#2463 AS delinquency_12#2123, current_actual_upb#2457 AS upb_12#2159, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                        :     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                        :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                        :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,current_actual_upb#2457,current_loan_delinquency_status#2463,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_actual_upb:double,current_loan_deli...\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                        +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#3863]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                              +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                                 +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#3860]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                                       +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                                          +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                                             +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                                                +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :     :     :     :     :     :     :     :     :                                                                                   +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :     :     :     :     :     :     :     :     +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#3894]\n",
+      "      :     :     :     :     :     :     :     :     :     :        +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, orig_interest_rate#1606, orig_upb#1607, orig_loan_term#1608, orig_ltv#1611, orig_cltv#1612, num_borrowers#1613, dti#1614, borrower_credit_score#1615, first_home_buyer#1616, loan_purpose#1617, property_type#1618, num_units#1619, occupancy_status#1620, property_state#1621, zip#1622, mortgage_insurance_percent#1623, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :     :     :     :     :     :     :     :           +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :     :     :     :              :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :     :              :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#3523]\n",
+      "      :     :     :     :     :     :     :     :     :     :              :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :     :              :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :     :     :     :     :     :     :     :              :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,orig_interest_rate#1606,orig_upb#1607,orig_loan_term#1608,orig_ltv#1611,orig_cltv#1612,num_borrowers#1613,dti#1614,borrower_credit_score#1615,first_home_buyer#1616,loan_purpose#1617,property_type#1618,num_units#1619,occupancy_status#1620,property_state#1621,zip#1622,mortgage_insurance_percent#1623,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,orig_interest_rate:double,orig_upb:i...\n",
+      "      :     :     :     :     :     :     :     :     :     :              +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#3891]\n",
+      "      :     :     :     :     :     :     :     :     :     :                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :     :                    +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :     :     :     :     :     :     :     :                       +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :     :                          +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     :     :     :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3694]\n",
+      "      :     :     :     :     :     :     :     :     :        +- GpuProject [data#2945 AS orig_channel#3010, id#2956]\n",
+      "      :     :     :     :     :     :     :     :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :              +- GpuFilter ((column_id#2942 = 0) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :     :     :     :     :     :     :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 0), isnotnull(data#2945)]\n",
+      "      :     :     :     :     :     :     :     :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :     :     :     :     :     :     :     :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :     :     :     :     :     :     :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :     :     :     :     :     :     :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :     :     :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :     :     :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :     :     :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :     :     :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :     :     :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :     :     :     :     :     :     :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     :     :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3701]\n",
+      "      :     :     :     :     :     :     :     :        +- GpuProject [data#2945 AS first_home_buyer#3215, id#2956]\n",
+      "      :     :     :     :     :     :     :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :              +- GpuFilter ((column_id#2942 = 1) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :     :     :     :     :     :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 1), isnotnull(data#2945)]\n",
+      "      :     :     :     :     :     :     :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :     :     :     :     :     :     :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :     :     :     :     :     :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :     :     :     :     :     :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :     :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :     :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :     :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :     :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :     :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :     :     :     :     :     :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :     :     :     :     :     :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :     :     :     :     :     :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :     :     :     :     :     :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :     :     :     :     :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :     :     :     :     :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :     :     :     :     :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :     :     :     :     :     :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :     :     :     :     :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :     :     :     :     :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :     :     :     :     :     :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :     :     :     :     :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :     :     :     :     :     :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :     :     :     :     :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :     :     :     :     :     :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :     :     :     :     :     :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :     :     :     :     :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3708]\n",
+      "      :     :     :     :     :     :     :        +- GpuProject [data#2945 AS loan_purpose#3420, id#2956]\n",
+      "      :     :     :     :     :     :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :              +- GpuFilter ((column_id#2942 = 2) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :     :     :     :     :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 2), isnotnull(data#2945)]\n",
+      "      :     :     :     :     :     :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :     :     :     :     :     :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :     :     :     :     :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :     :     :     :     :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :     :     :     :     :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :     :     :     :     :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :     :     :     :     :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :     :     :     :     :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :     :     :     :     :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :     :     :     :     :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :     :     :     :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :     :     :     :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :     :     :     :     :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :     :     :     :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :     :     :     :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :     :     :     :     :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :     :     :     :     :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :     :     :     :     :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :     :     :     :     :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :     :     :     :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :     :     :     :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :     :     :     :     :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :     :     :     :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :     :     :     :     :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :     :     :     :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :     :     :     :     :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :     :     :     :     :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :     :     :     :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3715]\n",
+      "      :     :     :     :     :     :        +- GpuProject [data#2945 AS property_type#3625, id#2956]\n",
+      "      :     :     :     :     :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :              +- GpuFilter ((column_id#2942 = 3) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :     :     :     :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 3), isnotnull(data#2945)]\n",
+      "      :     :     :     :     :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :     :     :     :     :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :     :     :     :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :     :     :     :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :     :     :     :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :     :     :     :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :     :     :     :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :     :     :     :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :     :     :     :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :     :     :     :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :     :     :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :     :     :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :     :     :     :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :     :     :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :     :     :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :     :     :     :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :     :     :     :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :     :     :     :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :     :     :     :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :     :     :     :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :     :     :     :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :     :     :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :     :     :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :     :     :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :     :     :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :     :     :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :     :     :     :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :     :     :     :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :     :     :     :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :     :     :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :     :     :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :     :     :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :     :     :     :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :     :     :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :     :     :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :     :     :     :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :     :     :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :     :     :     :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :     :     :     :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :     :     :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3722]\n",
+      "      :     :     :     :     :        +- GpuProject [data#2945 AS occupancy_status#3830, id#2956]\n",
+      "      :     :     :     :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :              +- GpuFilter ((column_id#2942 = 4) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :     :     :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 4), isnotnull(data#2945)]\n",
+      "      :     :     :     :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :     :     :     :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :     :     :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :     :     :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :     :     :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :     :     :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :     :     :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :     :     :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :     :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :     :     :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :     :     :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :     :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :     :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :     :     :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :     :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :     :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :     :     :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :     :     :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :     :     :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :     :     :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :     :     :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :     :     :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :     :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :     :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :     :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :     :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :     :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :     :     :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :     :     :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :     :     :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :     :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :     :     :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :     :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :     :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :     :     :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :     :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :     :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :     :     :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :     :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :     :     :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :     :     :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :     :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3729]\n",
+      "      :     :     :     :        +- GpuProject [data#2945 AS property_state#4035, id#2956]\n",
+      "      :     :     :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :              +- GpuFilter ((column_id#2942 = 5) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :     :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 5), isnotnull(data#2945)]\n",
+      "      :     :     :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :     :     :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :     :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :     :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :     :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :     :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :     :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :     :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :     :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :     :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :     :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :     :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :     :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :     :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :     :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :     :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :     :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :     :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :     :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :     :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :     :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :     :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :     :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :     :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :     :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :     :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :     :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :     :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :     :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :     :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :     :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3736]\n",
+      "      :     :     :        +- GpuProject [data#2945 AS product_type#4240, id#2956]\n",
+      "      :     :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :              +- GpuFilter ((column_id#2942 = 6) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 6), isnotnull(data#2945)]\n",
+      "      :     :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :     :                                +- GpuColumnarToRow false\n",
+      "      :     :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3743]\n",
+      "      :     :        +- GpuProject [data#2945 AS relocation_mortgage_indicator#4445, id#2956]\n",
+      "      :     :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :              +- GpuFilter ((column_id#2942 = 7) AND gpuisnotnull(data#2945)), true\n",
+      "      :     :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 7), isnotnull(data#2945)]\n",
+      "      :     :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :     :                                +- GpuColumnarToRow false\n",
+      "      :     :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :     :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :     :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :     :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :     :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :     :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :     :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :     :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :     :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :     :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :     :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :     :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :     :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :     :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :     :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :     :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :     :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :     :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :     :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :     :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :     :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :     :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :     :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :     :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :     :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :     :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :     :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :     :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :     :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :     :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :     :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :     :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :     :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :     :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :     :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :     :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :     :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :     :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :     :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :     :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :     :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :     :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :     :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :     :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :     :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :     :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :     :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :     :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :     :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :     :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :     :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :     :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :     :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :     :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :     :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :     :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :     :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :     :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :     :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :     :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :     :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      :     +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3750]\n",
+      "      :        +- GpuProject [data#2945 AS seller_name#4650, id#2956]\n",
+      "      :           +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :              +- GpuFilter ((column_id#2942 = 8) AND gpuisnotnull(data#2945)), true\n",
+      "      :                 +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :                    +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 8), isnotnull(data#2945)]\n",
+      "      :                          +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "      :                                +- GpuColumnarToRow false\n",
+      "      :                                   +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "      :                                      +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "      :                                         +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :                                            +- GpuShuffleCoalesce 536870912\n",
+      "      :                                               +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "      :                                                  +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :                                                     +- GpuShuffleCoalesce 536870912\n",
+      "      :                                                        +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "      :                                                           +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "      :                                                              +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "      :                                                                 +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :                                                                    +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "      :                                                                       +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "      :                                                                          +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "      :                                                                             +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "      :                                                                                :- GpuShuffleCoalesce 536870912\n",
+      "      :                                                                                :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "      :                                                                                :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "      :                                                                                :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :                                                                                :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "      :                                                                                :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "      :                                                                                :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "      :                                                                                :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "      :                                                                                :              :        +- GpuColumnarToRow false\n",
+      "      :                                                                                :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "      :                                                                                :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "      :                                                                                :              +- GpuColumnarToRow false\n",
+      "      :                                                                                :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "      :                                                                                :                    +- GpuShuffleCoalesce 536870912\n",
+      "      :                                                                                :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "      :                                                                                :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :                                                                                :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "      :                                                                                :                                +- GpuColumnarToRow false\n",
+      "      :                                                                                :                                   +- GpuShuffleCoalesce 536870912\n",
+      "      :                                                                                :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "      :                                                                                :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "      :                                                                                :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :                                                                                :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "      :                                                                                :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "      :                                                                                :                                                     +- GpuColumnarToRow false\n",
+      "      :                                                                                :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "      :                                                                                :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "      :                                                                                :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "      :                                                                                :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "      :                                                                                :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "      :                                                                                :                                                                 :     +- GpuColumnarToRow false\n",
+      "      :                                                                                :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "      :                                                                                :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "      :                                                                                :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "      :                                                                                :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :                                                                                :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "      :                                                                                :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "      :                                                                                :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "      :                                                                                :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :                                                                                :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "      :                                                                                :                                                                                      +- GpuColumnarToRow false\n",
+      "      :                                                                                :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "      :                                                                                :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "      :                                                                                +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "      :                                                                                   +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "      :                                                                                      +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "      :                                                                                         :- GpuShuffleCoalesce 536870912\n",
+      "      :                                                                                         :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "      :                                                                                         :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :                                                                                         :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "      :                                                                                         :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "      :                                                                                         +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "      :                                                                                            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "      :                                                                                               +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "      :                                                                                                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "      :                                                                                                     +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
+      "      +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#3757]\n",
+      "         +- GpuProject [data#2945 AS mod_flag#4855, id#2956]\n",
+      "            +- GpuCoalesceBatches targetsize(536870912)\n",
+      "               +- GpuFilter ((column_id#2942 = 9) AND gpuisnotnull(data#2945)), true\n",
+      "                  +- GpuRowToColumnar targetsize(536870912)\n",
+      "                     +- InMemoryTableScan [column_id#2942, data#2945, id#2956], [(column_id#2942 = 9), isnotnull(data#2945)]\n",
+      "                           +- InMemoryRelation [column_id#2942, data#2945, id#2956], StorageLevel(disk, memory, deserialized, 1 replicas)\n",
+      "                                 +- GpuColumnarToRow false\n",
+      "                                    +- GpuProject [column_id#2942, data#2945, id#2956]\n",
+      "                                       +- GpuRunningWindow [column_id#2942, data#2945, count#2951L, gpurownumber$() gpuwindowspecdefinition(column_id#2942, count#2951L DESC NULLS LAST, gpuspecifiedwindowframe(RowFrame, gpuspecialframeboundary(unboundedpreceding$()), gpuspecialframeboundary(currentrow$()))) AS id#2956], [column_id#2942], [count#2951L DESC NULLS LAST]\n",
+      "                                          +- GpuSort [column_id#2942 ASC NULLS FIRST, count#2951L DESC NULLS LAST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "                                             +- GpuShuffleCoalesce 536870912\n",
+      "                                                +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, 192), ENSURE_REQUIREMENTS, [id=#1141]\n",
+      "                                                   +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "                                                      +- GpuShuffleCoalesce 536870912\n",
+      "                                                         +- GpuColumnarExchange gpuhashpartitioning(column_id#2942, data#2945, 192), ENSURE_REQUIREMENTS, [id=#1138]\n",
+      "                                                            +- GpuHashAggregate(keys=[column_id#2942, data#2945], functions=[partial_gpucount(1)]), filters=ArrayBuffer(None))\n",
+      "                                                               +- GpuProject [pos#2938 AS column_id#2942, col#2939 AS data#2945]\n",
+      "                                                                  +- GpuCoalesceBatches targetsize(536870912)\n",
+      "                                                                     +- GpuFilter gpuisnotnull(col#2939), true\n",
+      "                                                                        +- GpuGenerate gpuposexplode(array(orig_channel#1604, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, seller_name#2689, mod_flag#1550)), false, [pos#2938, col#2939]\n",
+      "                                                                           +- GpuProject [mod_flag#1550, orig_channel#1604, seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627]\n",
+      "                                                                              +- GpuShuffledHashJoin [loan_id#1539L, quarter#1570], [loan_id#1603L, quarter#1629], Inner, GpuBuildRight, false\n",
+      "                                                                                 :- GpuShuffleCoalesce 536870912\n",
+      "                                                                                 :  +- GpuColumnarExchange gpuhashpartitioning(loan_id#1539L, quarter#1570, 192), ENSURE_REQUIREMENTS, [id=#1121]\n",
+      "                                                                                 :     +- GpuProject [quarter#1570, loan_id#1539L, mod_flag#1550]\n",
+      "                                                                                 :        +- GpuRowToColumnar targetsize(536870912)\n",
+      "                                                                                 :           +- SortMergeJoin [quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint)], [quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L], LeftOuter\n",
+      "                                                                                 :              :- *(2) Sort [quarter#1570 ASC NULLS FIRST, loan_id#1539L ASC NULLS FIRST, cast(timestamp_year#2417 as bigint) ASC NULLS FIRST, cast(timestamp_month#2381 as bigint) ASC NULLS FIRST], false, 0\n",
+      "                                                                                 :              :  +- Exchange hashpartitioning(quarter#1570, loan_id#1539L, cast(timestamp_year#2417 as bigint), cast(timestamp_month#2381 as bigint), 192), ENSURE_REQUIREMENTS, [id=#1080]\n",
+      "                                                                                 :              :     +- *(1) Project [loan_id#1539L, mod_flag#1550, quarter#1570, month(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2381, year(cast(gettimestamp(monthly_reporting_period#1540, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2417]\n",
+      "                                                                                 :              :        +- GpuColumnarToRow false\n",
+      "                                                                                 :              :           +- GpuFilter (gpuisnotnull(loan_id#1539L) AND gpuisnotnull(quarter#1570)), true\n",
+      "                                                                                 :              :              +- GpuFileGpuScan parquet [loan_id#1539L,monthly_reporting_period#1540,mod_flag#1550,quarter#1570] Batched: true, DataFilters: [isnotnull(loan_id#1539L), isnotnull(quarter#1570)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,mod_flag:string,quarter:string>\n",
+      "                                                                                 :              +- GpuColumnarToRow false\n",
+      "                                                                                 :                 +- GpuSort [quarter#2484 ASC NULLS FIRST, loan_id#2453L ASC NULLS FIRST, timestamp_year#2307L ASC NULLS FIRST, timestamp_month#2336L ASC NULLS FIRST], false, com.nvidia.spark.rapids.OutOfCoreSort$@163d9f7d\n",
+      "                                                                                 :                    +- GpuShuffleCoalesce 536870912\n",
+      "                                                                                 :                       +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, timestamp_year#2307L, timestamp_month#2336L, 192), ENSURE_REQUIREMENTS, [id=#1114]\n",
+      "                                                                                 :                          +- GpuRowToColumnar targetsize(536870912)\n",
+      "                                                                                 :                             +- *(6) HashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[])\n",
+      "                                                                                 :                                +- GpuColumnarToRow false\n",
+      "                                                                                 :                                   +- GpuShuffleCoalesce 536870912\n",
+      "                                                                                 :                                      +- GpuColumnarExchange gpuhashpartitioning(quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248, 192), ENSURE_REQUIREMENTS, [id=#1107]\n",
+      "                                                                                 :                                         +- GpuHashAggregate(keys=[quarter#2484, loan_id#2453L, josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, month_y#2248], functions=[]), filters=ArrayBuffer())\n",
+      "                                                                                 :                                            +- GpuRowToColumnar targetsize(536870912)\n",
+      "                                                                                 :                                               +- *(5) Project [quarter#2484, FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) AS josh_mody_n#2264L, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997, loan_id#2453L, month_y#2248]\n",
+      "                                                                                 :                                                  +- *(5) Filter (isnotnull(FLOOR((cast(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast((month_y#2248 - 1) as bigint)) as double) / 12.0))) AND isnotnull(CASE WHEN (pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) = 0) THEN 12 ELSE pmod(((24000 + (FLOOR((cast(((((timestamp_year#2087 * 12) + timestamp_month#2051) - 24000) - month_y#2248) as double) / 12.0)) * 12)) + cast(month_y#2248 as bigint)), 12) END))\n",
+      "                                                                                 :                                                     +- GpuColumnarToRow false\n",
+      "                                                                                 :                                                        +- GpuGenerate gpuexplode([0,1,2,3,4,5,6,7,8,9,10,11]), [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997], false, [month_y#2248]\n",
+      "                                                                                 :                                                           +- GpuProject [loan_id#2453L, quarter#2484, timestamp_month#2051, timestamp_year#2087, ever_30#2004, ever_90#2005, ever_180#2006, delinquency_30#1993, delinquency_90#1995, delinquency_180#1997]\n",
+      "                                                                                 :                                                              +- GpuBroadcastHashJoin [loan_id#2453L, quarter#2484], [loan_id#2202L, quarter#2233], LeftOuter, GpuBuildRight\n",
+      "                                                                                 :                                                                 :- GpuRowToColumnar targetsize(536870912)\n",
+      "                                                                                 :                                                                 :  +- *(3) Project [quarter#2484, loan_id#2453L, month(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_month#2051, year(cast(gettimestamp(monthly_reporting_period#2454, MM/dd/yyyy, Some(America/Los_Angeles), false) as date)) AS timestamp_year#2087]\n",
+      "                                                                                 :                                                                 :     +- GpuColumnarToRow false\n",
+      "                                                                                 :                                                                 :        +- GpuFilter (gpuisnotnull(quarter#2484) AND gpuisnotnull(loan_id#2453L)), true\n",
+      "                                                                                 :                                                                 :           +- GpuFileGpuScan parquet [loan_id#2453L,monthly_reporting_period#2454,quarter#2484] Batched: true, DataFilters: [isnotnull(quarter#2484), isnotnull(loan_id#2453L)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(quarter), IsNotNull(loan_id)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,quarter:string>\n",
+      "                                                                                 :                                                                 +- GpuBroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true], input[0, string, true]),false), [id=#1096]\n",
+      "                                                                                 :                                                                    +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[gpumax(current_loan_delinquency_status#2212), gpumin(delinquency_30#1975), gpumin(delinquency_90#1976), gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "                                                                                 :                                                                       +- GpuShuffleCoalesce 536870912\n",
+      "                                                                                 :                                                                          +- GpuColumnarExchange gpuhashpartitioning(quarter#2233, loan_id#2202L, 192), ENSURE_REQUIREMENTS, [id=#1093]\n",
+      "                                                                                 :                                                                             +- GpuHashAggregate(keys=[quarter#2233, loan_id#2202L], functions=[partial_gpumax(current_loan_delinquency_status#2212), partial_gpumin(delinquency_30#1975), partial_gpumin(delinquency_90#1976), partial_gpumin(delinquency_180#1977)]), filters=ArrayBuffer(None, None, None, None))\n",
+      "                                                                                 :                                                                                +- GpuRowToColumnar targetsize(536870912)\n",
+      "                                                                                 :                                                                                   +- *(4) Project [quarter#2233, loan_id#2202L, current_loan_delinquency_status#2212, CASE WHEN (current_loan_delinquency_status#2212 >= 1) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_30#1975, CASE WHEN (current_loan_delinquency_status#2212 >= 3) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_90#1976, CASE WHEN (current_loan_delinquency_status#2212 >= 6) THEN cast(gettimestamp(monthly_reporting_period#2203, MM/dd/yyyy, Some(America/Los_Angeles), false) as date) END AS delinquency_180#1977]\n",
+      "                                                                                 :                                                                                      +- GpuColumnarToRow false\n",
+      "                                                                                 :                                                                                         +- GpuFilter (gpuisnotnull(loan_id#2202L) AND gpuisnotnull(quarter#2233)), true\n",
+      "                                                                                 :                                                                                            +- GpuFileGpuScan parquet [loan_id#2202L,monthly_reporting_period#2203,current_loan_delinquency_status#2212,quarter#2233] Batched: true, DataFilters: [isnotnull(loan_id#2202L), isnotnull(quarter#2233)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/perf], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,monthly_reporting_period:string,current_loan_delinquency_status:int,quarter...\n",
+      "                                                                                 +- GpuColumnarExchange gpuhashpartitioning(loan_id#1603L, quarter#1629, 192), ENSURE_REQUIREMENTS, [id=#1130]\n",
+      "                                                                                    +- GpuProject [loan_id#1603L, orig_channel#1604, gpucoalesce(to_seller_name#2570, seller_name#1605) AS seller_name#2689, first_home_buyer#1616, loan_purpose#1617, property_type#1618, occupancy_status#1620, property_state#1621, product_type#1624, relocation_mortgage_indicator#1627, quarter#1629]\n",
+      "                                                                                       +- GpuShuffledHashJoin [seller_name#1605], [from_seller_name#2569], LeftOuter, GpuBuildRight, false\n",
+      "                                                                                          :- GpuShuffleCoalesce 536870912\n",
+      "                                                                                          :  +- GpuColumnarExchange gpuhashpartitioning(seller_name#1605, 192), ENSURE_REQUIREMENTS, [id=#862]\n",
+      "                                                                                          :     +- GpuCoalesceBatches targetsize(536870912)\n",
+      "                                                                                          :        +- GpuFilter (gpuisnotnull(loan_id#1603L) AND gpuisnotnull(quarter#1629)), true\n",
+      "                                                                                          :           +- GpuFileGpuScan parquet [loan_id#1603L,orig_channel#1604,seller_name#1605,first_home_buyer#1616,loan_purpose#1617,property_type#1618,occupancy_status#1620,property_state#1621,product_type#1624,relocation_mortgage_indicator#1627,quarter#1629] Batched: true, DataFilters: [isnotnull(loan_id#1603L), isnotnull(quarter#1629)], Format: Parquet, Location: InMemoryFileIndex[file:/local/saralihalli/HOME/mortgage/acq], PartitionFilters: [], PushedFilters: [IsNotNull(loan_id), IsNotNull(quarter)], ReadSchema: struct<loan_id:bigint,orig_channel:string,seller_name:string,first_home_buyer:string,loan_purpose...\n",
+      "                                                                                          +- GpuColumnarExchange gpuhashpartitioning(from_seller_name#2569, 192), ENSURE_REQUIREMENTS, [id=#1127]\n",
+      "                                                                                             +- GpuCoalesceBatches targetsize(536870912)\n",
+      "                                                                                                +- GpuFilter gpuisnotnull(from_seller_name#2569), true\n",
+      "                                                                                                   +- GpuRowToColumnar targetsize(536870912)\n",
+      "                                                                                                      +- *(7) Scan ExistingRDD[from_seller_name#2569,to_seller_name#2570]\n",
       "\n",
       "\n",
       "None\n",
-      "49.65925192832947\n"
+      "249.0352599620819\n"
      ]
     }
    ],
    "source": [
     "start = time.time()\n",
-    "# read parquet\n",
-    "perf = spark.read.parquet(tmp_perf_path)\n",
-    "acq = spark.read.parquet(tmp_acq_path)\n",
     "\n",
     "# run main function to process data\n",
     "out = run_mortgage(spark, perf, acq)\n",
@@ -1553,19 +1726,22 @@
     "# save processed data\n",
     "out.write.parquet(output_path, mode='overwrite')\n",
     "\n",
+    "# save processed data\n",
+    "if save_train_eval_dataset:\n",
+    "    etlDf = spark.read.parquet(output_path)\n",
+    "\n",
+    "    # split 80% for training, 20% for test\n",
+    "    splits = etlDf.randomSplit([0.8, 0.2])\n",
+    "\n",
+    "    splits[0].write.format('parquet').save(output_path_train, mode=\"overwrite\")\n",
+    "    splits[1].write.format('parquet').save(output_path_eval, mode=\"overwrite\")\n",
+    "\n",
     "# print explain and time\n",
     "print(out.explain())\n",
     "end = time.time()\n",
     "print(end - start)\n",
     "spark.stop()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb
index 57e5b1d8b..94a682cef 100644
--- a/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb
+++ b/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb
@@ -38,7 +38,7 @@
     "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n",
     "from pyspark.ml.tuning import ParamGridBuilder\n",
     "from pyspark.sql import SparkSession\n",
-    "from pyspark.sql.types import FloatType, IntegerType, StructField, StructType\n",
+    "from pyspark.sql.types import FloatType, IntegerType, StructField, StructType, DoubleType\n",
     "from time import time\n",
     "import os"
    ]
@@ -92,7 +92,7 @@
     "    StructField('seller_name', FloatType()),\n",
     "    StructField('mod_flag', FloatType()),\n",
     "    StructField('orig_interest_rate', FloatType()),\n",
-    "    StructField('orig_upb', IntegerType()),\n",
+    "    StructField('orig_upb', DoubleType()),\n",
     "    StructField('orig_loan_term', IntegerType()),\n",
     "    StructField('orig_ltv', FloatType()),\n",
     "    StructField('orig_cltv', FloatType()),\n",
@@ -114,8 +114,11 @@
     "\n",
     "# You need to update them to your real paths!\n",
     "dataRoot = os.getenv(\"DATA_ROOT\", \"/data\")\n",
-    "train_data = spark.read.parquet(dataRoot + '/mortgage/parquet/train')\n",
-    "trans_data = spark.read.parquet(dataRoot + '/mortgage/parquet/eval')"
+    "train_path = dataRoot + \"/mortgage/output/train\"\n",
+    "eval_path = dataRoot + \"/mortgage/output/eval\"\n",
+    "\n",
+    "train_data = spark.read.parquet(train_path)\n",
+    "trans_data = spark.read.parquet(eval_path)"
    ]
   },
   {
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb
index ce7b82162..e2c64c15e 100644
--- a/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb
+++ b/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb
@@ -37,7 +37,7 @@
     "from ml.dmlc.xgboost4j.scala.spark import XGBoostClassificationModel, XGBoostClassifier\n",
     "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n",
     "from pyspark.sql import SparkSession\n",
-    "from pyspark.sql.types import FloatType, IntegerType, StructField, StructType\n",
+    "from pyspark.sql.types import FloatType, IntegerType, StructField, StructType, DoubleType\n",
     "from time import time\n",
     "import os"
    ]
@@ -96,7 +96,7 @@
     "    StructField('seller_name', FloatType()),\n",
     "    StructField('mod_flag', FloatType()),\n",
     "    StructField('orig_interest_rate', FloatType()),\n",
-    "    StructField('orig_upb', IntegerType()),\n",
+    "    StructField('orig_upb', DoubleType()),\n",
     "    StructField('orig_loan_term', IntegerType()),\n",
     "    StructField('orig_ltv', FloatType()),\n",
     "    StructField('orig_cltv', FloatType()),\n",
@@ -118,8 +118,11 @@
     "\n",
     "# You need to update them to your real paths!\n",
     "dataRoot = os.getenv(\"DATA_ROOT\", \"/data\")\n",
-    "train_data = reader.schema(schema).option('header', True).csv(dataRoot + '/mortgage/csv/train')\n",
-    "trans_data = reader.schema(schema).option('header', True).csv(dataRoot + '/mortgage/csv/test')"
+    "train_path = dataRoot + \"/mortgage/output/train\"\n",
+    "eval_path = dataRoot + \"/mortgage/output/eval\"\n",
+    "\n",
+    "train_data = reader.parquet(train_path)\n",
+    "trans_data = reader.parquet(eval_path)"
    ]
   },
   {
@@ -232,8 +235,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model.write().overwrite().save(dataRoot + '/new-model-path')\n",
-    "loaded_model = XGBoostClassificationModel().load(dataRoot + '/new-model-path')"
+    "model.write().overwrite().save(dataRoot + '/mortgage/model')\n",
+    "loaded_model = XGBoostClassificationModel().load(dataRoot + '/mortgage/model')"
    ]
   },
   {
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb
index 840c068b7..4a9bee1ec 100644
--- a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb
+++ b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb
@@ -16,21 +16,22 @@
    "source": [
     "## Prerequirement\n",
     "### 1. Download data\n",
-    "All data could be found at https://docs.rapids.ai/datasets/mortgage-data\n",
+    "<!-- Refer these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset -->\n",
+    "Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
     "\n",
     "### 2. Download needed jars\n",
-    "* [rapids-4-spark_2.12-22.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-22.08.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before Running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-22.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-22.08.0.jar\n",
     "\n",
     "```\n",
     "\n",
-    "### 5.Start Jupyter Notebook with spylon-kernal or toree\n",
+    "### 5.Start Jupyter Notebook with spylon-kernel or toree\n",
     "\n",
     "```\n",
     "$ jupyter notebook --allow-root --notebook-dir=${your-dir} --config=${your-configs}\n",
@@ -66,38 +67,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "674771a8",
+   "execution_count": null,
+   "id": "b2834c06",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dataRoot = /data\n",
-       "perfPath = /data/mortgage/Performance/\n",
-       "acqPath = /data/mortgage/Acquisition/\n",
-       "outPath = /data/mortgage/output/\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "/data"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val dataRoot = sys.env.getOrElse(\"DATA_ROOT\", \"/data\")\n",
-    "val perfPath = dataRoot + \"/mortgage/Performance/\"\n",
-    "val acqPath  = dataRoot + \"/mortgage/Acquisition/\"\n",
-    "val outPath = dataRoot + \"/mortgage/output/\"\n"
+    "val dataOut = sys.env.getOrElse(\"DATA_OUT\", \"/data\")\n",
+    "val dataPath = dataRoot + \"/mortgage/input\"\n",
+    "val outPath = dataOut + \"/mortgage/output\"\n",
+    "val output_csv2parquet = dataOut + \"/mortgage/output/csv2parquet/\"\n",
+    "val saveTrainEvalDataset = true"
    ]
   },
   {
@@ -120,7 +100,7 @@
     {
      "data": {
       "text/plain": [
-       "performanceSchema = StructType(StructField(loan_id,LongType,true), StructField(monthly_reporting_period,StringType,true), StructField(servicer,StringType,true), StructField(interest_rate,DoubleType,true), StructField(current_actual_upb,DoubleType,true), StructField(loan_age,DoubleType,true), StructField(remaining_months_to_legal_maturity,DoubleType,true), StructField(adj_remaining_months_to_maturity,DoubleType,true), StructField(maturity_date,StringType,true), StructField(msa,DoubleType,true), StructField(current_loan_delinquency_status,IntegerType,true), StructField(mod_flag,StringType,true), StructField(zero_balance_code,StringType,true), StructField(zero_balance_effective_date,StringType,true), StructField(last_paid_installment_date,StringType,t...\n"
+       "rawSchema = StructType(StructField(reference_pool_id,StringType,true), StructField(loan_id,LongType,true), StructField(monthly_reporting_period,StringType,true), StructField(orig_channel,StringType,true), StructField(seller_name,StringType,true), StructField(servicer,StringType,true), StructField(master_servicer,StringType,true), StructField(orig_interest_rate,DoubleType,true), StructField(interest_rate,DoubleType,true), StructField(orig_upb,IntegerType,true), StructField(upb_at_issuance,StringType,true), StructField(current_actual_upb,DoubleType,true), StructField(orig_loan_term,IntegerType,true), StructField(orig_date,StringType,true), StructField(first_pay_date,StringType,true), StructField(loan_age,DoubleType,true), StructField(remaining_months...\n"
       ]
      },
      "metadata": {},
@@ -129,7 +109,7 @@
     {
      "data": {
       "text/plain": [
-       "StructType(StructField(loan_id,LongType,true), StructField(monthly_reporting_period,StringType,true), StructField(servicer,StringType,true), StructField(interest_rate,DoubleType,true), StructField(current_actual_upb,DoubleType,true), StructField(loan_age,DoubleType,true), StructField(remaining_months_to_legal_maturity,DoubleType,true), StructField(adj_remaining_months_to_maturity,DoubleType,true), StructField(maturity_date,StringType,true), StructField(msa,DoubleType,true), StructField(current_loan_delinquency_status,IntegerType,true), StructField(mod_flag,StringType,true), StructField(zero_balance_code,StringType,true), StructField(zero_balance_effective_date,StringType,true), StructField(last_paid_installment_date,StringType,t..."
+       "StructType(StructField(reference_pool_id,StringType,true), StructField(loan_id,LongType,true), StructField(monthly_reporting_period,StringType,true), StructField(orig_channel,StringType,true), StructField(seller_name,StringType,true), StructField(servicer,StringType,true), StructField(master_servicer,StringType,true), StructField(orig_interest_rate,DoubleType,true), StructField(interest_rate,DoubleType,true), StructField(orig_upb,IntegerType,true), StructField(upb_at_issuance,StringType,true), StructField(current_actual_upb,DoubleType,true), StructField(orig_loan_term,IntegerType,true), StructField(orig_date,StringType,true), StructField(first_pay_date,StringType,true), StructField(loan_age,DoubleType,true), StructField(remaining_months..."
       ]
      },
      "execution_count": 3,
@@ -139,21 +119,57 @@
    ],
    "source": [
     "// File schema\n",
-    "val performanceSchema = StructType(Array(\n",
+    "val rawSchema = StructType(Array(\n",
+    "      StructField(\"reference_pool_id\", StringType),\n",
     "      StructField(\"loan_id\", LongType),\n",
     "      StructField(\"monthly_reporting_period\", StringType),\n",
+    "      StructField(\"orig_channel\", StringType),\n",
+    "      StructField(\"seller_name\", StringType),\n",
     "      StructField(\"servicer\", StringType),\n",
+    "      StructField(\"master_servicer\", StringType),\n",
+    "      StructField(\"orig_interest_rate\", DoubleType),\n",
     "      StructField(\"interest_rate\", DoubleType),\n",
+    "      StructField(\"orig_upb\", DoubleType),\n",
+    "      StructField(\"upb_at_issuance\", StringType),\n",
     "      StructField(\"current_actual_upb\", DoubleType),\n",
+    "      StructField(\"orig_loan_term\", IntegerType),\n",
+    "      StructField(\"orig_date\", StringType),\n",
+    "      StructField(\"first_pay_date\", StringType),    \n",
     "      StructField(\"loan_age\", DoubleType),\n",
     "      StructField(\"remaining_months_to_legal_maturity\", DoubleType),\n",
     "      StructField(\"adj_remaining_months_to_maturity\", DoubleType),\n",
     "      StructField(\"maturity_date\", StringType),\n",
+    "      StructField(\"orig_ltv\", DoubleType),\n",
+    "      StructField(\"orig_cltv\", DoubleType),\n",
+    "      StructField(\"num_borrowers\", DoubleType),\n",
+    "      StructField(\"dti\", DoubleType),\n",
+    "      StructField(\"borrower_credit_score\", DoubleType),\n",
+    "      StructField(\"coborrow_credit_score\", DoubleType),\n",
+    "      StructField(\"first_home_buyer\", StringType),\n",
+    "      StructField(\"loan_purpose\", StringType),\n",
+    "      StructField(\"property_type\", StringType),\n",
+    "      StructField(\"num_units\", IntegerType),\n",
+    "      StructField(\"occupancy_status\", StringType),\n",
+    "      StructField(\"property_state\", StringType),\n",
     "      StructField(\"msa\", DoubleType),\n",
+    "      StructField(\"zip\", IntegerType),\n",
+    "      StructField(\"mortgage_insurance_percent\", DoubleType),\n",
+    "      StructField(\"product_type\", StringType),\n",
+    "      StructField(\"prepayment_penalty_indicator\", StringType),\n",
+    "      StructField(\"interest_only_loan_indicator\", StringType),\n",
+    "      StructField(\"interest_only_first_principal_and_interest_payment_date\", StringType),\n",
+    "      StructField(\"months_to_amortization\", StringType),\n",
     "      StructField(\"current_loan_delinquency_status\", IntegerType),\n",
+    "      StructField(\"loan_payment_history\", StringType),\n",
     "      StructField(\"mod_flag\", StringType),\n",
+    "      StructField(\"mortgage_insurance_cancellation_indicator\", StringType),\n",
     "      StructField(\"zero_balance_code\", StringType),\n",
     "      StructField(\"zero_balance_effective_date\", StringType),\n",
+    "      StructField(\"upb_at_the_time_of_removal\", StringType),\n",
+    "      StructField(\"repurchase_date\", StringType),\n",
+    "      StructField(\"scheduled_principal_current\", StringType),\n",
+    "      StructField(\"total_principal_current\", StringType),\n",
+    "      StructField(\"unscheduled_principal_current\", StringType),\n",
     "      StructField(\"last_paid_installment_date\", StringType),\n",
     "      StructField(\"foreclosed_after\", StringType),\n",
     "      StructField(\"disposition_date\", StringType),\n",
@@ -168,37 +184,51 @@
     "      StructField(\"other_foreclosure_proceeds\", DoubleType),\n",
     "      StructField(\"non_interest_bearing_upb\", DoubleType),\n",
     "      StructField(\"principal_forgiveness_upb\", StringType),\n",
-    "      StructField(\"repurchase_make_whole_proceeds_flag\", StringType),\n",
-    "      StructField(\"foreclosure_principal_write_off_amount\", StringType),\n",
-    "      StructField(\"servicing_activity_indicator\", StringType))\n",
-    "    )\n",
-    "\n",
-    "val acquisitionSchema = StructType(Array(\n",
-    "      StructField(\"loan_id\", LongType),\n",
-    "      StructField(\"orig_channel\", StringType),\n",
-    "      StructField(\"seller_name\", StringType),\n",
-    "      StructField(\"orig_interest_rate\", DoubleType),\n",
-    "      StructField(\"orig_upb\", IntegerType),\n",
-    "      StructField(\"orig_loan_term\", IntegerType),\n",
-    "      StructField(\"orig_date\", StringType),\n",
-    "      StructField(\"first_pay_date\", StringType),\n",
-    "      StructField(\"orig_ltv\", DoubleType),\n",
-    "      StructField(\"orig_cltv\", DoubleType),\n",
-    "      StructField(\"num_borrowers\", DoubleType),\n",
-    "      StructField(\"dti\", DoubleType),\n",
-    "      StructField(\"borrower_credit_score\", DoubleType),\n",
-    "      StructField(\"first_home_buyer\", StringType),\n",
-    "      StructField(\"loan_purpose\", StringType),\n",
-    "      StructField(\"property_type\", StringType),\n",
-    "      StructField(\"num_units\", IntegerType),\n",
-    "      StructField(\"occupancy_status\", StringType),\n",
-    "      StructField(\"property_state\", StringType),\n",
-    "      StructField(\"zip\", IntegerType),\n",
-    "      StructField(\"mortgage_insurance_percent\", DoubleType),\n",
-    "      StructField(\"product_type\", StringType),\n",
-    "      StructField(\"coborrow_credit_score\", DoubleType),\n",
+    "      StructField(\"original_list_start_date\", StringType),\n",
+    "      StructField(\"original_list_price\", StringType),\n",
+    "      StructField(\"current_list_start_date\", StringType),\n",
+    "      StructField(\"current_list_price\", StringType),\n",
+    "      StructField(\"borrower_credit_score_at_issuance\", StringType),\n",
+    "      StructField(\"co-borrower_credit_score_at_issuance\", StringType),\n",
+    "      StructField(\"borrower_credit_score_current\", StringType),\n",
+    "      StructField(\"co-Borrower_credit_score_current\", StringType),\n",
     "      StructField(\"mortgage_insurance_type\", DoubleType),\n",
-    "      StructField(\"relocation_mortgage_indicator\", StringType))\n",
+    "      StructField(\"servicing_activity_indicator\", StringType),\n",
+    "      StructField(\"current_period_modification_loss_amount\", StringType),\n",
+    "      StructField(\"cumulative_modification_loss_amount\", StringType),\n",
+    "      StructField(\"current_period_credit_event_net_gain_or_loss\", StringType),\n",
+    "      StructField(\"cumulative_credit_event_net_gain_or_loss\", StringType),\n",
+    "      StructField(\"homeready_program_indicator\", StringType),\n",
+    "      StructField(\"foreclosure_principal_write_off_amount\", StringType),\n",
+    "      StructField(\"relocation_mortgage_indicator\", StringType),\n",
+    "      StructField(\"zero_balance_code_change_date\", StringType),\n",
+    "      StructField(\"loan_holdback_indicator\", StringType),\n",
+    "      StructField(\"loan_holdback_effective_date\", StringType),\n",
+    "      StructField(\"delinquent_accrued_interest\", StringType),\n",
+    "      StructField(\"property_valuation_method\", StringType),\n",
+    "      StructField(\"high_balance_loan_indicator\", StringType),\n",
+    "      StructField(\"arm_initial_fixed-rate_period_lt_5_yr_indicator\", StringType),\n",
+    "      StructField(\"arm_product_type\", StringType),\n",
+    "      StructField(\"initial_fixed-rate_period\", StringType),\n",
+    "      StructField(\"interest_rate_adjustment_frequency\", StringType),\n",
+    "      StructField(\"next_interest_rate_adjustment_date\", StringType),\n",
+    "      StructField(\"next_payment_change_date\", StringType),\n",
+    "      StructField(\"index\", StringType),\n",
+    "      StructField(\"arm_cap_structure\", StringType),\n",
+    "      StructField(\"initial_interest_rate_cap_up_percent\", StringType),\n",
+    "      StructField(\"periodic_interest_rate_cap_up_percent\", StringType),\n",
+    "      StructField(\"lifetime_interest_rate_cap_up_percent\", StringType),\n",
+    "      StructField(\"mortgage_margin\", StringType),\n",
+    "      StructField(\"arm_balloon_indicator\", StringType),\n",
+    "      StructField(\"arm_plan_number\", StringType),\n",
+    "      StructField(\"borrower_assistance_plan\", StringType),\n",
+    "      StructField(\"hltv_refinance_option_indicator\", StringType),\n",
+    "      StructField(\"deal_name\", StringType),\n",
+    "      StructField(\"repurchase_make_whole_proceeds_flag\", StringType),\n",
+    "      StructField(\"alternative_delinquency_resolution\", StringType),\n",
+    "      StructField(\"alternative_delinquency_resolution_count\", StringType),\n",
+    "      StructField(\"total_deferral_amount\", StringType)\n",
+    "      )\n",
     "    )"
    ]
   },
@@ -356,7 +386,7 @@
     "  // So we strip off the .txt and everything after it\n",
     "  // and then take everything after the last remaining _\n",
     "  def apply(): Column = substring_index(\n",
-    "    substring_index(input_file_name(), \".\", 1), \"_\", -1)\n",
+    "    substring_index(input_file_name(), \".\", 1), \"/\", -1)\n",
     "}"
    ]
   },
@@ -413,7 +443,7 @@
     "\n",
     "val numericCols = List(\n",
     "    (\"orig_interest_rate\", FloatType),\n",
-    "    (\"orig_upb\", IntegerType),\n",
+    "    (\"orig_upb\", DoubleType),\n",
     "    (\"orig_loan_term\", IntegerType),\n",
     "    (\"orig_ltv\", FloatType),\n",
     "    (\"orig_cltv\", FloatType),\n",
@@ -556,6 +586,120 @@
     "  }"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "9e1fbb61",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "defined object extractPerfColumns\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "object extractPerfColumns{\n",
+    "  def apply(rawDf : DataFrame) : DataFrame = {\n",
+    "    val perfDf = rawDf.select(\n",
+    "      col(\"loan_id\"),\n",
+    "      date_format(to_date(col(\"monthly_reporting_period\"),\"MMyyyy\"), \"MM/dd/yyyy\").as(\"monthly_reporting_period\"),\n",
+    "      upper(col(\"servicer\")).as(\"servicer\"),\n",
+    "      col(\"interest_rate\"),\n",
+    "      col(\"current_actual_upb\"),\n",
+    "      col(\"loan_age\"),\n",
+    "      col(\"remaining_months_to_legal_maturity\"),\n",
+    "      col(\"adj_remaining_months_to_maturity\"),\n",
+    "      date_format(to_date(col(\"maturity_date\"),\"MMyyyy\"), \"MM/yyyy\").as(\"maturity_date\"),\n",
+    "      col(\"msa\"),\n",
+    "      col(\"current_loan_delinquency_status\"),\n",
+    "      col(\"mod_flag\"),\n",
+    "      col(\"zero_balance_code\"),\n",
+    "      date_format(to_date(col(\"zero_balance_effective_date\"),\"MMyyyy\"), \"MM/yyyy\").as(\"zero_balance_effective_date\"),\n",
+    "      date_format(to_date(col(\"last_paid_installment_date\"),\"MMyyyy\"), \"MM/dd/yyyy\").as(\"last_paid_installment_date\"),\n",
+    "      date_format(to_date(col(\"foreclosed_after\"),\"MMyyyy\"), \"MM/dd/yyyy\").as(\"foreclosed_after\"),\n",
+    "      date_format(to_date(col(\"disposition_date\"),\"MMyyyy\"), \"MM/dd/yyyy\").as(\"disposition_date\"),\n",
+    "      col(\"foreclosure_costs\"),\n",
+    "      col(\"prop_preservation_and_repair_costs\"),\n",
+    "      col(\"asset_recovery_costs\"),\n",
+    "      col(\"misc_holding_expenses\"),\n",
+    "      col(\"holding_taxes\"),\n",
+    "      col(\"net_sale_proceeds\"),\n",
+    "      col(\"credit_enhancement_proceeds\"),\n",
+    "      col(\"repurchase_make_whole_proceeds\"),\n",
+    "      col(\"other_foreclosure_proceeds\"),\n",
+    "      col(\"non_interest_bearing_upb\"),\n",
+    "      col(\"principal_forgiveness_upb\"),\n",
+    "      col(\"repurchase_make_whole_proceeds_flag\"),\n",
+    "      col(\"foreclosure_principal_write_off_amount\"),\n",
+    "      col(\"servicing_activity_indicator\"),\n",
+    "      col(\"quarter\")\n",
+    "    )\n",
+    "    \n",
+    "    perfDf.select(\"*\").filter(\"current_actual_upb != 0.0\")\n",
+    "  }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "ce429163",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "defined object extractAcqColumns\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "object extractAcqColumns{\n",
+    "  def apply(rawDf : DataFrame) : DataFrame = {\n",
+    "    val acqDf = rawDf.select(\n",
+    "      col(\"loan_id\"),\n",
+    "      col(\"orig_channel\"),\n",
+    "      upper(col(\"seller_name\")).as(\"seller_name\"),\n",
+    "      col(\"orig_interest_rate\"),\n",
+    "      col(\"orig_upb\"),\n",
+    "      col(\"orig_loan_term\"),\n",
+    "      date_format(to_date(col(\"orig_date\"),\"MMyyyy\"), \"MM/yyyy\").as(\"orig_date\"),\n",
+    "      date_format(to_date(col(\"first_pay_date\"),\"MMyyyy\"), \"MM/yyyy\").as(\"first_pay_date\"),\n",
+    "      col(\"orig_ltv\"),\n",
+    "      col(\"orig_cltv\"),\n",
+    "      col(\"num_borrowers\"),\n",
+    "      col(\"dti\"),\n",
+    "      col(\"borrower_credit_score\"),\n",
+    "      col(\"first_home_buyer\"),\n",
+    "      col(\"loan_purpose\"),\n",
+    "      col(\"property_type\"),\n",
+    "      col(\"num_units\"),\n",
+    "      col(\"occupancy_status\"),\n",
+    "      col(\"property_state\"),\n",
+    "      col(\"zip\"),\n",
+    "      col(\"mortgage_insurance_percent\"),\n",
+    "      col(\"product_type\"),\n",
+    "      col(\"coborrow_credit_score\"),\n",
+    "      col(\"mortgage_insurance_type\"),\n",
+    "      col(\"relocation_mortgage_indicator\"),\n",
+    "      col(\"quarter\"),\n",
+    "      dense_rank().over(Window.partitionBy(\"loan_id\").orderBy(to_date(col(\"monthly_reporting_period\"),\"MMyyyy\"))).as(\"rank\")\n",
+    "    )\n",
+    "\n",
+    "    acqDf.select(\"*\").filter(col(\"rank\") === 1)\n",
+    "  }\n",
+    "\n",
+    "}"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "37c64d85",
@@ -566,15 +710,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "id": "98d37174",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "sparkSession = org.apache.spark.sql.SparkSession@1d87c1c2\n",
-       "reader = org.apache.spark.sql.DataFrameReader@2e8a7a69\n"
+       "sparkSession = org.apache.spark.sql.SparkSession@694178ec\n",
+       "reader = org.apache.spark.sql.DataFrameReader@4b2afd51\n"
       ]
      },
      "metadata": {},
@@ -583,18 +727,30 @@
     {
      "data": {
       "text/plain": [
-       "org.apache.spark.sql.DataFrameReader@2e8a7a69"
+       "org.apache.spark.sql.DataFrameReader@4b2afd51"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "// Build the spark session and data reader as usual\n",
-    "val sparkSession = SparkSession.builder.appName(\"mortgage-gpu\").getOrCreate\n",
-    "val reader = sparkSession.read.option(\"header\", true).schema(performanceSchema)"
+    "val sparkSession = SparkSession.builder.appName(\"mortgage-gpu\").config(\"spark.sql.cache.serializer\", \"com.nvidia.spark.ParquetCachedBatchSerializer\").getOrCreate\n",
+    "\n",
+    "// GPU run, set to true\n",
+    "sparkSession.conf.set(\"spark.rapids.sql.enabled\", true)\n",
+    "// CPU run, set to false\n",
+    "// sparkSession.conf.set('spark.rapids.sql.enabled', 'false')\n",
+    "// remove config(\"spark.sql.cache.serializer\", \"com.nvidia.spark.ParquetCachedBatchSerializer\") for CPU\n",
+    "sparkSession.conf.set(\"spark.sql.files.maxPartitionBytes\", \"1G\")\n",
+    "sparkSession.conf.set(\"spark.sql.broadcastTimeout\", 700)\n",
+    "sparkSession.conf.set(\"spark.rapids.sql.hasNans\", false)\n",
+    "// use GPU to read CSV\n",
+    "sparkSession.conf.set(\"spark.rapids.sql.csv.read.double.enabled\", true)\n",
+    "\n",
+    "val reader = sparkSession.read.schema(rawSchema)"
    ]
   },
   {
@@ -607,7 +763,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "id": "5bac2301",
    "metadata": {},
    "outputs": [
@@ -615,8 +771,9 @@
      "data": {
       "text/plain": [
        "optionsMap = Map(header -> true)\n",
+       "rawDf = [reference_pool_id: string, loan_id: bigint ... 107 more fields]\n",
        "perfSet = [loan_id: bigint, monthly_reporting_period: string ... 30 more fields]\n",
-       "acqSet = [loan_id: bigint, orig_channel: string ... 24 more fields]\n"
+       "acqSet = [loan_id: bigint, orig_channel: string ... 25 more fields]\n"
       ]
      },
      "metadata": {},
@@ -625,28 +782,28 @@
     {
      "data": {
       "text/plain": [
-       "[loan_id: bigint, orig_channel: string ... 24 more fields]"
+       "[loan_id: bigint, orig_channel: string ... 25 more fields]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "val optionsMap = Map(\"header\" -> \"true\")\n",
-    "val perfSet = reader.options(optionsMap)\n",
+    "val rawDf_csv = reader.option(\"header\", false)\n",
     "      .option(\"nullValue\", \"\")\n",
     "      .option(\"delimiter\", \"|\")\n",
     "      .option(\"parserLib\", \"univocity\")\n",
-    "      .schema(performanceSchema)\n",
-    "      .csv(perfPath)\n",
+    "      .schema(rawSchema)\n",
+    "      .csv(dataPath)\n",
     "      .withColumn(\"quarter\", GetQuarterFromCsvFileName())\n",
-    "val acqSet = reader.options(optionsMap)\n",
-    "      .option(\"delimiter\", \"|\")\n",
-    "      .schema(acquisitionSchema)\n",
-    "      .csv(acqPath)\n",
-    "      .withColumn(\"quarter\", GetQuarterFromCsvFileName())"
+    "\n",
+    "rawDf_csv.write.mode(\"overwrite\").parquet(output_csv2parquet)\n",
+    "val rawDf = spark.read.parquet(output_csv2parquet)\n",
+    "\n",
+    "val perfSet = extractPerfColumns(rawDf)\n",
+    "val acqSet = extractAcqColumns(rawDf)"
    ]
   },
   {
@@ -659,7 +816,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "id": "a16155cb",
    "metadata": {},
    "outputs": [
@@ -681,7 +838,7 @@
        "List(orig_channel, first_home_buyer, loan_purpose, property_type, occupancy_status, property_state, product_type, relocation_mortgage_indicator, seller_name, mod_flag, orig_interest_rate, orig_upb, orig_loan_term, orig_ltv, orig_cltv, num_borrowers, dti, borrower_credit_score, num_units, zip, mortgage_insurance_percent, current_loan_delinquency_status, current_actual_upb, interest_rate, loan_age, msa, non_interest_bearing_upb, delinquency_12)"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -816,7 +973,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "id": "78b76252",
    "metadata": {},
    "outputs": [
@@ -859,7 +1016,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "id": "ffdb0a62",
    "metadata": {},
    "outputs": [
@@ -867,16 +1024,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed time : 35.638s\n"
+      "Elapsed time : 399.241s\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "t0 = 1654138715501\n",
+       "t0 = 1656695479451\n",
        "optionsMap = Map(header -> true)\n",
        "rawDF = [orig_channel: int, first_home_buyer: int ... 26 more fields]\n",
-       "t1 = 1654138751139\n"
+       "t1 = 1656695878692\n"
       ]
      },
      "metadata": {},
@@ -885,42 +1042,47 @@
     {
      "data": {
       "text/plain": [
-       "1654138751139"
+       "1656695878692"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "val t0 = System.currentTimeMillis\n",
-    "val optionsMap = Map(\"header\" -> \"true\")\n",
     "val rawDF = transform(\n",
     "      perfSet,\n",
     "      acqSet,\n",
     "      sparkSession\n",
     "    )\n",
-    "rawDF.write.mode(\"overwrite\").parquet(new Path(outPath, \"data\").toString)\n",
+    "\n",
+    "val etlDataPath = new Path(outPath, \"data\").toString\n",
+    "rawDF.write.mode(\"overwrite\").parquet(etlDataPath)\n",
+    "\n",
+    "if(saveTrainEvalDataset == true)\n",
+    "{\n",
+    "  val etlDf = sparkSession.read.parquet(etlDataPath)\n",
+    "  val sets = etlDf.randomSplit(Array[Double](0.8, 0.2))\n",
+    "  val train = sets(0)\n",
+    "  val eval = sets(1)\n",
+    "  train.write.mode(\"overwrite\").parquet(new Path(outPath, \"train\").toString)\n",
+    "  eval.write.mode(\"overwrite\").parquet(new Path(outPath, \"eval\").toString)\n",
+    "}\n",
+    "\n",
+    "\n",
     "val t1 = System.currentTimeMillis\n",
     "println(\"Elapsed time : \" + ((t1 - t0).toFloat / 1000) + \"s\")\n",
     "sparkSession.stop()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4388fe96",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "XGBoost4j-Spark-2206 - Scala",
+   "display_name": "XGBoost4j-Spark Scala",
    "language": "scala",
-   "name": "xgboost4j-spark-2206_scala"
+   "name": "XGBoost4j-Spark_scala"
   },
   "language_info": {
    "codemirror_mode": "text/x-scala",
@@ -933,4 +1095,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-gpu.ipynb
index 6eaec8c72..872e09c2d 100644
--- a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-gpu.ipynb
+++ b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-gpu.ipynb
@@ -47,39 +47,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dataRoot = /data\n",
-       "trainPath = /data/mortgage/csv/train/\n",
-       "evalPath = /data/mortgage/csv/test/\n",
-       "transPath = /data/mortgage/csv/test/\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "/data/mortgage/csv/test/"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "// You need to update them to your real paths! The input data files can be the output of mortgage-etl jobs, or you can\n",
-    "// just use the provided sample datasets upder datasets path. \n",
+    "// You need to update them to your real paths! The input data files is the output of mortgage-etl jobs\n",
     "val dataRoot = sys.env.getOrElse(\"DATA_ROOT\", \"/data\")\n",
-    "val trainPath = dataRoot + \"/mortgage/csv/train/\"\n",
-    "val evalPath  = dataRoot + \"/mortgage/csv/test/\"\n",
-    "val transPath = dataRoot + \"/mortgage/csv/test/\""
+    "val trainPath = dataRoot + \"/mortgage/output/train/\"\n",
+    "val evalPath  = dataRoot + \"/mortgage/output/eval/\"\n",
+    "val transPath = dataRoot + \"/mortgage/output/eval/\""
    ]
   },
   {
@@ -132,7 +108,7 @@
     "  StructField(\"seller_name\", DoubleType),\n",
     "  StructField(\"mod_flag\", DoubleType),\n",
     "  StructField(\"orig_interest_rate\", DoubleType),\n",
-    "  StructField(\"orig_upb\", IntegerType),\n",
+    "  StructField(\"orig_upb\", DoubleType),\n",
     "  StructField(\"orig_loan_term\", IntegerType),\n",
     "  StructField(\"orig_ltv\", DoubleType),\n",
     "  StructField(\"orig_cltv\", DoubleType),\n",
@@ -208,7 +184,8 @@
    "source": [
     "// Build the spark session and data reader as usual\n",
     "val sparkSession = SparkSession.builder.appName(\"mortgage-gpu\").getOrCreate\n",
-    "val reader = sparkSession.read.option(\"header\", true).schema(schema)"
+    "sparkSession.conf.set(\"spark.rapids.sql.hasNans\", false)\n",
+    "val reader = sparkSession.read"
    ]
   },
   {
@@ -239,10 +216,9 @@
     }
    ],
    "source": [
-    "// Please make sure to change the api to reader.parquet if you load parquet files.\n",
-    "val trainSet = reader.csv(trainPath)\n",
-    "val evalSet  = reader.csv(evalPath)\n",
-    "val transSet = reader.csv(transPath)"
+    "val trainSet = reader.parquet(trainPath)\n",
+    "val evalSet  = reader.parquet(evalPath)\n",
+    "val transSet = reader.parquet(transPath)"
    ]
   },
   {
@@ -588,9 +564,9 @@
     }
    ],
    "source": [
-    "xgbClassificationModel.write.overwrite.save(dataRoot + \"/model/mortgage\")\n",
+    "xgbClassificationModel.write.overwrite.save(dataRoot + \"/mortgage/model/\")\n",
     "\n",
-    "val modelFromDisk = XGBoostClassificationModel.load(dataRoot + \"/model/mortgage\")\n",
+    "val modelFromDisk = XGBoostClassificationModel.load(dataRoot + \"/mortgage/model/\")\n",
     "\n",
     "val (results2, _) = Benchmark.time(\"transform2\") {\n",
     "  modelFromDisk.transform(transSet)\n",
@@ -606,20 +582,13 @@
    "source": [
     "sparkSession.close()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "XGBoost4j-Spark-2206 - Scala",
+   "display_name": "XGBoost4j-Spark - Scala",
    "language": "scala",
-   "name": "xgboost4j-spark-2206_scala"
+   "name": "XGBoost4j-Spark_scala"
   },
   "language_info": {
    "codemirror_mode": "text/x-scala",
@@ -632,4 +601,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb
index a83c2bcdb..812436087 100644
--- a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb
+++ b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb
@@ -23,7 +23,7 @@
     "import org.apache.spark.sql.SparkSession\n",
     "import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator\n",
     "import org.apache.spark.ml.tuning.{ParamGridBuilder,CrossValidator}\n",
-    "import org.apache.spark.sql.types.{FloatType, IntegerType, StructField, StructType}"
+    "import org.apache.spark.sql.types.{FloatType, IntegerType, StructField, StructType, DoubleType}"
    ]
   },
   {
@@ -42,36 +42,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dataRoot = /data\n",
-       "trainParquetPath = /data/mortgage/parquet/train\n",
-       "evalParquetPath = /data/mortgage/parquet/eval\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "/data/mortgage/parquet/eval"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "// You need to update them to your real paths!\n",
     "val dataRoot = sys.env.getOrElse(\"DATA_ROOT\", \"/data\")\n",
-    "val trainParquetPath=dataRoot + \"/mortgage/parquet/train\"\n",
-    "val evalParquetPath=dataRoot + \"/mortgage/parquet/eval\""
+    "val trainParquetPath=dataRoot + \"/mortgage/output/train\"\n",
+    "val evalParquetPath=dataRoot + \"/mortgage/output/eval\""
    ]
   },
   {
@@ -83,30 +61,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "labelColName = delinquency_12\n",
-       "schema = StructType(StructField(orig_channel,FloatType,true), StructField(first_home_buyer,FloatType,true), StructField(loan_purpose,FloatType,true), StructField(property_type,FloatType,true), StructField(occupancy_status,FloatType,true), StructField(property_state,FloatType,true), StructField(product_type,FloatType,true), StructField(relocation_mortgage_indicator,FloatType,true), StructField(seller_name,FloatType,true), StructField(mod_flag,FloatType,true), StructField(orig_interest_rate,FloatType,true), StructField(orig_upb,IntegerType,true), StructField(orig_loan_term,IntegerType,true), StructField(orig_ltv,FloatType,true), StructField(orig_cltv,FloatType,true), StructField(num_borrowers,FloatType,true), Str...\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "StructType(StructField(orig_channel,FloatType,true), StructField(first_home_buyer,FloatType,true), StructField(loan_purpose,FloatType,true), StructField(property_type,FloatType,true), StructField(occupancy_status,FloatType,true), StructField(property_state,FloatType,true), StructField(product_type,FloatType,true), StructField(relocation_mortgage_indicator,FloatType,true), StructField(seller_name,FloatType,true), StructField(mod_flag,FloatType,true), StructField(orig_interest_rate,FloatType,true), StructField(orig_upb,IntegerType,true), StructField(orig_loan_term,IntegerType,true), StructField(orig_ltv,FloatType,true), StructField(orig_cltv,FloatType,true), StructField(num_borrowers,FloatType,true), Str..."
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val labelColName = \"delinquency_12\"\n",
     "val schema = StructType(List(\n",
@@ -121,7 +78,7 @@
     "    StructField(\"seller_name\", FloatType),\n",
     "    StructField(\"mod_flag\", FloatType),\n",
     "    StructField(\"orig_interest_rate\", FloatType),\n",
-    "    StructField(\"orig_upb\", IntegerType),\n",
+    "    StructField(\"orig_upb\", DoubleType),\n",
     "    StructField(\"orig_loan_term\", IntegerType),\n",
     "    StructField(\"orig_ltv\", FloatType),\n",
     "    StructField(\"orig_cltv\", FloatType),\n",
@@ -480,20 +437,13 @@
    "source": [
     "spark.close()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "XGBoost4j-Spark-2206 - Scala",
+   "display_name": "XGBoost4j-Spark - Scala",
    "language": "scala",
-   "name": "xgboost4j-spark-2206_scala"
+   "name": "XGBoost4j-Spark_scala"
   },
   "language_info": {
    "codemirror_mode": "text/x-scala",
@@ -506,4 +456,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/consts.py b/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/consts.py
index 7782d84c5..1cca6e6d8 100644
--- a/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/consts.py
+++ b/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/consts.py
@@ -30,7 +30,7 @@
     StructField('seller_name', FloatType()),
     StructField('mod_flag', FloatType()),
     StructField('orig_interest_rate', FloatType()),
-    StructField('orig_upb', IntegerType()),
+    StructField('orig_upb', DoubleType()),
     StructField('orig_loan_term', IntegerType()),
     StructField('orig_ltv', FloatType()),
     StructField('orig_cltv', FloatType()),
@@ -133,67 +133,117 @@
     'Other REFINANCE': 'OTHER REFINANCE',
 }
 
-performance_schema = StructType([
-    StructField('loan_id', LongType()),
-    StructField('monthly_reporting_period', StringType()),
-    StructField('servicer', StringType()),
-    StructField('interest_rate', DoubleType()),
-    StructField('current_actual_upb', DoubleType()),
-    StructField('loan_age', DoubleType()),
-    StructField('remaining_months_to_legal_maturity', DoubleType()),
-    StructField('adj_remaining_months_to_maturity', DoubleType()),
-    StructField('maturity_date', StringType()),
-    StructField('msa', DoubleType()),
-    StructField('current_loan_delinquency_status', IntegerType()),
-    StructField('mod_flag', StringType()),
-    StructField('zero_balance_code', StringType()),
-    StructField('zero_balance_effective_date', StringType()),
-    StructField('last_paid_installment_date', StringType()),
-    StructField('foreclosed_after', StringType()),
-    StructField('disposition_date', StringType()),
-    StructField('foreclosure_costs', DoubleType()),
-    StructField('prop_preservation_and_repair_costs', DoubleType()),
-    StructField('asset_recovery_costs', DoubleType()),
-    StructField('misc_holding_expenses', DoubleType()),
-    StructField('holding_taxes', DoubleType()),
-    StructField('net_sale_proceeds', DoubleType()),
-    StructField('credit_enhancement_proceeds', DoubleType()),
-    StructField('repurchase_make_whole_proceeds', StringType()),
-    StructField('other_foreclosure_proceeds', DoubleType()),
-    StructField('non_interest_bearing_upb', DoubleType()),
-    StructField('principal_forgiveness_upb', StringType()),
-    StructField('repurchase_make_whole_proceeds_flag', StringType()),
-    StructField('foreclosure_principal_write_off_amount', StringType()),
-    StructField('servicing_activity_indicator', StringType()),
-])
 
-acquisition_schema = StructType([
-    StructField('loan_id', LongType()),
-    StructField('orig_channel', StringType()),
-    StructField('seller_name', StringType()),
-    StructField('orig_interest_rate', DoubleType()),
-    StructField('orig_upb', IntegerType()),
-    StructField('orig_loan_term', IntegerType()),
-    StructField('orig_date', StringType()),
-    StructField('first_pay_date', StringType()),
-    StructField('orig_ltv', DoubleType()),
-    StructField('orig_cltv', DoubleType()),
-    StructField('num_borrowers', DoubleType()),
-    StructField('dti', DoubleType()),
-    StructField('borrower_credit_score', DoubleType()),
-    StructField('first_home_buyer', StringType()),
-    StructField('loan_purpose', StringType()),
-    StructField('property_type', StringType()),
-    StructField('num_units', IntegerType()),
-    StructField('occupancy_status', StringType()),
-    StructField('property_state', StringType()),
-    StructField('zip', IntegerType()),
-    StructField('mortgage_insurance_percent', DoubleType()),
-    StructField('product_type', StringType()),
-    StructField('coborrow_credit_score', DoubleType()),
-    StructField('mortgage_insurance_type', DoubleType()),
-    StructField('relocation_mortgage_indicator', StringType()),
-])
+rawSchema = StructType([
+      StructField("reference_pool_id", StringType()),
+      StructField("loan_id", LongType()),
+      StructField("monthly_reporting_period", StringType()),
+      StructField("orig_channel", StringType()),
+      StructField("seller_name", StringType()),
+      StructField("servicer", StringType()),
+      StructField("master_servicer", StringType()),
+      StructField("orig_interest_rate", DoubleType()),
+      StructField("interest_rate", DoubleType()),
+      StructField("orig_upb", DoubleType()),
+      StructField("upb_at_issuance", StringType()),
+      StructField("current_actual_upb", DoubleType()),
+      StructField("orig_loan_term", IntegerType()),
+      StructField("orig_date", StringType()),
+      StructField("first_pay_date", StringType()),    
+      StructField("loan_age", DoubleType()),
+      StructField("remaining_months_to_legal_maturity", DoubleType()),
+      StructField("adj_remaining_months_to_maturity", DoubleType()),
+      StructField("maturity_date", StringType()),
+      StructField("orig_ltv", DoubleType()),
+      StructField("orig_cltv", DoubleType()),
+      StructField("num_borrowers", DoubleType()),
+      StructField("dti", DoubleType()),
+      StructField("borrower_credit_score", DoubleType()),
+      StructField("coborrow_credit_score", DoubleType()),
+      StructField("first_home_buyer", StringType()),
+      StructField("loan_purpose", StringType()),
+      StructField("property_type", StringType()),
+      StructField("num_units", IntegerType()),
+      StructField("occupancy_status", StringType()),
+      StructField("property_state", StringType()),
+      StructField("msa", DoubleType()),
+      StructField("zip", IntegerType()),
+      StructField("mortgage_insurance_percent", DoubleType()),
+      StructField("product_type", StringType()),
+      StructField("prepayment_penalty_indicator", StringType()),
+      StructField("interest_only_loan_indicator", StringType()),
+      StructField("interest_only_first_principal_and_interest_payment_date", StringType()),
+      StructField("months_to_amortization", StringType()),
+      StructField("current_loan_delinquency_status", IntegerType()),
+      StructField("loan_payment_history", StringType()),
+      StructField("mod_flag", StringType()),
+      StructField("mortgage_insurance_cancellation_indicator", StringType()),
+      StructField("zero_balance_code", StringType()),
+      StructField("zero_balance_effective_date", StringType()),
+      StructField("upb_at_the_time_of_removal", StringType()),
+      StructField("repurchase_date", StringType()),
+      StructField("scheduled_principal_current", StringType()),
+      StructField("total_principal_current", StringType()),
+      StructField("unscheduled_principal_current", StringType()),
+      StructField("last_paid_installment_date", StringType()),
+      StructField("foreclosed_after", StringType()),
+      StructField("disposition_date", StringType()),
+      StructField("foreclosure_costs", DoubleType()),
+      StructField("prop_preservation_and_repair_costs", DoubleType()),
+      StructField("asset_recovery_costs", DoubleType()),
+      StructField("misc_holding_expenses", DoubleType()),
+      StructField("holding_taxes", DoubleType()),
+      StructField("net_sale_proceeds", DoubleType()),
+      StructField("credit_enhancement_proceeds", DoubleType()),
+      StructField("repurchase_make_whole_proceeds", StringType()),
+      StructField("other_foreclosure_proceeds", DoubleType()),
+      StructField("non_interest_bearing_upb", DoubleType()),
+      StructField("principal_forgiveness_upb", StringType()),
+      StructField("original_list_start_date", StringType()),
+      StructField("original_list_price", StringType()),
+      StructField("current_list_start_date", StringType()),
+      StructField("current_list_price", StringType()),
+      StructField("borrower_credit_score_at_issuance", StringType()),
+      StructField("co-borrower_credit_score_at_issuance", StringType()),
+      StructField("borrower_credit_score_current", StringType()),
+      StructField("co-Borrower_credit_score_current", StringType()),
+      StructField("mortgage_insurance_type", DoubleType()),
+      StructField("servicing_activity_indicator", StringType()),
+      StructField("current_period_modification_loss_amount", StringType()),
+      StructField("cumulative_modification_loss_amount", StringType()),
+      StructField("current_period_credit_event_net_gain_or_loss", StringType()),
+      StructField("cumulative_credit_event_net_gain_or_loss", StringType()),
+      StructField("homeready_program_indicator", StringType()),
+      StructField("foreclosure_principal_write_off_amount", StringType()),
+      StructField("relocation_mortgage_indicator", StringType()),
+      StructField("zero_balance_code_change_date", StringType()),
+      StructField("loan_holdback_indicator", StringType()),
+      StructField("loan_holdback_effective_date", StringType()),
+      StructField("delinquent_accrued_interest", StringType()),
+      StructField("property_valuation_method", StringType()),
+      StructField("high_balance_loan_indicator", StringType()),
+      StructField("arm_initial_fixed-rate_period_lt_5_yr_indicator", StringType()),
+      StructField("arm_product_type", StringType()),
+      StructField("initial_fixed-rate_period", StringType()),
+      StructField("interest_rate_adjustment_frequency", StringType()),
+      StructField("next_interest_rate_adjustment_date", StringType()),
+      StructField("next_payment_change_date", StringType()),
+      StructField("index", StringType()),
+      StructField("arm_cap_structure", StringType()),
+      StructField("initial_interest_rate_cap_up_percent", StringType()),
+      StructField("periodic_interest_rate_cap_up_percent", StringType()),
+      StructField("lifetime_interest_rate_cap_up_percent", StringType()),
+      StructField("mortgage_margin", StringType()),
+      StructField("arm_balloon_indicator", StringType()),
+      StructField("arm_plan_number", StringType()),
+      StructField("borrower_assistance_plan", StringType()),
+      StructField("hltv_refinance_option_indicator", StringType()),
+      StructField("deal_name", StringType()),
+      StructField("repurchase_make_whole_proceeds_flag", StringType()),
+      StructField("alternative_delinquency_resolution", StringType()),
+      StructField("alternative_delinquency_resolution_count", StringType()),
+      StructField("total_deferral_amount", StringType())
+      ])
 
 categorical_columns = [
     'orig_channel',
diff --git a/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl.py b/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl.py
index eb3f40aef..47052737c 100644
--- a/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl.py
+++ b/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl.py
@@ -16,9 +16,10 @@
 from com.nvidia.spark.examples.mortgage.consts import *
 from pyspark.sql.functions import *
 from pyspark.sql.types import *
+from pyspark.sql.window import Window
 from sys import exit
 
-get_quarter = udf(lambda path: path.split(r'.')[0].split('_')[-1], StringType())
+get_quarter = udf(lambda path: path.split(r'.')[0].split('/')[-1], StringType())
 standardize_name = udf(lambda name: name_mapping.get(name), StringType())
 
 def load_data(spark, paths, schema, args, extra_csv_opts={}):
@@ -31,18 +32,60 @@ def load_data(spark, paths, schema, args, extra_csv_opts={}):
         (reader
             .schema(schema)
             .option('delimiter', '|')
-            .option('header', args.hasHeader))
+            .option('header', False))
         for k, v in extra_csv_opts.items():
             reader.option(k, v)
     return reader.load(paths)
 
-def prepare_performance(spark, args):
+def prepare_rawDf(spark, args):
     extra_csv_options = {
         'nullValue': '',
         'parserLib': 'univocity',
     }
-    paths = extract_paths(args.dataPaths, 'perf::')
-    performance = (load_data(spark, paths, performance_schema, args, extra_csv_options)
+    paths = extract_paths(args.dataPaths, 'data::')
+    rawDf = load_data(spark, paths, rawSchema, args, extra_csv_options)
+
+    return rawDf
+
+def extract_perf_columns(rawDf):
+    perfDf = rawDf.select(
+      col("loan_id"),
+      date_format(to_date(col("monthly_reporting_period"),"MMyyyy"), "MM/dd/yyyy").alias("monthly_reporting_period"),
+      upper(col("servicer")).alias("servicer"),
+      col("interest_rate"),
+      col("current_actual_upb"),
+      col("loan_age"),
+      col("remaining_months_to_legal_maturity"),
+      col("adj_remaining_months_to_maturity"),
+      date_format(to_date(col("maturity_date"),"MMyyyy"), "MM/yyyy").alias("maturity_date"),
+      col("msa"),
+      col("current_loan_delinquency_status"),
+      col("mod_flag"),
+      col("zero_balance_code"),
+      date_format(to_date(col("zero_balance_effective_date"),"MMyyyy"), "MM/yyyy").alias("zero_balance_effective_date"),
+      date_format(to_date(col("last_paid_installment_date"),"MMyyyy"), "MM/dd/yyyy").alias("last_paid_installment_date"),
+      date_format(to_date(col("foreclosed_after"),"MMyyyy"), "MM/dd/yyyy").alias("foreclosed_after"),
+      date_format(to_date(col("disposition_date"),"MMyyyy"), "MM/dd/yyyy").alias("disposition_date"),
+      col("foreclosure_costs"),
+      col("prop_preservation_and_repair_costs"),
+      col("asset_recovery_costs"),
+      col("misc_holding_expenses"),
+      col("holding_taxes"),
+      col("net_sale_proceeds"),
+      col("credit_enhancement_proceeds"),
+      col("repurchase_make_whole_proceeds"),
+      col("other_foreclosure_proceeds"),
+      col("non_interest_bearing_upb"),
+      col("principal_forgiveness_upb"),
+      col("repurchase_make_whole_proceeds_flag"),
+      col("foreclosure_principal_write_off_amount"),
+      col("servicing_activity_indicator"))
+
+    return perfDf.select("*").filter("current_actual_upb != 0.0")
+    
+
+def prepare_performance(spark, args, rawDf):
+    performance = (extract_perf_columns(rawDf)
         .withColumn('quarter', get_quarter(input_file_name()))
         .withColumn('timestamp', to_date(col('monthly_reporting_period'), 'MM/dd/yyyy'))
         .withColumn('timestamp_year', year(col('timestamp')))
@@ -133,8 +176,42 @@ def prepare_performance(spark, args):
         .join(to_join, ['quarter', 'loan_id', 'timestamp_year', 'timestamp_month'], 'left')
         .drop('timestamp_year', 'timestamp_month'))
 
-def prepare_acquisition(spark, args):
-    return (load_data(spark, extract_paths(args.dataPaths, 'acq::'), acquisition_schema, args)
+def extract_acq_columns(rawDf):
+    acqDf = rawDf.select(
+      col("loan_id"),
+      col("orig_channel"),
+      upper(col("seller_name")).alias("seller_name"),
+      col("orig_interest_rate"),
+      col("orig_upb"),
+      col("orig_loan_term"),
+      date_format(to_date(col("orig_date"),"MMyyyy"), "MM/yyyy").alias("orig_date"),
+      date_format(to_date(col("first_pay_date"),"MMyyyy"), "MM/yyyy").alias("first_pay_date"),
+      col("orig_ltv"),
+      col("orig_cltv"),
+      col("num_borrowers"),
+      col("dti"),
+      col("borrower_credit_score"),
+      col("first_home_buyer"),
+      col("loan_purpose"),
+      col("property_type"),
+      col("num_units"),
+      col("occupancy_status"),
+      col("property_state"),
+      col("zip"),
+      col("mortgage_insurance_percent"),
+      col("product_type"),
+      col("coborrow_credit_score"),
+      col("mortgage_insurance_type"),
+      col("relocation_mortgage_indicator"),
+      dense_rank().over(Window.partitionBy("loan_id").orderBy(to_date(col("monthly_reporting_period"),"MMyyyy"))).alias("rank")
+      )
+
+    return acqDf.select("*").filter(col("rank")==1)
+
+    
+
+def prepare_acquisition(spark, args, rawDf):
+    return (extract_acq_columns(rawDf)
         .withColumn('quarter', get_quarter(input_file_name()))
         .withColumn('seller_name', standardize_name(col('seller_name'))))
 
@@ -147,8 +224,12 @@ def extract_paths(paths, prefix):
     return results
 
 def etl(spark, args):
-    performance = prepare_performance(spark, args)
-    acquisition = prepare_acquisition(spark, args)
+    rawDf = prepare_rawDf(spark, args)
+    rawDf.write.parquet(extract_paths(args.dataPaths, 'tmp::')[0], mode='overwrite')
+    rawDf = spark.read.parquet(extract_paths(args.dataPaths, 'tmp::')[0])
+    
+    performance = prepare_performance(spark, args, rawDf)
+    acquisition = prepare_acquisition(spark, args, rawDf)
     return (performance
         .join(acquisition, ['loan_id', 'quarter'], 'left_outer')
         .select(
diff --git a/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl_main.py b/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl_main.py
index 6002f5056..55f5df5fc 100644
--- a/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl_main.py
+++ b/examples/XGBoost-Examples/mortgage/python/com/nvidia/spark/examples/mortgage/etl_main.py
@@ -16,7 +16,6 @@
 from com.nvidia.spark.examples.mortgage.consts import *
 from com.nvidia.spark.examples.mortgage.etl import etl, extract_paths
 from com.nvidia.spark.examples.utility.utils import *
-from ml.dmlc.xgboost4j.scala.spark import *
 from pyspark.sql import SparkSession
 
 def main(args, xgboost_args):
diff --git a/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/ETLMain.scala b/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/ETLMain.scala
index f54d3d67c..d6b5db30a 100644
--- a/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/ETLMain.scala
+++ b/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/ETLMain.scala
@@ -31,17 +31,17 @@ object ETLMain extends Mortgage {
     val spark = SparkSession.builder().appName(appInfo.mkString("-")).getOrCreate()
 
     try {
-      val (perfPaths, acqPaths, outPath) = checkAndGetPaths(xgbArgs.dataPaths)
+      val (dataPaths, outPath, tmpPath) = checkAndGetPaths(xgbArgs.dataPaths)
       println("\n------ Start ETL ------")
       benchmark.time("ETL") {
         // ETL the raw data
         val rawDF = xgbArgs.format match {
-          case "csv" => XGBoostETL.csv(spark, perfPaths, acqPaths, xgbArgs.hasHeader)
-          case "orc" => XGBoostETL.orc(spark, perfPaths, acqPaths)
-          case "parquet" => XGBoostETL.parquet(spark, perfPaths, acqPaths)
+          case "csv" => XGBoostETL.csv(spark, dataPaths, tmpPath, false)
+          case "orc" => XGBoostETL.orc(spark, dataPaths)
+          case "parquet" => XGBoostETL.parquet(spark, dataPaths)
           case _ => throw new IllegalArgumentException("Unsupported data file format!")
         }
-        rawDF.write.mode("overwrite").parquet(new Path(outPath, "data").toString)
+        rawDF.write.mode("overwrite").parquet(outPath)
       }
       if (xgbArgs.saveDict) {
         XGBoostETL.saveDictTable(new Path(outPath, ".dict").toString)
@@ -52,32 +52,32 @@ object ETLMain extends Mortgage {
     }
   }
 
-  private def checkAndGetPaths(paths: Seq[String]): (Seq[String], Seq[String], String) = {
-    val prefixes = Array("perf::", "acq::", "out::")
+  def checkAndGetPaths(paths: Seq[String]): (Seq[String], String, String) = {
+    val prefixes = Array("data::", "out::",  "tmp::")
     val validPaths = paths.filter(_.nonEmpty).map(_.trim)
 
     // get and check perf data paths
-    val perfPaths = validPaths.filter(_.startsWith(prefixes.head))
-    require(perfPaths.nonEmpty, s"$appName ETL requires at least one path for performance data file." +
-      s" Please specify it by '-dataPath=perf::your_perf_path'")
-
-    // get and check acq data paths
-    val acqPaths = validPaths.filter(_.startsWith(prefixes(1)))
-    require(acqPaths.nonEmpty, s"$appName ETL requires at least one path for acquisition data file." +
-      s" Please specify it by '-dataPath=acq::your_acq_path'")
+    val dataPaths = validPaths.filter(_.startsWith(prefixes.head))
+    require(dataPaths.nonEmpty, s"$appName ETL requires at least one path for data file." +
+      s" Please specify it by '-dataPath=data::your_data_path'")
 
     // get and check out path
-    val outPath = validPaths.filter(_.startsWith(prefixes(2)))
+    val outPath = validPaths.filter(_.startsWith(prefixes(1)))
     require(outPath.nonEmpty, s"$appName ETL requires a path to save the ETLed data file. Please specify it" +
       " by '-dataPath=out::your_out_path', only the first path is used if multiple paths are found.")
+    
+    // get and check tmp path
+    val tmpPath = validPaths.filter(_.startsWith(prefixes(2)))
+    require(tmpPath.nonEmpty, s"$appName ETL requires a path to save the temp parquet files. Please specify it" +
+      " by '-dataPath=tmp::your_out_path'.")
 
     // check data paths not specified type
     val unknownPaths = validPaths.filterNot(p => prefixes.exists(p.contains(_)))
     require(unknownPaths.isEmpty, s"Unknown type for data path: ${unknownPaths.head}, $appName requires to specify" +
-      " the type for each data path by adding the prefix 'perf::' or 'acq::' or 'out::'.")
+      " the type for each data path by adding the prefix 'data::' or 'out::'.")
 
-    (perfPaths.map(_.stripPrefix(prefixes.head)),
-     acqPaths.map(_.stripPrefix(prefixes(1))),
-     outPath.head.stripPrefix(prefixes(2)))
+    (dataPaths.map(_.stripPrefix(prefixes.head)),
+     outPath.head.stripPrefix(prefixes(1)),
+     tmpPath.head.stripPrefix(prefixes(2)))
   }
 }
diff --git a/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/Mortgage.scala b/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/Mortgage.scala
index 582492006..c051cff07 100644
--- a/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/Mortgage.scala
+++ b/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/Mortgage.scala
@@ -16,7 +16,7 @@
 
 package com.nvidia.spark.examples.mortgage
 
-import org.apache.spark.sql.types.{FloatType, IntegerType, StructField, StructType}
+import org.apache.spark.sql.types.{FloatType, IntegerType, StructField, StructType, DoubleType}
 
 private[mortgage] trait Mortgage {
   val appName = "Mortgage"
@@ -37,7 +37,7 @@ private[mortgage] trait Mortgage {
 
   protected val numericCols = List(
     ("orig_interest_rate", FloatType),
-    ("orig_upb", IntegerType),
+    ("orig_upb", DoubleType),
     ("orig_loan_term", IntegerType),
     ("orig_ltv", FloatType),
     ("orig_cltv", FloatType),
diff --git a/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/XGBoostETL.scala b/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/XGBoostETL.scala
index 0ef25ea2e..7c21b9dbe 100644
--- a/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/XGBoostETL.scala
+++ b/examples/XGBoost-Examples/mortgage/scala/src/com/nvidia/spark/examples/mortgage/XGBoostETL.scala
@@ -27,27 +27,64 @@ object GetQuarterFromCsvFileName {
   // So we strip off the .txt and everything after it
   // and then take everything after the last remaining _
   def apply(): Column = substring_index(
-    substring_index(input_file_name(), ".", 1), "_", -1)
+    substring_index(input_file_name(), ".", 1), "/", -1)
 }
 
 private object CsvReader {
 
-  def readPerformance(spark: SparkSession, paths: Seq[String], optionsMap: Map[String, String]): DataFrame = {
-    val performanceSchema = StructType(Array(
+  def readRaw(spark: SparkSession, paths: Seq[String], optionsMap: Map[String, String]): DataFrame = {
+
+    val rawSchema = StructType(Array(
+      StructField("reference_pool_id", StringType),
       StructField("loan_id", LongType),
       StructField("monthly_reporting_period", StringType),
+      StructField("orig_channel", StringType),
+      StructField("seller_name", StringType),
       StructField("servicer", StringType),
+      StructField("master_servicer", StringType),
+      StructField("orig_interest_rate", DoubleType),
       StructField("interest_rate", DoubleType),
+      StructField("orig_upb", DoubleType),
+      StructField("upb_at_issuance", StringType),
       StructField("current_actual_upb", DoubleType),
+      StructField("orig_loan_term", IntegerType),
+      StructField("orig_date", StringType),
+      StructField("first_pay_date", StringType),    
       StructField("loan_age", DoubleType),
       StructField("remaining_months_to_legal_maturity", DoubleType),
       StructField("adj_remaining_months_to_maturity", DoubleType),
       StructField("maturity_date", StringType),
+      StructField("orig_ltv", DoubleType),
+      StructField("orig_cltv", DoubleType),
+      StructField("num_borrowers", DoubleType),
+      StructField("dti", DoubleType),
+      StructField("borrower_credit_score", DoubleType),
+      StructField("coborrow_credit_score", DoubleType),
+      StructField("first_home_buyer", StringType),
+      StructField("loan_purpose", StringType),
+      StructField("property_type", StringType),
+      StructField("num_units", IntegerType),
+      StructField("occupancy_status", StringType),
+      StructField("property_state", StringType),
       StructField("msa", DoubleType),
+      StructField("zip", IntegerType),
+      StructField("mortgage_insurance_percent", DoubleType),
+      StructField("product_type", StringType),
+      StructField("prepayment_penalty_indicator", StringType),
+      StructField("interest_only_loan_indicator", StringType),
+      StructField("interest_only_first_principal_and_interest_payment_date", StringType),
+      StructField("months_to_amortization", StringType),
       StructField("current_loan_delinquency_status", IntegerType),
+      StructField("loan_payment_history", StringType),
       StructField("mod_flag", StringType),
+      StructField("mortgage_insurance_cancellation_indicator", StringType),
       StructField("zero_balance_code", StringType),
       StructField("zero_balance_effective_date", StringType),
+      StructField("upb_at_the_time_of_removal", StringType),
+      StructField("repurchase_date", StringType),
+      StructField("scheduled_principal_current", StringType),
+      StructField("total_principal_current", StringType),
+      StructField("unscheduled_principal_current", StringType),
       StructField("last_paid_installment_date", StringType),
       StructField("foreclosed_after", StringType),
       StructField("disposition_date", StringType),
@@ -62,59 +99,141 @@ private object CsvReader {
       StructField("other_foreclosure_proceeds", DoubleType),
       StructField("non_interest_bearing_upb", DoubleType),
       StructField("principal_forgiveness_upb", StringType),
-      StructField("repurchase_make_whole_proceeds_flag", StringType),
+      StructField("original_list_start_date", StringType),
+      StructField("original_list_price", StringType),
+      StructField("current_list_start_date", StringType),
+      StructField("current_list_price", StringType),
+      StructField("borrower_credit_score_at_issuance", StringType),
+      StructField("co-borrower_credit_score_at_issuance", StringType),
+      StructField("borrower_credit_score_current", StringType),
+      StructField("co-Borrower_credit_score_current", StringType),
+      StructField("mortgage_insurance_type", DoubleType),
+      StructField("servicing_activity_indicator", StringType),
+      StructField("current_period_modification_loss_amount", StringType),
+      StructField("cumulative_modification_loss_amount", StringType),
+      StructField("current_period_credit_event_net_gain_or_loss", StringType),
+      StructField("cumulative_credit_event_net_gain_or_loss", StringType),
+      StructField("homeready_program_indicator", StringType),
       StructField("foreclosure_principal_write_off_amount", StringType),
-      StructField("servicing_activity_indicator", StringType))
+      StructField("relocation_mortgage_indicator", StringType),
+      StructField("zero_balance_code_change_date", StringType),
+      StructField("loan_holdback_indicator", StringType),
+      StructField("loan_holdback_effective_date", StringType),
+      StructField("delinquent_accrued_interest", StringType),
+      StructField("property_valuation_method", StringType),
+      StructField("high_balance_loan_indicator", StringType),
+      StructField("arm_initial_fixed-rate_period_lt_5_yr_indicator", StringType),
+      StructField("arm_product_type", StringType),
+      StructField("initial_fixed-rate_period", StringType),
+      StructField("interest_rate_adjustment_frequency", StringType),
+      StructField("next_interest_rate_adjustment_date", StringType),
+      StructField("next_payment_change_date", StringType),
+      StructField("index", StringType),
+      StructField("arm_cap_structure", StringType),
+      StructField("initial_interest_rate_cap_up_percent", StringType),
+      StructField("periodic_interest_rate_cap_up_percent", StringType),
+      StructField("lifetime_interest_rate_cap_up_percent", StringType),
+      StructField("mortgage_margin", StringType),
+      StructField("arm_balloon_indicator", StringType),
+      StructField("arm_plan_number", StringType),
+      StructField("borrower_assistance_plan", StringType),
+      StructField("hltv_refinance_option_indicator", StringType),
+      StructField("deal_name", StringType),
+      StructField("repurchase_make_whole_proceeds_flag", StringType),
+      StructField("alternative_delinquency_resolution", StringType),
+      StructField("alternative_delinquency_resolution_count", StringType),
+      StructField("total_deferral_amount", StringType)
+      )
     )
 
     spark.read
       .options(optionsMap)
       .option("nullValue", "")
       .option("delimiter", "|")
-      .option("parserLib", "univocity")
-      .schema(performanceSchema)
+      .schema(rawSchema)
       .csv(paths: _*)
       .withColumn("quarter", GetQuarterFromCsvFileName())
   }
+}
 
-  def readAcquisition(spark: SparkSession, paths: Seq[String], optionsMap: Map[String, String]): DataFrame = {
-    val acquisitionSchema = StructType(Array(
-      StructField("loan_id", LongType),
-      StructField("orig_channel", StringType),
-      StructField("seller_name", StringType),
-      StructField("orig_interest_rate", DoubleType),
-      StructField("orig_upb", IntegerType),
-      StructField("orig_loan_term", IntegerType),
-      StructField("orig_date", StringType),
-      StructField("first_pay_date", StringType),
-      StructField("orig_ltv", DoubleType),
-      StructField("orig_cltv", DoubleType),
-      StructField("num_borrowers", DoubleType),
-      StructField("dti", DoubleType),
-      StructField("borrower_credit_score", DoubleType),
-      StructField("first_home_buyer", StringType),
-      StructField("loan_purpose", StringType),
-      StructField("property_type", StringType),
-      StructField("num_units", IntegerType),
-      StructField("occupancy_status", StringType),
-      StructField("property_state", StringType),
-      StructField("zip", IntegerType),
-      StructField("mortgage_insurance_percent", DoubleType),
-      StructField("product_type", StringType),
-      StructField("coborrow_credit_score", DoubleType),
-      StructField("mortgage_insurance_type", DoubleType),
-      StructField("relocation_mortgage_indicator", StringType))
+object extractPerfColumns{
+  def apply(rawDf : DataFrame) : DataFrame = {
+    val perfDf = rawDf.select(
+      col("loan_id"),
+      date_format(to_date(col("monthly_reporting_period"),"MMyyyy"), "MM/dd/yyyy").as("monthly_reporting_period"),
+      upper(col("servicer")).as("servicer"),
+      col("interest_rate"),
+      col("current_actual_upb"),
+      col("loan_age"),
+      col("remaining_months_to_legal_maturity"),
+      col("adj_remaining_months_to_maturity"),
+      date_format(to_date(col("maturity_date"),"MMyyyy"), "MM/yyyy").as("maturity_date"),
+      col("msa"),
+      col("current_loan_delinquency_status"),
+      col("mod_flag"),
+      col("zero_balance_code"),
+      date_format(to_date(col("zero_balance_effective_date"),"MMyyyy"), "MM/yyyy").as("zero_balance_effective_date"),
+      date_format(to_date(col("last_paid_installment_date"),"MMyyyy"), "MM/dd/yyyy").as("last_paid_installment_date"),
+      date_format(to_date(col("foreclosed_after"),"MMyyyy"), "MM/dd/yyyy").as("foreclosed_after"),
+      date_format(to_date(col("disposition_date"),"MMyyyy"), "MM/dd/yyyy").as("disposition_date"),
+      col("foreclosure_costs"),
+      col("prop_preservation_and_repair_costs"),
+      col("asset_recovery_costs"),
+      col("misc_holding_expenses"),
+      col("holding_taxes"),
+      col("net_sale_proceeds"),
+      col("credit_enhancement_proceeds"),
+      col("repurchase_make_whole_proceeds"),
+      col("other_foreclosure_proceeds"),
+      col("non_interest_bearing_upb"),
+      col("principal_forgiveness_upb"),
+      col("repurchase_make_whole_proceeds_flag"),
+      col("foreclosure_principal_write_off_amount"),
+      col("servicing_activity_indicator"),
+      col("quarter")
     )
 
-    spark.read
-      .options(optionsMap)
-      .option("delimiter", "|")
-      .schema(acquisitionSchema)
-      .csv(paths: _*)
-      .withColumn("quarter", GetQuarterFromCsvFileName())
+    perfDf.select("*").filter("current_actual_upb != 0.0")
   }
 }
 
+object extractAcqColumns{
+  def apply(rawDf : DataFrame) : DataFrame = {
+    val acqDf = rawDf.select(
+      col("loan_id"),
+      col("orig_channel"),
+      upper(col("seller_name")).as("seller_name"),
+      col("orig_interest_rate"),
+      col("orig_upb"),
+      col("orig_loan_term"),
+      date_format(to_date(col("orig_date"),"MMyyyy"), "MM/yyyy").as("orig_date"),
+      date_format(to_date(col("first_pay_date"),"MMyyyy"), "MM/yyyy").as("first_pay_date"),
+      col("orig_ltv"),
+      col("orig_cltv"),
+      col("num_borrowers"),
+      col("dti"),
+      col("borrower_credit_score"),
+      col("first_home_buyer"),
+      col("loan_purpose"),
+      col("property_type"),
+      col("num_units"),
+      col("occupancy_status"),
+      col("property_state"),
+      col("zip"),
+      col("mortgage_insurance_percent"),
+      col("product_type"),
+      col("coborrow_credit_score"),
+      col("mortgage_insurance_type"),
+      col("relocation_mortgage_indicator"),
+      col("quarter"),
+      dense_rank().over(Window.partitionBy("loan_id").orderBy(to_date(col("monthly_reporting_period"),"MMyyyy"))).as("rank")
+    )
+
+    acqDf.select("*").filter(col("rank") === 1).drop("rank")
+  }
+
+}
+
 object NameMapping {
   /**
     * Returns a dataframe with two columns named based off of the column names passed in.
@@ -414,28 +533,43 @@ object XGBoostETL extends Mortgage {
     }
   }
 
-  def csv(spark: SparkSession, perfPaths: Seq[String], acqPaths: Seq[String], hasHeader: Boolean): DataFrame = {
+  def csv(spark: SparkSession, dataPaths: Seq[String], tmpPath: String, hasHeader: Boolean): DataFrame = {
     val optionsMap = Map("header" -> hasHeader.toString)
+    val rawDf_csv = CsvReader.readRaw(spark, dataPaths, optionsMap)
+    
+    rawDf_csv.write.mode("overwrite").parquet(tmpPath)
+    val rawDf = spark.read.parquet(tmpPath)
+    
+    val perfDf = extractPerfColumns(rawDf)
+    val acqDf = extractAcqColumns(rawDf)
     transform(
-      CsvReader.readPerformance(spark, perfPaths, optionsMap),
-      CsvReader.readAcquisition(spark, acqPaths, optionsMap),
+      perfDf,
+      acqDf,
       spark
     )
   }
 
-  def parquet(spark: SparkSession, perfPaths: Seq[String], acqPaths: Seq[String]): DataFrame = {
+  def parquet(spark: SparkSession, dataPaths: Seq[String]): DataFrame = {
+    val rawDf = spark.read.parquet(dataPaths: _*)
+    val perfDf = extractPerfColumns(rawDf)
+    val acqDf = extractAcqColumns(rawDf)
     transform(
-      spark.read.parquet(perfPaths: _*),
-      spark.read.parquet(acqPaths: _*),
+      perfDf,
+      acqDf,
       spark
     )
   }
 
-  def orc(spark: SparkSession, perfPaths: Seq[String], acqPaths: Seq[String]): DataFrame = {
+  def orc(spark: SparkSession, dataPaths: Seq[String]): DataFrame = {
+    val rawDf = spark.read.orc(dataPaths: _*)
+    val perfDf = extractPerfColumns(rawDf)
+    val acqDf = extractAcqColumns(rawDf)
     transform(
-      spark.read.orc(perfPaths: _*),
-      spark.read.orc(acqPaths: _*),
+      perfDf,
+      acqDf,
       spark
     )
   }
+  
+  
 }
diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb
index 7abeac750..171f47f4c 100644
--- a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb
+++ b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb
@@ -19,14 +19,14 @@
     "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n",
     "\n",
     "### 2. Download needed jars\n",
-    "* [rapids-4-spark_2.12-22.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-22.08.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-22.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-22.08.0.jar\n",
     "$ export PYSPARK_DRIVER_PYTHON=jupyter \n",
     "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n",
     "```\n",
diff --git a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb
index 6a849b5b1..0f14cdc65 100644
--- a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb
+++ b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb
@@ -19,18 +19,18 @@
     "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n",
     "\n",
     "### 2. Download needed jar\n",
-    "* [rapids-4-spark_2.12-22.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.06.0/rapids-4-spark_2.12-22.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-22.08.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.08.0/rapids-4-spark_2.12-22.08.0.jar)\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-22.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-22.08.0.jar\n",
     "\n",
     "```\n",
     "\n",
-    "### 5.Start Jupyter Notebook with spylon-kernal or toree\n",
+    "### 5.Start Jupyter Notebook with spylon-kernel or toree\n",
     "\n",
     "```\n",
     "$ jupyter notebook --allow-root --notebook-dir=${your-dir} --config=${your-configs}\n",
@@ -563,9 +563,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "XGBoost4j-Spark-2206 - Scala",
+   "display_name": "XGBoost4j-Spark - Scala",
    "language": "scala",
-   "name": "xgboost4j-spark-2206_scala"
+   "name": "XGBoost4j-Spark_scala"
   },
   "language_info": {
    "codemirror_mode": "text/x-scala",
@@ -578,4 +578,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-gpu.ipynb b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-gpu.ipynb
index b59f74473..58dd84eb0 100644
--- a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-gpu.ipynb
+++ b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-gpu.ipynb
@@ -594,9 +594,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "XGBoost4j-Spark-2206 - Scala",
+   "display_name": "XGBoost4j-Spark - Scala",
    "language": "scala",
-   "name": "xgboost4j-spark-2206_scala"
+   "name": "XGBoost4j-Spark_scala"
   },
   "language_info": {
    "codemirror_mode": "text/x-scala",
diff --git a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb
index b8e524e4d..b5d1da710 100644
--- a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb
+++ b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb
@@ -490,9 +490,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "XGBoost4j-Spark-2206 - Scala",
+   "display_name": "XGBoost4j-Spark - Scala",
    "language": "scala",
-   "name": "xgboost4j-spark-2206_scala"
+   "name": "XGBoost4j-Spark_scala"
   },
   "language_info": {
    "codemirror_mode": "text/x-scala",
diff --git a/examples/XGBoost-Examples/utility/scala/src/com/nvidia/spark/examples/utility/XGBoostArgs.scala b/examples/XGBoost-Examples/utility/scala/src/com/nvidia/spark/examples/utility/XGBoostArgs.scala
index 75faaaa5a..d8cca3fcd 100644
--- a/examples/XGBoost-Examples/utility/scala/src/com/nvidia/spark/examples/utility/XGBoostArgs.scala
+++ b/examples/XGBoost-Examples/utility/scala/src/com/nvidia/spark/examples/utility/XGBoostArgs.scala
@@ -97,10 +97,10 @@ object XGBoostArgs {
     println("    -saveDict=value: Boolean\n" +
       "        Whether to save the dictionary table for Mortgage ETL. It is saved under '<out>/.dict'. Default is true.\n")
     println("    -rabitTrackerHost=value: String\n" +
-      "        Specify rabit tracker host IP address. In some environments XGBoost might fail to resolve
-               the IP address of the rabit tracker, a symptom is user receiving ``OSError: [Errno 99]
-               Cannot assign requested address`` error during training.  A quick workaround is to
-               specify the address explicitly.\n")
+      "        Specify rabit tracker host IP address. In some environments XGBoost might fail to resolve\n" +
+               "the IP address of the rabit tracker, a symptom is user receiving ``OSError: [Errno 99]\n" +
+               "Cannot assign requested address`` error during training.  A quick workaround is to\n" +
+               "specify the address explicitly.\n")
     println("For XGBoost arguments:")
     println("    Now we pass all XGBoost parameters transparently to XGBoost, no longer to verify them.")
     println("    Both of the formats are supported, such as 'numWorkers'. You can pass as either one below:")