BrooksIan · ifigeneia1989 · Dec 6, 2019 · Dec 6, 2019 · Sep 1, 2020 · Sep 23, 2020
diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ In this project, there are 5 different supervised classifer models designed for
 1.  In CSDW, download the project using the git url for [here](https://github.com/BrooksIan/ChurnBabyChurn.git) 
 2.  Open a new session, and execute the setup.sh file
 3.  In Experiments, run the following scripts
-    * dsforteko_pyspark.py  - vanilla random forest churn model
+    * dsfortelco_pyspark.py  - vanilla random forest churn model
     * gbt_churn_pyspark.py  - gradient boost tree churn model with normamlized variables, hyperturning, and crossvalidation
     * mlp_churn_pyspark.py  - multilayer perceptron churn model with normamlized variables, hyperturning, and crossvalidatio
     * rf_churn_pyspark.py  -  random forest churn model with normamlized variables, hyperturning, and crossvalidation

diff --git a/RapidSetup.sh b/RapidSetup.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+#Set Up Paths for Rapids Jars
+
+export SPARK_RAPIDS_DIR=/opt/sparkRapidsPlugin
+export SPARK_CUDF_JAR=${SPARK_RAPIDS_DIR}/cudf-0.15-SNAPSHOT-cuda10-1.jar
+export SPARK_RAPIDS_PLUGIN_JAR=${SPARK_RAPIDS_DIR}/rapids-4-spark_2.12-0.2.0-SNAPSHOT.jar
+
+chmod 775 /opt/sparkRapidsPlugin/*.jar
+
+#/opt/sparkRapidsPlugin/cudf-0.15-SNAPSHOT-cuda10-1.jar
+#/opt/sparkRapidsPlugin/rapids-4-spark_2.12-0.2.0-SNAPSHOT.jar
diff --git a/dsfortelco_pyspark.py b/dsfortelco_pyspark.py
@@ -112,7 +112,7 @@
 !rm -r -f models/spark_rf_vanilla.tar
 !hdfs dfs -get models/spark 
 !hdfs dfs -get models/
-!tar -cvf models/spark_rf._vanilla.tar models/spark/vanilla
+!tar -cvf models/spark_rf_vanilla.tar models/spark/vanilla
 
 cdsw.track_file("models/spark_rf_vanilla.tar")
 

diff --git a/spark-defaults.conf b/spark-defaults.conf
@@ -4,4 +4,4 @@ spark.executor.memory 3584m
 spark.executor.cores 1
 spark.yarn.executor.memoryOverhead 512m
 spark.app.name ds-for-telco
-spark.lineage.enabled false
+spark.lineage.enabled false
diff --git a/spark-rapids.conf b/spark-rapids.conf
@@ -0,0 +1,17 @@
+spark.master=k8s://https://172.20.0.1:443
+spark.rapids.sql.concurrentGpuTasks=1
+spark.rapids.sql.format.csv.read.enabled=false
+spark.rapids.sql.enabled=false
+spark.executor.memory=4G
+spark.executor.cores=4
+spark.task.cpus=1
+spark.task.resource.gpu.amount=0.25
+spark.executor.resource.gpu.amount=1
+spark.executor.memoryOverhead=4G
+spark.rapids.memory.pinnedPool.size=2G
+spark.locality.wait=0s
+spark.sql.files.maxPartitionBytes=512m
+spark.sql.shuffle.partitions=10
+spark.plugins=com.nvidia.spark.SQLPlugin
+spark.executor.resource.gpu.discoveryScript=/opt/sparkRapidsPlugin/getGpusResources.sh
+spark.executor.resource.gpu.vendor=nvidia.com