-
Notifications
You must be signed in to change notification settings - Fork 602
/
jsl_sagemaker_setup_3.0.1.sh
35 lines (28 loc) · 1.52 KB
/
jsl_sagemaker_setup_3.0.1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash
PYSPARK="3.1.1"
SPARKNLP="3.0.1"
SPARKHOME="/home/ec2-user/SageMaker/spark-3.1.1-bin-hadoop2.7"
echo "setup SageMaker for PySpark $PYSPARK and Spark NLP $SPARKNLP"
JAVA_8=$(alternatives --display java | grep 'jre-1.8.0-openjdk.x86_64/bin/java'| cut -d' ' -f1)
sudo alternatives --set java $JAVA_8
if [[ "$PYSPARK" == "3.1"* ]]; then
wget -q "https://downloads.apache.org/spark/spark-3.1.1/spark-3.1.1-bin-hadoop2.7.tgz" > /dev/null
tar -xvf spark-3.1.1-bin-hadoop2.7.tgz > /dev/null
SPARKHOME="/home/ec2-user/SageMaker/spark-3.1.1-bin-hadoop2.7"
elif [[ "$PYSPARK" == "3.0"* ]]; then
wget -q "https://downloads.apache.org/spark/spark-3.0.2/spark-3.0.2-bin-hadoop2.7.tgz" > /dev/null
tar -xvf spark-3.0.2-bin-hadoop2.7.tgz > /dev/null
SPARKHOME=""/home/ec2-user/SageMaker/spark-3.0.2-bin-hadoop2.7""
elif [[ "$PYSPARK" == "2"* ]]; then
wget -q "https://downloads.apache.org/spark/spark-2.4.7/spark-2.4.7-bin-hadoop2.7.tgz" > /dev/null
tar -xvf spark-2.4.7-bin-hadoop2.7.tgz > /dev/null
SPARKHOME="/home/ec2-user/SageMaker/spark-2.4.7-bin-hadoop2.7"
else
wget -q "https://downloads.apache.org/spark/spark-3.1.1/spark-3.1.1-bin-hadoop2.7.tgz" > /dev/null
tar -xvf spark-3.1.1-bin-hadoop2.7.tgz > /dev/null
SPARKHOME="/home/ec2-user/SageMaker/spark-3.1.1-bin-hadoop2.7"
fi
export SPARK_HOME=$SPARKHOME
# Install pyspark spark-nlp
! pip install --upgrade -q pyspark==$PYSPARK spark-nlp==$SPARKNLP findspark
! pip install --upgrade -q spark-nlp-jsl==$JSL_VERSION --extra-index-url https://pypi.johnsnowlabs.com/$SECRET