forked from luisbelloch/data_processing_course
-
Notifications
You must be signed in to change notification settings - Fork 0
/
local_setup.sh
executable file
·40 lines (32 loc) · 1.42 KB
/
local_setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
set -euo pipefail
SPARK_PKG=${SPARK_PKG:-spark-2.1.0-bin-hadoop2.7}
SPARK_HOME=${SPARK_HOME:-$(pwd)/.spark}
if [ -t 1 ]; then
readonly colors=$(tput colors)
if [ -n "$colors" ]; then
readonly c_step="$(tput setaf 6)"
readonly c_error="$(tput setaf 1)"
readonly c_norm="$(tput sgr0)"
fi
fi
stderr() { >&2 echo $@; }
if [[ -d "${SPARK_HOME}" ]]; then
stderr "${c_error}ERROR${c_norm}: Folder already exists '$SPARK_HOME'"
stderr "Set SPARK_HOME to an empty folder before running this script or make sure there's no 'spark' folder in current directory."
exit -1
fi
stderr "${c_step}[0] Destination: ${SPARK_HOME}${c_norm}"
stderr "${c_step}[1] Downloading and unpacking $SPARK_PKG.tgz${c_norm}"
mkdir -p "${SPARK_HOME}"
curl -s http://d3kbcqa49mib13.cloudfront.net/${SPARK_PKG}.tgz | tar -xz -C "${SPARK_HOME}" --strip-components=1
stderr "${c_step}[2] Reducing log level${c_norm}"
cp "${SPARK_HOME}"/conf/log4j.properties.template "${SPARK_HOME}"/conf/log4j.properties
sed -ibak 's/rootCategory=INFO/rootCategory=ERROR/g' "${SPARK_HOME}"/conf/log4j.properties
stderr "${c_step}[3] Testing setup${c_norm}"
echo 'sc.parallelize(1 to 100).count()' | "${SPARK_HOME}"/bin/spark-shell
rm -rf derby.log metastore_db
stderr
stderr "${c_step}DONE! Local setup completed${c_norm}"
stderr "Spark unpacked properly. You can now modify your path:"
echo "export PATH=${SPARK_HOME// /\\ /}:\$PATH"