forked from CoorpAcademy/docker-pyspark
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a5b243a
commit 01f793d
Showing
1 changed file
with
49 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
|
||
|
||
#Installing Anaconda3-2019.10-Linux-x86_64.sh | ||
#RUN curl -O https://repo.anaconda.com/archive/Anaconda3-2019.10-Linux-x86_64.sh | ||
#RUN yes | bash Anaconda3-2019.10-Linux-x86_64.sh | ||
#RUN source ~/.bashrc | ||
RUN apk add --no-cache --virtual build_deps bash && \ | ||
|
||
curl -O https://repo.anaconda.com/archive/Anaconda3-2019.10-Linux-x86_64.sh -O Anaconda.sh | ||
bash Anaconda.sh -b -p /opt/conda && \ | ||
echo "export PATH=opt/conda/bin:${PATH}" >> /root/.bashrc && \ | ||
rm -f Anaconda.sh && \ | ||
/opt/conda/bin/conda clean -afy | ||
|
||
|
||
# Installing IPython | ||
#RUN apk add --no-cache python-dev py-pip | ||
#RUN pip install --upgrade pip | ||
#RUN pip install https://github.com/jonathanslenders/python-prompt-toolkit/archive/master.zip | ||
#RUN pip install ipython | ||
|
||
|
||
# HADOOP | ||
|
||
ENV HADOOP_VERSION 2.7.2 | ||
ENV HADOOP_HOME /usr/hadoop-$HADOOP_VERSION | ||
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | ||
ENV PATH $PATH:$HADOOP_HOME/bin | ||
RUN wget http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz && \ | ||
tar -vxzf hadoop-$HADOOP_VERSION.tar.gz && \ | ||
mv hadoop-$HADOOP_VERSION /usr/hadoop-$HADOOP_VERSION && \ | ||
rm -rf $HADOOP_HOME/share/doc | ||
|
||
# SPARK | ||
RUN apk add ca-certificates wget && update-ca-certificates | ||
ENV SPARK_VERSION 2.4.0 | ||
ENV SPARK_PACKAGE spark-$SPARK_VERSION-bin-without-hadoop | ||
ENV SPARK_HOME /usr/spark-$SPARK_VERSION | ||
ENV PYSPARK_DRIVER_PYTHON ipython | ||
ENV PYSPARK_PYTHON python3 | ||
ENV SPARK_DIST_CLASSPATH="$HADOOP_HOME/etc/hadoop/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/tools/lib/*" | ||
ENV PATH $PATH:$SPARK_HOME/bin | ||
RUN wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-without-hadoop.tgz && \ | ||
tar -xvzf spark-2.4.0-bin-without-hadoop.tgz && \ | ||
mv $SPARK_PACKAGE $SPARK_HOME && \ | ||
rm -rf $SPARK_HOME/examples $SPARK_HOME/ec2 | ||
#https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-without-hadoop.tgz | ||
WORKDIR /$SPARK_HOME | ||
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"] |