-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
52 lines (37 loc) · 1.77 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Would much prefer to use an official image, but it looks like they're just for submitting jobs
# rather than running a cluster?
# https://github.com/apache/spark-docker/blob/master/3.5.1/scala2.12-java17-ubuntu/entrypoint.sh
# Can't seem to find any usage documentation, so've just been reading the code to figure out how
# they work
FROM ubuntu:22.04 as build
RUN apt update -y
RUN apt install -y wget
ENV SPARK_VER=spark-3.5.1-bin-hadoop3-scala2.13
WORKDIR /opt
# TODO should at least check the sha, or copy the pgp check from the official dockerfile
RUN wget -q https://dlcdn.apache.org/spark/spark-3.5.1/$SPARK_VER.tgz && \
tar -xf $SPARK_VER.tgz
# https://stackoverflow.com/questions/73465937/apache-spark-3-3-0-breaks-on-java-17-with-cannot-access-class-sun-nio-ch-direct
# https://stackoverflow.com/questions/72724816/running-unit-tests-with-spark-3-3-0-on-java-17-fails-with-illegalaccesserror-cl
FROM eclipse-temurin:11.0.23_9-jre-jammy
ENV SPARK_VER=spark-3.5.1-bin-hadoop3-scala2.13
ENV PYTHON_VER=python3.11
# install from deadsnakes so it's not an rc version
RUN apt update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt install -y $PYTHON_VER python3-pip && \
apt install -y r-base r-base-dev && \
rm -rf /var/lib/apt/lists/*
RUN $PYTHON_VER --version
RUN mkdir /opt/py && ln -s /usr/bin/$PYTHON_VER /opt/py/python3
RUN echo '#!/usr/bin/bash' > /usr/bin/pip && \
echo "$PYTHON_VER -m pip \$@" >> /usr/bin/pip
ENV R_HOME /usr/lib/R
RUN mkdir /opt/spark
COPY --from=build /opt/$SPARK_VER/ /opt/spark/
# this doesn't seem to actually work
RUN echo "spark.pyspark.python /usr/bin/$PYTHON_VER" > /opt/spark/conf/spark-defaults.conf
COPY entrypoint.sh /opt/
RUN chmod a+x /opt/entrypoint.sh
ENTRYPOINT ["/opt/entrypoint.sh"]