Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add .env and ARG to ease SPARK_VERSION changes #14

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
SPARK_VERSION=2.4.2
HOST_APP_FOLDER=/mnt/spark-apps
HOST_DATA_FOLDER=/mnt/spark-data
HOST_NAME=poteng-centos.westus.cloudapp.azure.com
10 changes: 6 additions & 4 deletions build-images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

set -e

docker build -t spark-base:2.3.1 ./docker/base
docker build -t spark-master:2.3.1 ./docker/spark-master
docker build -t spark-worker:2.3.1 ./docker/spark-worker
docker build -t spark-submit:2.3.1 ./docker/spark-submit
source ./.env

docker build -t spark-base:${SPARK_VERSION} ./docker/base --build-arg SPARK_VERSION=${SPARK_VERSION}
docker build -t spark-master:${SPARK_VERSION} ./docker/spark-master --build-arg FROM_IMAGE=spark-base:${SPARK_VERSION}
docker build -t spark-worker:${SPARK_VERSION} ./docker/spark-worker --build-arg FROM_IMAGE=spark-base:${SPARK_VERSION}
docker build -t spark-submit:${SPARK_VERSION} ./docker/spark-submit --build-arg FROM_IMAGE=spark-base:${SPARK_VERSION}
42 changes: 23 additions & 19 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
version: "3.7"

services:
spark-master:
image: spark-master:2.3.1
image: spark-master:${SPARK_VERSION}
container_name: spark-master
hostname: spark-master
ports:
Expand All @@ -11,12 +12,12 @@ services:
spark-network:
ipv4_address: 10.5.0.2
volumes:
- /mnt/spark-apps:/opt/spark-apps
- /mnt/spark-data:/opt/spark-data
- ${HOST_APP_FOLDER}:/opt/spark-apps
- ${HOST_DATA_FOLDER}:/opt/spark-data
environment:
- "SPARK_LOCAL_IP=spark-master"
- "SPARK_PUBLIC_DNS=${HOST_NAME}"
spark-worker-1:
image: spark-worker:2.3.1
image: spark-worker:${SPARK_VERSION}
container_name: spark-worker-1
hostname: spark-worker-1
depends_on:
Expand All @@ -25,51 +26,54 @@ services:
- "8081:8081"
env_file: ./env/spark-worker.sh
environment:
- "SPARK_LOCAL_IP=spark-worker-1"
- "SPARK_PUBLIC_DNS=${HOST_NAME}"
- "SPARK_WORKER_WEBUI_PORT=8081"
networks:
spark-network:
ipv4_address: 10.5.0.3
volumes:
- /mnt/spark-apps:/opt/spark-apps
- /mnt/spark-data:/opt/spark-data
- ${HOST_APP_FOLDER}:/opt/spark-apps
- ${HOST_DATA_FOLDER}:/opt/spark-data
spark-worker-2:
image: spark-worker:2.3.1
image: spark-worker:${SPARK_VERSION}
container_name: spark-worker-2
hostname: spark-worker-2
depends_on:
- spark-master
ports:
- "8082:8081"
- "8082:8082"
env_file: ./env/spark-worker.sh
environment:
- "SPARK_LOCAL_IP=spark-worker-2"
- "SPARK_PUBLIC_DNS=${HOST_NAME}"
- "SPARK_WORKER_WEBUI_PORT=8082"
networks:
spark-network:
ipv4_address: 10.5.0.4
volumes:
- /mnt/spark-apps:/opt/spark-apps
- /mnt/spark-data:/opt/spark-data
- ${HOST_APP_FOLDER}:/opt/spark-apps
- ${HOST_DATA_FOLDER}:/opt/spark-data
spark-worker-3:
image: spark-worker:2.3.1
image: spark-worker:${SPARK_VERSION}
container_name: spark-worker-3
hostname: spark-worker-3
depends_on:
- spark-master
ports:
- "8083:8081"
- "8083:8083"
env_file: ./env/spark-worker.sh
environment:
- "SPARK_LOCAL_IP=spark-worker-3"
- "SPARK_PUBLIC_DNS=${HOST_NAME}"
- "SPARK_WORKER_WEBUI_PORT=8083"
networks:
spark-network:
ipv4_address: 10.5.0.5
volumes:
- /mnt/spark-apps:/opt/spark-apps
- /mnt/spark-data:/opt/spark-data
- ${HOST_APP_FOLDER}:/opt/spark-apps
- ${HOST_DATA_FOLDER}:/opt/spark-data
networks:
spark-network:
driver: bridge
ipam:
driver: default
config:
- subnet: 10.5.0.0/16
- subnet: 10.5.0.0/16
3 changes: 2 additions & 1 deletion docker/base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
FROM java:8-jdk-alpine

ARG SPARK_VERSION

ENV DAEMON_RUN=true
ENV SPARK_VERSION=2.4.0
ENV HADOOP_VERSION=2.7
ENV SCALA_VERSION=2.12.4
ENV SCALA_HOME=/usr/share/scala
Expand Down
6 changes: 4 additions & 2 deletions docker/spark-master/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
FROM spark-base:2.3.1
ARG FROM_IMAGE

FROM ${FROM_IMAGE}

COPY start-master.sh /

Expand All @@ -8,4 +10,4 @@ ENV SPARK_MASTER_LOG /spark/logs

EXPOSE 8080 7077 6066

CMD ["/bin/bash", "/start-master.sh"]
CMD ["/bin/bash", "/start-master.sh"]
9 changes: 5 additions & 4 deletions docker/spark-submit/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
FROM spark-base:2.3.1
ARG FROM_IMAGE

FROM ${FROM_IMAGE}

COPY spark-submit.sh /

ENV SPARK_MASTER_URL="spark://spark-master:7077"
ENV SPARK_SUBMIT_ARGS=""
ENV SPARK_APPLICATION_ARGS ""
ENV SPARK_APPLICATION_ARGS=""
#ENV SPARK_APPLICATION_JAR_LOCATION /opt/spark-apps/myjar.jar
#ENV SPARK_APPLICATION_MAIN_CLASS my.main.Application


CMD ["/bin/bash", "/spark-submit.sh"]
CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion docker/spark-submit/spark-submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
--total-executor-cores 1 \
${SPARK_SUBMIT_ARGS} \
${SPARK_APPLICATION_JAR_LOCATION} \
${SPARK_APPLICATION_ARGS} \
${SPARK_APPLICATION_ARGS} \
6 changes: 4 additions & 2 deletions docker/spark-worker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
FROM spark-base:2.3.1
ARG FROM_IMAGE

FROM ${FROM_IMAGE}

COPY start-worker.sh /

ENV SPARK_WORKER_WEBUI_PORT 8081
ENV SPARK_WORKER_LOG /spark/logs
ENV SPARK_MASTER "spark://spark-master:7077"

EXPOSE 8081
EXPOSE 8081-8083

CMD ["/bin/bash", "/start-worker.sh"]
19 changes: 19 additions & 0 deletions run-spark-submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
source ./.env

LOCAL_JAR_FOLDER="/data/poteng/workspace/spark-learn/spark/target"
JAR_NAME="spark-examples-1.0-SNAPSHOT.jar"
MAIN_CLASS="spark.Pi"

# copy files to host folder defined in .env
mkdir -p ${HOST_APP_FOLDER}
mkdir -p ${HOST_DATA_FOLDER}
cp ${LOCAL_JAR_FOLDER}/${JAR_NAME} ${HOST_APP_FOLDER}

SPARK_APPLICATION_JAR_LOCATION="/opt/spark-apps/$JAR_NAME"

docker run --network docker-spark-cluster_spark-network \
-v ${HOST_APP_FOLDER}:/opt/spark-apps \
--env SPARK_APPLICATION_JAR_LOCATION=$SPARK_APPLICATION_JAR_LOCATION \
--env SPARK_APPLICATION_MAIN_CLASS=$MAIN_CLASS \
spark-submit:${SPARK_VERSION} sh spark-submit.sh