From 6dc327631f6a7ff7f1af86a5627d74edc9046cb0 Mon Sep 17 00:00:00 2001 From: Po-Yuan Teng Date: Thu, 25 Apr 2019 08:43:31 +0000 Subject: [PATCH 1/4] Add .env and ARG to ease SPARK_VERSION changes --- .env | 1 + build-images.sh | 10 ++++++---- docker-compose.yml | 11 ++++++----- docker/base/Dockerfile | 3 ++- docker/spark-master/Dockerfile | 6 ++++-- docker/spark-submit/Dockerfile | 4 +++- docker/spark-worker/Dockerfile | 4 +++- 7 files changed, 25 insertions(+), 14 deletions(-) create mode 100644 .env diff --git a/.env b/.env new file mode 100644 index 0000000..4bf3c57 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +SPARK_VERSION=2.4.2 diff --git a/build-images.sh b/build-images.sh index 627fdb3..7100d95 100755 --- a/build-images.sh +++ b/build-images.sh @@ -2,7 +2,9 @@ set -e -docker build -t spark-base:2.3.1 ./docker/base -docker build -t spark-master:2.3.1 ./docker/spark-master -docker build -t spark-worker:2.3.1 ./docker/spark-worker -docker build -t spark-submit:2.3.1 ./docker/spark-submit \ No newline at end of file +source ./.env + +docker build -t spark-base:${SPARK_VERSION} ./docker/base --build-arg SPARK_VERSION=${SPARK_VERSION} +docker build -t spark-master:${SPARK_VERSION} ./docker/spark-master --build-arg FROM_IMAGE=spark-base:${SPARK_VERSION} +docker build -t spark-worker:${SPARK_VERSION} ./docker/spark-worker --build-arg FROM_IMAGE=spark-base:${SPARK_VERSION} +docker build -t spark-submit:${SPARK_VERSION} ./docker/spark-submit --build-arg FROM_IMAGE=spark-base:${SPARK_VERSION} diff --git a/docker-compose.yml b/docker-compose.yml index 5a67659..aa1f6d2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,8 @@ version: "3.7" + services: spark-master: - image: spark-master:2.3.1 + image: spark-master:${SPARK_VERSION} container_name: spark-master hostname: spark-master ports: @@ -16,7 +17,7 @@ services: environment: - "SPARK_LOCAL_IP=spark-master" spark-worker-1: - image: spark-worker:2.3.1 + image: spark-worker:${SPARK_VERSION} container_name: spark-worker-1 hostname: spark-worker-1 depends_on: @@ -33,7 +34,7 @@ services: - /mnt/spark-apps:/opt/spark-apps - /mnt/spark-data:/opt/spark-data spark-worker-2: - image: spark-worker:2.3.1 + image: spark-worker:${SPARK_VERSION} container_name: spark-worker-2 hostname: spark-worker-2 depends_on: @@ -50,7 +51,7 @@ services: - /mnt/spark-apps:/opt/spark-apps - /mnt/spark-data:/opt/spark-data spark-worker-3: - image: spark-worker:2.3.1 + image: spark-worker:${SPARK_VERSION} container_name: spark-worker-3 hostname: spark-worker-3 depends_on: @@ -72,4 +73,4 @@ networks: ipam: driver: default config: - - subnet: 10.5.0.0/16 \ No newline at end of file + - subnet: 10.5.0.0/16 diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile index 686a02c..269beee 100644 --- a/docker/base/Dockerfile +++ b/docker/base/Dockerfile @@ -1,7 +1,8 @@ FROM java:8-jdk-alpine +ARG SPARK_VERSION + ENV DAEMON_RUN=true -ENV SPARK_VERSION=2.4.0 ENV HADOOP_VERSION=2.7 ENV SCALA_VERSION=2.12.4 ENV SCALA_HOME=/usr/share/scala diff --git a/docker/spark-master/Dockerfile b/docker/spark-master/Dockerfile index f47285b..4ca8a51 100644 --- a/docker/spark-master/Dockerfile +++ b/docker/spark-master/Dockerfile @@ -1,4 +1,6 @@ -FROM spark-base:2.3.1 +ARG FROM_IMAGE + +FROM ${FROM_IMAGE} COPY start-master.sh / @@ -8,4 +10,4 @@ ENV SPARK_MASTER_LOG /spark/logs EXPOSE 8080 7077 6066 -CMD ["/bin/bash", "/start-master.sh"] \ No newline at end of file +CMD ["/bin/bash", "/start-master.sh"] diff --git a/docker/spark-submit/Dockerfile b/docker/spark-submit/Dockerfile index 97da3b8..a52d0de 100644 --- a/docker/spark-submit/Dockerfile +++ b/docker/spark-submit/Dockerfile @@ -1,4 +1,6 @@ -FROM spark-base:2.3.1 +ARG FROM_IMAGE + +FROM ${FROM_IMAGE} COPY spark-submit.sh / diff --git a/docker/spark-worker/Dockerfile b/docker/spark-worker/Dockerfile index 744a41e..9b2e509 100644 --- a/docker/spark-worker/Dockerfile +++ b/docker/spark-worker/Dockerfile @@ -1,4 +1,6 @@ -FROM spark-base:2.3.1 +ARG FROM_IMAGE + +FROM ${FROM_IMAGE} COPY start-worker.sh / From 6a2d62655605f9b7848af9156a69630c64b46f99 Mon Sep 17 00:00:00 2001 From: Po-Yuan Teng Date: Fri, 26 Apr 2019 09:44:56 +0000 Subject: [PATCH 2/4] add arg in .env for docker-compose.yml --- .env | 2 ++ docker-compose.yml | 16 ++++++++-------- run-spark-submit.sh | 20 ++++++++++++++++++++ 3 files changed, 30 insertions(+), 8 deletions(-) create mode 100755 run-spark-submit.sh diff --git a/.env b/.env index 4bf3c57..ab2ce14 100644 --- a/.env +++ b/.env @@ -1 +1,3 @@ SPARK_VERSION=2.4.2 +HOST_APP_FOLDER=/mnt/spark-apps +HOST_DATA_FOLDER=/mnt/spark-data diff --git a/docker-compose.yml b/docker-compose.yml index aa1f6d2..0b0ff71 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,8 +12,8 @@ services: spark-network: ipv4_address: 10.5.0.2 volumes: - - /mnt/spark-apps:/opt/spark-apps - - /mnt/spark-data:/opt/spark-data + - ${HOST_APP_FOLDER}:/opt/spark-apps + - ${HOST_DATA_FOLDER}:/opt/spark-data environment: - "SPARK_LOCAL_IP=spark-master" spark-worker-1: @@ -31,8 +31,8 @@ services: spark-network: ipv4_address: 10.5.0.3 volumes: - - /mnt/spark-apps:/opt/spark-apps - - /mnt/spark-data:/opt/spark-data + - ${HOST_APP_FOLDER}:/opt/spark-apps + - ${HOST_DATA_FOLDER}:/opt/spark-data spark-worker-2: image: spark-worker:${SPARK_VERSION} container_name: spark-worker-2 @@ -48,8 +48,8 @@ services: spark-network: ipv4_address: 10.5.0.4 volumes: - - /mnt/spark-apps:/opt/spark-apps - - /mnt/spark-data:/opt/spark-data + - ${HOST_APP_FOLDER}:/opt/spark-apps + - ${HOST_DATA_FOLDER}:/opt/spark-data spark-worker-3: image: spark-worker:${SPARK_VERSION} container_name: spark-worker-3 @@ -65,8 +65,8 @@ services: spark-network: ipv4_address: 10.5.0.5 volumes: - - /mnt/spark-apps:/opt/spark-apps - - /mnt/spark-data:/opt/spark-data + - ${HOST_APP_FOLDER}:/opt/spark-apps + - ${HOST_DATA_FOLDER}:/opt/spark-data networks: spark-network: driver: bridge diff --git a/run-spark-submit.sh b/run-spark-submit.sh new file mode 100755 index 0000000..048efe4 --- /dev/null +++ b/run-spark-submit.sh @@ -0,0 +1,20 @@ +source ./.env + +LOCAL_JAR_FOLDER="/data/poteng/workspace/spark-learn/spark/target" +JAR_NAME="spark-examples-1.0-SNAPSHOT.jar" +MAIN_CLASS="mygroup.spark.Pi" + +# copy files to host folder +mkdir -p ${HOST_APP_FOLDER} +mkdir -p ${HOST_DATA_FOLDER} +cp ${LOCAL_JAR_FOLDER}/${JAR_NAME} ${HOST_APP_FOLDER} + +SPARK_APPLICATION_JAR_LOCATION="/opt/spark-apps/$JAR_NAME" +SPARK_SUBMIT_ARGS="--conf spark.executor.extraJavaOptions='-Dconfig-path=/opt/spark-apps/dev/config.conf'" + +docker run --network docker-spark-cluster_spark-network \ +-v ${HOST_APP_FOLDER}:/opt/spark-apps \ +--env SPARK_APPLICATION_JAR_LOCATION=$SPARK_APPLICATION_JAR_LOCATION \ +--env SPARK_APPLICATION_MAIN_CLASS=$MAIN_CLASS \ +spark-submit:${SPARK_VERSION} + From 2b0426bd1d4ad883d875b05fe81beb7afd9da6bf Mon Sep 17 00:00:00 2001 From: Po-Yuan Teng Date: Fri, 31 May 2019 06:22:31 +0000 Subject: [PATCH 3/4] spark-submit change launching spark-submit.sh to docker run --- docker/spark-submit/Dockerfile | 5 ++--- docker/spark-submit/spark-submit.sh | 2 +- run-spark-submit.sh | 7 +++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/docker/spark-submit/Dockerfile b/docker/spark-submit/Dockerfile index a52d0de..ad52c8d 100644 --- a/docker/spark-submit/Dockerfile +++ b/docker/spark-submit/Dockerfile @@ -6,9 +6,8 @@ COPY spark-submit.sh / ENV SPARK_MASTER_URL="spark://spark-master:7077" ENV SPARK_SUBMIT_ARGS="" -ENV SPARK_APPLICATION_ARGS "" +ENV SPARK_APPLICATION_ARGS="" #ENV SPARK_APPLICATION_JAR_LOCATION /opt/spark-apps/myjar.jar #ENV SPARK_APPLICATION_MAIN_CLASS my.main.Application - -CMD ["/bin/bash", "/spark-submit.sh"] +CMD ["/bin/bash"] diff --git a/docker/spark-submit/spark-submit.sh b/docker/spark-submit/spark-submit.sh index 15aa483..59d9939 100644 --- a/docker/spark-submit/spark-submit.sh +++ b/docker/spark-submit/spark-submit.sh @@ -7,4 +7,4 @@ --total-executor-cores 1 \ ${SPARK_SUBMIT_ARGS} \ ${SPARK_APPLICATION_JAR_LOCATION} \ - ${SPARK_APPLICATION_ARGS} \ \ No newline at end of file + ${SPARK_APPLICATION_ARGS} \ diff --git a/run-spark-submit.sh b/run-spark-submit.sh index 048efe4..f2fd9ef 100755 --- a/run-spark-submit.sh +++ b/run-spark-submit.sh @@ -2,19 +2,18 @@ source ./.env LOCAL_JAR_FOLDER="/data/poteng/workspace/spark-learn/spark/target" JAR_NAME="spark-examples-1.0-SNAPSHOT.jar" -MAIN_CLASS="mygroup.spark.Pi" +MAIN_CLASS="spark.Pi" -# copy files to host folder +# copy files to host folder defined in .env mkdir -p ${HOST_APP_FOLDER} mkdir -p ${HOST_DATA_FOLDER} cp ${LOCAL_JAR_FOLDER}/${JAR_NAME} ${HOST_APP_FOLDER} SPARK_APPLICATION_JAR_LOCATION="/opt/spark-apps/$JAR_NAME" -SPARK_SUBMIT_ARGS="--conf spark.executor.extraJavaOptions='-Dconfig-path=/opt/spark-apps/dev/config.conf'" docker run --network docker-spark-cluster_spark-network \ -v ${HOST_APP_FOLDER}:/opt/spark-apps \ --env SPARK_APPLICATION_JAR_LOCATION=$SPARK_APPLICATION_JAR_LOCATION \ --env SPARK_APPLICATION_MAIN_CLASS=$MAIN_CLASS \ -spark-submit:${SPARK_VERSION} +spark-submit:${SPARK_VERSION} sh spark-submit.sh From 576659b9ad223662a72bc22736c19d7ca4998e0e Mon Sep 17 00:00:00 2001 From: Po-Yuan Teng Date: Fri, 31 May 2019 09:39:10 +0000 Subject: [PATCH 4/4] set SPARK_PUBLIC_DNS and different ports for UI mapping --- .env | 1 + docker-compose.yml | 15 +++++++++------ docker/spark-worker/Dockerfile | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.env b/.env index ab2ce14..a002197 100644 --- a/.env +++ b/.env @@ -1,3 +1,4 @@ SPARK_VERSION=2.4.2 HOST_APP_FOLDER=/mnt/spark-apps HOST_DATA_FOLDER=/mnt/spark-data +HOST_NAME=poteng-centos.westus.cloudapp.azure.com diff --git a/docker-compose.yml b/docker-compose.yml index 0b0ff71..a7b2ae0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,7 +15,7 @@ services: - ${HOST_APP_FOLDER}:/opt/spark-apps - ${HOST_DATA_FOLDER}:/opt/spark-data environment: - - "SPARK_LOCAL_IP=spark-master" + - "SPARK_PUBLIC_DNS=${HOST_NAME}" spark-worker-1: image: spark-worker:${SPARK_VERSION} container_name: spark-worker-1 @@ -26,7 +26,8 @@ services: - "8081:8081" env_file: ./env/spark-worker.sh environment: - - "SPARK_LOCAL_IP=spark-worker-1" + - "SPARK_PUBLIC_DNS=${HOST_NAME}" + - "SPARK_WORKER_WEBUI_PORT=8081" networks: spark-network: ipv4_address: 10.5.0.3 @@ -40,10 +41,11 @@ services: depends_on: - spark-master ports: - - "8082:8081" + - "8082:8082" env_file: ./env/spark-worker.sh environment: - - "SPARK_LOCAL_IP=spark-worker-2" + - "SPARK_PUBLIC_DNS=${HOST_NAME}" + - "SPARK_WORKER_WEBUI_PORT=8082" networks: spark-network: ipv4_address: 10.5.0.4 @@ -57,10 +59,11 @@ services: depends_on: - spark-master ports: - - "8083:8081" + - "8083:8083" env_file: ./env/spark-worker.sh environment: - - "SPARK_LOCAL_IP=spark-worker-3" + - "SPARK_PUBLIC_DNS=${HOST_NAME}" + - "SPARK_WORKER_WEBUI_PORT=8083" networks: spark-network: ipv4_address: 10.5.0.5 diff --git a/docker/spark-worker/Dockerfile b/docker/spark-worker/Dockerfile index 9b2e509..881e231 100644 --- a/docker/spark-worker/Dockerfile +++ b/docker/spark-worker/Dockerfile @@ -8,6 +8,6 @@ ENV SPARK_WORKER_WEBUI_PORT 8081 ENV SPARK_WORKER_LOG /spark/logs ENV SPARK_MASTER "spark://spark-master:7077" -EXPOSE 8081 +EXPOSE 8081-8083 CMD ["/bin/bash", "/start-worker.sh"]