Skip to content

Commit

Permalink
add github action to build all images
Browse files Browse the repository at this point in the history
  • Loading branch information
thanh-nguyen-dang committed Jun 3, 2024
1 parent 9f0891a commit 8f2fbd3
Show file tree
Hide file tree
Showing 14 changed files with 82 additions and 41 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/image_build_push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jobs:
OVERRIDE_REPO_NAME: hadoop-base
OVERRIDE_TAG_NAME: 3.3.0
DOCKERFILE_LOCATION: "./hadoop/base/Dockerfile"
DOCKERFILE_BUILD_CONTEXT: "./hadoop/base"
USE_QUAY_ONLY: true
secrets:
ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
Expand Down Expand Up @@ -49,7 +50,7 @@ jobs:
uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
needs: [build-hadoop-base]
with:
OVERRIDE_REPO_NAME: namenode
OVERRIDE_REPO_NAME: datanode
OVERRIDE_TAG_NAME: 3.3.0
DOCKERFILE_LOCATION: "./hadoop/datanode/Dockerfile"
DOCKERFILE_BUILD_CONTEXT: "./hadoop/datanode"
Expand Down
10 changes: 3 additions & 7 deletions hadoop/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,8 @@ RUN apt-get --only-upgrade install libpq-dev

ENV PATH="${PATH}:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${HADOOP_HOME}/sbin:${HADOOP_HOME}/bin:${JAVA_HOME}/bin:${SCALA_HOME}/bin"

COPY . /gen3spark
WORKDIR /gen3spark
ADD entrypoint.sh /entrypoint.sh

# ENV TINI_VERSION v0.18.0
# ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
# RUN chmod +x /tini
# ENTRYPOINT ["/tini", "--"]
RUN chmod a+x /entrypoint.sh

CMD ["/usr/sbin/sshd", "-D"]
ENTRYPOINT ["/entrypoint.sh"]
5 changes: 2 additions & 3 deletions hadoop/datanode/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ FROM quay.io/cdis/hadoop-base:3.3.0

HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1

ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
ENV HDFS_CONF_dfs_datanode_data_dir=$HADOOP_HDFS_HOME/hdfs/data/dfs/datanode
RUN mkdir -p /hadoop/dfs/data
VOLUME /hadoop/dfs/data

ADD run.sh /run.sh
RUN chmod a+x /run.sh
WORKDIR /gen3spark

EXPOSE 9864

CMD ["/gen3spark/run.sh"]
CMD ["/run.sh"]
9 changes: 4 additions & 5 deletions hadoop/historyserver/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ FROM quay.io/cdis/hadoop-base:3.3.0

HEALTHCHECK CMD curl -f http://localhost:9870/ || exit 1

ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
ENV HDFS_CONF_dfs_namenode_name_dir=hdfs://namenode:9000/hadoop/dfs/name
RUN mkdir -p /hadoop/dfs/name
VOLUME /hadoop/dfs/name

ADD run.sh /gen3spark/run.sh
RUN chmod a+x /gen3spark/run.sh
WORKDIR /gen3spark
ADD run.sh /run.sh
RUN chmod a+x /run.sh

EXPOSE 9870

CMD ["/gen3spark/run.sh"]
CMD ["/run.sh"]
9 changes: 4 additions & 5 deletions hadoop/namenode/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ FROM quay.io/cdis/hadoop-base:3.3.0

HEALTHCHECK CMD curl -f http://localhost:9870/ || exit 1

ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
ENV HDFS_CONF_dfs_namenode_name_dir=hdfs://namenode:9000/hadoop/dfs/name
RUN mkdir -p /hadoop/dfs/name
VOLUME /hadoop/dfs/name

ADD run.sh /gen3spark/run.sh
RUN chmod a+x /gen3spark/run.sh
WORKDIR /gen3spark
ADD run.sh /run.sh
RUN chmod a+x /run.sh

EXPOSE 9870

CMD ["/gen3spark/run.sh"]
CMD ["/run.sh"]
5 changes: 2 additions & 3 deletions hadoop/nodemanager/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ FROM quay.io/cdis/hadoop-base:3.3.0

HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1

ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
ENV HDFS_CONF_dfs_datanode_data_dir=hdfs://namenode:9000/hadoop/dfs/data
RUN mkdir -p /hadoop/dfs/data
VOLUME /hadoop/dfs/data

ADD run.sh /run.sh
RUN chmod a+x /run.sh
WORKDIR /gen3spark

EXPOSE 9864

CMD ["/gen3spark/run.sh"]
CMD ["/run.sh"]
5 changes: 2 additions & 3 deletions hadoop/resourcemanager/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ FROM quay.io/cdis/hadoop-base:3.3.0

HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1

ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
ENV HDFS_CONF_dfs_datanode_data_dir=hdfs://namenode:9000/hadoop/hadoop/dfs/data
RUN mkdir -p /hadoop/dfs/data
VOLUME /hadoop/dfs/data

ADD run.sh /run.sh
RUN chmod a+x /run.sh
WORKDIR /gen3spark

EXPOSE 9864

CMD ["/gen3spark/run.sh"]
CMD ["/run.sh"]
9 changes: 0 additions & 9 deletions spark/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,4 @@ ENV PATH="${PATH}:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${HADOOP_HOME}/sbin:${HAD

RUN mkdir -p /var/run/sshd ${HADOOP_HOME}/hdfs ${HADOOP_HOME}/hdfs/data ${HADOOP_HOME}/hdfs/data/dfs ${HADOOP_HOME}/hdfs/data/dfs/namenode ${HADOOP_HOME}/logs

COPY . /gen3spark
WORKDIR /gen3spark

# ENV TINI_VERSION v0.18.0
# ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
# RUN chmod +x /tini
# ENTRYPOINT ["/tini", "--"]

ENV PYTHONHASHSEED 1
CMD ["/usr/sbin/sshd", "-D"]
3 changes: 1 addition & 2 deletions spark/master/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ ENV SPARK_MASTER_LOG /spark/logs

EXPOSE 8080 7077 6066

COPY master.sh /gen3spark/
WORKDIR /gen3spark
COPY master.sh /

CMD ["/bin/bash", "/master.sh"]

16 changes: 16 additions & 0 deletions spark/master/master.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

export SPARK_MASTER_HOST=${SPARK_MASTER_HOST:-`hostname`}

export SPARK_HOME=/spark

. "/spark/sbin/spark-config.sh"

. "/spark/bin/load-spark-env.sh"

mkdir -p $SPARK_MASTER_LOG

ln -sf /dev/stdout $SPARK_MASTER_LOG/spark-master.out

cd /spark/bin && /spark/sbin/../bin/spark-class org.apache.spark.deploy.master.Master \
--ip $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT >> $SPARK_MASTER_LOG/spark-master.out
3 changes: 1 addition & 2 deletions spark/submit/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ LABEL maintainer="Gezim Sejdiu <[email protected]>, Giannis Mouchakis <gmouchak
ENV SPARK_MASTER_NAME spark-master
ENV SPARK_MASTER_PORT 7077

COPY submit.sh /gen3spark/
WORKDIR /gen3spark
COPY submit.sh /

CMD ["/bin/bash", "/submit.sh"]
30 changes: 30 additions & 0 deletions spark/submit/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

export SPARK_MASTER_URL=spark://${SPARK_MASTER_NAME}:${SPARK_MASTER_PORT}
export SPARK_HOME=/spark

/wait-for-step.sh
/execute-step.sh

if [ ! -z "${SPARK_APPLICATION_JAR_LOCATION}" ]; then
echo "Submit application ${SPARK_APPLICATION_JAR_LOCATION} with main class ${SPARK_APPLICATION_MAIN_CLASS} to Spark master ${SPARK_MASTER_URL}"
echo "Passing arguments ${SPARK_APPLICATION_ARGS}"
/${SPARK_HOME}/bin/spark-submit \
--class ${SPARK_APPLICATION_MAIN_CLASS} \
--master ${SPARK_MASTER_URL} \
${SPARK_SUBMIT_ARGS} \
${SPARK_APPLICATION_JAR_LOCATION} ${SPARK_APPLICATION_ARGS}
else
if [ ! -z "${SPARK_APPLICATION_PYTHON_LOCATION}" ]; then
echo "Submit application ${SPARK_APPLICATION_PYTHON_LOCATION} to Spark master ${SPARK_MASTER_URL}"
echo "Passing arguments ${SPARK_APPLICATION_ARGS}"
PYSPARK_PYTHON=python3 /spark/bin/spark-submit \
--master ${SPARK_MASTER_URL} \
${SPARK_SUBMIT_ARGS} \
${SPARK_APPLICATION_PYTHON_LOCATION} ${SPARK_APPLICATION_ARGS}
else
echo "Not recognized application."
fi
fi

/finish-step.sh
2 changes: 1 addition & 1 deletion spark/worker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ ENV SPARK_MASTER "spark://spark-master:7077"

EXPOSE 8081

COPY worker.sh /gen3spark/
COPY worker.sh /

CMD ["/bin/bash", "/worker.sh"]
14 changes: 14 additions & 0 deletions spark/worker/worker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

export SPARK_HOME=/spark

. "/spark/sbin/spark-config.sh"

. "/spark/bin/load-spark-env.sh"

mkdir -p $SPARK_WORKER_LOG

ln -sf /dev/stdout $SPARK_WORKER_LOG/spark-worker.out

/spark/sbin/../bin/spark-class org.apache.spark.deploy.worker.Worker \
--webui-port $SPARK_WORKER_WEBUI_PORT $SPARK_MASTER >> $SPARK_WORKER_LOG/spark-worker.out

0 comments on commit 8f2fbd3

Please sign in to comment.