Skip to content

Commit

Permalink
Add docker image to the repo (#876)
Browse files Browse the repository at this point in the history
I think it makes sense to have the Dockerfile
in the repository itself.

Resolves #739

Co-authored-by: Anders <[email protected]>
Co-authored-by: colin-rogers-dbt <[email protected]>
  • Loading branch information
3 people authored Aug 31, 2023
1 parent 01c9fd0 commit efa1b18
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 2 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Security-20230817-145626.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Security
body: Add docker image to the repo
time: 2023-08-17T14:56:26.361208+02:00
custom:
Author: Fokko
PR: "876"
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: "3.7"
services:

dbt-spark3-thrift:
image: godatadriven/spark:3.1.1
build: docker/
ports:
- "10000:10000"
- "4040:4040"
Expand All @@ -19,7 +19,7 @@ services:
- WAIT_FOR=dbt-hive-metastore:5432

dbt-hive-metastore:
image: postgres:9.6.17-alpine
image: postgres:9-alpine
volumes:
- ./.hive-metastore/:/var/lib/postgresql/data
environment:
Expand Down
30 changes: 30 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre

ARG BUILD_DATE
ARG SPARK_VERSION=3.3.2
ARG HADOOP_VERSION=3

LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
org.label-schema.version=$SPARK_VERSION

ENV SPARK_HOME /usr/spark
ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"

RUN apt-get update && \
apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
apt-get remove -y wget && \
apt-get autoremove -y && \
apt-get clean

COPY entrypoint.sh /scripts/
RUN chmod +x /scripts/entrypoint.sh

ENTRYPOINT ["/scripts/entrypoint.sh"]
CMD ["--help"]
15 changes: 15 additions & 0 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

if [ -n "$WAIT_FOR" ]; then
IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR"
for HOSTPORT in "${HOSTPORT_ARRAY[@]}"
do
WAIT_FOR_HOST=${HOSTPORT%:*}
WAIT_FOR_PORT=${HOSTPORT#*:}

echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT...
while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done
done
fi

exec spark-submit "$@"

0 comments on commit efa1b18

Please sign in to comment.