Skip to content

Commit 364887d

Browse files
andygrovecompheadedmondop
authored
chore: Add GitHub workflow to publish Docker image (#847)
* Add workflow to publish Docker images * update workflow name * remove regex check * improve * fix * use maven to get project version * add scalastyle config * fix * remove java distro name * add dev folder * save progress * docker build works * Update kube/Dockerfile Co-authored-by: Oleks V <[email protected]> * Update .github/workflows/docker-publish.yml Co-authored-by: Edmondo Porcu <[email protected]> * address feedback --------- Co-authored-by: Oleks V <[email protected]> Co-authored-by: Edmondo Porcu <[email protected]>
1 parent 27ab86b commit 364887d

File tree

6 files changed

+130
-17
lines changed

6 files changed

+130
-17
lines changed

.dockerignore

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
.git
2+
.github
3+
.idea
4+
bin
5+
conf
6+
docs/build
7+
docs/temp
8+
docs/venv
9+
metastore_db
10+
target
11+
common/target
12+
spark-integration/target
13+
fuzz-testing/target
14+
spark/target
15+
native/target
16+
core/target
17+
spark-warehouse
18+
venv

.github/workflows/docker-publish.yml

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Publish Docker images
19+
20+
concurrency:
21+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
22+
cancel-in-progress: true
23+
24+
on:
25+
push:
26+
tags:
27+
- '*.*.*'
28+
- '*.*.*-rc*'
29+
- 'test-docker-publish-*'
30+
31+
docker:
32+
name: Docker
33+
runs-on: ubuntu-22.04
34+
permissions:
35+
contents: read
36+
packages: write
37+
steps:
38+
- name: Set up Java
39+
uses: actions/setup-java@v3
40+
with:
41+
java-version: '17'
42+
- name: Extract Comet version
43+
id: extract_version
44+
run: |
45+
COMET_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
46+
echo "COMET_VERSION=$COMET_VERSION" >> $GITHUB_ENV
47+
- name: Echo Comet version
48+
run: echo "The current Comet version is ${{ env.COMET_VERSION }}"
49+
- name: Set up Docker Buildx
50+
uses: docker/setup-buildx-action@v3
51+
- name: Login to GitHub Container Registry
52+
uses: docker/login-action@v3
53+
with:
54+
registry: ghcr.io
55+
username: ${{ github.actor }}
56+
password: ${{ secrets.GITHUB_TOKEN }}
57+
- name: Build and push
58+
uses: docker/build-push-action@v6
59+
with:
60+
platforms: linux/amd64,linux/arm64
61+
push: true
62+
tags: apache/datafusion-comet:spark-3.4-scala-2.12-${{ env.COMET_VERSION }}
63+
file: kube/Dockerfile

docs/source/user-guide/installation.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ Make sure the following requirements are met and software installed on your mach
3232
- JDK 8 and up
3333
- GLIBC 2.17 (Centos 7) and up
3434

35-
## Using a Published Binary Release
35+
## Using a Published Docker Image
36+
37+
Docker images are available at https://github.com/orgs/apache/packages?repo_name=datafusion-comet
38+
39+
## Using a Published JAR File
3640

3741
There are no published binary releases yet.
3842

kube/Dockerfile

+29-6
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ USER root
2121

2222
# Installing JDK11 as the image comes with JRE
2323
RUN apt update \
24-
&& apt install -y git \
2524
&& apt install -y curl \
2625
&& apt install -y openjdk-11-jdk \
2726
&& apt clean
@@ -32,14 +31,38 @@ ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false"
3231
ENV SPARK_VERSION=3.4
3332
ENV SCALA_VERSION=2.12
3433

34+
# copy source files to Docker image
35+
RUN mkdir /comet
36+
WORKDIR /comet
37+
38+
# build native code first so that this layer can be re-used
39+
# if only Scala code gets modified
40+
COPY rust-toolchain.toml /comet/rust-toolchain.toml
41+
COPY native /comet/native
42+
RUN cd native && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release
43+
44+
# copy the rest of the project
45+
COPY .mvn /comet/.mvn
46+
COPY mvnw /comet/mvnw
47+
COPY common /comet/common
48+
COPY dev /comet/dev
49+
COPY docs /comet/docs
50+
COPY fuzz-testing /comet/fuzz-testing
51+
COPY spark /comet/spark
52+
COPY spark-integration /comet/spark-integration
53+
COPY scalafmt.conf /comet/scalafmt.conf
54+
COPY .scalafix.conf /comet/.scalafix.conf
55+
COPY Makefile /comet/Makefile
56+
COPY pom.xml /comet/pom.xml
57+
3558
# Pick the JDK instead of JRE to compile Comet
36-
RUN cd /opt \
37-
&& git clone https://github.com/apache/datafusion-comet.git \
38-
&& cd datafusion-comet \
39-
&& JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
59+
RUN cd /comet \
60+
&& JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
4061

4162
FROM apache/spark:3.4.2
4263
ENV SPARK_VERSION=3.4
4364
ENV SCALA_VERSION=2.12
4465
USER root
45-
COPY --from=builder /opt/datafusion-comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.1.0-SNAPSHOT.jar $SPARK_HOME/jars
66+
67+
# ntoe the use of a wildcard in the file name so that this works with both snapshot and final release versions
68+
COPY --from=builder /comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.2.0*.jar $SPARK_HOME/jars

native/Cargo.toml

+9-9
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ arrow-buffer = { version = "52.2.0" }
3939
arrow-data = { version = "52.2.0" }
4040
arrow-schema = { version = "52.2.0" }
4141
parquet = { version = "52.2.0", default-features = false, features = ["experimental"] }
42-
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1" }
43-
datafusion = { default-features = false, git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", features = ["unicode_expressions", "crypto_expressions"] }
44-
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", features = ["crypto_expressions"] }
45-
datafusion-functions-nested = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
46-
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
47-
datafusion-execution = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
48-
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
49-
datafusion-physical-expr-common = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
50-
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
42+
datafusion-common = { version = "41.0.0" }
43+
datafusion = { default-features = false, version = "41.0.0", features = ["unicode_expressions", "crypto_expressions"] }
44+
datafusion-functions = { version = "41.0.0", features = ["crypto_expressions"] }
45+
datafusion-functions-nested = { version = "41.0.0", default-features = false }
46+
datafusion-expr = { version = "41.0.0", default-features = false }
47+
datafusion-execution = { version = "41.0.0", default-features = false }
48+
datafusion-physical-plan = { version = "41.0.0", default-features = false }
49+
datafusion-physical-expr-common = { version = "41.0.0", default-features = false }
50+
datafusion-physical-expr = { version = "41.0.0", default-features = false }
5151
datafusion-comet-spark-expr = { path = "spark-expr", version = "0.2.0" }
5252
datafusion-comet-proto = { path = "proto", version = "0.2.0" }
5353
chrono = { version = "0.4", default-features = false, features = ["clock"] }

pom.xml

+6-1
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,10 @@ under the License.
588588
</properties>
589589
</profile>
590590

591+
<profile>
592+
<id>scala-2.12</id>
593+
</profile>
594+
591595
<profile>
592596
<id>scala-2.13</id>
593597
<properties>
@@ -938,6 +942,7 @@ under the License.
938942
<exclude>**/build/**</exclude>
939943
<exclude>**/target/**</exclude>
940944
<exclude>**/apache-spark/**</exclude>
945+
<exclude>.dockerignore</exclude>
941946
<exclude>.git/**</exclude>
942947
<exclude>.github/**</exclude>
943948
<exclude>.gitignore</exclude>
@@ -963,7 +968,7 @@ under the License.
963968
<exclude>docs/source/_static/images/**</exclude>
964969
<exclude>dev/release/rat_exclude_files.txt</exclude>
965970
<exclude>dev/release/requirements.txt</exclude>
966-
<exclude>native/core/src/execution/generated/**</exclude>
971+
<exclude>native/proto/src/generated/**</exclude>
967972
</excludes>
968973
</configuration>
969974
</plugin>

0 commit comments

Comments
 (0)