From f4b504aa53e9c8c107bd51b1b7f4e5abb785fe4c Mon Sep 17 00:00:00 2001 From: xuwenyihust Date: Mon, 8 Jan 2024 11:00:28 +0800 Subject: [PATCH] Add Docker build and push workflows, and update README and deployment configurations --- .github/workflows/build-docker.yml | 36 +++++++++++++++++++ ...deploy-examples.yml => build-examples.yml} | 11 +----- README.md | 12 +++++++ .../Dockerfile | 0 .../templates/notebook-deployment.yaml | 2 +- .../spark-history-server-deployment.yaml | 2 +- 6 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/build-docker.yml rename .github/workflows/{build-deploy-examples.yml => build-examples.yml} (84%) rename docker/{spakr-history-server => spark-history-server}/Dockerfile (100%) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml new file mode 100644 index 0000000..88ecfaa --- /dev/null +++ b/.github/workflows/build-docker.yml @@ -0,0 +1,36 @@ +name: Build Docker + +on: + push: + branches: + - master + pull_request: + paths: + - 'docker/**' +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: Docker Build & Push + run: | + timestamp=$(date +"%Y%m%d%H%M%S") + + docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/all-spark-notebook:$timestamp -f docker/jupyter-notebook/Dockerfile.notebook ./docker/jupyter-notebook + docker tag ${{ secrets.DOCKERHUB_USERNAME }}/all-spark-notebook:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/all-spark-notebook:latest + + docker push ${{ secrets.DOCKERHUB_USERNAME }}/all-spark-notebook:$timestamp + docker push ${{ secrets.DOCKERHUB_USERNAME }}/all-spark-notebook:latest + + docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/spark-history-server:$timestamp -f docker/spark-history-server/Dockerfile ./docker/spark-history-server + docker tag ${{ secrets.DOCKERHUB_USERNAME }}/spark-history-server:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/spark-history-server:latest + + docker push ${{ secrets.DOCKERHUB_USERNAME }}/spark-history-server:$timestamp + docker push ${{ secrets.DOCKERHUB_USERNAME }}/spark-history-server:latest diff --git a/.github/workflows/build-deploy-examples.yml b/.github/workflows/build-examples.yml similarity index 84% rename from .github/workflows/build-deploy-examples.yml rename to .github/workflows/build-examples.yml index 10be55c..27f2987 100644 --- a/.github/workflows/build-deploy-examples.yml +++ b/.github/workflows/build-examples.yml @@ -1,4 +1,4 @@ -name: Build & Deploy Examples +name: Build Examples # Controls when the workflow will run on: @@ -54,13 +54,4 @@ jobs: docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/word-count:$VERSION_WITH_TIMESTAMP --build-arg VERSION=$VERSION . docker push ${{ secrets.DOCKERHUB_USERNAME }}/word-count:$VERSION_WITH_TIMESTAMP - deploy-examples: - needs: build-examples - runs-on: ubuntu-latest - if: ${{ github.event.inputs.deploy-example != 'None' }} - - steps: - - name: Deploy to GKE - run: | - echo "Deploy ${{ github.event.inputs.deploy-example }} to GKE!" \ No newline at end of file diff --git a/README.md b/README.md index b5df10e..efa0463 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,21 @@ GitHub License

+> ⚠️ Currently in Development: Expect breaking changes and bugs! + ## Overview A big data platform for data processing and machine learning based on Kubernetes and Spark. +## Features +- Spark Application Deployment + - Jar Application Submission + - PySpark Application Submission + - Jupyter Notebook + - Customized Integration with PySpark +- Monitoring + - Spark UI + - History Server + ## Supported Versions - Spark: 3.5.0 - Scala: 2.12 diff --git a/docker/spakr-history-server/Dockerfile b/docker/spark-history-server/Dockerfile similarity index 100% rename from docker/spakr-history-server/Dockerfile rename to docker/spark-history-server/Dockerfile diff --git a/helm/data-platform/templates/notebook-deployment.yaml b/helm/data-platform/templates/notebook-deployment.yaml index 3950a7e..cefacbb 100644 --- a/helm/data-platform/templates/notebook-deployment.yaml +++ b/helm/data-platform/templates/notebook-deployment.yaml @@ -15,7 +15,7 @@ spec: serviceAccountName: spark containers: - name: notebook - image: wenyixu101/all-spark-notebook:spark-3.5.0_26 + image: wenyixu101/all-spark-notebook:latest imagePullPolicy: Always command: ["/bin/bash", "-c", "start-notebook.sh"] ports: diff --git a/helm/data-platform/templates/spark-history-server-deployment.yaml b/helm/data-platform/templates/spark-history-server-deployment.yaml index 99cd366..09b5a5e 100644 --- a/helm/data-platform/templates/spark-history-server-deployment.yaml +++ b/helm/data-platform/templates/spark-history-server-deployment.yaml @@ -14,7 +14,7 @@ spec: spec: containers: - name: spark-history-server - image: wenyixu101/spark-history-server:3.5.0 + image: wenyixu101/spark-history-server:latest command: ["/bin/sh", "-c"] args: ["/opt/spark/bin/spark-class org.apache.spark.deploy.history.HistoryServer"] env: