Merge pull request #542 from Souheil-Yazji/aaw-1855

refactor(docker): clean up #1
StatCan · Oct 24, 2023 · 21ce987 · 21ce987
2 parents 0c7be81 + 027317c
commit 21ce987
Show file tree

Hide file tree

Showing 31 changed files with 603 additions and 648 deletions.
diff --git a/.github/workflows/build_push.yaml b/.github/workflows/build_push.yaml
@@ -22,20 +22,10 @@
 #
 # 2. Change the values for the REGISTRY_NAME, CLUSTER_NAME, CLUSTER_RESOURCE_GROUP and NAMESPACE environment variables (below in build-push).
 name: build_and_push
-on:
-  schedule:
-    # Execute at 2am EST every day
-    - cron:  '0 21 * * *'
-  push:
-    branches:
-      - 'master'
-  pull_request:
-    types:
-      - 'opened'
-      - 'synchronize'
-      - 'reopened'
-env:
-  SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
+on: push
+  # schedule:
+  #   # Execute at 2am EST every day
+  #   - cron:  '0 21 * * *'
 
 jobs:
   # Any checks that run pre-build
@@ -62,7 +52,6 @@ jobs:
       CLUSTER_RESOURCE_GROUP: k8s-cancentral-01-covid-aks
       LOCAL_REPO: localhost:5000
       TRIVY_VERSION: "v0.31.3"
-      HADOLINT_VERSION: "2.12.0"
     strategy:
       fail-fast: false
       matrix:
@@ -83,18 +72,18 @@ jobs:
         ports:
           - 5000:5000
     steps:
-    - name: Set ENV variables for a PR containing the auto-deploy tag
-      if: github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'auto-deploy')
-      run: | 
-        echo "REGISTRY=k8scc01covidacrdev.azurecr.io" >> "$GITHUB_ENV"
-        echo "IMAGE_VERSION=dev" >> "$GITHUB_ENV"
+    # - name: Set ENV variables for a PR containing the auto-deploy tag
+    #   if: github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'auto-deploy')
+    #   run: | 
+    #     echo "REGISTRY=k8scc01covidacrdev.azurecr.io" >> "$GITHUB_ENV"
+    #     echo "IMAGE_VERSION=dev" >> "$GITHUB_ENV"
 
-    - name: Set ENV variables for pushes to master
-      if: github.event_name == 'push' && github.ref == 'refs/heads/master'
-      run: | 
-        echo "REGISTRY=k8scc01covidacr.azurecr.io" >> "$GITHUB_ENV"
-        echo "IMAGE_VERSION=v1" >> "$GITHUB_ENV"
-        echo "IS_LATEST=true" >> "$GITHUB_ENV"
+    # - name: Set ENV variables for pushes to master
+    #   if: github.event_name == 'push' && github.ref == 'refs/heads/master'
+    #   run: | 
+    #     echo "REGISTRY=k8scc01covidacr.azurecr.io" >> "$GITHUB_ENV"
+    #     echo "IMAGE_VERSION=v1" >> "$GITHUB_ENV"
+    #     echo "IS_LATEST=true" >> "$GITHUB_ENV"
 
     - uses: actions/checkout@master
 
@@ -111,32 +100,31 @@ jobs:
       id: notebook-name
       shell: bash
       run: |
-        echo NOTEBOOK_NAME=${{ matrix.notebook }} >> $GITHUB_OUTPUT
+        echo name=NOTEBOOK_NAME${{ matrix.notebook }} >> $GITHUB_OUTPUT
 
     # Connect to Azure Container registry (ACR)
-    - uses: azure/docker-login@v1
-      with:
-        login-server: ${{ env.REGISTRY_NAME }}.azurecr.io
-        username: ${{ secrets.REGISTRY_USERNAME }}
-        password: ${{ secrets.REGISTRY_PASSWORD }}
+    # - uses: azure/docker-login@v1
+    #   with:
+    #     login-server: ${{ env.REGISTRY_NAME }}.azurecr.io
+    #     username: ${{ secrets.REGISTRY_USERNAME }}
+    #     password: ${{ secrets.REGISTRY_PASSWORD }}
 
-    # Connect to Azure DEV Container registry (ACR)
-    - uses: azure/docker-login@v1
-      with:
-        login-server: ${{ env.DEV_REGISTRY_NAME }}.azurecr.io
-        username: ${{ secrets.DEV_REGISTRY_USERNAME }}
-        password: ${{ secrets.DEV_REGISTRY_PASSWORD }}
+    # # Connect to Azure DEV Container registry (ACR)
+    # - uses: azure/docker-login@v1
+    #   with:
+    #     login-server: ${{ env.DEV_REGISTRY_NAME }}.azurecr.io
+    #     username: ${{ secrets.DEV_REGISTRY_USERNAME }}
+    #     password: ${{ secrets.DEV_REGISTRY_PASSWORD }}
 
     # Image building/storing locally
     - name: Make Dockerfiles
       run: make generate-dockerfiles
 
-    - name: Run Hadolint
-      run:  |
-        sudo curl -L https://github.com/hadolint/hadolint/releases/download/v${{ env.HADOLINT_VERSION }}/hadolint-Linux-x86_64 --output hadolint
-        sudo chmod +x hadolint
-        ./hadolint output/${{ matrix.notebook }}/Dockerfile --no-fail
-        
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v2
+      with:
+        buildkitd-flags: --debug
+
     # make build emits full_image_name, image_tag, and image_repo outputs
     - name: Build image
       id: build-image
@@ -150,6 +138,7 @@ jobs:
 
     - name: Push image to local registry (default pushes all tags)
       run: make push/${{ matrix.notebook }} REPO=${{ env.LOCAL_REPO }}
+
     # Image testing
 
     - name: Set Up Python for Test Suite
@@ -165,6 +154,8 @@ jobs:
     - name: Test image
       run: make test/${{ matrix.notebook }} REPO=${{ env.LOCAL_REPO }}
 
+    # Image scanning
+
     # Free up space from build process (containerscan action will run out of space if we don't)
     - run: ./.github/scripts/cleanup_runner.sh
 
@@ -173,43 +164,43 @@ jobs:
     # see https://github.com/StatCan/aaw-private/issues/11 -- should be re-enabled
       if: steps.notebook-name.outputs.NOTEBOOK_NAME != 'sas'
       run: |
-        printf ${{ secrets.CVE_ALLOWLIST }} > .trivyignore
         curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin ${{ env.TRIVY_VERSION }}
         trivy image ${{ steps.build-image.outputs.full_image_name }} --exit-code 1 --timeout=20m --security-checks vuln --severity CRITICAL
+      
 
     # Push image to ACR
     # Pushes if this is a push to master or an update to a PR that has auto-deploy label
-    - name: Test if we should push to ACR
-      id: should-i-push
-      if: |
-        github.event_name == 'push' ||
-        (
-          github.event_name == 'pull_request' &&
-          contains( github.event.pull_request.labels.*.name, 'auto-deploy')
-        )
-      run: echo 'boolean=true' >> $GITHUB_OUTPUT
-
-    # Pull the local image back, then "build" it (will just tag the pulled image)
-    - name: Pull image back from local repo
-      if: steps.should-i-push.outputs.boolean == 'true'
-      run: docker pull ${{ steps.build-image.outputs.full_image_name }}
-
-    # Rename the localhost:5000/imagename:tag built above to use the real repo
-    # (get above's name from build-image's output)
-    - name: Tag images with real repository
-      if: steps.should-i-push.outputs.boolean == 'true'
-      run: > 
-        make post-build/${{ matrix.notebook }} DEFAULT_REPO=$REGISTRY IS_LATEST=$IS_LATEST
-        IMAGE_VERSION=$IMAGE_VERSION SOURCE_FULL_IMAGE_NAME=${{ steps.build-image.outputs.full_image_name }}
-
-    - name: Push image to registry
-      if: steps.should-i-push.outputs.boolean == 'true'
-      run: |
-        make push/${{ matrix.notebook }} DEFAULT_REPO=$REGISTRY
+    # - name: Test if we should push to ACR
+    #   id: should-i-push
+    #   if: |
+    #     github.event_name == 'push' ||
+    #     (
+    #       github.event_name == 'pull_request' &&
+    #       contains( github.event.pull_request.labels.*.name, 'auto-deploy')
+    #     )
+    #   run: echo "::set-output name=boolean::true"
+
+    # # Pull the local image back, then "build" it (will just tag the pulled image)
+    # - name: Pull image back from local repo
+    #   if: steps.should-i-push.outputs.boolean == 'true'
+    #   run: docker pull ${{ steps.build-image.outputs.full_image_name }}
+
+    # # Rename the localhost:5000/imagename:tag built above to use the real repo
+    # # (get above's name from build-image's output)
+    # - name: Tag images with real repository
+    #   if: steps.should-i-push.outputs.boolean == 'true'
+    #   run: > 
+    #     make post-build/${{ matrix.notebook }} DEFAULT_REPO=$REGISTRY IS_LATEST=$IS_LATEST
+    #     IMAGE_VERSION=$IMAGE_VERSION SOURCE_FULL_IMAGE_NAME=${{ steps.build-image.outputs.full_image_name }}
+
+    # - name: Push image to registry
+    #   if: steps.should-i-push.outputs.boolean == 'true'
+    #   run: |
+    #     make push/${{ matrix.notebook }} DEFAULT_REPO=$REGISTRY
 
-    - name: Slack Notification
-      if: failure() && github.event_name=='schedule'
-      uses: act10ns/slack@v1
-      with: 
-        status: failure
-        message: Build failed. https://github.com/StatCan/aaw-kubeflow-containers/actions/runs/${{github.run_id}}
+    # - name: Slack Notification
+    #   if: failure() && github.event_name=='schedule'
+    #   uses: act10ns/slack@v1
+    #   with: 
+    #     status: failure
+    #     message: Build failed. https://github.com/StatCan/aaw-kubeflow-containers/actions/runs/${{github.run_id}}
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,6 @@ __pycache__
 .venv
 .ipynb_checkpoints/
 .idea
+node_modules
+package-lock.json
+package.json
diff --git a/docker-bits/0_Rocker.Dockerfile b/docker-bits/0_Rocker.Dockerfile
@@ -4,17 +4,29 @@
 FROM rocker/geospatial:4.2.1@sha256:5caca36b8962233f8636540b7c349d3f493f09e864b6e278cb46946ccf60d4d2
 
 # For compatibility with docker stacks
-ARG NB_USER="jovyan"
 ARG HOME=/home/$NB_USER
-ENV NB_UID="1000"
-ENV NB_GID="100"
+ARG NB_USER="jovyan"
+ARG NB_UID="1000"
+ARG NB_GID="100"
+
+ENV NB_USER="${NB_USER}" \
+    NB_UID=${NB_UID} \
+    NB_GID=${NB_GID} \
+    CONDA_DIR=/opt/conda \
+    PATH=$PATH:/opt/conda/bin \
+    NB_USER="jovyan" \
+    HOME="/home/${NB_USER}"
 
 USER root
 ENV PATH="/home/jovyan/.local/bin/:${PATH}"
 
 #Fix-permissions
 COPY remote-desktop/fix-permissions /usr/bin/fix-permissions
-RUN chmod u+x /usr/bin/fix-permissions
+#clean up
+COPY clean-layer.sh /usr/bin/clean-layer.sh
+
+RUN chmod u+x /usr/bin/fix-permissions \
+    && chmod +x /usr/bin/clean-layer.sh
 
 RUN apt-get update --yes \
     && apt-get install --yes python3-pip tini language-pack-fr \
@@ -26,3 +38,63 @@ RUN /rocker_scripts/install_shiny_server.sh \
 
 # Users should install R packages in their home directory
 RUN chmod 555 /usr/local/lib/R /usr/local/lib/R/site-library/
+
+
+# ARG CONDA_VERSION=py38_4.10.3
+# ARG CONDA_MD5=14da4a9a44b337f7ccb8363537f65b9c
+ARG PYTHON_VERSION=3.11
+
+# #Install Miniconda
+# #Has to be appended, else messes with qgis
+# RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh -O miniconda.sh && \
+#     echo "${CONDA_MD5}  miniconda.sh" > miniconda.md5 && \
+#     if ! md5sum --status -c miniconda.md5; then exit 1; fi && \
+#     mkdir -p /opt && \
+#     sh miniconda.sh -b -p /opt/conda && \
+#     rm miniconda.sh miniconda.md5 && \
+#     ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+#     echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+#     echo "conda activate base" >> ~/.bashrc && \
+#     find /opt/conda/ -follow -type f -name '*.a' -delete && \
+#     find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+#     /opt/conda/bin/conda clean -afy && \
+#     chown -R $NB_UID:$NB_GID /opt/conda
+#
+# Download and install Micromamba, and initialize Conda prefix.
+#   <https://github.com/mamba-org/mamba#micromamba>
+#   Similar projects using Micromamba:
+#     - Micromamba-Docker: <https://github.com/mamba-org/micromamba-docker>
+#     - repo2docker: <https://github.com/jupyterhub/repo2docker>
+# Install Python, Mamba and jupyter_core
+# Cleanup temporary files and remove Micromamba
+# Correct permissions
+# Do all this in a single RUN command to avoid duplicating all of the
+# files across image layers when the permissions change
+COPY initial-condarc "${CONDA_DIR}/.condarc"
+WORKDIR /tmp
+RUN set -x && \
+    arch=$(uname -m) && \
+    if [ "${arch}" = "x86_64" ]; then \
+        # Should be simpler, see <https://github.com/mamba-org/mamba/issues/1437>
+        arch="64"; \
+    fi && \
+    wget --progress=dot:giga -O /tmp/micromamba.tar.bz2 \
+        "https://micromamba.snakepit.net/api/micromamba/linux-${arch}/latest" && \
+    tar -xvjf /tmp/micromamba.tar.bz2 --strip-components=1 bin/micromamba && \
+    rm /tmp/micromamba.tar.bz2 && \
+    PYTHON_SPECIFIER="python=${PYTHON_VERSION}" && \
+    if [[ "${PYTHON_VERSION}" == "default" ]]; then PYTHON_SPECIFIER="python"; fi && \
+    # Install the packages
+    ./micromamba install \
+        --root-prefix="${CONDA_DIR}" \
+        --prefix="${CONDA_DIR}" \
+        --yes \
+        "${PYTHON_SPECIFIER}" \
+        'mamba' \
+        'jupyter_core' && \
+    rm micromamba && \
+    # Pin major.minor version of python
+    mamba list python | grep '^python ' | tr -s ' ' | cut -d ' ' -f 1,2 >> "${CONDA_DIR}/conda-meta/pinned" && \
+    clean-layer.sh && \
+    fix-permissions "${CONDA_DIR}" && \
+    fix-permissions "/home/${NB_USER}"
diff --git a/docker-bits/0_cpu.Dockerfile b/docker-bits/0_cpu.Dockerfile
@@ -10,13 +10,10 @@ USER root
 
 ENV PATH="/home/jovyan/.local/bin/:${PATH}"
 
+COPY clean-layer.sh /usr/bin/clean-layer.sh
+
 RUN apt-get update --yes \
     && apt-get install --yes language-pack-fr \
     && apt-get upgrade --yes libwebp7 \
-    && rm -rf /var/lib/apt/lists/*
-
-#updates package to fix CVE-2023-0286 https://github.com/StatCan/aaw-private/issues/57
-#TODO: Evaluate if this is still necessary when updating the base image
-RUN pip install --force-reinstall cryptography==39.0.1 && \
-   fix-permissions $CONDA_DIR && \
-   fix-permissions /home/$NB_USER
+    && rm -rf /var/lib/apt/lists/* \
+    && chmod +x /usr/bin/clean-layer.sh
diff --git a/docker-bits/0_cpu_sas.Dockerfile b/docker-bits/0_cpu_sas.Dockerfile
@@ -12,12 +12,15 @@ USER root
 
 ENV PATH="/home/jovyan/.local/bin/:${PATH}"
 
+COPY clean-layer.sh /usr/bin/clean-layer.sh
+
 RUN apt-get update --yes \
     && apt-get install --yes language-pack-fr \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* \
+    && chmod +x /usr/bin/clean-layer.sh
 
 #updates package to fix CVE-2023-0286 https://github.com/StatCan/aaw-private/issues/57
 #TODO: Evaluate if this is still necessary when updating the base image
 RUN pip install --force-reinstall cryptography==39.0.1 && \
    fix-permissions $CONDA_DIR && \
-   fix-permissions /home/$NB_USER
+   fix-permissions /home/$NB_USER
diff --git a/docker-bits/2_cpu.Dockerfile b/docker-bits/2_cpu.Dockerfile
@@ -1,6 +1,6 @@
 # Create conda environment (CPU only) with many useful packages.
 
-RUN conda create -n pycpu --yes \
+RUN mamba create -n pycpu --yes \
       python==3.11.0 ipython==8.11.0 sphinx==6.1.3 \
       boto==2.49.0 s3fs==2023.3.0 \
       dos2unix==7.4.1 parallel==20230122 \
@@ -12,6 +12,6 @@ RUN conda create -n pycpu --yes \
       gensim==4.3.0 nltk==3.8.1 spacy==3.5.0 \
       pytorch==1.13.1 torchaudio==0.13.1 torchvision==0.14.1 cpuonly==2.0 \
       -c pytorch -c conda-forge && \
-    conda clean --all -f -y && \
+    clean-layer.sh && \
     fix-permissions $CONDA_DIR && \
     fix-permissions /home/$NB_USER
diff --git a/docker-bits/2_pytorch.Dockerfile b/docker-bits/2_pytorch.Dockerfile
@@ -10,7 +10,7 @@ RUN mamba create -n torch && \
         gputil \
         # pytorch-cuda are the nvidia cuda drivers
         pytorch-cuda=11.8 && \
-    mamba clean --all -f -y && \
+    clean-layer.sh && \
     fix-permissions $CONDA_DIR && \
     fix-permissions /home/$NB_USER && \
     python -m ipykernel install --user --name torch --display-name "PyTorch"

diff --git a/docker-bits/2_tensorflow.Dockerfile b/docker-bits/2_tensorflow.Dockerfile
@@ -1,8 +1,9 @@
 # Install Tensorflow
-RUN pip install --quiet \
-        'tensorflow' \
-        'keras' \
-        'ipykernel==6.21.3' \
+RUN mamba install --quiet --yes \
+        tensorflow \
+        keras \
+        ipykernel \
     && \
+    clean-layer.sh && \
     fix-permissions $CONDA_DIR && \
     fix-permissions /home/$NB_USER