diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index 966b05d6..51e88649 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 with: # We need all Git history for testing credentials fetch-depth: 0 @@ -79,7 +79,7 @@ jobs: submodules: recursive - name: SAST - Credentials - uses: trufflesecurity/trufflehog@v3.47.0 + uses: trufflesecurity/trufflehog@v3.53.0 with: base: ${{ github.event.repository.default_branch }} head: HEAD @@ -94,7 +94,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 with: # We need all Git history for "version.sh" fetch-depth: 0 @@ -114,7 +114,7 @@ jobs: # Required for running "npx" CLI - name: Setup Node - uses: actions/setup-node@v3.7.0 + uses: actions/setup-node@v3.8.1 with: node-version: ${{ env.NODE_VERSION }} @@ -170,7 +170,7 @@ jobs: snyk.sarif - name: Upload results to GitHub Security - uses: github/codeql-action/upload-sarif@v2.21.3 + uses: github/codeql-action/upload-sarif@v2.21.4 with: sarif_file: merged.sarif @@ -185,7 +185,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 with: # Chart Releaser needs to have local access to "gh-pages" plus current branch fetch-depth: 0 @@ -214,11 +214,11 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 # Required for running "npx" CLI - name: Setup Node - uses: actions/setup-node@v3.7.0 + uses: actions/setup-node@v3.8.1 with: node-version: ${{ env.NODE_VERSION }} @@ -255,7 +255,7 @@ jobs: arch: linux/amd64,linux/arm64 steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 with: # We need all Git history for "version.sh" fetch-depth: 0 @@ -288,7 +288,7 @@ jobs: # Required for running "npx" CLI - name: Setup Node - uses: actions/setup-node@v3.7.0 + uses: actions/setup-node@v3.8.1 with: node-version: ${{ env.NODE_VERSION }} @@ -405,7 +405,7 @@ jobs: snyk-*.sarif - name: Upload results to GitHub Security - uses: github/codeql-action/upload-sarif@v2.21.3 + uses: github/codeql-action/upload-sarif@v2.21.4 with: sarif_file: merged.sarif @@ -426,7 +426,7 @@ jobs: runs-on: windows-2019 steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 with: # We need all Git history for "version.sh" fetch-depth: 0 @@ -579,7 +579,7 @@ jobs: ${{ steps.tag.outputs.tag }} - name: Upload results to GitHub Security - uses: github/codeql-action/upload-sarif@v2.21.3 + uses: github/codeql-action/upload-sarif@v2.21.4 with: sarif_file: snyk.sarif @@ -590,7 +590,7 @@ jobs: image: returntocorp/semgrep steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 - name: Run tests # Semgrep can be used to break the build when it detects security issues. In this case we want to upload the issues to GitHub Security @@ -600,7 +600,7 @@ jobs: run: semgrep ci --sarif --output=semgrep.sarif - name: Upload results to GitHub Security - uses: github/codeql-action/upload-sarif@v2.21.3 + uses: github/codeql-action/upload-sarif@v2.21.4 with: sarif_file: semgrep.sarif @@ -611,7 +611,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3.5.3 + uses: actions/checkout@v3.6.0 - name: Setup ORAS uses: oras-project/setup-oras@v1.0.0 diff --git a/README.md b/README.md index 64bcdda4..dfe69d10 100644 --- a/README.md +++ b/README.md @@ -94,15 +94,8 @@ pipelines: capabilities: - arch_arm64 -affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 +extraNodeSelectors: + kubernetes.io/arch: arm64 ``` Deploy the Helm instance: @@ -392,7 +385,7 @@ extraVolumeMounts: | `pipelines.tmpdir.volumeEnabled` | Enabled by default, can be disabled if your CSI driver doesn't support ephemeral storage ([exhaustive list](https://kubernetes-csi.github.io/docs/drivers.html)). If disabled, it is advised to allow >= 10Gi of ephemeral storage usage (see `resources`). | `true` | | `podSecurityContext` | Security rules applied to the Pod ([more details](https://kubernetes.io/docs/concepts/security/pod-security-standards)). | `{}` | | `replicaCount` | Default fixed amount of agents deployed. Those are not auto-scaled. | `3` | -| `resources` | Resource limits | `{ "resources": { "limits": { "cpu": 2, "memory": "4Gi", "ephemeral-storage": "4Gi" }, "requests": { "cpu": 1, "memory": "2Gi", "ephemeral-storage": "2Gi" }}}` | +| `resources` | Resource limits | `{ "resources": { "limits": { "cpu": 2, "memory": "4Gi", "ephemeral-storage": "8Gi" }, "requests": { "cpu": 1, "memory": "2Gi", "ephemeral-storage": "2Gi" }}}` | | `secret.create` | Create Secret, must contains `personalAccessToken` and `organizationURL` variables. | `true` | | `secret.name` | Secret name | _Release name_ | | `securityContext` | Security rules applied to the container ([more details](https://kubernetes.io/docs/concepts/security/pod-security-standards)). | `{}` | @@ -410,6 +403,11 @@ These actions can enhance your system performance: - SSD volumes are used for both cache (see `pipelines.cache`) and system temporary directory (see `pipelines.tmpdir`). For exemple, in Azure, the `managed-csi-premium` volume type is a high-performance SSD. - The network bewteen Azure DevOps server and agents has a low latency. +BuikdKit specifics: + +- Choose an ephemeral disk for the cache in `/app-root/.local/share/buildkit`, instead of an emptyDir. +- Use an high-performance disk for the cache, exemple `managed-csi-premium` in Azure. + ### Proxy If you need to use a proxy, you can set the following environment variables. See [this documentation](https://github.com/microsoft/azure-pipelines-agent/blob/master/docs/start/proxyconfig.md) for more details. diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 059f7bba..8ab29b31 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -1,11 +1,49 @@ # Troubleshooting +## Pods are evicted by Kubernetes with the message `Pod ephemeral local storage usage exceeds the total limit of containers` + +This error is due to the fact that the default ephemeral storage limit is set to a lower value than the one used by the pipeline. You can fix it by setting the value to more than default value in `resources.limits.ephemeral-storage`. + +This error notably happens when using BuildKit with an `emptyDir` and a large number of layers. + +```yaml +# values.yaml (extract) +resources: + limits: + ephemeral-storage: 16Gi +``` + +## Pods are started but never selected by Azure DevOps when using multiple architectures + +Prefer hardcoding the architecture in both the pipeline and the Helm values. As this, KEDA will be able to select the right pods matching the architecture. Otherwise, there is a possibility that the deployment selected by KEDA is not matching the requested architecture. + +```yaml +# azure-pipelines.yaml (extract) +stages: + - stage: test + jobs: + - job: test + pool: + demands: + - arch_x64 +``` + +```yaml +# values.yaml (extract) +extraNodeSelectors: + kubernetes.io/arch: arm64 + +pipelines: + capabilities: + - arch_arm64 +``` + ## Container fails to a `ContainerStatusUnknown` state Error is often due to two things: - Kubernetes is not able to pull the image: check the image name and the credentials, if you are using the public registry, mind the domain whitelist -- Pod has been ecivted by Kubernetes due to the excessive local storage usage: parameter `ephemeral-storage` in `resources` Helm values is set to 4Gi by default, you can increase it to 10Gi for example +- Pod has been ecivted by Kubernetes due to the excessive local storage usage: parameter `ephemeral-storage` in `resources` Helm values is set to `8Gi` by default, you can increase it to `16Gi` for example ## Namespaces must be set to a non-zero value diff --git a/example/helm/container-build.yaml b/example/helm/container-build.yaml index e93a097e..3b635c65 100644 --- a/example/helm/container-build.yaml +++ b/example/helm/container-build.yaml @@ -6,8 +6,16 @@ extraVolumeMounts: name: buildkitd extraVolumes: - - emptyDir: {} - name: buildkitd + - name: buildkitd + # emptyDir: {} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + storageClassName: "managed-csi-premium" + resources: + requests: + storage: 16Gi securityContext: seccompProfile: @@ -29,7 +37,11 @@ resources: pipelines: capabilities: + - arch_x64 - buildkit personalAccessToken: your-pat poolName: private_kube organizationURL: https://dev.azure.com/shopping-cart-devops-demo + +extraNodeSelectors: + kubernetes.io/arch: amd64 diff --git a/example/helm/windows.yaml b/example/helm/windows.yaml index ca7559c1..46bd380b 100644 --- a/example/helm/windows.yaml +++ b/example/helm/windows.yaml @@ -1,6 +1,4 @@ pipelines: - capabilities: - - arch_x64 personalAccessToken: your-pat poolName: private_kube organizationURL: https://dev.azure.com/shopping-cart-devops-demo diff --git a/src/docker/Dockerfile-bookworm b/src/docker/Dockerfile-bookworm index d5db1598..30d38ee6 100644 --- a/src/docker/Dockerfile-bookworm +++ b/src/docker/Dockerfile-bookworm @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/dotnet/aspnet:6.0-bookworm-slim@sha256:85f215d6225222ed9c6350787c7b65fdb05bf98c48f116ce70ba7261736581cd as base +FROM mcr.microsoft.com/dotnet/aspnet:6.0-bookworm-slim@sha256:d9c46e7265ab5dacd41ab10253da89639afe63db10265912bfd779395ea5ad02 as base # Force apt-get to not use TTY ENV DEBIAN_FRONTEND noninteractive diff --git a/src/docker/Dockerfile-bullseye b/src/docker/Dockerfile-bullseye index a037efbe..7b8064c0 100644 --- a/src/docker/Dockerfile-bullseye +++ b/src/docker/Dockerfile-bullseye @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/dotnet/aspnet:6.0-bullseye-slim@sha256:03aae52deb58521b0368987571f85872af90e7e04496f7927fe84968d2bd3d49 as base +FROM mcr.microsoft.com/dotnet/aspnet:6.0-bullseye-slim@sha256:39f2c3efb84d744c63f43ee1c206d560d67444858e9622a9c5db93d5ef221dc8 as base # Force apt-get to not use TTY ENV DEBIAN_FRONTEND noninteractive diff --git a/src/helm/azure-pipelines-agent/values.yaml b/src/helm/azure-pipelines-agent/values.yaml index d3ea5863..af007371 100644 --- a/src/helm/azure-pipelines-agent/values.yaml +++ b/src/helm/azure-pipelines-agent/values.yaml @@ -63,7 +63,7 @@ securityContext: {} resources: limits: cpu: 2 - ephemeral-storage: 4Gi + ephemeral-storage: 8Gi memory: 4Gi requests: cpu: 1