From 86bbd64524e8d3c743f86368886c6e9cc82bb553 Mon Sep 17 00:00:00 2001 From: Marlon Saglia Date: Wed, 7 Aug 2024 09:49:49 +0200 Subject: [PATCH 1/2] build(link-checker): Add GitHub Actions workflow for link checking on a schedule and pull requests --- .github/workflows/link-checker.yml | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/link-checker.yml diff --git a/.github/workflows/link-checker.yml b/.github/workflows/link-checker.yml new file mode 100644 index 0000000000..18616ebf1f --- /dev/null +++ b/.github/workflows/link-checker.yml @@ -0,0 +1,66 @@ +name: Link Checker + +on: + workflow_dispatch: + schedule: + - cron: "2 0 * * *" + + # START Temporary for testing. + pull_request: + branches: [main] + push: + branches: ["link-checker-workflow-configuration"] + # END Temporary for testing. + +defaults: + run: + # Specify to ensure "pipefail and errexit" are set. + # Ref: https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#defaultsrunshell + shell: bash + +jobs: + link-checker-documentation: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.1 + bundler-cache: true + + - name: check links + env: + LANG: "C.UTF-8" + run: | + bundle exec jekyll build + # + # Remove the redirect-files before link-check + find _site/en _site/documentation -name \*.html | \ + xargs grep -l "Click here if you are not redirected." | xargs rm + # + # htmlproofer does not check links inside -elements + find _site -name \*.html | xargs sed -i.orig 's/]*>//g; s/<\/code>//g; s/]*>//g; s/<\/pre>//g;' + find _site -name \*.orig | xargs rm + # + bundle exec htmlproofer \ + --assume-extension .html \ + --no-enforce-https \ + --no-check-external-hash \ + --allow-missing-href \ + --ignore-files '/playground/index.html/' \ + --ignore-urls '\ + /localhost:8080/,\ + /docs.vespa.ai/playground/,\ + /javadoc.io.*#/,\ + /readthedocs.io.*#/,\ + /linux.die.net/,\ + /arxiv.org/,\ + /hub.docker.com/r/,\ + /platform.openai.com/' \ + --typhoeus '{"connecttimeout": 10, "timeout": 30, "accept_encoding": "zstd,br,gzip,deflate"}' \ + --hydra '{"max_concurrency": 1}' \ + --swap-urls '(https\://github.com.*/master/.*)#.*:\1,(https\://github.com.*/main/.*)#.*:\1' \ + _site From c207ad453c908fe834b85c50efe7c8d3f6910a74 Mon Sep 17 00:00:00 2001 From: Marlon Saglia Date: Wed, 7 Aug 2024 10:34:11 +0200 Subject: [PATCH 2/2] feat: Remove link checker job in screwdriver --- screwdriver.yaml | 45 ++------------------------------------------- 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/screwdriver.yaml b/screwdriver.yaml index 2ce97bd3eb..a5d0bbd9a3 100644 --- a/screwdriver.yaml +++ b/screwdriver.yaml @@ -19,47 +19,6 @@ shared: ln -sf /opt/vespa-cli_${VESPA_CLI_VERSION}_linux_amd64/bin/vespa /usr/local/bin/ jobs: - link-checker-documentation: - image: ruby:3.1 - annotations: - screwdriver.cd/buildPeriodically: H 2 * * * - steps: - - install-bundler: | - gem update --system 3.3.3 - gem install bundler - - check-links: | - export LANG=C.UTF-8 - bundle install - bundle exec jekyll build - # - # Remove the redirect-files before link-check - find _site/en _site/documentation -name \*.html | \ - xargs grep -l "Click here if you are not redirected." | xargs rm - # - # htmlproofer does not check links inside -elements - find _site -name \*.html | xargs sed -i.orig 's/]*>//g; s/<\/code>//g; s/]*>//g; s/<\/pre>//g;' - find _site -name \*.orig | xargs rm - # - bundle exec htmlproofer \ - --assume-extension .html \ - --no-enforce-https \ - --no-check-external-hash \ - --allow-missing-href \ - --ignore-files '/playground/index.html/' \ - --ignore-urls '\ - /localhost:8080/,\ - /docs.vespa.ai/playground/,\ - /javadoc.io.*#/,\ - /readthedocs.io.*#/,\ - /linux.die.net/,\ - /arxiv.org/,\ - /hub.docker.com/r/,\ - /platform.openai.com/' \ - --typhoeus '{"connecttimeout": 10, "timeout": 30, "accept_encoding": "zstd,br,gzip,deflate"}' \ - --hydra '{"max_concurrency": 1}' \ - --swap-urls '(https\://github.com.*/master/.*)#.*:\1,(https\://github.com.*/main/.*)#.*:\1' \ - _site - verify-guides: requires: [~pr, ~commit] image: vespaengine/vespa-build-almalinux-8:latest @@ -109,7 +68,7 @@ jobs: secrets: - VESPA_DOC_DEPLOY_KEY environment: - GIT_SSH_COMMAND: "ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" + GIT_SSH_COMMAND: "ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" steps: - update-to-latest: | # must checkout the repo again using ssh for the credentials to work @@ -190,7 +149,7 @@ jobs: secrets: - VESPA_DOC_DEPLOY_KEY environment: - GIT_SSH_COMMAND: "ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" + GIT_SSH_COMMAND: "ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" steps: - update-to-latest: | # Ref https://github.com/vespa-engine/vespa/blob/master/metrics/src/main/java/ai/vespa/metrics/docs/MetricDocumentation.java