From c8fafc9180d76d1e2b6ec04acfb6ced171d941bb Mon Sep 17 00:00:00 2001 From: John Bampton Date: Fri, 26 Apr 2024 03:47:00 +1000 Subject: [PATCH 1/5] [DOCS] Add `lychee` link checker with `pre-commit` Clean up and improve the docs. Update the Makefile. https://github.com/lycheeverse/lychee "Fast, async, stream-based link checker written in Rust. Finds broken URLs and mail addresses inside Markdown, HTML, reStructuredText, websites and more!" --- .github/linters/lychee.toml | 13 +++++++ .github/workflows/lint.yml | 30 ++++++++++++++-- .pre-commit-config.yaml | 15 +++++--- Makefile | 6 +++- docs/api/flink/Function.md | 2 +- docs/api/snowflake/vector-data/Function.md | 2 +- docs/api/sql/Function.md | 2 +- docs/download.md | 6 ++-- docs/setup/compile.md | 41 +++++++++++++++------- docs/tutorial/sql.md | 4 +-- 10 files changed, 94 insertions(+), 27 deletions(-) create mode 100644 .github/linters/lychee.toml diff --git a/.github/linters/lychee.toml b/.github/linters/lychee.toml new file mode 100644 index 0000000000..5e7c88f80d --- /dev/null +++ b/.github/linters/lychee.toml @@ -0,0 +1,13 @@ +exclude = ["docs/api/javadoc/sql/org/apache/sedona/sql/utils/index.html$", + "docs/api/scaladoc/spark/org/apache/sedona/sql/utils/index.html$", + "docs/javadoc/spark", + "docs/scaladoc/spark/org/apache/sedona/sql/utils/index.html$", + "spark://localhost:.*", + "https://issues.apache.org/jira.*", + "https://keyserver.pgp.com/.*", + "http://localhost:.*", + "https://whimsy.apache.org/officers/acreq.*", + "https://whimsy.apache.org/roster/pmc/sedona.*", + "https://www.datasyslab.net/", + "https://mvnrepository.com/.*", + "https://www.researchgate.net/figure/Relation-between-the-cartesian-axes-x-y-and-i-j-axes-of-the-pixels_fig3_313860913"] diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8ee034f12c..21af1b5434 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,3 +1,4 @@ +# https://pre-commit.com/ name: Lint on: [pull_request] @@ -6,8 +7,33 @@ permissions: contents: read jobs: + lychee: + name: Check links with lychee + runs-on: ubuntu-latest + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + architecture: 'x64' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pre-commit + curl -sSf 'https://sh.rustup.rs' | sh + sudo apt install gcc pkg-config libc6-dev libssl-dev + cargo install lychee + - name: set PY + run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV + - uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }} + - name: Run pre-commit + run: pre-commit run lychee --all-files pre-commit: - name: Run pre-commit # https://pre-commit.com/ + name: Run pre-commit runs-on: ubuntu-latest steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" @@ -27,4 +53,4 @@ jobs: path: ~/.cache/pre-commit key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }} - name: Run pre-commit - run: pre-commit run --all-files + run: SKIP=lychee pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a05147331c..a0a8a1029f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,12 +14,12 @@ repos: rev: v2.2.6 hooks: - id: codespell - name: Run codespell + name: run codespell description: Check spelling with codespell args: [--ignore-words=.github/linters/codespell.txt] exclude: ^docs/image|^spark/common/src/test/resources|^docs/usecases - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-ast - id: check-builtin-literals @@ -53,11 +53,18 @@ repos: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] exclude: ^docs-overrides/main\.html$|\.Rd$ + - repo: https://github.com/lycheeverse/lychee.git + rev: v0.15.0 + hooks: + - id: lychee + name: run lychee + args: [--config=.github/linters/lychee.toml] + files: \.md$ - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.38.0 + rev: v0.39.0 hooks: - id: markdownlint - name: Run markdownlint + name: run markdownlint description: Check Markdown files with markdownlint args: [--config=.github/linters/.markdown-lint.yml] exclude: ^\.github/.*$ diff --git a/Makefile b/Makefile index 775d1d01c3..4d6c9488ca 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,15 @@ check : - pre-commit run --all-files + SKIP=lychee pre-commit run --all-files .PHONY : check checkinstall : pre-commit install .PHONY : checkinstall +checklinks : + pre-commit run lychee --all-files +.PHONY : checklinks + checkupdate : pre-commit autoupdate .PHONY : checkupdate diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md index 5c6b2f6f32..9e5f48ce64 100644 --- a/docs/api/flink/Function.md +++ b/docs/api/flink/Function.md @@ -2501,7 +2501,7 @@ LINESTRING(0 0, 1 0) ## ST_S2CellIDs Introduction: Cover the geometry with Google S2 Cells, return the corresponding cell IDs with the given level. -The level indicates the [size of cells](https://s2geometry.io/resources/s2cell_statistics.html). With a bigger level, +The level indicates the [size of cells](http://s2geometry.io/resources/s2cell_statistics.html). With a bigger level, the cells will be smaller, the coverage will be more accurate, but the result size will be exponentially increasing. Format: `ST_S2CellIDs(geom: Geometry, level: Integer)` diff --git a/docs/api/snowflake/vector-data/Function.md b/docs/api/snowflake/vector-data/Function.md index b8df7cd1a3..dbe7ce76a7 100644 --- a/docs/api/snowflake/vector-data/Function.md +++ b/docs/api/snowflake/vector-data/Function.md @@ -1953,7 +1953,7 @@ Result: ## ST_S2CellIDs Introduction: Cover the geometry with Google S2 Cells, return the corresponding cell IDs with the given level. -The level indicates the [size of cells](https://s2geometry.io/resources/s2cell_statistics.html). With a bigger level, +The level indicates the [size of cells](http://s2geometry.io/resources/s2cell_statistics.html). With a bigger level, the cells will be smaller, the coverage will be more accurate, but the result size will be exponentially increasing. Format: `ST_S2CellIDs(geom: geometry, level: Int)` diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md index 7406fe1331..c347ab9364 100644 --- a/docs/api/sql/Function.md +++ b/docs/api/sql/Function.md @@ -2507,7 +2507,7 @@ LINESTRING (3 6, 2 4, 1 2, 0 0) ## ST_S2CellIDs Introduction: Cover the geometry with Google S2 Cells, return the corresponding cell IDs with the given level. -The level indicates the [size of cells](https://s2geometry.io/resources/s2cell_statistics.html). With a bigger level, +The level indicates the [size of cells](http://s2geometry.io/resources/s2cell_statistics.html). With a bigger level, the cells will be smaller, the coverage will be more accurate, but the result size will be exponentially increasing. Format: `ST_S2CellIDs(geom: Geometry, level: Integer)` diff --git a/docs/download.md b/docs/download.md index 2e61bef0e2..f5c41267f9 100644 --- a/docs/download.md +++ b/docs/download.md @@ -19,21 +19,21 @@ Automatically generated binary JARs (per each Master branch commit): [GitHub Act | | Download from ASF | Checksum | Signature | |:-----------------:|:----------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------:| | Source code | [src](https://www.apache.org/dyn/closer.lua/sedona/1.5.1/apache-sedona-1.5.1-src.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.5.1/apache-sedona-1.5.1-src.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.5.1/apache-sedona-1.5.1-src.tar.gz.asc) | -| Binary | [bin](https://www.apache.org/dyn/closer.lua/sedona/1.5.1/apache-sedona-1.5.1-bin.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.5.1/apache-sedona-1.5.1-bin.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.5.1/apache-sedona-1.5.1-bin.tar.gz.asc) +| Binary | [bin](https://www.apache.org/dyn/closer.lua/sedona/1.5.1/apache-sedona-1.5.1-bin.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.5.1/apache-sedona-1.5.1-bin.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.5.1/apache-sedona-1.5.1-bin.tar.gz.asc) | ### 1.5.0 | | Download from ASF | Checksum | Signature | |:-----------------:|:--------:|:--------:|:---------:| | Source code | [src](https://www.apache.org/dyn/closer.lua/sedona/1.5.0/apache-sedona-1.5.0-src.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.5.0/apache-sedona-1.5.0-src.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.5.0/apache-sedona-1.5.0-src.tar.gz.asc) | -| Binary | [bin](https://www.apache.org/dyn/closer.lua/sedona/1.5.0/apache-sedona-1.5.0-bin.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.5.0/apache-sedona-1.5.0-bin.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.5.0/apache-sedona-1.5.0-bin.tar.gz.asc) +| Binary | [bin](https://www.apache.org/dyn/closer.lua/sedona/1.5.0/apache-sedona-1.5.0-bin.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.5.0/apache-sedona-1.5.0-bin.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.5.0/apache-sedona-1.5.0-bin.tar.gz.asc) | ### 1.4.1 | | Download from ASF | Checksum | Signature | |:-----------------:|:--------:|:--------:|:---------:| | Source code | [src](https://www.apache.org/dyn/closer.lua/sedona/1.4.1/apache-sedona-1.4.1-src.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.4.1/apache-sedona-1.4.1-src.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.4.1/apache-sedona-1.4.1-src.tar.gz.asc) | -| Binary | [bin](https://www.apache.org/dyn/closer.lua/sedona/1.4.1/apache-sedona-1.4.1-bin.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.4.1/apache-sedona-1.4.1-bin.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.4.1/apache-sedona-1.4.1-bin.tar.gz.asc) +| Binary | [bin](https://www.apache.org/dyn/closer.lua/sedona/1.4.1/apache-sedona-1.4.1-bin.tar.gz) | [sha512](https://downloads.apache.org/sedona/1.4.1/apache-sedona-1.4.1-bin.tar.gz.sha512) | [asc](https://downloads.apache.org/sedona/1.4.1/apache-sedona-1.4.1-bin.tar.gz.asc) | ### Past releases diff --git a/docs/setup/compile.md b/docs/setup/compile.md index 7d2fd8d966..91d6155c62 100644 --- a/docs/setup/compile.md +++ b/docs/setup/compile.md @@ -161,22 +161,39 @@ mike serve ## pre-commit -We run [pre-commit](https://pre-commit.com/) with GitHub Actions so installation on -your local machine is currently optional. +We run [pre-commit](https://pre-commit.com/) with GitHub Actions so installation on your local machine is currently +optional. -The pre-commit [configuration file](https://github.com/apache/sedona/blob/master/.pre-commit-config.yaml) -is in the repository root. Before you can run the hooks, you need to have pre-commit installed. +The pre-commit [configuration file](https://github.com/apache/sedona/blob/master/.pre-commit-config.yaml) is in the +repository root. Before you can run the hooks, you need to have pre-commit installed. -The hooks run when running `git commit`. Some of the hooks will auto fix the code after the hook fails -whilst most will print error messages from the linters. - -If you want to test all hooks against all files and when you are adding a new hook -you should always run: +The hooks run when running `git commit` and also from the command line with `pre-commit`. Some of the hooks will auto +fix the code after the hooks fail whilst most will print error messages from the linters. If a hook fails the overall +commit will fail, and you will need to fix the issues or problems and `git add` and git commit again. On git commit +the hooks will run mostly only against modified files so if you want to test all hooks against all files and when you +are adding a new hook you should always run: `pre-commit run --all-files` -Sometimes you might need to skip a hook to commit for example: +We are also using pre-commit to check our links with [lychee](https://github.com/lycheeverse/lychee). The lychee hook +is not self-contained and requires a local lychee installation. + +Sometimes you might need to skip a hook to commit because the hook is stopping you from committing or your computer +might not have all the installation requirements for all the hooks. The `SKIP` variable is comma separated for two or +more hooks: + +`SKIP=lychee git commit -m "foo"` + +The same applies when running pre-commit: + +`SKIP=lychee pre-commit run --all-files` + +If you just want to run one hook for example just run the `markdownlint` hook: + +`pre-commit run markdownlint --all-files` -`SKIP=markdownlint git commit -m "foo"` +We have a [Makefile](https://github.com/apache/sedona/blob/master/Makefile) in the repository root which has four +pre-commit convenience commands. For example to run [Make](https://en.wikipedia.org/wiki/Make_(software)) to check the +spelling with lychee run: -We have a [Makefile](https://github.com/apache/sedona/blob/master/Makefile) in the repository root which has three pre-commit convenience commands. +`make checkspelling` diff --git a/docs/tutorial/sql.md b/docs/tutorial/sql.md index 6d28998a26..00090b849e 100644 --- a/docs/tutorial/sql.md +++ b/docs/tutorial/sql.md @@ -26,7 +26,7 @@ SedonaSQL supports SQL/MM Part3 Spatial SQL Standard. It includes four kinds of myDataFrame.createOrReplaceTempView("spatialDf") ``` -Detailed SedonaSQL APIs are available here: [SedonaSQL API](../api/sql/Overview.md). You can find example county data (i.e., `county_small.tsv`) in [Sedona GitHub repo](https://github.com/apache/sedona/tree/master/core/src/test/resources). +Detailed SedonaSQL APIs are available here: [SedonaSQL API](../api/sql/Overview.md). You can find example county data (i.e., `county_small.tsv`) in [Sedona GitHub repo](https://github.com/apache/sedona/tree/master/spark/common/src/test/resources). ## Set up dependencies @@ -578,7 +578,7 @@ Sedona provides `SedonaPyDeck` and `SedonaKepler` wrappers, both of which expose Spatial query results can be visualized in a Jupyter lab/notebook environment using SedonaPyDeck. -SedonaPyDeck exposes APIs to create interactive map visualizations using [pydeck](https://pydeck.gl/index.html#) based on [deck.gl](https://deck.gl/) +SedonaPyDeck exposes APIs to create interactive map visualizations using [pydeck](https://pydeck.gl/) based on [deck.gl](https://deck.gl/) !!!Note To use SedonaPyDeck, install sedona with the `pydeck-map` extra: From 1735ba8287b2e3b99949fa165c2809d661147599 Mon Sep 17 00:00:00 2001 From: John Bampton Date: Fri, 26 Apr 2024 04:28:15 +1000 Subject: [PATCH 2/5] Fix up --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 21af1b5434..7f10e9f9cd 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,7 +22,7 @@ jobs: python -m pip install --upgrade pip pip install pre-commit curl -sSf 'https://sh.rustup.rs' | sh - sudo apt install gcc pkg-config libc6-dev libssl-dev + sudo apt install -y gcc pkg-config libc6-dev libssl-dev cargo install lychee - name: set PY run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV From 47f3b53be691ae22c635c97b4fd80d561ab4abba Mon Sep 17 00:00:00 2001 From: John Bampton Date: Fri, 26 Apr 2024 04:32:39 +1000 Subject: [PATCH 3/5] Fix up --- .github/workflows/lint.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7f10e9f9cd..797b3b70ac 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -21,8 +21,8 @@ jobs: run: | python -m pip install --upgrade pip pip install pre-commit - curl -sSf 'https://sh.rustup.rs' | sh - sudo apt install -y gcc pkg-config libc6-dev libssl-dev + curl -sSf 'https://sh.rustup.rs' | sh -s -- -y + sudo apt install gcc pkg-config libc6-dev libssl-dev cargo install lychee - name: set PY run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV From 1bfdf71d384ec0deefecb6be478e7c54066b8e56 Mon Sep 17 00:00:00 2001 From: John Bampton Date: Fri, 26 Apr 2024 05:12:05 +1000 Subject: [PATCH 4/5] Update docs/setup/compile.md --- docs/setup/compile.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/setup/compile.md b/docs/setup/compile.md index 91d6155c62..0971f565c8 100644 --- a/docs/setup/compile.md +++ b/docs/setup/compile.md @@ -196,4 +196,4 @@ We have a [Makefile](https://github.com/apache/sedona/blob/master/Makefile) in t pre-commit convenience commands. For example to run [Make](https://en.wikipedia.org/wiki/Make_(software)) to check the spelling with lychee run: -`make checkspelling` +`make checklinks` From 410f59825d1187d356940d68e5badb75ca78bccc Mon Sep 17 00:00:00 2001 From: John Bampton Date: Fri, 26 Apr 2024 05:12:16 +1000 Subject: [PATCH 5/5] Update docs/setup/compile.md --- docs/setup/compile.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/setup/compile.md b/docs/setup/compile.md index 0971f565c8..cbdbdcd088 100644 --- a/docs/setup/compile.md +++ b/docs/setup/compile.md @@ -194,6 +194,6 @@ If you just want to run one hook for example just run the `markdownlint` hook: We have a [Makefile](https://github.com/apache/sedona/blob/master/Makefile) in the repository root which has four pre-commit convenience commands. For example to run [Make](https://en.wikipedia.org/wiki/Make_(software)) to check the -spelling with lychee run: +links with lychee run: `make checklinks`